├── skgstat ├── __version__.py ├── util │ ├── __init__.py │ ├── shannon.py │ ├── cross_validation.py │ ├── cross_variogram.py │ ├── likelihood.py │ └── uncertainty.py ├── data │ ├── rf │ │ ├── aniso.png │ │ └── pancake.png │ ├── samples │ │ └── README.md │ └── _loader.py ├── interfaces │ ├── __init__.py │ ├── pykrige.py │ ├── gstools.py │ └── variogram_estimator.py ├── __init__.py ├── tests │ ├── test_isotropic.py │ ├── test_plotting_backend.py │ ├── __init__.py │ ├── test_likelihood.py │ ├── test_data_loader.py │ ├── test_cross_utility.py │ ├── test_util.py │ ├── test_estimator.py │ ├── test_stmodels.py │ ├── test_metric_space.py │ ├── test_directionalvariogram.py │ ├── test_binning.py │ ├── sample.csv │ ├── test_models.py │ ├── test_spacetimevariogram.py │ └── test_kriging.py ├── plotting │ ├── __init__.py │ ├── variogram_dd_plot.py │ ├── variogram_scattergram.py │ ├── directtional_variogram.py │ ├── stvariogram_plot2d.py │ ├── variogram_location_trend.py │ ├── stvariogram_plot3d.py │ ├── stvariogram_marginal.py │ └── variogram_plot.py └── stmodels.py ├── example.png ├── requirements.unittest.3.10.txt ├── requirements.unittest.3.11.txt ├── requirements.unittest.3.12.txt ├── requirements.unittest.3.13.txt ├── requirements.unittest.3.9.txt ├── requirements.txt ├── docs ├── reference │ ├── kriging.rst │ ├── variogram.rst │ ├── spacetimevariogram.rst │ ├── data.rst │ ├── reference.rst │ ├── directionalvariogram.rst │ ├── metric_space.rst │ ├── util.rst │ ├── binning.rst │ ├── models.rst │ └── estimator.rst ├── tutorials │ ├── README.rst │ └── data │ │ └── tereno_fendt │ │ └── meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json ├── userguide │ ├── userguide.rst │ └── introduction.rst ├── technical │ ├── technical.rst │ ├── estimate_kriging.rst │ └── direction.rst ├── Makefile ├── make.bat ├── install.rst ├── index.rst ├── getting_started.rst ├── sg_execution_times.rst └── data │ └── sample_sr.csv ├── requirements.rtd.txt ├── MANIFEST.in ├── .github └── workflows │ ├── pre-commit.yml │ └── main.yml ├── .coveragerc ├── classifiers.txt ├── Dockerfile ├── .pre-commit-config.yaml ├── tutorials ├── README.rst └── tereno_fendt │ └── meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json ├── RELEASE.md ├── LICENSE ├── setup.py ├── Dockerfile.legacy ├── .gitignore └── README.rst /skgstat/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.22' 2 | -------------------------------------------------------------------------------- /skgstat/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .shannon import shannon_entropy 2 | -------------------------------------------------------------------------------- /example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaelicke/scikit-gstat/HEAD/example.png -------------------------------------------------------------------------------- /skgstat/data/rf/aniso.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaelicke/scikit-gstat/HEAD/skgstat/data/rf/aniso.png -------------------------------------------------------------------------------- /skgstat/data/rf/pancake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaelicke/scikit-gstat/HEAD/skgstat/data/rf/pancake.png -------------------------------------------------------------------------------- /requirements.unittest.3.10.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-depends 4 | pykrige 5 | gstools>=1.3 6 | plotly 7 | -------------------------------------------------------------------------------- /requirements.unittest.3.11.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-depends 4 | pykrige 5 | gstools>=1.3 6 | plotly 7 | -------------------------------------------------------------------------------- /requirements.unittest.3.12.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-depends 4 | pykrige 5 | gstools>=1.3 6 | plotly 7 | -------------------------------------------------------------------------------- /requirements.unittest.3.13.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-depends 4 | pykrige 5 | gstools>=1.3 6 | plotly 7 | -------------------------------------------------------------------------------- /requirements.unittest.3.9.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | pytest-depends 4 | pykrige 5 | gstools>=1.3 6 | plotly 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | scipy 3 | numpy 4 | pandas 5 | matplotlib 6 | numba 7 | scikit-learn 8 | imageio 9 | tqdm 10 | -------------------------------------------------------------------------------- /docs/reference/kriging.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Kriging Class 3 | ============= 4 | 5 | .. autoclass:: skgstat.OrdinaryKriging 6 | :members: 7 | 8 | .. automethod:: __init__ 9 | -------------------------------------------------------------------------------- /docs/reference/variogram.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Variogram Class 3 | =============== 4 | 5 | .. autoclass:: skgstat.Variogram 6 | :members: 7 | 8 | .. automethod:: __init__ 9 | -------------------------------------------------------------------------------- /skgstat/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .variogram_estimator import VariogramEstimator 2 | from .pykrige import pykrige_model, pykrige_params, pykrige_as_kwargs 3 | from .gstools import skgstat_to_gstools 4 | -------------------------------------------------------------------------------- /docs/reference/spacetimevariogram.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | SpaceTimeVariogram class 3 | ======================== 4 | 5 | .. autoclass:: skgstat.SpaceTimeVariogram 6 | :members: 7 | 8 | .. automethod:: __init__ 9 | -------------------------------------------------------------------------------- /docs/tutorials/README.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | You can find a collection of all tutorials below. The numbering gives you a rough guidance 5 | how to work through the tutorials, although they do not strictly depend on each other. 6 | -------------------------------------------------------------------------------- /requirements.rtd.txt: -------------------------------------------------------------------------------- 1 | ipython>=6 2 | autoapi 3 | numpydoc 4 | scipy 5 | numpy 6 | pandas 7 | nose 8 | matplotlib 9 | shapely 10 | sphinx_gallery 11 | pydata_sphinx_theme 12 | gstools>=1.3.0 13 | plotly 14 | nbformat 15 | pickleshare 16 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include requirements.txt 4 | include VERSION 5 | include classifiers.txt 6 | include TODO 7 | include .coveragerc 8 | include Dockerfile 9 | include Dockerfile.legacy 10 | graft skgstat/data/rf 11 | graft skgstat/data/samples 12 | -------------------------------------------------------------------------------- /docs/reference/data.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Example data sets 3 | ================= 4 | 5 | Datasets 6 | -------- 7 | 8 | .. automodule:: skgstat.data 9 | :members: pancake, pancake_field, aniso, aniso_field 10 | 11 | Utility Functions 12 | ----------------- 13 | 14 | ..automodule:: skgstat.data._loader 15 | :members: field, get_sample 16 | -------------------------------------------------------------------------------- /docs/reference/reference.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Code Reference 3 | ============== 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Contents: 8 | 9 | variogram 10 | directionalvariogram 11 | spacetimevariogram 12 | binning 13 | estimator 14 | models 15 | kriging 16 | data 17 | metric_space 18 | util 19 | -------------------------------------------------------------------------------- /docs/userguide/userguide.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | User Guide 3 | ========== 4 | 5 | This user guide shall help you getting started with ``scikit-gstat`` package 6 | along with a more general introduction to variogram analysis. 7 | 8 | .. toctree:: 9 | :maxdepth: 3 10 | :caption: Contents 11 | 12 | introduction 13 | variogram 14 | kriging 15 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: Linting and formatting (pre-commit) 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | pre-commit: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-python@v4 15 | - uses: pre-commit/action@v3.0.0 16 | -------------------------------------------------------------------------------- /docs/reference/directionalvariogram.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | DirectionalVariogram Class 3 | ========================== 4 | 5 | .. autoclass:: skgstat.DirectionalVariogram 6 | :members: 7 | 8 | .. automethod:: __init__ 9 | .. automethod:: _calc_direction_mask_data 10 | .. automethod:: _triangle 11 | .. automethod:: _compass 12 | .. automethod:: _direction_mask 13 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | skgstat/tests/* 4 | skgstat/plotting/* 5 | docs/* 6 | setup.py 7 | 8 | [report] 9 | exclude_lines = 10 | pragma: no cover 11 | def __repr__ 12 | def __str__ 13 | if self\.debug 14 | if False: 15 | if 0: 16 | raise AssertionError 17 | raise NotImplementedError 18 | raise ModuleNotFoundError 19 | if __name__ == .__main__.: 20 | -------------------------------------------------------------------------------- /classifiers.txt: -------------------------------------------------------------------------------- 1 | Development Status :: 5 - Production/Stable 2 | Intended Audience :: Science/Research 3 | License :: OSI Approved :: MIT License 4 | Natural Language :: English 5 | Programming Language :: Python :: 3.6 6 | Programming Language :: Python :: 3.7 7 | Programming Language :: Python :: 3.8 8 | Programming Language :: Python :: 3.9 9 | Programming Language :: Python :: 3.10 10 | Topic :: Scientific/Engineering :: Information Analysis 11 | -------------------------------------------------------------------------------- /docs/reference/metric_space.rst: -------------------------------------------------------------------------------- 1 | ======================================= 2 | MetricSpace - Coordinate representation 3 | ======================================= 4 | 5 | MetricSpace 6 | =========== 7 | 8 | .. autoclass:: skgstat.MetricSpace 9 | :members: 10 | 11 | .. automethod:: __init__ 12 | .. automethod:: find_closest 13 | 14 | MetricSpacePair 15 | =============== 16 | 17 | .. autoclass:: skgstat.MetricSpacePair 18 | :members: 19 | 20 | .. automethod:: __init__ 21 | .. automethod:: find_closest 22 | -------------------------------------------------------------------------------- /docs/technical/technical.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Technical Notes 3 | =============== 4 | 5 | This chapter collects a number of technical advises for using scikit-gstat. 6 | These examples either give details on the implementation or 7 | guide a correct package usage. This are technical notes, no tutorials. 8 | The application of the shown examples might not make sense in every situation 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | :caption: Contents: 13 | 14 | fitting 15 | direction 16 | estimate_kriging 17 | -------------------------------------------------------------------------------- /skgstat/data/samples/README.md: -------------------------------------------------------------------------------- 1 | # Meuse dataset 2 | 3 | Please note that the `./meuse.txt` is distributed under a GPL 3.0 license. 4 | It is originally from https://cran.r-project.org/package=sp and if you 5 | reuse the data, cite: 6 | 7 | Pebesma EJ, Bivand RS (2005). “Classes and methods for spatial 8 | data in R.” R News, 5(2), 9–13. https://CRAN.R-project.org/doc/Rnews/. 9 | 10 | Bivand RS, Pebesma E, Gomez-Rubio V (2013). Applied spatial data 11 | analysis with R, Second edition. Springer, NY. https://asdar-book.org/. 12 | -------------------------------------------------------------------------------- /docs/reference/util.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Utility Functions 3 | ================= 4 | 5 | Shannon Entropy 6 | --------------- 7 | 8 | .. autofunction:: skgstat.util.shannon.shannon_entropy 9 | 10 | Cross Validation 11 | ---------------- 12 | 13 | .. autofunction:: skgstat.util.cross_validation.jacknife 14 | 15 | Uncertainty Propagation 16 | ----------------------- 17 | 18 | .. autofunction:: skgstat.util.uncertainty.propagate 19 | 20 | 21 | Maximum Likelihood Estimation 22 | ----------------------------- 23 | 24 | .. autofunction: skgstat.util.likelihood.get_likelihood 25 | -------------------------------------------------------------------------------- /skgstat/__init__.py: -------------------------------------------------------------------------------- 1 | from .Variogram import Variogram 2 | from .DirectionalVariogram import DirectionalVariogram 3 | from .SpaceTimeVariogram import SpaceTimeVariogram 4 | from .Kriging import OrdinaryKriging 5 | from .MetricSpace import MetricSpace, MetricSpacePair, ProbabalisticMetricSpace, RasterEquidistantMetricSpace 6 | from . import interfaces 7 | from . import data 8 | from . import util 9 | from .util.cross_variogram import cross_variograms 10 | 11 | # set some stuff 12 | from .__version__ import __version__ 13 | __author__ = 'Mirko Maelicke ' 14 | __backend__ = 'matplotlib' 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = SciKitGStat 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/reference/binning.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Binning functions 3 | ================= 4 | 5 | SciKit-GStat implements a large amount of binning functions, 6 | which can be used to spatially aggregate the distance matrix 7 | into lag classes, or bins. 8 | There are a number of functions available, which usually accept 9 | more than one method identifier: 10 | 11 | 12 | .. autofunction:: skgstat.binning.even_width_lags 13 | 14 | .. autofunction:: skgstat.binning.uniform_count_lags 15 | 16 | .. autofunction:: skgstat.binning.auto_derived_lags 17 | 18 | .. autofunction:: skgstat.binning.kmeans 19 | 20 | .. autofunction:: skgstat.binning.ward 21 | 22 | .. autofunction:: skgstat.binning.stable_entropy_lags 23 | -------------------------------------------------------------------------------- /skgstat/tests/test_isotropic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import skgstat as skg 4 | from numpy.testing import assert_array_almost_equal 5 | 6 | 7 | def _get_pan_sample(): 8 | df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'pan_sample.csv')) 9 | return df[['x', 'y']].values, df.z.values 10 | 11 | 12 | def test_maxlag_change(): 13 | # get data 14 | c, v = _get_pan_sample() 15 | 16 | # create a Variogram with default settings 17 | default = skg.Variogram(c, v) 18 | maxlag = skg.Variogram(c, v, maxlag=default.bins[-1]) 19 | 20 | assert_array_almost_equal( 21 | default.experimental, 22 | maxlag.experimental, 23 | decimal=1 24 | ) 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # specify a default Python version 2 | ARG PYTHON_VERSION=3.8 3 | 4 | # build from Python 5 | FROM python:${PYTHON_VERSION} 6 | LABEL maintainer="Mirko Mälicke" 7 | 8 | # set a user 9 | RUN adduser skguser 10 | USER skguser 11 | WORKDIR /home/skguser 12 | 13 | # copy the tutorial 14 | RUN mkdir tutorials 15 | COPY --chown=skguser:skguser ./tutorials ./tutorials 16 | 17 | # set the path 18 | ENV PATH="/home/skguser/.local/bin:${PATH}" 19 | 20 | # install scikit-gstat 21 | RUN pip install scikit-gstat 22 | 23 | # install optional dependencies 24 | RUN pip install gstools pykrige 25 | RUN pip install plotly 26 | RUN pip install rise 27 | RUN pip install jupyter 28 | 29 | # open port 8888 30 | EXPOSE 8888 31 | 32 | CMD jupyter notebook --ip "0.0.0.0" 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | # Fix common spelling mistakes 3 | - repo: https://github.com/codespell-project/codespell 4 | rev: v2.2.1 5 | hooks: 6 | - id: codespell 7 | args: [ 8 | # Verly is a Name, coo references the SciPy coo sparse matrix 9 | '--ignore-words-list', 'verly,coo', 10 | '--write-changes', 11 | # 'nd,alos,inout', 12 | # '--ignore-regex', '\bhist\b', 13 | '--' 14 | ] 15 | types_or: [python, rst, markdown] 16 | files: ^(skgstat|docs|tutorials)/ 17 | 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v2.3.0 20 | hooks: 21 | - id: check-yaml 22 | - id: end-of-file-fixer 23 | - id: trailing-whitespace 24 | -------------------------------------------------------------------------------- /skgstat/tests/test_plotting_backend.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from skgstat.plotting import backend 3 | 4 | import matplotlib.pyplot as plt 5 | import plotly.graph_objects as go 6 | 7 | 8 | def test_backend_no_args(): 9 | """ 10 | The default backend should be 'matplotlib' 11 | """ 12 | assert backend() == 'matplotlib' 13 | 14 | 15 | @pytest.mark.depends(on=['test_backend_no_args']) 16 | def test_raise_value_error(): 17 | """ 18 | Raise a value error by setting the wrong backend 19 | """ 20 | with pytest.raises(ValueError): 21 | backend('not-a-backend') 22 | 23 | 24 | @pytest.mark.depends(on=['test_raise_value_error']) 25 | def test_change_plotting_backend(): 26 | """ 27 | Set the correct backend and check 28 | """ 29 | # change to plotly 30 | backend('plotly') 31 | assert backend() == 'plotly' 32 | 33 | # change back 34 | backend('matplotlib') 35 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /tutorials/README.rst: -------------------------------------------------------------------------------- 1 | SciKit-GStat Tutorials 2 | =================== 3 | 4 | This gallery contains tutorials that demonstrate how to use SciKit-GStat for various geostatistical tasks. 5 | Each tutorial is designed to help you understand a specific aspect of the library. 6 | 7 | Getting Started 8 | -------------- 9 | 10 | Start with the basic tutorials to learn the fundamentals of SciKit-GStat: 11 | 12 | * **Tutorial 1**: Getting Started - Basic concepts and your first variogram 13 | * **Tutorial 2**: Estimators - Understanding different variogram estimators 14 | * **Tutorial 3**: Variogram Models - Working with theoretical variogram models 15 | * **Tutorial 4**: Plotting - Visualizing variograms and results 16 | * **Tutorial 5**: Binning - Understanding and customizing distance binning 17 | * **Tutorial 6**: GSTools Integration - Using SciKit-GStat with GSTools 18 | * **Tutorial 7**: Maximum Likelihood - Fitting variograms using maximum likelihood estimation -------------------------------------------------------------------------------- /docs/reference/models.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Variogram models 3 | ================ 4 | 5 | Scikit-GStat implements different theoretical variogram functions. These 6 | model functions expect a single lag value or an array of lag values as input 7 | data. Each function has at least a parameter `a` for the effective range and 8 | a parameter `c0` for the sill. The nugget parameter `b` is optional and will 9 | be set to :math:`b:=0` if not given. 10 | 11 | Spherical model 12 | ~~~~~~~~~~~~~~~ 13 | 14 | .. autofunction:: skgstat.models.spherical 15 | 16 | 17 | Exponential model 18 | ~~~~~~~~~~~~~~~~~ 19 | 20 | .. autofunction:: skgstat.models.exponential 21 | 22 | 23 | Gaussian model 24 | ~~~~~~~~~~~~~~ 25 | 26 | .. autofunction:: skgstat.models.gaussian 27 | 28 | Cubic model 29 | ~~~~~~~~~~~ 30 | 31 | .. autofunction:: skgstat.models.cubic 32 | 33 | 34 | Stable model 35 | ~~~~~~~~~~~~ 36 | 37 | .. autofunction:: skgstat.models.stable 38 | 39 | 40 | Matérn model 41 | ~~~~~~~~~~~~ 42 | 43 | .. autofunction:: skgstat.models.matern 44 | -------------------------------------------------------------------------------- /skgstat/util/shannon.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def shannon_entropy(x, bins): 5 | """Shannon Entropy 6 | 7 | Calculates the Shannon Entropy, which is the most basic 8 | metric in information theory. It can be used to calculate 9 | the information content of discrete distributions. 10 | This can be used to estimate the intrinsic uncertainty of 11 | a sample,independent of the value range or variance, which 12 | makes it more comparable. 13 | 14 | Parameters 15 | ---------- 16 | x : numpy.ndarray 17 | flat 1D array of the observations 18 | bins : list, int 19 | upper edges of the bins used to calculate the histogram 20 | of x. 21 | 22 | Returns 23 | ------- 24 | h : float 25 | Shannon Entropy of x, given bins. 26 | """ 27 | # histogram 28 | c, _ = np.histogram(x, bins=bins) 29 | 30 | # empirical probabilities 31 | p = c / np.sum(c) + 1e-15 32 | 33 | # map information function and return product 34 | return - np.fromiter(map(np.log2, p), dtype=float).dot(p) 35 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Scikit-GStat 2 | 3 | SciKit-Gstat is a scipy-styled variogram estimation and analysis module for geostatistics. 4 | It includes classes for variogram estimation and ordinary kriging. More advanced use-cases 5 | like directional variograms and space-time variograms are included as well. 6 | 7 | ## Citing 8 | SciKit-GStat published in [GMD](https://www.geoscientific-model-development.net/). Please 9 | cite it like: 10 | 11 | ```plain 12 | Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. 13 | ``` 14 | 15 | or bibtex: 16 | 17 | ``` 18 | @Article{gmd-15-2505-2022, 19 | AUTHOR = {M\"alicke, M.}, 20 | TITLE = {SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation toolbox written in Python}, 21 | JOURNAL = {Geoscientific Model Development}, 22 | VOLUME = {15}, 23 | YEAR = {2022}, 24 | NUMBER = {6}, 25 | PAGES = {2505--2532}, 26 | URL = {https://gmd.copernicus.org/articles/15/2505/2022/}, 27 | DOI = {10.5194/gmd-15-2505-2022} 28 | } 29 | ``` 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Mirko Mälicke 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /skgstat/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | from skgstat.tests.estimator import TestEstimator 3 | from skgstat.tests.models import TestModels, TestVariogramDecorator 4 | from skgstat.tests.binning import TestEvenWidth, TestUniformCount 5 | from skgstat.tests.Variogram import ( 6 | TestVariogramInstatiation, 7 | TestVariogramArguments, 8 | TestVariogramFittingProcedure, 9 | TestVariogramQaulityMeasures, 10 | TestVariogramMethods, 11 | TestVariogramPlots, 12 | ) 13 | from skgstat.tests.DirectionalVariogram import ( 14 | TestDirectionalVariogramInstantiation, 15 | TestDirectionalVariogramMethods, 16 | ) 17 | from skgstat.tests.SpaceTimeVariogram import ( 18 | TestSpaceTimeVariogramInitialization, 19 | TestSpaceTimeVariogramArgumets, 20 | TestSpaceTimeVariogramPlots, 21 | ) 22 | from skgstat.tests.kriging import ( 23 | TestKrigingInstantiation, 24 | TestPerformance, 25 | ) 26 | from skgstat.tests.interfaces import ( 27 | TestVariogramEstimator, 28 | TestPyKrigeInterface, 29 | TestGstoolsInterface 30 | ) 31 | from skgstat.tests.stmodels import ( 32 | TestSumModel, 33 | TestProductModel, 34 | TestProductSumModel 35 | ) 36 | 37 | import os 38 | os.environ['SKG_SUPRESS'] = 'TRUE' 39 | """ 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | def readme(): 5 | with open('README.rst') as f: 6 | return f.read().strip() 7 | 8 | 9 | def version(): 10 | with open('skgstat/__version__.py') as f: 11 | loc = dict() 12 | exec(f.read(), loc, loc) 13 | return loc['__version__'] 14 | 15 | 16 | def requirements(): 17 | with open('requirements.txt') as f: 18 | return f.read().strip().split('\n') 19 | 20 | 21 | def classifiers(): 22 | with open('classifiers.txt') as f: 23 | return f.read().strip().split('\n') 24 | 25 | 26 | setup(name='scikit-gstat', 27 | license='MIT License', 28 | version=version(), 29 | author='Mirko Maelicke', 30 | author_email='mirko.maelicke@kit.edu', 31 | description='Geostatistical expansion in the scipy style', 32 | long_description=readme(), 33 | long_description_content_type='text/x-rst', 34 | classifiers=classifiers(), 35 | install_requires=requirements(), 36 | test_suite='nose.collector', 37 | # test_require=['nose'], 38 | extras_require={"gstools": ["gstools>=1.3"]}, 39 | packages=find_packages(), 40 | include_package_data=True, 41 | zip_safe=False 42 | ) 43 | -------------------------------------------------------------------------------- /Dockerfile.legacy: -------------------------------------------------------------------------------- 1 | # NOTE: This image is not maintained anymore 2 | # use the minimal jupyter notebook 3 | FROM jupyter/minimal-notebook:ad3574d3c5c7 4 | 5 | # build tutorials folder 6 | USER root 7 | RUN mkdir tutorials 8 | 9 | # switch back to user 10 | USER $NB_USER 11 | 12 | # copy the tutorials content 13 | COPY ./docs/tutorials ./tutorials 14 | 15 | # install the latest version 16 | COPY ./ ./scikit-gstat 17 | 18 | # use the latest 19 | RUN cd scikit-gstat && \ 20 | pip install . && \ 21 | cd .. 22 | 23 | # the interfaces has two additional 24 | # optional dependencies: pykrige and gstools 25 | RUN pip install pykrige gstools 26 | 27 | # add RISE 28 | RUN conda install -c damianavila82 rise 29 | 30 | # use a hashed pw if set 31 | ARG PASSWD=nopass 32 | RUN if [ "$PASSWD" = "nopass" ]; then \ 33 | echo "c.NotebookApp.password = u''" >> /home/$NB_USER/.jupyter/jupyter_notebook_config.py && \ 34 | echo "c.NotebookApp.token = u''" >> /home/$NB_USER/.jupyter/jupyter_notebook_config.py; else \ 35 | echo "c.NotebookApp.password = u'$PASSWD'" >> /home/$NB_USER/.jupyter/jupyter_notebook_config.py; \ 36 | fi 37 | 38 | # switch tu root 39 | USER root 40 | 41 | # remove the repo 42 | RUN rm -rf scikit-gstat 43 | 44 | # fix permissions 45 | RUN fix-permissions $CONDA_DIR && \ 46 | fix-permissions /home/$NB_USER 47 | 48 | # switch back 49 | USER $NB_USER 50 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | 6 | The package can be installed directly from the Python Package Index or GitHub. 7 | The version on GitHub might be more recent, as only stable versions are 8 | uploaded to the Python Package Index. 9 | 10 | PyPI 11 | ---- 12 | 13 | The version from PyPI can directly be installed using pip 14 | 15 | .. code-block:: bash 16 | 17 | pip install scikit-gstat 18 | 19 | 20 | GitHub 21 | ------ 22 | 23 | The most recent version from GitHub can be installed like: 24 | 25 | .. code-block:: bash 26 | 27 | git clone git@github.com:mmaelicke/scikit-gstat 28 | cd scikit-gstat 29 | pip install -e . 30 | 31 | 32 | Conda-Forge 33 | ----------- 34 | 35 | Since version `0.5.5`, SciKit-GStat is available on Conda-Forge. 36 | You can install it like: 37 | 38 | .. code-block:: bash 39 | 40 | conda install -c conda-forge scikit-gstat 41 | 42 | Note 43 | ---- 44 | 45 | On Windows, you might run into problems installing all requirements 46 | in a clean Python environment, especially if C++ redistributables are missing. 47 | This can happen i.e. on *bare* VMs and the compilation of libraries required by 48 | scipy, numpy or numba package are the ones failing. 49 | In these cases, install the libraries first, and then SciKit-GStat or move to 50 | the conda-forge package 51 | 52 | .. code-block:: bash 53 | 54 | conda install numpy, scipy, numba 55 | -------------------------------------------------------------------------------- /skgstat/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | import skgstat 2 | 3 | from .variogram_plot import matplotlib_variogram_plot, plotly_variogram_plot 4 | from .variogram_scattergram import matplotlib_variogram_scattergram, plotly_variogram_scattergram 5 | from .variogram_location_trend import matplotlib_location_trend, plotly_location_trend 6 | from .variogram_dd_plot import matplotlib_dd_plot, plotly_dd_plot 7 | from .directtional_variogram import matplotlib_pair_field, plotly_pair_field 8 | from .stvariogram_plot3d import matplotlib_plot_3d, plotly_plot_3d 9 | from .stvariogram_plot2d import matplotlib_plot_2d, plotly_plot_2d 10 | from .stvariogram_marginal import matplotlib_marginal, plotly_marginal 11 | 12 | 13 | ALLOWED_BACKENDS = [ 14 | 'matplotlib', 15 | 'plotly' 16 | ] 17 | 18 | 19 | def backend(name=None): 20 | """ 21 | """ 22 | if name is None: 23 | return skgstat.__backend__ 24 | 25 | elif name not in ALLOWED_BACKENDS: 26 | raise ValueError( 27 | "'%s' is not an allowed plotting backend.\nOptions are: [%s]" % 28 | (name, ','.join(["'%s'" % _ for _ in ALLOWED_BACKENDS])) 29 | ) 30 | 31 | elif name == 'plotly': 32 | try: 33 | import plotly 34 | except ImportError: 35 | print('You need to install plotly >=4.12.0 separately:\npip install plotly') 36 | return 37 | 38 | # were are good to set the new backend 39 | skgstat.__backend__ = name 40 | -------------------------------------------------------------------------------- /docs/reference/estimator.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Estimator Functions 3 | =================== 4 | 5 | Scikit-GStat implements various semi-variance estimators. These functions can 6 | be found in the skgstat.estimators submodule. Each of these functions can be 7 | used independently from Variogram class. In this case the estimator is 8 | expecting an array of pairwise differences to calculate the semi-variance. 9 | Not the values themselves. 10 | 11 | Matheron 12 | ~~~~~~~~ 13 | 14 | .. autofunction:: skgstat.estimators.matheron 15 | 16 | Cressie 17 | ~~~~~~~ 18 | 19 | .. autofunction:: skgstat.estimators.cressie 20 | 21 | Dowd 22 | ~~~~ 23 | 24 | .. autofunction:: skgstat.estimators.dowd 25 | 26 | Genton 27 | ~~~~~~ 28 | 29 | .. autofunction:: skgstat.estimators.genton 30 | 31 | Shannon Entropy 32 | ~~~~~~~~~~~~~~~ 33 | 34 | .. autofunction:: skgstat.estimators.entropy 35 | 36 | 37 | MinMax 38 | ~~~~~~ 39 | 40 | .. warning:: 41 | 42 | This is an experimental semi-variance estimator. It is heavily influenced 43 | by extreme values and outliers. That behaviour is usually not desired in 44 | geostatistics. 45 | 46 | .. autofunction:: skgstat.estimators.minmax 47 | 48 | 49 | Percentile 50 | ~~~~~~~~~~ 51 | 52 | .. warning:: 53 | 54 | This is an experimental semi-variance estimator. It uses just a 55 | percentile of the given pairwise differences and does not bear any 56 | information about their variance. 57 | 58 | .. autofunction:: skgstat.estimators.percentile 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # PyCharm 104 | .idea 105 | cover 106 | docs/_build 107 | docs/savefig 108 | notebooks 109 | .vscode 110 | container 111 | 112 | # project specific 113 | Playground.ipynb 114 | in_progress 115 | docs/auto_examples 116 | docs/gen_modules 117 | docs/*.png 118 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ==== 2 | Home 3 | ==== 4 | 5 | Welcome to SciKit GStat 6 | ======================= 7 | 8 | `Download the docs as PDF `_ 9 | 10 | 11 | SciKit-Gstat is a scipy-styled analysis module for variogram analysis. 12 | The base class is called :class:`Variogram `, which is probably the 13 | only import needed. However, several other classes exist: 14 | 15 | * :class:`DirectionalVariogram ` for directional variograms 16 | * :class:`SpaceTimeVariogram ` for spatio-temporal variograms 17 | * :class:`OrdinaryKriging ` for interpolation 18 | * :class:`MetricSpace ` for pre-computed spatial samples 19 | 20 | The variogram classes have a similar interface and can compute experimental variograms 21 | and fit theoretical variogram model functions. 22 | The module makes use of a rich selection of semi-variance estimators, variogram model functions 23 | and sptial binning functions, while being extensible at the same time. 24 | 25 | How to cite 26 | =========== 27 | 28 | In case you use SciKit-GStat in other software or scientific publications, 29 | please reference this module. There is a `GMD `_ publication. Please cite it like: 30 | 31 | Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. 32 | 33 | The code itself is published and has a DOI. It can be cited as: 34 | 35 | Mirko Mälicke, Romain Hugonnet, Helge David Schneider, Sebastian Müller, Egil Möller, & Johan Van de Wauw. (2022). mmaelicke/scikit-gstat: Version 1.0 (v1.0.0). Zenodo. https://doi.org/10.5281/zenodo.5970098 36 | 37 | 38 | 39 | .. toctree:: 40 | :maxdepth: 3 41 | :caption: Contents: 42 | 43 | install 44 | getting_started 45 | userguide/userguide 46 | auto_examples/index 47 | technical/technical 48 | reference/reference 49 | changelog 50 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting Started 3 | =============== 4 | 5 | 6 | Load the class and data 7 | ----------------------- 8 | 9 | The main class of scikit-gstat is the Variogram. It can directly be imported 10 | from the module, called skgstat. The main class can easily be demonstrated on 11 | the data module available with version `>=0.5.5`. 12 | 13 | .. ipython:: python 14 | :okwarning: 15 | 16 | import skgstat as skg 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | plt.style.use('ggplot') 20 | 21 | data = skg.data.pancake(N=500, seed=42) 22 | print(data.get('origin')) 23 | coordinates, values = data.get('sample') 24 | 25 | The Variogram needs at least an array of coordinates and an array of values 26 | on instantiation. 27 | 28 | .. ipython:: python 29 | 30 | V = skg.Variogram(coordinates=coordinates, values=values) 31 | print(V) 32 | 33 | 34 | Plot 35 | ---- 36 | 37 | The Variogram class has its own plotting method. 38 | 39 | .. ipython:: python 40 | :okwarning: 41 | 42 | @savefig default_variogram.png width=7in 43 | V.plot() 44 | plt.close() 45 | 46 | With version 0.2, the histogram plot can also be disabled. This is most 47 | useful, when the binning method for the lag classes is changed from `'even'` 48 | step classes to `'uniform'` distribution in the lag classes. 49 | 50 | .. ipython:: python 51 | :okwarning: 52 | 53 | V.set_bin_func('uniform') 54 | @savefig variogram_uniform.png width=7in 55 | V.plot(hist=False) 56 | plt.close() 57 | 58 | Mutating 59 | -------- 60 | 61 | One of the main strenghs of :class:`Variogram ` is its 62 | ability to change arguments in place. Any dependent result or parameter 63 | will be invalidated and re-caluculated. 64 | You can i.e. increase the number of lag classes: 65 | 66 | .. ipython:: python 67 | :okwarning: 68 | 69 | V.n_lags = 25 70 | V.maxlag = 500 71 | V.bin_func = 'kmeans' 72 | 73 | @savefig default_variogram_25lag.png width=7in 74 | V.plot() 75 | plt.close() 76 | 77 | Note, how the experimental variogram was updated and the model was 78 | fitted to the new data automatically. 79 | -------------------------------------------------------------------------------- /docs/sg_execution_times.rst: -------------------------------------------------------------------------------- 1 | 2 | :orphan: 3 | 4 | .. _sphx_glr_sg_execution_times: 5 | 6 | 7 | Computation times 8 | ================= 9 | **00:09.810** total execution time for 7 files **from all galleries**: 10 | 11 | .. container:: 12 | 13 | .. raw:: html 14 | 15 | 19 | 20 | 21 | 22 | 27 | 28 | .. list-table:: 29 | :header-rows: 1 30 | :class: table table-striped sg-datatable 31 | 32 | * - Example 33 | - Time 34 | - Mem (MB) 35 | * - :ref:`sphx_glr_auto_examples_tutorial_01_getting_started.py` (``tutorials/tutorial_01_getting_started.py``) 36 | - 00:09.810 37 | - 0.0 38 | * - :ref:`sphx_glr_auto_examples_tutorial_02_estimators.py` (``tutorials/tutorial_02_estimators.py``) 39 | - 00:00.000 40 | - 0.0 41 | * - :ref:`sphx_glr_auto_examples_tutorial_03_variogram_models.py` (``tutorials/tutorial_03_variogram_models.py``) 42 | - 00:00.000 43 | - 0.0 44 | * - :ref:`sphx_glr_auto_examples_tutorial_04_plotting.py` (``tutorials/tutorial_04_plotting.py``) 45 | - 00:00.000 46 | - 0.0 47 | * - :ref:`sphx_glr_auto_examples_tutorial_05_binning.py` (``tutorials/tutorial_05_binning.py``) 48 | - 00:00.000 49 | - 0.0 50 | * - :ref:`sphx_glr_auto_examples_tutorial_06_gstools.py` (``tutorials/tutorial_06_gstools.py``) 51 | - 00:00.000 52 | - 0.0 53 | * - :ref:`sphx_glr_auto_examples_tutorial_07_maximum_likelihood_fit.py` (``tutorials/tutorial_07_maximum_likelihood_fit.py``) 54 | - 00:00.000 55 | - 0.0 56 | -------------------------------------------------------------------------------- /skgstat/plotting/variogram_dd_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from scipy.spatial.distance import squareform 4 | 5 | try: 6 | import plotly.graph_objects as go 7 | except ImportError: 8 | pass 9 | 10 | 11 | def __calculate_plot_data(variogram): 12 | # get all distances and residual diffs 13 | dist = variogram.distance 14 | diff = variogram.pairwise_diffs 15 | 16 | return diff, dist 17 | 18 | 19 | def matplotlib_dd_plot(variogram, ax=None, plot_bins=True, show=True): 20 | # get the plotting data 21 | _diff, _dist = __calculate_plot_data(variogram) 22 | 23 | # create the plot 24 | if ax is None: 25 | fig, ax = plt.subplots(1, 1, figsize=(8, 6)) 26 | else: 27 | fig = ax.get_figure() 28 | 29 | # plot the bins 30 | if plot_bins: 31 | _bins = variogram.bins 32 | ax.vlines(_bins, 0, np.max(_diff), linestyle='--', lw=1, color='r') 33 | 34 | # plot 35 | ax.scatter(_dist, _diff, 8, color='b', marker='o', alpha=0.5) 36 | 37 | # set limits 38 | ax.set_ylim((0, np.max(_diff))) 39 | ax.set_xlim((0, np.max(_dist))) 40 | ax.set_xlabel('separating distance') 41 | ax.set_ylabel('pairwise difference') 42 | ax.set_title('Pairwise distance ~ difference') 43 | 44 | # show the plot 45 | if show: # pragma: no cover 46 | fig.show() 47 | 48 | return fig 49 | 50 | 51 | def plotly_dd_plot(variogram, fig=None, plot_bins=True, show=True): 52 | # get the plotting data 53 | _diff, _dist = __calculate_plot_data(variogram) 54 | 55 | # create a new Figure if needed 56 | if fig is None: 57 | fig = go.Figure() 58 | 59 | # plot 60 | fig.add_trace( 61 | go.Scattergl( 62 | x=_dist, y=_diff, 63 | mode='markers', marker=dict(color='blue', opacity=0.5) 64 | ) 65 | ) 66 | 67 | # plot the bins 68 | if plot_bins: 69 | for _bin in variogram.bins: 70 | fig.add_vline(x=_bin, line_dash='dash', line_color='red') 71 | 72 | # titles 73 | fig.update_layout( 74 | title='Pairwise distance ~ difference', 75 | xaxis_title='separating distance', 76 | yaxis_title='pairwise difference' 77 | ) 78 | 79 | if show: 80 | fig.show() 81 | 82 | return fig 83 | -------------------------------------------------------------------------------- /skgstat/tests/test_likelihood.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import numpy as np 4 | from scipy.optimize import minimize 5 | 6 | import skgstat as skg 7 | from skgstat import models 8 | import skgstat.util.likelihood as li 9 | 10 | 11 | def test_wrapped_model_doc(): 12 | """Test the docstring wrapping""" 13 | # create a wrapped spherical function 14 | wrapped = li._model_transformed(models.spherical, has_s=False) 15 | 16 | assert 'Autocorrelation function.' in wrapped.__doc__ 17 | assert models.spherical.__doc__ in wrapped.__doc__ 18 | 19 | 20 | def test_wrapped_model_args(): 21 | """" 22 | Check that the number of model parameters is initialized correctly. 23 | """ 24 | # get the two functions 25 | spherical = li._model_transformed(models.spherical, has_s=False) 26 | stable = li._model_transformed(models.stable, has_s=True) 27 | 28 | sig = inspect.signature(spherical) 29 | assert len(sig.parameters) == 2 30 | 31 | sig = inspect.signature(stable) 32 | assert len(sig.parameters) == 3 33 | 34 | 35 | def test_build_A(): 36 | """ 37 | Test the autocorrelation matrix building. 38 | """ 39 | # create a wrapped spherical function 40 | wrap_2 = li._model_transformed(models.spherical, has_s=False) 41 | wrap_3 = li._model_transformed(models.stable, has_s=True) 42 | 43 | # build the autocorrelation matrix 44 | A_5 = li._build_A(wrap_2, [1, 1, 0], np.arange(0, 1, 0.1)) 45 | A_6 = li._build_A(wrap_3, [1, 1, 1, 1], np.arange(0, 1.5, 0.1)) 46 | 47 | # check the matrix shape 48 | assert A_5.shape == (5, 5) 49 | assert A_6.shape == (6, 6) 50 | 51 | 52 | def test_likelihood(): 53 | """ 54 | Call the likelihood function and make sure that it optimizes the 55 | the pancake variogram 56 | """ 57 | # build the variogram from the tutorial 58 | c, v = skg.data.pancake(300, seed=42).get('sample') 59 | vario = skg.Variogram(c, v, bin_func='scott', maxlag=0.7) 60 | 61 | # get the likelihood function 62 | like = li.get_likelihood(vario) 63 | 64 | # create the optimization attributes 65 | sep_mean = vario.distance.mean() 66 | sam_var = vario.values.var() 67 | 68 | # create initial guess 69 | p0 = np.array([sep_mean, sam_var, 0.1 * sam_var]) 70 | 71 | # create the bounds to restrict optimization 72 | bounds = [[0, vario.bins[-1]], [0, 3*sam_var], [0, 2.9*sam_var]] 73 | 74 | # minimize the likelihood function 75 | res = minimize(like, p0, bounds=bounds, method='SLSQP') 76 | 77 | # the result and p0 should be different 78 | assert not np.allclose(res.x, p0, rtol=1e-3) 79 | -------------------------------------------------------------------------------- /tutorials/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json: -------------------------------------------------------------------------------- 1 | { 2 | "Description": "15 Minute SoilNet profile data for TERENO-Pre-Alpine DE-Fen (Fendt) site", 3 | 4 | "Citation": { 5 | "Info": "These files are part of the dataset published with the data-paper mentioned below.", 6 | "cite-as": "Fersch, B., Francke, T., Heistermann, M., Schrön, M., Döpper, V., Jakobi, J. et. al (2020): A dense network of cosmic-ray neutron sensors for soilmoisture observation in a highly instrumented pre-alpine headwater catchment in Germany. Earth System Science Data. https://doi.org/10.5194/essd-2020-48" 7 | }, 8 | 9 | "Provider": { 10 | "Name": "Benjamin Fersch", 11 | "Institution": "KIT Campus Alpin", 12 | "Email": "fersch@kit.edu", 13 | "Comment": "" 14 | }, 15 | 16 | "SpaceTimeCoverage": { 17 | "StartDate": "2019-05-01", 18 | "EndDate": "2019-07-31", 19 | "RegionName": "TERENO, Peißenberg-Fendt, Germany", 20 | "BBox": "47.83016, 11.05866; 47.83414, 11.06292", 21 | "Latitude": "47.83289", 22 | "Longitude": "11.06073", 23 | "Elevation": "595 m ASL", 24 | "Weblink": "https://geoportal.bayern.de/bayernatlas/?zoom=15&lang=de&topic=ba&bgLayer=atkis&E=654197&N=5299736&catalogNodes=122,11&layers=luftbild&crosshair=marker" 25 | }, 26 | 27 | "Source": { 28 | "Name": "Benjamin Fersch", 29 | "Institution": "KIT Campus Alpin", 30 | "LinkToOriginalSource": "https://www.imk-ifu.kit.edu/tereno.php" 31 | }, 32 | 33 | 34 | "Variables": { 35 | "time": "time of measurement", 36 | "lat": "profile latitude coordinate", 37 | "lon": "profile longitude coordinate", 38 | "elev": "profile altitude", 39 | "depth": "soil sensor depth", 40 | "station_name": "profile name (id)", 41 | "eps_a": "soil permittivity (sensor group a)", 42 | "eps_b": "soil permittivity (sensor group b)", 43 | "vwc_a": "soil volumetric water content (sensor group a)", 44 | "vwc_b": "soil volumetric water content (sensor group b)", 45 | "T_a": "soil temperature (sensor group a)", 46 | "T_b": "soil temperature (sensor group b)" 47 | }, 48 | 49 | "Units": { 50 | "time": "minutes since 2019-05-01 00:00:00", 51 | "lat": "degree north", 52 | "lon": "degree east", 53 | "elev": "m ASL", 54 | "depth": "cm", 55 | "station_name": "character", 56 | "eps_a": "-", 57 | "eps_b": "-" 58 | "vwc_a": "-", 59 | "vwc_b": "-", 60 | "T_a": "degree Celsius", 61 | "T_b": "degree Celsius" 62 | }, 63 | 64 | "SpatialReferenceSystem": { 65 | "Name": "WGS 84", 66 | "EPSG": "4326" 67 | }, 68 | 69 | 70 | "TemporalReferenceSystem": { 71 | "TimeZone": "UTC", 72 | "IntervalLength": "15 minutes", 73 | "IntervalAggregation": "instantaneous", 74 | "TimestampAtEndOfInterval": "FALSE" 75 | }, 76 | 77 | "Remarks": "Each profile is equipped with 2 redundant sensors (a, and b)\n Metadata also contained in NetCDF header." 78 | } 79 | -------------------------------------------------------------------------------- /docs/tutorials/data/tereno_fendt/meta_data_CosmicSense_JFC1_DE-Fen_SNdata.json: -------------------------------------------------------------------------------- 1 | { 2 | "Description": "15 Minute SoilNet profile data for TERENO-Pre-Alpine DE-Fen (Fendt) site", 3 | 4 | "Citation": { 5 | "Info": "These files are part of the dataset published with the data-paper mentioned below.", 6 | "cite-as": "Fersch, B., Francke, T., Heistermann, M., Schrön, M., Döpper, V., Jakobi, J. et. al (2020): A dense network of cosmic-ray neutron sensors for soilmoisture observation in a highly instrumented pre-alpine headwater catchment in Germany. Earth System Science Data. https://doi.org/10.5194/essd-2020-48" 7 | }, 8 | 9 | "Provider": { 10 | "Name": "Benjamin Fersch", 11 | "Institution": "KIT Campus Alpin", 12 | "Email": "fersch@kit.edu", 13 | "Comment": "" 14 | }, 15 | 16 | "SpaceTimeCoverage": { 17 | "StartDate": "2019-05-01", 18 | "EndDate": "2019-07-31", 19 | "RegionName": "TERENO, Peißenberg-Fendt, Germany", 20 | "BBox": "47.83016, 11.05866; 47.83414, 11.06292", 21 | "Latitude": "47.83289", 22 | "Longitude": "11.06073", 23 | "Elevation": "595 m ASL", 24 | "Weblink": "https://geoportal.bayern.de/bayernatlas/?zoom=15&lang=de&topic=ba&bgLayer=atkis&E=654197&N=5299736&catalogNodes=122,11&layers=luftbild&crosshair=marker" 25 | }, 26 | 27 | "Source": { 28 | "Name": "Benjamin Fersch", 29 | "Institution": "KIT Campus Alpin", 30 | "LinkToOriginalSource": "https://www.imk-ifu.kit.edu/tereno.php" 31 | }, 32 | 33 | 34 | "Variables": { 35 | "time": "time of measurement", 36 | "lat": "profile latitude coordinate", 37 | "lon": "profile longitude coordinate", 38 | "elev": "profile altitude", 39 | "depth": "soil sensor depth", 40 | "station_name": "profile name (id)", 41 | "eps_a": "soil permittivity (sensor group a)", 42 | "eps_b": "soil permittivity (sensor group b)", 43 | "vwc_a": "soil volumetric water content (sensor group a)", 44 | "vwc_b": "soil volumetric water content (sensor group b)", 45 | "T_a": "soil temperature (sensor group a)", 46 | "T_b": "soil temperature (sensor group b)" 47 | }, 48 | 49 | "Units": { 50 | "time": "minutes since 2019-05-01 00:00:00", 51 | "lat": "degree north", 52 | "lon": "degree east", 53 | "elev": "m ASL", 54 | "depth": "cm", 55 | "station_name": "character", 56 | "eps_a": "-", 57 | "eps_b": "-" 58 | "vwc_a": "-", 59 | "vwc_b": "-", 60 | "T_a": "degree Celsius", 61 | "T_b": "degree Celsius" 62 | }, 63 | 64 | "SpatialReferenceSystem": { 65 | "Name": "WGS 84", 66 | "EPSG": "4326" 67 | }, 68 | 69 | 70 | "TemporalReferenceSystem": { 71 | "TimeZone": "UTC", 72 | "IntervalLength": "15 minutes", 73 | "IntervalAggregation": "instantaneous", 74 | "TimestampAtEndOfInterval": "FALSE" 75 | }, 76 | 77 | "Remarks": "Each profile is equipped with 2 redundant sensors (a, and b)\n Metadata also contained in NetCDF header." 78 | } 79 | -------------------------------------------------------------------------------- /skgstat/interfaces/pykrige.py: -------------------------------------------------------------------------------- 1 | try: 2 | import pykrige 3 | PYKRIGE_AVAILABLE = True 4 | except ImportError: 5 | PYKRIGE_AVAILABLE = False 6 | 7 | import numpy as np 8 | 9 | 10 | def __check_pykrige_available(): # pragma: no cover 11 | if not PYKRIGE_AVAILABLE: 12 | print('The pykrige interface needs pykrige installed.') 13 | print("Run 'pip install pykrige' to install it.") 14 | return False 15 | return True 16 | 17 | 18 | def pykrige_model(variogram): 19 | """ 20 | """ 21 | # pykrige is available? 22 | if not __check_pykrige_available(): 23 | return 24 | 25 | # get the fitted model 26 | model = variogram.fitted_model 27 | 28 | # define the model function 29 | def skgstat_model(parameters, lags): 30 | """Variogram model 31 | 32 | This function is a interface from scikit-gstat to pykrige. 33 | If you want to use a fitted skgstat.Variogram instance as a 34 | custom variogram model in pykrige, this is the already fitted 35 | function that can be passed as the `variogram_function` argument. 36 | Additionally, you need to set the `variogram_model` to `'custom'`. 37 | 38 | The skgstat.interfaces.pykrige also has a pykrige_as_kwargs 39 | function. That will return all necessary keyword arguments for the 40 | pykrige class as a dictionary. You can just pass it using the double 41 | star operator. 42 | 43 | """ 44 | if not isinstance(lags, np.ndarray): 45 | lags = np.asarray(lags, dtype=float) 46 | 47 | # get the semi-variances 48 | semivar = np.fromiter(map(model, lags.flatten()), dtype=float) 49 | 50 | # return 51 | return semivar.reshape(lags.shape) 52 | 53 | # return model 54 | return skgstat_model 55 | 56 | 57 | def pykrige_params(variogram): 58 | """ 59 | """ 60 | # pykrige is available? 61 | if not __check_pykrige_available(): 62 | return 63 | 64 | # get the parameters into the correct order. 65 | pars = variogram.parameters 66 | 67 | return [pars[1], pars[0], pars[2]] 68 | 69 | 70 | def pykrige_as_kwargs(variogram, adjust_maxlag=False, adjust_nlags=False): 71 | """ 72 | """ 73 | # pykrige is available? 74 | if not __check_pykrige_available(): 75 | return 76 | 77 | # as far as I get it, there is no maximum lag in pykrige. 78 | if adjust_maxlag: 79 | variogram.maxlag = None 80 | else: 81 | print('[WARNING]: If the maximum lag is not None, the variogram plots will differ.') 82 | 83 | # to work properly, variogram has to use a nugget 84 | variogram.use_nugget = True 85 | variogram.fit() 86 | 87 | # get the model 88 | model_func = pykrige_model(variogram) 89 | 90 | # get the parameters 91 | pars = pykrige_params(variogram) 92 | 93 | args = dict( 94 | variogram_model='custom', 95 | variogram_parameters=pars, 96 | variogram_function=model_func 97 | ) 98 | 99 | if adjust_nlags: 100 | args['nlags'] = variogram.n_lags 101 | 102 | # return 103 | return args 104 | -------------------------------------------------------------------------------- /skgstat/tests/test_data_loader.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from skgstat import data 4 | import numpy as np 5 | from numpy.testing import assert_array_almost_equal 6 | 7 | 8 | def test_data_names(): 9 | assert 'pancake' in data.names 10 | 11 | 12 | def test_loader(): 13 | img = data.pancake_field().get('sample') 14 | 15 | assert img.shape[0] == 500 and img.shape[1] == 500 16 | 17 | 18 | def test_sample(): 19 | c, v = data.pancake(N=50).get('sample') 20 | 21 | assert len(c) == len(v) == 50 22 | 23 | 24 | def test_loader_mean(): 25 | c0, v0 = data.pancake(N=10, band=0).get('sample') 26 | c1, v1 = data.pancake(N=10, band=1).get('sample') 27 | c2, v2 = data.pancake(N=10, band=2).get('sample') 28 | cm, cv = data.pancake(N=10, band='mean').get('sample') 29 | 30 | # manually calculate the mean 31 | mean = np.mean(np.column_stack((v0, v1, v2)), axis=1) 32 | print(mean) 33 | 34 | assert_array_almost_equal(cv, mean, decimal=4) 35 | 36 | 37 | def test_aniso_data(): 38 | assert 'aniso' in data.names 39 | 40 | img = data.aniso_field().get('sample') 41 | assert img.shape[0] == 500 and img.shape[1] == 500 42 | 43 | c, v = data.aniso(N=25).get('sample') 44 | assert len(c) == len(v) == 25 45 | 46 | 47 | def test_meuse_loads(): 48 | df = data._loader.read_sample_file('meuse.txt') 49 | 50 | # get zinc data 51 | _, zinc = data.meuse(variable='zinc').get('sample') 52 | 53 | assert_array_almost_equal( 54 | zinc, df[['zinc']].values, decimal=6 55 | ) 56 | 57 | # check exception 58 | with pytest.raises(AttributeError) as e: 59 | data.meuse(variable='unknown') 60 | 61 | assert 'variable has to be in' in str(e.value) 62 | 63 | 64 | def test_corr_var(): 65 | np.random.seed(42) 66 | d = np.random.multivariate_normal([1.0, 10.0], [[1.2, 3.3], [3.3, 1.2]], size=50) 67 | 68 | # test the data provider 69 | p = data.corr_variable(50, [1.0, 10.0], vars=None, cov=[[1.2, 3.3], [3.3, 1.2]], seed=42).get('sample')[1] 70 | 71 | assert_array_almost_equal(d, p, decimal=1) 72 | 73 | 74 | def test_corr_var_derirved(): 75 | # Test random covariance generation 76 | vars = [1.2, 1.5] 77 | np.random.seed(42) 78 | cov = np.random.rand(2, 2) 79 | np.fill_diagonal(cov, vars) 80 | 81 | # generate test sample 82 | np.random.seed(42) 83 | d = np.random.multivariate_normal([1.0, 10.0], cov, size=50) 84 | 85 | p = data.corr_variable(50, [1.0, 10.0], vars=vars, cov=None, seed=42).get('sample')[1] 86 | 87 | assert_array_almost_equal(d, p, decimal=1) 88 | 89 | # test uniform covariance 90 | cov = np.ones((2, 2)) * 0.8 91 | np.fill_diagonal(cov, vars) 92 | 93 | # generate test sample 94 | np.random.seed(42) 95 | d = np.random.multivariate_normal([1.0, 10.0], cov, size=50) 96 | 97 | p = data.corr_variable(50, [1.0, 10.0], vars=vars, cov=0.8, seed=42).get('sample')[1] 98 | 99 | assert_array_almost_equal(d, p, decimal=1) 100 | 101 | 102 | def test_corr_var_matrix_error(): 103 | with pytest.raises(ValueError) as e: 104 | data.corr_variable(50, [1.0, 2.0], cov='NotAllowed') 105 | 106 | assert 'uniform co-variance, or a co-variance matrix' in str(e.value) 107 | -------------------------------------------------------------------------------- /skgstat/util/cross_validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from itertools import cycle 3 | 4 | from skgstat.Kriging import OrdinaryKriging 5 | from skgstat.Variogram import Variogram 6 | from skgstat.util.likelihood import get_likelihood 7 | 8 | 9 | def _interpolate(idx: int, variogram) -> float: 10 | # get the data for this iteration 11 | c = np.delete(variogram.coordinates, idx, axis=0) 12 | v = np.delete(variogram.values, idx, axis=0) 13 | ok = OrdinaryKriging(variogram, coordinates=c, values=v) 14 | 15 | # interpolate Z[idx] 16 | Z = ok.transform( 17 | [variogram.coordinates[idx][0]], 18 | [variogram.coordinates[idx][1]] 19 | ) 20 | 21 | return (Z - variogram.values[idx])[0] 22 | 23 | 24 | def jacknife( 25 | variogram, 26 | n: int = None, 27 | metric: str = 'rmse', 28 | seed=None 29 | ) -> float: 30 | """ 31 | Leave-one-out cross validation of the given variogram 32 | model using the OrdinaryKriging instance. 33 | This method can be called using 34 | :func:`Variogram.cross_validate `. 35 | 36 | Parameters 37 | ---------- 38 | variogram : skgstat.Variogram 39 | The variogram isnstance to be validated 40 | n : int 41 | Number of points that should be used for cross validation. 42 | If None is given, all points are used (default). 43 | metric : str 44 | Metric used for cross validation. Can be one of 45 | ['rmse', 'mse', 'mae'] 46 | 47 | Returns 48 | ------- 49 | metric : float 50 | Cross-validation result The value is given 51 | in the selected metric. 52 | 53 | """ 54 | if metric.lower() not in ('rmse', 'mse', 'mae'): 55 | raise ValueError("metric has to be in ['rmse', 'mse', 'mae']") 56 | 57 | # shuffle the input coordinates 58 | rng = np.random.default_rng(seed=seed) 59 | size = n if n is not None else len(variogram.coordinates) 60 | indices = rng.choice(len(variogram.coordinates), replace=False, size=size) 61 | 62 | # TODO maybe multiprocessing? 63 | cros_val_map = map(_interpolate, indices, cycle([variogram])) 64 | 65 | # if no multiprocessing - use numpy 66 | deviations = np.fromiter(cros_val_map, dtype=float) 67 | 68 | if metric.lower() == 'rmse': 69 | return np.sqrt(np.nanmean(np.power(deviations, 2))) 70 | elif metric.lower() == 'mse': 71 | return np.nanmean(np.power(deviations, 2)) 72 | else: 73 | # MAE 74 | return np.nansum(np.abs(deviations)) / len(deviations) 75 | 76 | 77 | def aic(variogram: Variogram) -> float: 78 | like = get_likelihood(variogram) 79 | 80 | # get parameters 81 | params = variogram.parameters 82 | k = len(params) 83 | if params[-1] < 1e-6: 84 | k -= 1 85 | 86 | # get maximum log-likelihood 87 | log_like = like(params) 88 | 89 | # return AIC 90 | return 2 * k - 2 * log_like 91 | 92 | 93 | def bic(variogram: Variogram) -> float: 94 | like = get_likelihood(variogram) 95 | 96 | # get parameters 97 | params = variogram.parameters 98 | k = len(params) 99 | if params[-1] < 1e-6: 100 | k -= 1 101 | 102 | # get maximum log-likelihood 103 | log_like = like(params) 104 | 105 | # return BIC 106 | return 2 * np.log(k) - 2 * log_like 107 | -------------------------------------------------------------------------------- /skgstat/plotting/variogram_scattergram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.distance import squareform 3 | import matplotlib.pyplot as plt 4 | 5 | try: 6 | import plotly.graph_objects as go 7 | except ImportError: 8 | pass 9 | 10 | 11 | def __calculate_plot_data(variogram): 12 | tails = [] 13 | heads = [] 14 | 15 | sq_lags = squareform(variogram.lag_groups()) 16 | 17 | for h in np.unique(variogram.lag_groups()): 18 | # get head and tail 19 | x, y = np.where(sq_lags == h) 20 | 21 | # add 22 | tails.append(variogram.values[x].flatten()) 23 | heads.append(variogram.values[y].flatten()) 24 | 25 | return tails, heads 26 | 27 | 28 | def matplotlib_variogram_scattergram(variogram, ax=None, show=True, single_color=True, **kwargs): 29 | # get the plot data 30 | tails, heads = __calculate_plot_data(variogram) 31 | 32 | # create a new figure or use the given 33 | if ax is None: 34 | fig, ax = plt.subplots(1, 1) 35 | else: 36 | fig = ax.get_figure() 37 | 38 | # some settings 39 | color = 'orange' if single_color else None 40 | 41 | # plot 42 | h = np.concatenate(heads).ravel() 43 | t = np.concatenate(tails).ravel() 44 | ax.vlines(np.nanmean(t), np.min(t), np.nanmax(t), linestyles='--', color='red', lw=kwargs.get('lw', 1.5)) 45 | ax.hlines(np.nanmean(h), np.nanmin(h), np.nanmax(h), linestyles='--', color='red', lw=kwargs.get('lw', 1.5)) 46 | 47 | # plot 48 | for tail, head in zip(tails, heads): 49 | ax.scatter(tail, head, kwargs.get('size', 8), marker='o', color=color) 50 | 51 | # annotate 52 | ax.set_ylabel('head') 53 | ax.set_xlabel('tail') 54 | 55 | # show the figure 56 | if show: # pragma: no cover 57 | fig.show() 58 | 59 | return fig 60 | 61 | 62 | def plotly_variogram_scattergram(variogram, fig=None, show=False, single_color=False, **kwargs): 63 | # get the plot data 64 | tails, heads = __calculate_plot_data(variogram) 65 | 66 | # create a new Figure if needed 67 | if fig is None: 68 | fig = go.Figure() 69 | 70 | # some arguments 71 | lw = kwargs.get('line_width', kwargs.get('lw', 1.5)) 72 | ld = kwargs.get('line_dash', 'dash') 73 | color = 'orange' if single_color else None 74 | 75 | # add vertical and horizontal lines 76 | try: 77 | h = np.concatenate(heads).ravel() 78 | t = np.concatenate(tails).ravel() 79 | fig.add_vline(x=np.nanmean(t), line_dash=ld, line_width=lw, line_color='red') 80 | fig.add_hline(y=np.nanmean(h), line_dash=ld, line_width=lw, line_color='red') 81 | except AttributeError: 82 | # add_hline and add_vline were added in plotly >= 4.12 83 | print("Can't plot lines, consider updating your plotly to >= 4.12") 84 | pass 85 | 86 | # do the plot 87 | for i, (tail, head) in enumerate(zip(tails, heads)): 88 | fig.add_trace( 89 | go.Scattergl(x=tail, y=head, mode='markers', marker=dict(size=kwargs.get('size', 4), color=color), name='Lag #%d' % i) 90 | ) 91 | 92 | # add some titles 93 | fig.update_xaxes(title_text='Tail') 94 | fig.update_yaxes(title_text='Head') 95 | 96 | if single_color: 97 | fig.update_layout(showlegend=False) 98 | 99 | if show: 100 | fig.show() 101 | 102 | return fig 103 | -------------------------------------------------------------------------------- /skgstat/tests/test_cross_utility.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import warnings 3 | 4 | import numpy as np 5 | from numpy.testing import assert_array_almost_equal 6 | 7 | from skgstat import Variogram, DirectionalVariogram 8 | from skgstat.util.cross_variogram import cross_variograms 9 | 10 | 11 | class TestCrossUtility(unittest.TestCase): 12 | def setUp(self) -> None: 13 | # ignore scipy runtime warnings as for this random data 14 | # the covariance may not be positive-semidefinite 15 | # this is caused by the multivariate_normal - thus no problem 16 | # see here: https://stackoverflow.com/questions/41515522/numpy-positive-semi-definite-warning 17 | warnings.simplefilter('ignore', category=RuntimeWarning) 18 | 19 | # set up default values, whenever c and v are not important 20 | np.random.seed(42) 21 | self.c = np.random.gamma(10, 4, (100, 2)) 22 | 23 | # build the multivariate sample 24 | means = [1, 10, 100, 1000] 25 | cov = [[1, 0.8, 0.7, 0.6], [0.8, 1, 0.2, 0.2], [0.7, 0.2, 1.0, 0.2], [0.6, 0.2, 0.2, 1.0]] 26 | 27 | np.random.seed(42) 28 | self.v = np.random.multivariate_normal(means, cov, size=100) 29 | 30 | def test_cross_matrix_shape(self): 31 | """Test the shape of the cross-variogram matrix for 4 variables""" 32 | mat = cross_variograms(self.c, self.v) 33 | 34 | # check shape 35 | mat = np.asarray(mat, dtype='object') 36 | self.assertTrue(mat.shape, (4, 4)) 37 | 38 | def test_cross_matrix_diagonal(self): 39 | """Test that the primary variograms are correct""" 40 | # get the cross variogram matrix 41 | mat = cross_variograms(self.c, self.v, maxlag='median') 42 | 43 | # calculate the first and third primary variogram 44 | first = Variogram(self.c, self.v[:, 0], maxlag='median') 45 | third = Variogram(self.c, self.v[:, 2], maxlag='median') 46 | 47 | # assert first empirical variogram 48 | assert_array_almost_equal(mat[0][0].experimental, first.experimental, 2) 49 | assert_array_almost_equal(mat[0][0].bins, first.bins, 1) 50 | 51 | # assert thrird empirical variogram 52 | assert_array_almost_equal(mat[2][2].experimental, third.experimental, 2) 53 | assert_array_almost_equal(mat[2][2].bins, third.bins, 1) 54 | 55 | def test_check_cross_variogram(self): 56 | """Test two of the cross-variograms in the matrix""" 57 | mat = cross_variograms(self.c, self.v, n_lags=15) 58 | 59 | # calculate two cross-variograms 60 | first = Variogram(self.c, self.v[:, [1, 3]], n_lags=15) 61 | second = Variogram(self.c, self.v[:, [0, 2]], n_lags=15) 62 | 63 | # assert first variogram 64 | assert_array_almost_equal(mat[1][3].experimental, first.experimental, 2) 65 | assert_array_almost_equal(mat[1][3].bins, first.bins, 1) 66 | 67 | # assert second variogram 68 | assert_array_almost_equal(mat[0][2].experimental, second.experimental, 2) 69 | assert_array_almost_equal(mat[0][2].bins, second.bins, 1) 70 | 71 | def test_for_directional_variograms(self): 72 | """Check that DirectionalVariograms are also calculated correctly""" 73 | mat = cross_variograms(self.c, self.v, azimuth=90) 74 | 75 | mat = np.asarray(mat, dtype='object').flatten() 76 | 77 | self.assertTrue(all([isinstance(v, DirectionalVariogram) for v in mat])) 78 | -------------------------------------------------------------------------------- /skgstat/tests/test_util.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from skgstat import Variogram 7 | from skgstat import data 8 | from skgstat.util import shannon_entropy 9 | from skgstat.util.cross_validation import jacknife 10 | from skgstat.util.uncertainty import propagate 11 | 12 | 13 | # read the sample data 14 | def get_sample() -> pd.DataFrame: 15 | df = pd.read_csv(os.path.join(os.path.dirname(__file__), 'pan_sample.csv')) 16 | return df 17 | 18 | 19 | def test_shannon_entropy(): 20 | np.random.seed(42) 21 | 22 | # calculate the entropy the 23 | x = np.random.gamma(10, 15, size=1000) 24 | h = shannon_entropy(x, bins=15) 25 | 26 | assert np.abs(h - 2.943) < 0.001 27 | 28 | 29 | def test_jacknife(): 30 | # load the data sample 31 | df = get_sample() 32 | 33 | # create a Variogram 34 | V = Variogram(df[['x', 'y']].values, df.z.values, model='exponential', n_lags=25) 35 | 36 | rmse = V.cross_validate(n=30, seed=42) 37 | 38 | assert rmse - 16.623 < 0.1 39 | 40 | 41 | def test_jackknife_metrics(): 42 | # load the data sample 43 | df = get_sample() 44 | 45 | # create a Variogram 46 | V = Variogram(df[['x', 'y']].values, df.z.values, model='exponential', n_lags=25) 47 | 48 | rmse = jacknife(V, n=50, metric='RMSE', seed=1312) 49 | mse = jacknife(V, n=50, metric='MSE', seed=1312) 50 | 51 | assert np.sqrt(mse) - rmse < 0.001 52 | 53 | mae = jacknife(V, n=50, metric='MAE', seed=13062018) 54 | 55 | assert mae - 6.092 < 0.1 56 | 57 | 58 | def test_unknown_cross_validation(): 59 | # load the data sample 60 | df = get_sample() 61 | 62 | # create a Variogram 63 | V = Variogram(df[['x', 'y']].values, df.z.values, model='exponential', n_lags=25) 64 | 65 | with pytest.raises(AttributeError) as e: 66 | V.cross_validate(method='foobar') 67 | 68 | assert "'foobar' is not implemented" in str(e.value) 69 | 70 | 71 | def test_uncertainty_propagation(): 72 | # load a pancake variogram 73 | c, v = data.pancake().get('sample') 74 | 75 | V = Variogram(c, v, n_lags=15, obs_sigma=5) 76 | 77 | # now there should be a 15,3 shaped conf interval 78 | conf = V._experimental_conf_interval 79 | assert conf.shape[0] == 15 80 | assert conf.shape[1] == 3 81 | 82 | 83 | def test_all_propagation_options(): 84 | # load a pancake variogram 85 | c, v = data.pancake().get('sample') 86 | 87 | V = Variogram(c, v, n_lags=15) 88 | 89 | # propagation - experimental 90 | conf = propagate(V, 'values', sigma=5, evalf='experimental', num_iter=100) 91 | assert conf.shape == (15, 3) 92 | 93 | # propagation - model 94 | conf = propagate(V, 'values', sigma=5, evalf='model', num_iter=100) 95 | assert conf.shape == (100, 3) 96 | 97 | # propagation - parameter 98 | conf = propagate(V, 'values', sigma=5, evalf='parameter', num_iter=100) 99 | assert conf.shape == (3, 3) 100 | 101 | # switch model 102 | V.model = 'stable' 103 | conf = propagate(V, 'values', sigma=5, evalf='parameter', num_iter=100) 104 | assert conf.shape == (4, 3) 105 | 106 | 107 | def test_propagate_many_targets(): 108 | # load a pancake variogram 109 | c, v = data.pancake().get('sample') 110 | 111 | V = Variogram(c, v, n_lags=12) 112 | 113 | # propagate many 114 | conf_list = propagate(V, 'values', sigma=10, evalf=['experimental', 'parameter'], num_iter=50) 115 | assert len(conf_list) == 2 116 | 117 | # unstack the list 118 | conf_exp, conf_par = conf_list 119 | assert conf_exp.shape == (12, 3) 120 | assert conf_par.shape == (3, 3) 121 | -------------------------------------------------------------------------------- /skgstat/tests/test_estimator.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from skgstat.estimators import matheron, cressie, dowd, genton 9 | from skgstat.estimators import minmax, percentile, entropy 10 | 11 | 12 | class TestEstimator(unittest.TestCase): 13 | def setUp(self): 14 | pass 15 | 16 | def test_matheron(self): 17 | # extract actual estimator 18 | e = matheron.py_func 19 | np.random.seed(42) 20 | 21 | self.assertAlmostEqual( 22 | e(np.random.normal(0, 1, 10000)), 23 | 0.50342, 24 | places=6 25 | ) 26 | 27 | def test_matheron_nan(self): 28 | # extract actual estimator 29 | e = matheron.py_func 30 | 31 | self.assertTrue(np.isnan(e(np.array([])))) 32 | 33 | def test_cressie(self): 34 | # extract actual estimator 35 | e = cressie.py_func 36 | 37 | np.random.seed(42) 38 | 39 | self.assertAlmostEqual( 40 | e(np.random.gamma(10, 4, 10000)), 41 | 1686.7519, 42 | places=4 43 | ) 44 | 45 | def test_cressie_nan(self): 46 | # extract actual estimator 47 | e = cressie.py_func 48 | 49 | self.assertTrue(np.isnan(e(np.array([])))) 50 | 51 | def test_dowd(self): 52 | np.random.seed(1306) 53 | x1 = np.random.weibull(14, 1000) 54 | np.random.seed(1312) 55 | x2 = np.random.gamma(10, 4, 100) 56 | 57 | # test 58 | self.assertAlmostEqual(dowd(x1), 1.0437, places=4) 59 | self.assertAlmostEqual(dowd(x2), 1585.48, places=2) 60 | 61 | def test_genton(self): 62 | # extract actual estimator 63 | e = genton.py_func 64 | 65 | np.random.seed(42) 66 | x1 = np.random.gamma(40, 2, 100) 67 | np.random.seed(42) 68 | x2 = np.random.gamma(30, 5, 1000) 69 | 70 | self.assertAlmostEqual(e(x1), 62.1, places=1) 71 | self.assertAlmostEqual(e(x2), 354.5, places=1) 72 | 73 | def test_genton_nan(self): 74 | # extract actual estimator 75 | e = genton.py_func 76 | 77 | # genton cannot be solved for only one element 78 | self.assertTrue(np.isnan(e(np.array([0.1])))) 79 | 80 | def test_minmax_skew(self): 81 | # heavily skewed gamma 82 | np.random.seed(1306) 83 | x = np.random.gamma(15, 20, 100) 84 | self.assertAlmostEqual(minmax(x), 1.5932, places=4) 85 | 86 | def test_minmax_pow(self): 87 | # L-stable pareto 88 | np.random.seed(2409) 89 | x = np.random.pareto(2, 10) 90 | self.assertAlmostEqual(minmax(x), 2.5, places=2) 91 | 92 | def test_percentile(self): 93 | np.random.seed(42) 94 | x = np.abs(np.random.normal(0, 1, 100000)) 95 | 96 | self.assertAlmostEqual(percentile(x), 0.67588, places=5) 97 | self.assertAlmostEqual(percentile(x, 20), 0.25277, places=5) 98 | 99 | def test_entropy_default_bins(self): 100 | np.random.seed(42) 101 | x = np.random.normal(5, 1, 10000) 102 | 103 | self.assertAlmostEqual(entropy(x, bins=None), 3.0, places=2) 104 | 105 | def test_entropy_custom_bins(self): 106 | np.random.seed(123456789) 107 | x = np.random.gamma(10, 5, 10000) 108 | 109 | # custom bins 110 | self.assertAlmostEqual( 111 | entropy(x, [5, 15, 50, 51, 52, 53, 54, 55, 56, 100, 120, 150]), 112 | 1.82, places=2 113 | ) 114 | 115 | # default bins 116 | self.assertAlmostEqual(entropy(x), 2.91, places=2) 117 | 118 | 119 | if __name__ == '__main__': 120 | unittest.main() 121 | -------------------------------------------------------------------------------- /skgstat/util/cross_variogram.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cross-variogram utility function. This module can be used to calculate 3 | cross-variograms for more than two variables, by creating a variogram 4 | for each combination of variables. 5 | 6 | """ 7 | from typing import List 8 | 9 | import numpy as np 10 | 11 | from skgstat.Variogram import Variogram 12 | from skgstat.DirectionalVariogram import DirectionalVariogram 13 | 14 | def cross_variograms(coordinates: np.ndarray, values: np.ndarray, **kwargs) -> List[List[Variogram]]: 15 | """ 16 | Cross-variogram matrix calculation. Similar to a cross-correlation table. 17 | For all combinations of ``(n_samples, N)`` given values a 18 | :class:`Variogram ` is calculated using the cross-variogram 19 | option between two columns into a ``(N, N)`` matrix. 20 | The diagonal of the *'matrix'* holds primary variograms (without cross option) 21 | for the respective column. 22 | The function accepts all keyword arguments that are also accepted by 23 | :class:`Variogram ` and 24 | :class:`DirectionalVariogram ` and passes them 25 | down to the respective function. The directional variogram will be used as 26 | base class if any of the specific arguments are present: azimuth, bandwidth 27 | or tolerance. 28 | 29 | Parameters 30 | ---------- 31 | coordinates : numpy.ndarray, MetricSpace 32 | Array of shape (m, n). Will be used as m observation points of 33 | n-dimensions. This variogram can be calculated on 1 - n 34 | dimensional coordinates. In case a 1-dimensional array is passed, 35 | a second array of same length containing only zeros will be 36 | stacked to the passed one. 37 | For very large datasets, you can set maxlag to only calculate 38 | distances within the maximum lag in a sparse matrix. 39 | Alternatively you can supply a MetricSpace (optionally with a 40 | `max_dist` set for the same effect). This is useful if you're 41 | creating many different variograms for different measured 42 | parameters that are all measured at the same set of coordinates, 43 | as distances will only be calculated once, instead of once per 44 | variogram. 45 | values : numpy.ndarray 46 | Array of values observed at the given coordinates. The length of 47 | the values array has to match the m dimension of the coordinates 48 | array. Will be used to calculate the dependent variable of the 49 | variogram. 50 | If the values are of shape ``(n_samples, 2)``, a cross-variogram 51 | will be calculated. This assumes the main variable and the 52 | co-variable to be co-located under Markov-model 1 assumptions, 53 | meaning the variable need to be conditionally independent. 54 | 55 | """ 56 | # turn input data to numpy arrays 57 | coordinates = np.asarray(coordinates) 58 | values = np.asarray(values) 59 | 60 | # check which base-class is needed 61 | if any([arg in kwargs for arg in ('azimuth', 'tolerance', 'bandwidth')]): 62 | BaseCls = DirectionalVariogram 63 | else: 64 | BaseCls = Variogram 65 | 66 | # create the output matrix 67 | cross_m = [] 68 | 69 | # get the number of variables 70 | N = values.shape[1] 71 | 72 | for i in range(N): 73 | # create a new row 74 | cross_row = [] 75 | for j in range(N): 76 | # check if this is a primary variogram 77 | if i == j: 78 | cross_row.append(BaseCls(coordinates, values[:, i], **kwargs)) 79 | else: 80 | # extract the two datasets 81 | v = values[:, [i, j]] 82 | 83 | # append the cross-variogram 84 | cross_row.append(BaseCls(coordinates, v, **kwargs)) 85 | 86 | # append 87 | cross_m.append(cross_row) 88 | 89 | return cross_m 90 | -------------------------------------------------------------------------------- /skgstat/plotting/directtional_variogram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib.collections import LineCollection 4 | from scipy.spatial.distance import squareform 5 | 6 | try: 7 | import plotly.graph_objects as go 8 | except ImportError: 9 | pass 10 | 11 | 12 | def __calculate_plot_data(variogram, points): 13 | # get the direction mask 14 | direction_mask = squareform(variogram._direction_mask()) 15 | 16 | # build a coordinate meshgrid 17 | n = len(variogram.coordinates) 18 | r = np.arange(n) 19 | x1, x2 = np.meshgrid(r, r) 20 | 21 | # handle the point pairs 22 | if isinstance(points, int): 23 | points = [points] 24 | if isinstance(points, (list, tuple)): 25 | point_mask = np.zeros((n, n), dtype=bool) 26 | point_mask[:, points] = True 27 | else: 28 | # use all points 29 | point_mask = np.ones((n, n), dtype=bool) 30 | 31 | start = variogram.coordinates[x1[direction_mask & point_mask]] 32 | end = variogram.coordinates[x2[direction_mask & point_mask]] 33 | 34 | # extract all lines 35 | lines = np.column_stack(( 36 | start.reshape(len(start), 1, 2), 37 | end.reshape(len(end), 1, 2) 38 | )) 39 | 40 | return lines 41 | 42 | 43 | def matplotlib_pair_field( 44 | variogram, ax=None, 45 | cmap='gist_rainbow', 46 | points='all', 47 | add_points=True, 48 | alpha=0.3, 49 | **kwargs 50 | ): 51 | # get the plot data 52 | lines = __calculate_plot_data(variogram, points) 53 | 54 | # align the colors 55 | colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(lines))) 56 | colors[:, 3] = alpha 57 | 58 | # get the figure and ax object 59 | if ax is None: 60 | figsize = kwargs.get('figsize', (8, 8)) 61 | fig, ax = plt.subplots(1, 1, figsize=figsize) 62 | else: 63 | fig = ax.get_figure() 64 | 65 | # plot 66 | lc = LineCollection(lines, colors=colors, linewidths=1) 67 | ax.add_collection(lc) 68 | 69 | # add coordinates 70 | if add_points: 71 | ax.scatter(variogram.coordinates[:, 0], variogram.coordinates[:, 1], 15, c='k') 72 | if isinstance(points, list): 73 | ax.scatter( 74 | variogram.coordinates[:, 0][points], 75 | variogram.coordinates[:, 1][points], 76 | 25, c='r' 77 | ) 78 | 79 | # finish plot 80 | ax.autoscale() 81 | ax.margins(0.1) 82 | 83 | return fig 84 | 85 | 86 | def plotly_pair_field( 87 | variogram, 88 | fig=None, 89 | points='all', 90 | add_points=True, 91 | alpha=0.3, 92 | **kwargs 93 | ): 94 | # get the plot data 95 | lines = __calculate_plot_data(variogram, points) 96 | 97 | # create a figure if none is passed 98 | if fig is None: 99 | fig = go.Figure() 100 | 101 | # plot all requested networks 102 | for line in lines: 103 | fig.add_trace( 104 | go.Scatter(x=line[:, 0], y=line[:, 1], mode='lines', opacity=alpha) 105 | ) 106 | 107 | # add the coordinates as well 108 | if add_points: 109 | x = variogram.coordinates[:, 0] 110 | y = variogram.coordinates[:, 1] 111 | fig.add_trace( 112 | go.Scatter( 113 | x=x, y=y, mode='markers', 114 | marker=dict(color='black', size=5), 115 | text=['Coord: #%d' % i for i in range(len(x))] 116 | ) 117 | ) 118 | if isinstance(points, (list, tuple)): 119 | fig.add_trace( 120 | go.Scatter( 121 | x=x[points], y=y[points], mode='markers', 122 | marker=dict(color='red', size=15), 123 | text=['Coordinate: #%d' % p for p in points] 124 | ) 125 | ) 126 | 127 | # get rid of the legend 128 | fig.update_layout(showlegend=False) 129 | 130 | return fig 131 | -------------------------------------------------------------------------------- /skgstat/plotting/stvariogram_plot2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from scipy.ndimage import zoom 4 | from scipy.interpolate import griddata 5 | 6 | try: 7 | import plotly.graph_objects as go 8 | except ImportError: 9 | pass 10 | 11 | 12 | def matplotlib_plot_2d(stvariogram, kind='contour', ax=None, zoom_factor=100., levels=10, method='fast', **kwargs): 13 | # get or create the figure 14 | if ax is not None: 15 | fig = ax.get_figure() 16 | else: 17 | fig, ax = plt.subplots(1, 1, figsize=kwargs.get('figsize', (8, 8))) 18 | 19 | # prepare the meshgrid 20 | xx, yy = stvariogram.meshbins 21 | z = stvariogram.experimental.T 22 | x = xx.flatten() 23 | y = yy.flatten() 24 | 25 | xxi = zoom(xx, zoom_factor, order=1) 26 | yyi = zoom(yy, zoom_factor, order=1) 27 | 28 | # interpolation, either fast or precise 29 | if method.lower() == "fast": 30 | zi = zoom(z.reshape((stvariogram.t_lags, stvariogram.x_lags)), zoom_factor, order=1, prefilter=False) 31 | elif method.lower() == "precise": 32 | # zoom the meshgrid by linear interpolation 33 | # interpolate the semivariance 34 | zi = griddata((x, y), z, (xxi, yyi), method='linear') 35 | else: 36 | raise ValueError("method has to be one of ['fast', 'precise']") 37 | 38 | # get the bounds 39 | zmin = np.nanmin(zi) 40 | zmax = np.nanmax(zi) 41 | 42 | # get the plotting parameters 43 | lev = np.linspace(0, zmax, levels) 44 | c = kwargs.get('color', kwargs.get('c', 'k')) 45 | cmap = kwargs.get('cmap', 'RdYlBu_r') 46 | 47 | # plot 48 | if kind.lower() == 'contour': 49 | ax.contour(xxi, yyi, zi, colors=c, levels=lev, vmin=zmin * 1.1, vmax=zmax * 0.9, linewidths=kwargs.get('linewidths', 0.3)) 50 | elif kind.lower() == 'contourf': 51 | C = ax.contourf(xxi, yyi, zi, cmap=cmap, levels=lev, vmin=zmin *1.1, vmax=zmax * 0.9) 52 | if kwargs.get('colorbar', True): 53 | plt.colorbar(C, ax=ax) 54 | else: 55 | raise ValueError("%s is not a valid 2D plot" % kind) 56 | 57 | # some labels 58 | ax.set_xlabel(kwargs.get('xlabel', 'space')) 59 | ax.set_ylabel(kwargs.get('ylabel', 'time')) 60 | ax.set_xlim(kwargs.get('xlim', (0, stvariogram.xbins[-1]))) 61 | ax.set_ylim(kwargs.get('ylim', (0, stvariogram.tbins[-1]))) 62 | 63 | return fig 64 | 65 | 66 | def plotly_plot_2d(stvariogram, kind='contour', fig=None, **kwargs): 67 | # get base data 68 | x = stvariogram.xbins 69 | y = stvariogram.tbins 70 | z = stvariogram.experimental.reshape((len(x), len(y))).T 71 | 72 | # get settings 73 | showlabels = kwargs.get('showlabels', True) 74 | colorscale = kwargs.get('colorscale', 'Earth_r') 75 | smooth = kwargs.get('line_smoothing', 0.0) 76 | coloring = kwargs.get('coloring', 'heatmap') 77 | if kind == 'contour': 78 | coloring = 'lines' 79 | lw = kwargs.get('line_width', kwargs.get('lw', 2)) 80 | label_color = kwargs.get('label_color', 'black') 81 | else: 82 | label_color = kwargs.get('label_color', 'white') 83 | lw = kwargs.get('line_width', kwargs.get('lw', .3)) 84 | 85 | # get the figure 86 | if fig is None: 87 | fig = go.Figure() 88 | 89 | # do the plot 90 | fig.add_trace( 91 | go.Contour( 92 | x=x, 93 | y=y, 94 | z=z, 95 | line_smoothing=smooth, 96 | colorscale=colorscale, 97 | contours=dict( 98 | coloring=coloring, 99 | showlabels=showlabels, 100 | labelfont=dict( 101 | color=label_color, 102 | size=kwargs.get('label_size', 14) 103 | ) 104 | ), 105 | line_width=lw, 106 | colorbar=dict( 107 | title=f"semivariance ({stvariogram.estimator.__name__})", 108 | titleside='right' 109 | ) 110 | ) 111 | ) 112 | 113 | # update the labels 114 | fig.update_layout(scene=dict( 115 | xaxis_title=kwargs.get('xlabel', 'space'), 116 | yaxis_title=kwargs.get('ylabel', 'time') 117 | )) 118 | 119 | return fig 120 | -------------------------------------------------------------------------------- /skgstat/tests/test_stmodels.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | 6 | from skgstat import models, stmodels 7 | 8 | 9 | class TestSumModel(unittest.TestCase): 10 | def setUp(self): 11 | # spatial range = 10; spatial sill = 5 12 | self.Vx = lambda h: models.spherical(h, 10, 5) 13 | # temporal range = 5; temporal sill = 7 14 | self.Vt = lambda t: models.cubic(t, 5, 7) 15 | 16 | self.lags = np.array([ 17 | [1.2, 1.0], 18 | [5.0, 2.5], 19 | [10., 5.0], 20 | [12., 7.0], 21 | [4.2, 7.0], 22 | [12.0, 3.4] 23 | ]) 24 | 25 | def test_default(self): 26 | assert_array_almost_equal( 27 | [stmodels.sum(lag, self.Vx, self.Vt) for lag in self.lags], 28 | [2.37, 8.76, 12., 12., 9.96, 11.61], 29 | decimal=2 30 | ) 31 | 32 | def test_default_as_array(self): 33 | assert_array_almost_equal( 34 | stmodels.sum(self.lags, self.Vx, self.Vt), 35 | [2.37, 8.76, 12., 12., 9.96, 11.61], 36 | decimal=2 37 | ) 38 | 39 | 40 | class TestProductModel(unittest.TestCase): 41 | def setUp(self): 42 | # spatial range = 10; spatial sill = 5 43 | self.Vx = lambda h: models.spherical(h, 10, 5) 44 | # temporal range = 5; temporal sill = 7 45 | self.Vt = lambda t: models.cubic(t, 5, 7) 46 | 47 | self.lags = np.array([ 48 | [1.2, 1.0], 49 | [5.0, 2.5], 50 | [10., 5.0], 51 | [12., 7.0], 52 | [4.2, 7.0], 53 | [12.0, 3.4] 54 | ]) 55 | 56 | def test_default(self): 57 | assert_array_almost_equal( 58 | [stmodels.product(h, self.Vx, self.Vt, 5, 7) for h in self.lags], 59 | [12.34, 32.37, 35., 35., 35., 35.], 60 | decimal=2 61 | ) 62 | 63 | def test_default_as_array(self): 64 | assert_array_almost_equal( 65 | stmodels.product(self.lags, self.Vx, self.Vt, 5, 7), 66 | [12.34, 32.37, 35., 35., 35., 35.], 67 | decimal=2 68 | ) 69 | 70 | 71 | class TestProductSumModel(unittest.TestCase): 72 | def setUp(self): 73 | # spatial range = 10; spatial sill = 5 74 | self.Vx = lambda h: models.spherical(h, 10, 5) 75 | # temporal range = 5; temporal sill = 7 76 | self.Vt = lambda t: models.cubic(t, 5, 7) 77 | 78 | self.lags = np.array([ 79 | [1.2, 1.0], 80 | [5.0, 2.5], 81 | [10., 5.0], 82 | [12., 7.0], 83 | [4.2, 7.0], 84 | [12.0, 3.4] 85 | ]) 86 | 87 | def test_default(self): 88 | assert_array_almost_equal( 89 | [stmodels.product_sum(h, self.Vx, self.Vt, 90 | k1=2.2, k2=2.3, k3=4.3, Cx=5, Ct=7) for h in self.lags], 91 | [35.55, 101.99, 118.6, 118.6, 113.92, 116.91], 92 | decimal=2 93 | ) 94 | 95 | def test_default_as_array(self): 96 | assert_array_almost_equal( 97 | stmodels.product_sum(self.lags, self.Vx, self.Vt, 98 | k1=2.2, k2=2.3, k3=4.3, Cx=5, Ct=7), 99 | [35.55, 101.99, 118.6, 118.6, 113.92, 116.91], 100 | decimal=2 101 | ) 102 | 103 | def test_with_zero_ks(self): 104 | assert_array_almost_equal( 105 | stmodels.product_sum(self.lags, self.Vx, self.Vt, 106 | k1=0, k2=0, k3=0, Cx=5, Ct=7), 107 | [0., 0., 0., 0., 0., 0.], 108 | decimal=2 109 | ) 110 | 111 | def test_with_all_one(self): 112 | assert_array_almost_equal( 113 | stmodels.product_sum(self.lags, self.Vx, self.Vt, 114 | k1=1, k2=1, k3=1, Cx=5, Ct=7), 115 | [14.71, 41.13, 47. ,47. ,44.96, 46.61], 116 | decimal=2 117 | ) 118 | 119 | def test_as_product_model(self): 120 | assert_array_almost_equal( 121 | stmodels.product_sum(self.lags, self.Vx, self.Vt, 122 | k1=1, k2=0, k3=0, Cx=5, Ct=7), 123 | stmodels.product(self.lags, self.Vx, self.Vt, 5, 7), 124 | decimal=2 125 | ) 126 | 127 | 128 | 129 | if __name__ == '__main__': 130 | unittest.main() 131 | -------------------------------------------------------------------------------- /skgstat/plotting/variogram_location_trend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from scipy.optimize import curve_fit 4 | from itertools import cycle 5 | 6 | try: 7 | import plotly 8 | import plotly.graph_objects as go 9 | except ImportError: 10 | pass 11 | 12 | 13 | def __get_trend(variogram, fig, **kwargs): 14 | # get the number of dimentsions 15 | N = variogram.coordinates.shape[1] 16 | 17 | # create the names 18 | if N <= 3: 19 | names = ['X', 'Y', 'Z'][:3] 20 | else: 21 | names = ['%d. dimension' % (_ + 1) for _ in range(N)] 22 | 23 | # cycle the default colors 24 | colors = cycle(plotly.colors.qualitative.Plotly) 25 | 26 | # only linear trend analysis supported: 27 | # TODO: this could be changed by kwargs... 28 | def model(x, m, b): 29 | return m * x + b 30 | 31 | for dim in range(N): 32 | x, y = (variogram.values, variogram.coordinates[:, dim]) 33 | color = next(colors) 34 | 35 | # fit the model 36 | cof, cov = curve_fit(model, x, y) 37 | 38 | # apply the model 39 | xi = np.linspace(np.min(x), np.max(x), 100) 40 | yi = np.fromiter(map(lambda x: model(x, *cof), xi), dtype=float) 41 | 42 | # calculate R2 43 | y_star = np.fromiter(map(lambda x: model(x, *cof), x), dtype=float) 44 | r2 = 1 - (np.sum(np.power(y - y_star, 2)) / np.sum(np.power(y - np.mean(y), 2))) 45 | 46 | # add the trace 47 | fig.add_trace( 48 | go.Scatter( 49 | x=xi, 50 | y=yi, 51 | mode='lines+text', 52 | line=dict(dash='dash', width=0.7, color=color), 53 | name='%s trend' % names[dim], 54 | text=['y = %.2fx + %.2f [R²=%.2f]' % (cof[0], cof[1], r2) if i == 10 else '' for i, _ in enumerate(xi)], 55 | textfont_size=14, 56 | textposition='top center', 57 | textfont_color=color 58 | ) 59 | ) 60 | 61 | # after all traces are added, return 62 | return fig 63 | 64 | 65 | def matplotlib_location_trend(variogram, axes=None, show=True, **kwargs): 66 | N = len(variogram.coordinates[0]) 67 | 68 | # create the figure 69 | if axes is None: 70 | # derive the needed amount of col and row 71 | nrow = int(round(np.sqrt(N))) 72 | ncol = int(np.ceil(N / nrow)) 73 | fig, axes = plt.subplots(nrow, ncol, figsize=(ncol * 6, nrow * 6)) 74 | else: 75 | if not len(axes) == N: 76 | raise ValueError( 77 | 'The amount of passed axes does not fit the coordinate' + 78 | ' dimensionality of %d' % N 79 | ) 80 | fig = axes[0].get_figure() 81 | 82 | # plot 83 | for i in range(N): 84 | axes.flatten()[i].plot([_[i] for _ in variogram.coordinates], variogram.values, '.r') 85 | axes.flatten()[i].set_xlabel('%d-dimension' % (i + 1)) 86 | axes.flatten()[i].set_ylabel('value') 87 | 88 | # decrease margins 89 | plt.tight_layout() 90 | 91 | # show if needed 92 | if show: 93 | fig.show() 94 | 95 | return fig 96 | 97 | 98 | def plotly_location_trend(variogram, fig=None, show=True, **kwargs): 99 | N = len(variogram.coordinates[0]) 100 | if N <= 3: 101 | names = ['X', 'Y', 'Z'][:N] 102 | else: 103 | names = ['%d. dimension' % _ for _ in range(N)] 104 | 105 | # check if a figure is needed 106 | if fig is None: 107 | fig = go.Figure() 108 | 109 | x = variogram.values 110 | # switch to ScatterGL, if more than 5000 points will be plotted 111 | if len(x) * N >= 5000: 112 | GoCls = go.Scattergl 113 | else: 114 | GoCls = go.Scatter 115 | 116 | # plot 117 | for i in range(N): 118 | y = variogram.coordinates[:, i] 119 | fig.add_trace( 120 | GoCls(x=x, y=y, mode='markers', name=names[i]) 121 | ) 122 | 123 | fig.update_xaxes(title_text='Value') 124 | fig.update_yaxes(title_text='Coordinate dimension') 125 | 126 | # check if add_trend_line is given 127 | if kwargs.get('add_trend_line', False): 128 | fig = __get_trend(variogram, fig, **kwargs) 129 | 130 | # show figure if needed 131 | if show: 132 | fig.show() 133 | 134 | return fig 135 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Test and build docs 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | # Allow only one concurrent deployment 14 | concurrency: 15 | group: "pages" 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | test: 20 | name: Run Unittest 21 | runs-on: ubuntu-22.04 22 | strategy: 23 | matrix: 24 | python: ['3.9', '3.10', '3.11', '3.12', '3.13'] 25 | 26 | steps: 27 | - name: Checkout 28 | uses: actions/checkout@master 29 | - name: Install Python ${{ matrix.python }} 30 | uses: actions/setup-python@master 31 | with: 32 | python-version: ${{ matrix.python }} 33 | - name: Install SciKit-GStat 34 | run: | 35 | pip3 install -r requirements.txt 36 | python3 setup.py install 37 | - name: Install PyTest requirements 38 | run: pip3 install -r "requirements.unittest.${{ matrix.python }}.txt" 39 | - name: Run tests 40 | run: pytest --cov-config=.coveragerc --cov=./ --cov-report=xml 41 | - name: Upload coverage to codecov 42 | uses: codecov/codecov-action@v1 43 | with: 44 | file: ./coverage.xml 45 | env_vars: OS, PYTHON 46 | 47 | docs: 48 | name: Build documentation 49 | runs-on: ubuntu-latest 50 | needs: test 51 | if: github.ref == 'refs/heads/main' 52 | 53 | steps: 54 | - name: Checkout 55 | uses: actions/checkout@master 56 | - name: Install Python 57 | uses: actions/setup-python@master 58 | with: 59 | python-version: '3.12' 60 | - name: Install SciKit-GStat 61 | run: | 62 | pip3 install -r requirements.txt 63 | python3 setup.py install 64 | - name: Install Sphinx requirements 65 | run: pip3 install -r requirements.rtd.txt 66 | - name: Setup Pages 67 | id: pages 68 | uses: actions/configure-pages@v4 69 | - name: Build HTML docs 70 | run: | 71 | cd docs 72 | make html 73 | continue-on-error: true 74 | - name: Upload artifact 75 | uses: actions/upload-pages-artifact@v3 76 | with: 77 | path: docs/_build/html 78 | 79 | deploy: 80 | environment: 81 | name: github-pages 82 | url: ${{ steps.deployment.outputs.page_url }} 83 | runs-on: ubuntu-latest 84 | needs: docs 85 | if: github.ref == 'refs/heads/main' 86 | steps: 87 | - name: Deploy to GitHub Pages 88 | id: deployment 89 | uses: actions/deploy-pages@v4 90 | 91 | release: 92 | name: Create Github release 93 | runs-on: ubuntu-latest 94 | needs: test 95 | if: startsWith(github.event.ref, 'refs/tags/v') && endsWith(github.event.ref, '.0') 96 | 97 | steps: 98 | - name: Checkout 99 | uses: actions/checkout@master 100 | 101 | - name: Release 102 | uses: softprops/action-gh-release@v1 103 | with: 104 | body_path: RELEASE.md 105 | generate_release_notes: true 106 | 107 | publish: 108 | name: Publish to PyPi 109 | runs-on: ubuntu-latest 110 | needs: test 111 | if: startsWith(github.event.ref, 'refs/tags/v') 112 | 113 | steps: 114 | - uses: actions/checkout@master 115 | - name: Set up Python 116 | uses: actions/setup-python@master 117 | with: 118 | python-version: '3.x' 119 | - name: Install dependencies 120 | run: | 121 | python -m pip install --upgrade pip 122 | pip install build 123 | - name: Build package 124 | run: python -m build 125 | - name: Publish package 126 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 127 | with: 128 | user: __token__ 129 | password: ${{ secrets.PYPI_TOKEN }} 130 | 131 | ci_develop: 132 | name: Print Github Context for Development 133 | runs-on: ubuntu-22.04 134 | if: true 135 | 136 | steps: 137 | - name: Dump GitHub context 138 | env: 139 | GITHUB_CONTEXT: ${{ toJson(github) }} 140 | run: | 141 | echo "$GITHUB_CONTEXT" 142 | -------------------------------------------------------------------------------- /docs/technical/estimate_kriging.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | Kriging estimate mode 3 | ===================== 4 | 5 | General 6 | ======= 7 | 8 | Generally speaking, the kriging procedure for one unobserved point (poi) can be 9 | broken down into three different steps. 10 | 11 | 1. calculate the distance matrix between the poi and all observed locations 12 | to determine the in-range points and apply the minimum and maximum points 13 | to be used constraints. 14 | 2. build the kriging equation system by calculating the semi-variance for all 15 | distances left over from step 1. Formulate squareform matrix and add the 16 | Lagrange multipliers 17 | 3. Solve the kriging equation system, usually by matrix inversion. 18 | 19 | Hereby, we try to optimize the step 2 for performance. The basic idea is to 20 | estimate the semivariances instead of calculating them on each iteration. 21 | 22 | Why not calculate? 23 | ================== 24 | 25 | Calculating the semivariance for all elements in the kriging equation system 26 | gives us the best solution for the interpolation problem formulated by the 27 | respective variogram. The main point is that the distances for each 28 | unobserved location do differ at least slightly from all other unobserved 29 | locations in a kriging modeling application. The variogram parameters do not 30 | change, they are static within one modeling. This is what we want to use. 31 | The main advantage is, that the effective range is constant in this setting. 32 | If we can now specify a precision at which we want to resolute the range, we 33 | can pre-calculate the corresponding semivariance values. In the time-critical 34 | iterative formulation of the kriging equation system, one would use the 35 | pre-calculated values of the closest distance. 36 | 37 | What about precision? 38 | --------------------- 39 | 40 | The precision is a hyperparameter. That means it is up to the user to decide 41 | how precise the estimation of the kriging itself can get given an estimated 42 | kriging equation system. The main advantage is, that the range and precision 43 | are constant values within the scope of a simulation and therefore the 44 | expected uncertainty can be calculated and the precision can be adjusted. 45 | This will take some effort fine-tune the kriging instance, but it can yield 46 | results, that are only numerically different while still increasing the 47 | calculation time one magnitude of order. 48 | 49 | In terms of uncertainty, one can think of a variogram function, where the 50 | given lag distance is uncertain. This deviation can be calculated as: 51 | 52 | .. math:: 53 | d = \frac{range}{precision} 54 | 55 | and increasing the precision will obviously decrease the lag deviation. 56 | 57 | Example 58 | ======= 59 | 60 | This example should illustrate the idea behind the estimation and show how 61 | the precision value can influence the result. An arbitrary variogram is 62 | created and then recalculated by the OrdinaryKriging routine to illustrate 63 | the precision. 64 | 65 | .. ipython:: python 66 | 67 | import matplotlib.pyplot as plt 68 | from skgstat import Variogram, OrdinaryKriging 69 | import numpy as np 70 | 71 | # create some random input 72 | np.random.seed(42) 73 | c = np.random.gamma(10, 4, size=(100,2)) 74 | np.random.seed(42) 75 | v = np.random.normal(10, 2, size=100) 76 | 77 | V = Variogram(c, v, model='gaussian', normalize=False) 78 | ok = OrdinaryKriging(V, mode='exact') 79 | 80 | # exact calculation 81 | x = np.linspace(0, ok.range * 1.3, 120) 82 | y_c = list(map(ok.gamma_model, x)) 83 | 84 | # estimation 85 | ok.mode = 'estimate' 86 | y_e = ok._estimate_matrix(x) 87 | 88 | plt.plot(x, y_c, '-b', label='exact variogram') 89 | @savefig krig_compare.png width=7in 90 | plt.plot(x, y_e, '-g', label='estimated variogram') 91 | plt.legend(loc='lower right') 92 | 93 | 94 | There is almost no difference between the two lines and the result that can 95 | be expected will be very similar, as the kriging equation system will yield 96 | very similar weights to make the prediction. 97 | 98 | If the precision is, however, chosen to coarse, there is a difference in the 99 | reconstructed variogram. This way, the idea behind the estimation becomes 100 | quite obvious. 101 | 102 | .. ipython:: python 103 | 104 | # make precision really small 105 | ok.precision = 10 106 | 107 | y_e2 = ok._estimate_matrix(x) 108 | 109 | plt.plot(x, y_c, '-b') 110 | @savefig krig_coarse.png width=7in 111 | plt.plot(x, y_e2, '-g') 112 | -------------------------------------------------------------------------------- /skgstat/plotting/stvariogram_plot3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | 5 | try: 6 | import plotly.graph_objects as go 7 | except ImportError: 8 | pass 9 | 10 | 11 | def __calculate_plot_data(stvariogram, **kwargs): 12 | xx, yy = stvariogram.meshbins 13 | z = stvariogram.experimental 14 | # x = xx.flatten() 15 | # y = yy.flatten() 16 | 17 | # apply the model 18 | nx = kwargs.get('x_resolution', 100) 19 | nt = kwargs.get('t_resolution', 100) 20 | 21 | # model spacing 22 | _xx, _yy = np.mgrid[ 23 | 0:np.nanmax(stvariogram.xbins):nx * 1j, 24 | 0:np.nanmax(stvariogram.tbins):nt * 1j 25 | ] 26 | model = stvariogram.fitted_model 27 | lags = np.vstack((_xx.flatten(), _yy.flatten())).T 28 | # apply the model 29 | _z = model(lags) 30 | 31 | return xx.T, yy.T, z, _xx, _yy, _z 32 | 33 | 34 | def matplotlib_plot_3d(stvariogram, kind='scatter', ax=None, elev=30, azim=220, **kwargs): 35 | # get the data, spanned over a bin meshgrid 36 | xx, yy, z, _xx, _yy, _z = __calculate_plot_data(stvariogram, **kwargs) 37 | x = xx.flatten() 38 | y = yy.flatten() 39 | 40 | # some settings 41 | c = kwargs.get('color', kwargs.get('c', 'b')) 42 | cmap = kwargs.get('model_color', kwargs.get('cmap', 'terrain')) 43 | alpha = kwargs.get('alpha', 0.8) 44 | depthshade = kwargs.get('depthshade', False) 45 | 46 | # handle the axes 47 | if ax is not None: 48 | if not isinstance(ax, Axes3D): 49 | raise ValueError('The passed ax object is not an instance of mpl_toolkis.mplot3d.Axes3D.') 50 | fig = ax.get_figure() 51 | else: 52 | fig = plt.figure(figsize=kwargs.get('figsize', (10, 10))) 53 | ax = fig.add_subplot(111, projection='3d') 54 | 55 | # do the plot 56 | ax.view_init(elev=elev, azim=azim) 57 | if kind == 'surf': 58 | ax.plot_trisurf(x, y, z, color=c, alpha=alpha) 59 | elif kind == 'scatter': 60 | ax.scatter(x, y, z, c=c, depthshade=depthshade) 61 | else: 62 | raise ValueError('%s is not a valid 3D plot' % kind) 63 | 64 | 65 | # add the model 66 | if not kwargs.get('no_model', False): 67 | ax.plot_trisurf(_xx.flatten(), _yy.flatten(), _z, cmap=cmap, alpha=alpha) 68 | 69 | # labels: 70 | ax.set_xlabel('space') 71 | ax.set_ylabel('time') 72 | ax.set_zlabel('semivariance [%s]' % stvariogram.estimator.__name__) 73 | 74 | # return 75 | return fig 76 | 77 | 78 | def plotly_plot_3d(stvariogram, kind='scatter', fig=None, **kwargs): 79 | # get the data spanned over a bin meshgrid 80 | xx, yy, z, _xx, _yy, _z = __calculate_plot_data(stvariogram, **kwargs) 81 | 82 | # get some settings 83 | c = kwargs.get('color', kwargs.get('c', 'black')) 84 | cmap = kwargs.get('model_color', kwargs.get('colorscale', kwargs.get('cmap', 'Electric'))) 85 | alpha = kwargs.get('opacity', kwargs.get('alpha', 0.6)) 86 | 87 | # handle the figue 88 | if fig is None: 89 | fig = go.Figure() 90 | 91 | # do the plot 92 | if kind == 'surf': 93 | fig.add_trace( 94 | go.Surface( 95 | x=xx, 96 | y=yy, 97 | z=z.reshape(xx.shape), 98 | opacity=0.8 * alpha, 99 | colorscale=[[0, c], [1, c]], 100 | name='experimental variogram' 101 | ) 102 | ) 103 | elif kind == 'scatter' or kwargs.get('add_points', False): 104 | fig.add_trace( 105 | go.Scatter3d( 106 | x=xx.flatten(), 107 | y=yy.flatten(), 108 | z=z, 109 | mode='markers', 110 | opacity=1, 111 | marker=dict(color=c, size=kwargs.get('size', 4)), 112 | name='experimental variogram' 113 | ) 114 | ) 115 | 116 | # add the model 117 | if not kwargs.get('no_model', False): 118 | fig.add_trace( 119 | go.Surface( 120 | x=_xx, 121 | y=_yy, 122 | z=_z.reshape(_xx.shape), 123 | opacity=min(1, alpha * 0.8), 124 | colorscale=cmap, 125 | name='%s model' % stvariogram.model.__name__ 126 | ) 127 | ) 128 | 129 | # set some labels 130 | fig.update_layout(scene=dict( 131 | xaxis_title='space', 132 | yaxis_title='time', 133 | zaxis_title='semivariance [%s]' % stvariogram.estimator.__name__ 134 | )) 135 | 136 | # return 137 | return fig 138 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | SciKit-GStat 2 | ============ 3 | 4 | .. image:: https://img.shields.io/pypi/v/scikit-gstat?color=green&logo=pypi&logoColor=yellow&style=flat-square :alt: PyPI 5 | :target: https://pypi.org/project/scikit-gstat 6 | 7 | .. image:: https://img.shields.io/github/v/release/mmaelicke/scikit-gstat?color=green&logo=github&style=flat-square :alt: GitHub release (latest by date) 8 | :target: https://github.com/mmaelicke/scikit-gstat 9 | 10 | .. image:: https://github.com/mmaelicke/scikit-gstat/workflows/Test%20and%20build%20docs/badge.svg 11 | :target: https://github.com/mmaelicke/scikit-gstat/actions 12 | 13 | .. image:: https://codecov.io/gh/mmaelicke/scikit-gstat/branch/master/graph/badge.svg 14 | :target: https://codecov.io/gh/mmaelicke/scikit-gstat 15 | :alt: Codecov 16 | 17 | .. image:: https://zenodo.org/badge/98853365.svg 18 | :target: https://zenodo.org/badge/latestdoi/98853365 19 | 20 | How to cite 21 | ----------- 22 | 23 | In case you use SciKit-GStat in other software or scientific publications, 24 | please reference this module. There is a `GMD `_ publication. Please cite it like: 25 | 26 | Mälicke, M.: SciKit-GStat 1.0: a SciPy-flavored geostatistical variogram estimation toolbox written in Python, Geosci. Model Dev., 15, 2505–2532, https://doi.org/10.5194/gmd-15-2505-2022, 2022. 27 | 28 | The code itself is published and has a DOI. It can be cited as: 29 | 30 | Mirko Mälicke, Romain Hugonnet, Helge David Schneider, Sebastian Müller, Egil Möller, & Johan Van de Wauw. (2022). mmaelicke/scikit-gstat: Version 1.0 (v1.0.0). Zenodo. https://doi.org/10.5281/zenodo.5970098 31 | 32 | 33 | Full Documentation 34 | ------------------ 35 | 36 | The full documentation can be found at: https://mmaelicke.github.io/scikit-gstat 37 | 38 | Description 39 | ----------- 40 | 41 | SciKit-Gstat is a scipy-styled analysis module for geostatistics. It includes 42 | two base classes ``Variogram`` and ``OrdinaryKriging``. Additionally, various 43 | variogram classes inheriting from ``Variogram`` are available for solving 44 | directional or space-time related tasks. 45 | The module makes use of a rich selection of semi-variance 46 | estimators and variogram model functions, while being extensible at the same 47 | time. 48 | The estimators include: 49 | 50 | - matheron 51 | - cressie 52 | - dowd 53 | - genton 54 | - entropy 55 | - two experimental ones: quantiles, minmax 56 | 57 | The models include: 58 | 59 | - sperical 60 | - exponential 61 | - gaussian 62 | - cubic 63 | - stable 64 | - matérn 65 | 66 | with all of them in a nugget and no-nugget variation. All the estimator are 67 | implemented using numba's jit decorator. The usage of numba might be subject 68 | to change in future versions. 69 | 70 | 71 | Installation 72 | ~~~~~~~~~~~~ 73 | 74 | PyPI 75 | ^^^^ 76 | .. code-block:: bash 77 | 78 | pip install scikit-gstat 79 | 80 | **Note:** It can happen that the installation of numba or numpy is failing using pip. Especially on Windows systems. 81 | Usually, a missing Dll (see eg. `#31 `_) or visual c++ redistributable is the reason. 82 | 83 | GIT: 84 | ^^^^ 85 | 86 | .. code-block:: bash 87 | 88 | git clone https://github.com/mmaelicke/scikit-gstat.git 89 | cd scikit-gstat 90 | pip install -r requirements.txt 91 | pip install -e . 92 | 93 | Conda-Forge: 94 | ^^^^^^^^^^^^ 95 | 96 | From Version `0.5.5` on `scikit-gstat` is also available on conda-forge. 97 | Note that for versions `< 1.0` conda-forge will not always be up to date, but 98 | from `1.0` on, each minor release will be available. 99 | 100 | .. code-block:: bash 101 | 102 | conda install -c conda-forge scikit-gstat 103 | 104 | 105 | Quickstart 106 | ---------- 107 | 108 | The `Variogram` class needs at least a list of coordiantes and values. 109 | All other attributes are set by default. 110 | You can easily set up an example by using the `skgstat.data` sub-module, 111 | that includes a growing list of sample data. 112 | 113 | .. code-block:: python 114 | 115 | import skgstat as skg 116 | 117 | # the data functions return a dict of 'sample' and 'description' 118 | coordinates, values = skg.data.pancake(N=300).get('sample') 119 | 120 | V = skg.Variogram(coordinates=coordinates, values=values) 121 | print(V) 122 | 123 | .. code-block:: bash 124 | 125 | spherical Variogram 126 | ------------------- 127 | Estimator: matheron 128 | Effective Range: 353.64 129 | Sill: 1512.24 130 | Nugget: 0.00 131 | 132 | All variogram parameters can be changed in place and the class will automatically 133 | invalidate and update dependent results and parameters. 134 | 135 | .. code-block:: python 136 | 137 | V.model = 'exponential' 138 | V.n_lags = 15 139 | V.maxlag = 500 140 | 141 | # plot - matplotlib and plotly are available backends 142 | fig = V.plot() 143 | 144 | .. image:: ./example.png 145 | -------------------------------------------------------------------------------- /skgstat/data/_loader.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Tuple, List 2 | import os 3 | from glob import glob 4 | import numpy as np 5 | import pandas as pd 6 | 7 | # for python 3.6 compatibility 8 | try: 9 | from imageio.v3 import imread 10 | except ImportError: 11 | from imageio import imread 12 | 13 | PATH = os.path.abspath(os.path.dirname(__file__)) 14 | 15 | 16 | def field_names() -> List[str]: 17 | """ 18 | Get all available fields 19 | """ 20 | fnames = glob(os.path.join(PATH, 'rf', '*.png')) 21 | basenames = [os.path.basename(name) for name in fnames] 22 | return [os.path.splitext(name)[0] for name in basenames] 23 | 24 | 25 | def field(fname: str, band: Union[int, str] = 0) -> np.ndarray: 26 | """ 27 | Return one of the fields stored in the ``rf`` folder 28 | and return as numpy array. 29 | 30 | Parameters 31 | ---------- 32 | fname : str 33 | The filename (not path) of the field. The file 34 | extension can be omitted. 35 | band : int, str 36 | The band to use, can either be an integer or the 37 | literal ``'mean'``, which will average all bands 38 | 39 | Returns 40 | ------- 41 | field : numpy.ndarray 42 | The ndarray representing the requested band 43 | 44 | """ 45 | # append a png file extension if needed 46 | if not fname.endswith('.png'): 47 | fname += '.png' 48 | 49 | # read image 50 | # TODO: with imageio v3 this can be switched to imageio.imread - the result should be the same here 51 | img = imread(os.path.join(PATH, 'rf', fname)) 52 | 53 | # switch band 54 | if isinstance(band, int): 55 | if len(img.shape) > 2: 56 | return img[:, :, band] 57 | elif len(img.shape) == 2: 58 | return img 59 | elif band.lower() == 'mean': 60 | return np.mean(img, axis=2) 61 | 62 | raise AttributeError('band parameter is invalid') 63 | 64 | 65 | def get_sample( 66 | fname: str, 67 | N: int = 100, 68 | seed: int = None, 69 | band: Union[int, str] = 0 70 | ) -> Tuple[np.ndarray]: 71 | """ 72 | Sample one of the fields. The filename and band are passed down to 73 | :func:`field`. To return reproducible results the random Generator 74 | can be seeded. 75 | 76 | Parameters 77 | ---------- 78 | fname : str 79 | The filename (not path) of the field. The file 80 | extension can be omitted. 81 | N : int 82 | Sample size 83 | seed : int 84 | seed to use for the random generator. 85 | band : int, str 86 | The band to use, can either be an integer or the 87 | literal ``'mean'``, which will average all bands 88 | 89 | Returns 90 | ------- 91 | coordinates : numpy.ndarray 92 | Coordinate array of shape ``(N, 2)``. 93 | values : numpy.ndarray 94 | 1D array of the values at the coordinates 95 | 96 | """ 97 | # first get the image 98 | img = field(fname, band) 99 | 100 | # randomly sample points 101 | rng = np.random.default_rng(seed) 102 | 103 | # sample at random flattened indices without replace 104 | idx = rng.choice(np.multiply(*img.shape), replace=False, size=N) 105 | 106 | # build a meshgrid over the image 107 | _x, _y = np.meshgrid(*[range(dim) for dim in img.shape]) 108 | x = _x.flatten() 109 | y = _y.flatten() 110 | 111 | # get the coordinates and values 112 | coordinates = np.asarray([[x[i], y[i]] for i in idx]) 113 | values = np.asarray([img[c[0], c[1]] for c in coordinates]) 114 | 115 | return coordinates, values 116 | 117 | 118 | def read_sample_file(fname) -> pd.DataFrame: 119 | """ 120 | Return a sample from a sample-file as a 121 | pandas DataFrame 122 | 123 | Returns 124 | ------- 125 | df : pandas.DataFrame 126 | The file content 127 | 128 | """ 129 | # build the path 130 | path = os.path.join(PATH, 'samples', fname) 131 | return pd.read_csv(path) 132 | 133 | 134 | def sample_to_df(coordinates: np.ndarray, values: np.ndarray) -> pd.DataFrame: 135 | """ 136 | Turn the coordinates and values array into a pandas DataFrame. 137 | The columns will be called x[,y[,z]],v 138 | 139 | Parameters 140 | ---------- 141 | coordinates : numpy.ndarray 142 | Coordinate array of shape ``(N, 1), (N, 2)`` or ``(N, 3)``. 143 | values : numpy.ndarray 144 | 1D array of the values at the coordinates 145 | 146 | Returns 147 | ------- 148 | df : pandas.DataFrame 149 | The data as a pandas DataFrame 150 | 151 | """ 152 | # check that the array sizes match 153 | if len(coordinates) != len(values): 154 | raise AttributeError('coordinates and values must have the same length') 155 | 156 | # check dimensions 157 | col_names = ['x'] 158 | if coordinates.ndim >= 2: 159 | col_names.append('y') 160 | if coordinates.ndim == 3: 161 | col_names.append('z') 162 | 163 | # build the dataframe 164 | df = pd.DataFrame(coordinates, columns=col_names) 165 | df['v'] = values 166 | return df 167 | -------------------------------------------------------------------------------- /skgstat/tests/test_metric_space.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import warnings 3 | import numpy as np 4 | import skgstat as skg 5 | import scipy 6 | 7 | # produce a random dataset 8 | np.random.seed(42) 9 | rcoords = np.random.gamma(40, 10, size=(500, 2)) 10 | np.random.seed(42) 11 | rvals = np.random.normal(10, 4, 500) 12 | 13 | def test_invalid_dist_func(): 14 | # instantiate metrix space 15 | ms = skg.MetricSpace(rcoords, dist_metric='euclidean') 16 | 17 | with pytest.raises(AttributeError) as e: 18 | skg.Variogram(ms, rvals, dist_func='cityblock') 19 | 20 | assert 'Distance metric' in e.value 21 | 22 | 23 | def test_sparse_matrix_no_warning(): 24 | # make a really sparse matrix 25 | sparse = skg.MetricSpace(rcoords, max_dist=5) 26 | 27 | # call triangular_distance_matrix without warning 28 | V = skg.Variogram(sparse, rvals) 29 | V.triangular_distance_matrix 30 | 31 | 32 | def test_dense_matrix_warning(): 33 | dense = skg.MetricSpace(rcoords) 34 | 35 | # check the warning 36 | with pytest.raises(RuntimeWarning) as w: 37 | V = skg.Variogram(dense, rvals) 38 | V.triangular_distance_matrix 39 | 40 | assert 'Only available' in w.value 41 | 42 | 43 | def test_unknown_metric(): 44 | with pytest.raises(ValueError) as e: 45 | skg.MetricSpace(rcoords, dist_metric='foobar') 46 | 47 | assert 'Unknown Distance Metric:' in e.value 48 | 49 | 50 | def test_tree_non_euklidean(): 51 | with pytest.raises(ValueError) as e: 52 | ms = skg.MetricSpace(rcoords, 'cityblock') 53 | ms.tree 54 | 55 | assert 'can only be constructed' in e.value 56 | 57 | 58 | def test_metric_pair_metrix(): 59 | c1 = np.random.gamma(100, 4, (300, 2)) 60 | c2 = np.random.gamma(50, 5, (100, 2)) 61 | ms1 = skg.MetricSpace(c1, dist_metric='cityblock') 62 | ms2 = skg.MetricSpace(c2, dist_metric='euclidean') 63 | 64 | with pytest.raises(ValueError) as e: 65 | skg.MetricSpacePair(ms1, ms2) 66 | 67 | assert 'same distance metric' in e.value 68 | 69 | 70 | def test_metric_pair_max_dist(): 71 | c1 = np.random.gamma(100, 4, (300, 2)) 72 | c2 = np.random.gamma(50, 5, (100, 2)) 73 | ms1 = skg.MetricSpace(c1, max_dist=50) 74 | ms2 = skg.MetricSpace(c2, max_dist=400) 75 | 76 | with pytest.raises(ValueError) as e: 77 | skg.MetricSpacePair(ms1, ms2) 78 | 79 | assert 'same max_dist' in e.value 80 | 81 | def test_raster_metric(): 82 | # Generate a gridded dataset 83 | shape = (100, 100) 84 | np.random.seed(42) 85 | vals = np.random.normal(0, 1, size=shape) 86 | 87 | # Coordinates 88 | x = np.arange(0, shape[0]) 89 | y = np.arange(0, shape[1]) 90 | xx, yy = np.meshgrid(x, y) 91 | 92 | # Flatten everything because we don't care about the 2D at this point 93 | coords = np.dstack((xx.flatten(), yy.flatten())).squeeze() 94 | vals = vals.flatten() 95 | 96 | # Run the computation 97 | rems = skg.RasterEquidistantMetricSpace(coords, shape=shape, extent=(x[0],x[-1],y[0],y[-1]), samples=10, runs=10, 98 | rnd=42, verbose=True) 99 | 100 | # Minimal check of the output 101 | assert rems.max_dist == pytest.approx(140,rel=0.01) 102 | assert rems.res == pytest.approx(1, rel=0.0001) 103 | assert isinstance(rems.dists, scipy.sparse.csr_matrix) 104 | assert rems.dists.shape == (10000, 10000) 105 | 106 | # Check the random state provides the same final center 107 | assert all(rems._centers[-1] == np.array([62, 52])) 108 | 109 | # Check the interface with a Variogram object works 110 | with warnings.catch_warnings(): 111 | # this will throw a optimize warning on random data 112 | warnings.simplefilter('ignore') 113 | V = skg.Variogram(rems, vals) 114 | 115 | assert V.bin_count is not None 116 | # Check the variogram is always the same with the random state given 117 | assert V.experimental[0] == pytest.approx(0.89,0.01) 118 | 119 | # Check that the routines are robust to very few data points in the grid (e.g., from nodata values) 120 | coords_sub = coords[0::1000] 121 | vals_sub = vals[0::1000] 122 | rems_sub = skg.RasterEquidistantMetricSpace(coords_sub, shape=shape, extent=(x[0],x[-1],y[0],y[-1]), samples=100, runs=10, 123 | rnd=42) 124 | with warnings.catch_warnings(): 125 | # this will throw a optimize warning on random data 126 | warnings.simplefilter('ignore') 127 | V = skg.Variogram(rems_sub, vals_sub) 128 | 129 | # Check with a single isolated point possibly being used as center 130 | coords_sub = np.concatenate(([coords[0]], coords[-10:])) 131 | vals_sub = np.concatenate(([vals[0]], vals[-10:])) 132 | rems_sub = skg.RasterEquidistantMetricSpace(coords_sub, shape=shape, extent=(x[0],x[-1],y[0],y[-1]), samples=100, runs=11, 133 | rnd=42) 134 | with warnings.catch_warnings(): 135 | # this will throw a optimize warning on random data 136 | warnings.simplefilter('ignore') 137 | V = skg.Variogram(rems_sub, vals_sub) 138 | -------------------------------------------------------------------------------- /skgstat/tests/test_directionalvariogram.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | 6 | from skgstat import DirectionalVariogram, Variogram 7 | 8 | 9 | class TestDirectionalVariogramInstantiation(unittest.TestCase): 10 | def setUp(self): 11 | # set up default valures, whenever c,v are not important 12 | np.random.seed(1306) 13 | self.c = np.random.gamma(11, 2, (30, 2)) 14 | np.random.seed(1306) 15 | self.v = np.random.normal(5, 4, 30) 16 | 17 | def test_standard_settings(self): 18 | DV = DirectionalVariogram(self.c, self.v, normalize=True) 19 | 20 | assert_array_almost_equal(DV.describe()["normalized_effective_range"], 436., decimal=0) 21 | assert_array_almost_equal(DV.describe()["normalized_sill"], 2706., decimal=0) 22 | assert_array_almost_equal(DV.describe()["normalized_nugget"], 0., decimal=0) 23 | 24 | def test_azimuth(self): 25 | DV = DirectionalVariogram(self.c, self.v, azimuth=-45, normalize=True) 26 | 27 | assert_array_almost_equal(DV.describe()["normalized_effective_range"], 23.438, decimal=3) 28 | assert_array_almost_equal(DV.describe()["normalized_sill"], 219.406, decimal=3) 29 | assert_array_almost_equal(DV.describe()["normalized_nugget"], 0., decimal=3) 30 | 31 | def test_invalid_azimuth(self): 32 | with self.assertRaises(ValueError) as e: 33 | DirectionalVariogram(self.c, self.v, azimuth=360) 34 | self.assertEqual( 35 | str(e), 36 | 'The azimuth is an angle in degree and has ' 37 | 'to meet -180 <= angle <= 180' 38 | ) 39 | 40 | def test_tolerance(self): 41 | DV = DirectionalVariogram(self.c, self.v, tolerance=15, normalize=True) 42 | 43 | assert_array_almost_equal(DV.describe()["normalized_effective_range"], 435.7, decimal=1) 44 | assert_array_almost_equal(DV.describe()["normalized_sill"], 2722.1, decimal=1) 45 | assert_array_almost_equal(DV.describe()["normalized_nugget"], 0., decimal=1) 46 | 47 | def test_invalid_tolerance(self): 48 | with self.assertRaises(ValueError) as e: 49 | DirectionalVariogram(self.c, self.v, tolerance=-1) 50 | self.assertEqual( 51 | str(e), 52 | 'The tolerance is an angle in degree and has to ' 53 | 'meet 0 <= angle <= 360' 54 | ) 55 | 56 | def test_bandwidth(self): 57 | DV = DirectionalVariogram(self.c, self.v, bandwidth=12, normalize=True) 58 | 59 | for x, y in zip([DV.describe()[name] for name in ("normalized_effective_range", "normalized_sill", "normalized_nugget")], 60 | [435.733, 2715.865, 0]): 61 | self.assertAlmostEqual(x, y, places=3) 62 | 63 | def test_invalid_model(self): 64 | with self.assertRaises(ValueError) as e: 65 | DirectionalVariogram(self.c, self.v, directional_model='NotAModel') 66 | self.assertEqual( 67 | str(e), 68 | 'NotAModel is not a valid model.' 69 | ) 70 | 71 | def test_invalid_model_type(self): 72 | with self.assertRaises(ValueError) as e: 73 | DirectionalVariogram(self.c, self.v, directional_model=5) 74 | self.assertEqual( 75 | str(e), 76 | 'The directional model has to be identified by a ' 77 | 'model name, or it has to be the search area ' 78 | 'itself' 79 | ) 80 | 81 | def test_binning_change_nlags(self): 82 | DV = DirectionalVariogram(self.c, self.v, n_lags=5) 83 | 84 | self.assertEqual(DV.n_lags, 5) 85 | 86 | # go through the n_lags changing procedure 87 | DV.bin_func = 'scott' 88 | 89 | # with scott, there are 6 classes now 90 | self.assertEqual(DV.n_lags, 6) 91 | 92 | 93 | 94 | class Mock: 95 | def __init__(self, c=None, v=None): 96 | self._X = c 97 | self.values = v 98 | 99 | 100 | class TestDirectionalVariogramMethods(unittest.TestCase): 101 | def setUp(self): 102 | # set up default valures, whenever c,v are not important 103 | np.random.seed(11884) 104 | self.c = np.random.gamma(15, 3, (30, 2)) 105 | np.random.seed(11884) 106 | self.v = np.random.normal(9, 2, 30) 107 | 108 | 109 | def test_bin_func(self): 110 | DV = DirectionalVariogram(self.c, self.v, n_lags=4) 111 | V = Variogram(self.c, self.v, n_lags=4) 112 | 113 | for x, y in zip (DV.bins, V.bins): 114 | self.assertNotEqual(x, y) 115 | 116 | assert_array_almost_equal( 117 | np.array([12.3, 24.7, 37., 49.4]), DV.bins, decimal=1 118 | ) 119 | 120 | def test_directional_mask(self): 121 | a = np.array([[0, 0], [1, 2], [2, 1]]) 122 | 123 | # overwrite fit method 124 | class Test(DirectionalVariogram): 125 | def fit(*args, **kwargs): 126 | pass 127 | 128 | var = Test(a, np.random.normal(0, 1, size=3)) 129 | var._calc_direction_mask_data() 130 | 131 | assert_array_almost_equal( 132 | np.degrees(var._angles + np.pi)[:2], 133 | [63.4, 26.6], 134 | decimal=1 135 | ) 136 | 137 | 138 | if __name__ == '__main__': 139 | unittest.main() 140 | -------------------------------------------------------------------------------- /skgstat/tests/test_binning.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | 4 | import numpy as np 5 | from numpy.testing import assert_array_almost_equal 6 | 7 | from skgstat.binning import ( 8 | even_width_lags, 9 | uniform_count_lags, 10 | auto_derived_lags, 11 | kmeans, 12 | ward, 13 | stable_entropy_lags 14 | ) 15 | 16 | 17 | class TestEvenWidth(unittest.TestCase): 18 | @staticmethod 19 | def test_normal(): 20 | np.random.seed(42) 21 | bins, _ = even_width_lags(np.random.normal(5, 1, 1000), 4, None) 22 | 23 | assert_array_almost_equal( 24 | bins, 25 | np.array([2.21318287, 4.42636575, 6.63954862, 8.85273149]) 26 | ) 27 | 28 | @staticmethod 29 | def test_more_bins(): 30 | np.random.seed(42) 31 | 32 | bins, _ = even_width_lags(np.random.normal(5, 1, 1000), 10, None) 33 | 34 | assert_array_almost_equal( 35 | bins, 36 | np.array([0.88527315, 1.7705463, 2.65581945, 3.5410926, 4.42636575, 37 | 5.3116388, 6.19691204, 7.08218519, 7.96745834, 8.8527314]) 38 | ) 39 | 40 | @staticmethod 41 | def test_maxlag(): 42 | np.random.seed(42) 43 | bins, _ = even_width_lags(np.random.normal(5, 1, 1000), 4, 4.4) 44 | 45 | assert_array_almost_equal( 46 | bins, 47 | np.array([1.1, 2.2, 3.3, 4.4]) 48 | ) 49 | 50 | @staticmethod 51 | def test_too_large_maxlag(): 52 | np.random.seed(42) 53 | 54 | bins, n = even_width_lags(np.random.normal(5, 1, 1000), 4, 400) 55 | assert_array_almost_equal( 56 | bins, 57 | np.array([2.21318287, 4.42636575, 6.63954862, 8.85273149]) 58 | ) 59 | 60 | @staticmethod 61 | def test_median_split(): 62 | np.random.seed(42) 63 | bins, _ = even_width_lags(np.random.normal(5, 1, 1000), 2, None) 64 | assert_array_almost_equal( 65 | bins, 66 | np.array([4.42636575, 8.85273149]) 67 | ) 68 | 69 | 70 | class TestUniformCount(unittest.TestCase): 71 | def test_normal(self): 72 | np.random.seed(42) 73 | 74 | bins, _ = uniform_count_lags(np.random.normal(10, 2, 1000), 4, None) 75 | assert_array_almost_equal( 76 | bins, 77 | np.array([8.7048, 10.0506, 11.2959, 17.7055]), 78 | decimal=4 79 | ) 80 | 81 | 82 | class TestDerivedBins(unittest.TestCase): 83 | def test_auto(self): 84 | np.random.seed(42) 85 | 86 | bins, n = auto_derived_lags( 87 | np.random.normal(10, 2, 1000), 88 | 'sturges', 89 | None 90 | ) 91 | 92 | # sturges should find 11 classes 93 | self.assertTrue(n == 11) 94 | 95 | assert_array_almost_equal( 96 | bins, 97 | np.array([4.8, 6.1, 7.4, 8.7, 10., 11.3, 12.5, 13.8, 15.1, 16.4, 17.7]), 98 | decimal=1 99 | ) 100 | 101 | def test_skewed(self): 102 | np.random.seed(1312) 103 | 104 | bins, n = auto_derived_lags(np.random.gamma(10, 20, 500), 'doane', 100) 105 | 106 | # doane should condense to 6 here 107 | self.assertTrue(n == 6) 108 | 109 | assert_array_almost_equal( 110 | bins, 111 | np.array([75.6, 80.4, 85.3, 90.2, 95.1, 100.]), 112 | decimal=1 113 | ) 114 | 115 | 116 | class TestClusteringBins(unittest.TestCase): 117 | def test_kmeans(self): 118 | # Python 3.8 yields different results, not sure why 119 | if sys.version_info.minor >= 8: 120 | res = np.array([117.9, 281.1, 370.1, 459.9, 566.9, 759.8]) 121 | return True 122 | else: 123 | res = np.array([118.5, 283.2, 374.7, 467.9, 574.5, 762.5]) 124 | 125 | np.random.seed(1312) 126 | bins, _ = kmeans(np.random.gamma(10, 40, 500), 6, None) 127 | 128 | assert_array_almost_equal( 129 | res, 130 | bins, 131 | decimal=1 132 | ) 133 | 134 | def test_kmeans_convergence(self): 135 | with self.assertRaises(ValueError) as err: 136 | kmeans(np.array([1, 1, 1, 1, 1]), 3, None) 137 | 138 | self.assertTrue('KMeans failed to converge' in str(err.exception)) 139 | 140 | def test_ward(self): 141 | np.random.seed(1312) 142 | bins, _ = ward(np.random.gamma(10, 40, 500), 6, None) 143 | 144 | assert_array_almost_equal( 145 | np.array([122.7, 283.7, 352.3, 422.7, 520.9, 660.4]), 146 | bins, 147 | decimal=1 148 | ) 149 | 150 | def test_ward_median(self): 151 | np.random.seed(1312) 152 | bins, _ = ward(np.random.gamma(10, 40, 500), 6, None, binning_agg_func='median') 153 | 154 | assert_array_almost_equal( 155 | np.array([126.2, 287.8, 354.6, 421.7, 517., 643.1]), 156 | bins, 157 | decimal=1 158 | ) 159 | 160 | def test_stable_entropy(self): 161 | np.random.seed(1312) 162 | d = np.random.gamma(1500, 40, 100) 163 | 164 | # run 165 | bins, _ = stable_entropy_lags(d, 6, None) 166 | 167 | assert_array_almost_equal( 168 | np.array([10964.6, 21929.3, 32893.9, 43858.6, 54823.2, 69077.2]), 169 | bins, 170 | decimal=1 171 | ) 172 | 173 | 174 | if __name__ == '__main__': 175 | unittest.main() 176 | -------------------------------------------------------------------------------- /skgstat/plotting/stvariogram_marginal.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | try: 5 | import plotly.graph_objects as go 6 | from plotly.subplots import make_subplots 7 | except ImportError: 8 | pass 9 | 10 | 11 | def __calc_plot_data(stvariogram, **kwargs): 12 | # get the marginal experimental variograms 13 | vx = stvariogram.XMarginal.experimental 14 | vy = stvariogram.TMarginal.experimental 15 | 16 | res = kwargs.get('model_resolution', 100) 17 | 18 | # get the model 19 | xx = np.linspace(0, stvariogram.xbins[-1], res) 20 | xy = np.linspace(0, stvariogram.tbins[-1], res) 21 | y_vx = stvariogram.XMarginal.transform(xx) 22 | y_vy = stvariogram.TMarginal.transform(xy) 23 | 24 | return xx, xy, y_vx, y_vy 25 | 26 | 27 | def matplotlib_marginal(stvariogram, axes=None, sharey=True, include_model=False, **kwargs): 28 | # check if an ax needs to be created 29 | if axes is None: 30 | fig, axes = plt.subplots(1, 2, figsize=kwargs.get('figsize', (12, 6)),sharey=sharey) 31 | else: 32 | if len(axes) != 2: 33 | raise ValueError('axes needs to an array of two AxesSubplot objects') 34 | fig = axes[0].get_figure() 35 | 36 | # get some settings 37 | x_style = kwargs.get('x_style', 'ok' if include_model else '-ob') 38 | t_style = kwargs.get('t_style', 'ok' if include_model else '-og') 39 | 40 | # handle the twin axes 41 | ax = axes[0] 42 | ax2 = axes[1] 43 | ax3 = ax2.twinx() 44 | ax3.get_shared_y_axes().join(ax3, ax) 45 | 46 | # plot the marginal experimental variogram 47 | ax.plot(stvariogram.xbins, stvariogram.XMarginal.experimental, x_style) 48 | ax3.plot(stvariogram.tbins, stvariogram.TMarginal.experimental, t_style) 49 | 50 | if include_model: 51 | xx, xy, y_vx, y_vy = __calc_plot_data(stvariogram, **kwargs) 52 | 53 | # plot 54 | ax.plot(xx, y_vx, '-b') 55 | ax3.plot(xy, y_vy, '-g') 56 | 57 | # set labels 58 | ax.set_xlabel('distance [spatial]') 59 | ax.set_ylabel('semivariance [%s]' % stvariogram.estimator.__name__) 60 | ax2.set_xlabel('distance [temporal]') 61 | if not sharey: 62 | ax3.set_ylabel('semivariance [%s]' % stvariogram.estimator.__name__) 63 | 64 | # set title and grid 65 | ax.set_title('spatial marginal variogram') 66 | ax2.set_title('temporal marginal variogram') 67 | 68 | ax.grid(which='major') 69 | ax2.grid(which='major') 70 | plt.tight_layout() 71 | 72 | # return 73 | return fig 74 | 75 | 76 | def plotly_marginal(stvariogram, fig=None, include_model=False, **kwargs): 77 | shared_yaxes = kwargs.get('sharey', kwargs.get('shared_yaxis', True)) 78 | # check if a figure needs to be created 79 | if fig is None: 80 | fig = go.Figure() 81 | try: 82 | fig.set_subplots(rows=1, cols=2, shared_yaxes=shared_yaxes) 83 | except ValueError: 84 | # figure has already subplots 85 | pass 86 | 87 | # get some settings 88 | x_color = kwargs.get('x_color', 'black' if include_model else 'green') 89 | t_color = kwargs.get('t_color', 'black' if include_model else 'blue') 90 | 91 | # plot the marginal experimental variogram 92 | fig.add_trace( 93 | go.Scatter( 94 | name='Spatial marginal variogram', 95 | x=stvariogram.xbins, 96 | y=stvariogram.XMarginal.experimental, 97 | mode='markers' if include_model else 'markers+lines', 98 | marker=dict(color=x_color), 99 | line=dict(color=x_color), 100 | ), row=1, col=1 101 | ) 102 | fig.add_trace( 103 | go.Scatter( 104 | name='Temporal marginal variogram', 105 | x=stvariogram.tbins, 106 | y=stvariogram.TMarginal.experimental, 107 | mode='markers' if include_model else 'markers+lines', 108 | marker=dict(color=t_color), 109 | line=dict(color=t_color) 110 | ), row=1, col=2 111 | ) 112 | 113 | # handle models 114 | if include_model: 115 | xx, yy, y_vx, y_vy = __calc_plot_data(stvariogram, **kwargs) 116 | 117 | # add the models 118 | fig.add_trace( 119 | go.Scatter( 120 | name='spatial %s model' % stvariogram.XMarginal.model.__name__, 121 | x=xx, 122 | y=y_vx, 123 | mode='lines', 124 | line=dict(color='blue') 125 | ), row=1, col=1 126 | ) 127 | fig.add_trace( 128 | go.Scatter( 129 | name='temporal %s model' % stvariogram.TMarginal.model.__name__, 130 | x=yy, 131 | y=y_vy, 132 | mode='lines', 133 | line=dict(color='green') 134 | ), row=1, col=2 135 | ) 136 | 137 | # update the layout 138 | fig.update_xaxes(title_text='distance [spatial]', row=1, col=1) 139 | fig.update_xaxes(title_text='distance [temporal]', row=1, col=2) 140 | fig.update_yaxes(title_text='semivariance [%s]' % stvariogram.estimator.__name__, row=1, col=1) 141 | if not shared_yaxes: 142 | fig.update_yaxes(title_text='semivariance [%s]' % stvariogram.estimator.__name__, row=1, col=2) 143 | 144 | fig.update_layout( 145 | legend=dict( 146 | orientation='h', 147 | x=0, 148 | y=1.05, 149 | xanchor='left', 150 | yanchor='bottom' 151 | ) 152 | ) 153 | 154 | return fig 155 | -------------------------------------------------------------------------------- /docs/technical/direction.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | Directional Variograms 3 | ====================== 4 | 5 | General 6 | ======= 7 | 8 | With version 0.2.2, directional variograms have been introduced. A 9 | directional variogram is a variogram where point pairs are only included into 10 | the semivariance calculation if they fulfill a specified spatial relation. 11 | This relation is expressed as a *search area* that identifies all 12 | *directional* points for a given specific point. SciKit-GStat refers to this 13 | point as *poi* (point of interest). The implementation is done by the 14 | :class:`DirectionalVariogram ` class. 15 | 16 | Understanding Search Area 17 | ========================= 18 | 19 | .. note:: 20 | 21 | The :class:`DirectionalVariogram ` is 22 | in general capable of handling n-dimensional coordinates. The application 23 | of directional dependency is, however, only applied to the first two 24 | dimensions. 25 | 26 | Understanding the search area of a directional is vital for using the 27 | :class:`DirectionalVariogram ` class. The 28 | search area is controlled by the 29 | :func:`directional_model ` 30 | property which determines the shape of the search area. The extend and 31 | orientation of this area is controlled by the parameters: 32 | 33 | - :func:`azimuth ` 34 | - :func:`tolerance ` 35 | - :func:`bandwidth ` 36 | 37 | As of this writing, SciKit-GStat supports three different search area shapes: 38 | 39 | - :func:`triangle ` (*default*) 40 | - :func:`circle ` 41 | - :func:`compass ` 42 | 43 | Additionally, the shape generation is controlled by the 44 | :func:`tolerance ` parameter 45 | (:func:`triangle `, 46 | :func:`compass `) and 47 | :func:`bandwidth ` parameter 48 | (:func:`triangle `, 49 | :func:`circle `). The 50 | :func:`azimuth ` is used to rotate the 51 | search area into a desired direction. An azimuth of 0° is heading East of the 52 | coordinate plane. Positive values for azimuth rotate the search area 53 | clockwise, negative values counter-clockwise. 54 | The :func:`tolerance ` specifies how 55 | far the angle (against 'x-axis') between two points can be off the azimuth to 56 | be still considered as a directional point pair. Based on this definition, 57 | two points at a larger distance would generally be allowed to differ more 58 | from azimuth in terms of coordinate distance. Therefore the 59 | :func:`bandwidth ` defines a maximum 60 | coordinate distance, a point can have from the azimuth line. 61 | The difference between the 62 | :func:`triangle ` and the 63 | :func:`compass ` search area is that 64 | the triangle uses the bandwidth and the compass does not. 65 | 66 | The :class:`DirectionalVariogram ` has a 67 | function to plot the effect of the search area. The method is called 68 | :func:`pair_field `. Using 69 | random coordinates, the visualization is shown below. 70 | 71 | .. ipython:: python 72 | :okwarning: 73 | 74 | from skgstat import DirectionalVariogram 75 | from skgstat.plotting import backend 76 | import numpy as np 77 | import matplotlib.pyplot as plt 78 | plt.style.use('ggplot') 79 | backend('matplotlib') 80 | np.random.seed(42) 81 | coords = np.random.gamma(15, 6, (40, 2)) 82 | np.random.seed(42) 83 | vals = np.random.normal(5,1, 40) 84 | 85 | DV = DirectionalVariogram(coords, vals, 86 | azimuth=0, 87 | tolerance=45, 88 | directional_model='triangle') 89 | 90 | @savefig dv1.png width=6in 91 | DV.pair_field(plt.gca()) 92 | 93 | The model can easily be changed, using the 94 | :func:`set_directional_model ` 95 | function: 96 | 97 | .. ipython:: python 98 | :okwarning: 99 | 100 | fig, axes = plt.subplots(1, 2, figsize=(8, 4)) 101 | 102 | DV.set_directional_model('triangle') 103 | DV.pair_field(plt.gca()) 104 | 105 | @savefig dv2.png width=8in 106 | DV.pair_field(plt.gca()) 107 | fig.show() 108 | 109 | DV.set_directional_model('compass') 110 | 111 | @savefig dv3.png width=8in 112 | DV.pair_field(plt.gca()) 113 | fig.show() 114 | 115 | Directional variograms 116 | ====================== 117 | 118 | In principle, the :class:`DirectionalVariogram ` 119 | can be used just like the :class:`Variogram ` base class. 120 | In fact :class:`DirectionalVariogram ` inherits 121 | most of the behaviour. All the functionality described in the previous 122 | sections is added to the basic :class:`Variogram `. 123 | All other methods and attributes can be used in the same way. 124 | 125 | .. warning:: 126 | 127 | In order to implement the directional dependency, some methods have been 128 | rewritten in :class:`DirectionalVariogram `. 129 | Thus the following methods do **not** show the same behaviour: 130 | 131 | - :func:`DirectionalVariogram.bins ` 132 | - :func:`DirectionalVariogram._calc_groups ` 133 | -------------------------------------------------------------------------------- /skgstat/util/likelihood.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a maximum likelihood function for variogram models. 3 | The definition is taken from [601]_: 4 | 5 | References 6 | ---------- 7 | [601] Lark, R. M. "Estimating variograms of soil properties by the 8 | method‐of‐moments and maximum likelihood." European Journal 9 | of Soil Science 51.4 (2000): 717-728. 10 | """ 11 | from typing import Callable, List 12 | from itertools import cycle 13 | 14 | import numpy as np 15 | from scipy.spatial.distance import squareform 16 | from scipy.linalg import inv, det 17 | 18 | from skgstat import Variogram 19 | 20 | 21 | DOC_TEMPLATE = """Autocorrelation function. 22 | This function calculates the spatial autocorrelation for any 23 | model function only, by setting nugget to 0 and sill to 1. 24 | This can be used to create an autocorreation matrix as used 25 | to derive a maximum likelihhod function for the model. 26 | 27 | Original documentation: 28 | {doc} 29 | """ 30 | 31 | LIKE_DOC = """ 32 | Negative log-likelihood function following [601]. 33 | This likelihood function is based on a ``{model_name}`` model. 34 | The sample of the variogram instance contained ``{n}`` points. 35 | 36 | References 37 | ---------- 38 | [601] Lark, R. M. "Estimating variograms of soil properties by the 39 | method‐of‐moments and maximum likelihood." European Journal 40 | of Soil Science 51.4 (2000): 717-728. 41 | """ 42 | 43 | 44 | def _model_transformed(model_func, has_s: bool = False) -> Callable: 45 | """ 46 | Transforms the model parameter input to fit likelihood function 47 | input parameters. 48 | The returned function can be used to create the log-likelihood 49 | function that has to be minimized. 50 | To build up the spatial autocorrelation matrix, the spatial 51 | autocorrelation has to be separated from nugget and sill. 52 | """ 53 | if has_s: 54 | def wrapped(h, r, s): 55 | return model_func(h, r, s=s, c0=1, b=0) 56 | else: 57 | def wrapped(h, r): 58 | return model_func(h, r, c0=1, b=0) 59 | 60 | # add the original docstring 61 | wrapped.__doc__ = DOC_TEMPLATE.format(doc=model_func.__doc__) 62 | wrapped.__name__ = f"autocorr_{model_func.__name__}" 63 | 64 | return wrapped 65 | 66 | 67 | def _build_A(transformed_func: Callable, params: List[float], dists: np.ndarray) -> np.ndarray: 68 | """ 69 | Builds the autocorrelation matrix for a given model function. 70 | """ 71 | if len(params) == 4: 72 | r, c0, s, b = params 73 | a = np.fromiter(map(transformed_func, dists, cycle([r]), cycle([s])), dtype=float) 74 | else: 75 | r, c0, b = params 76 | # calculate the upper triangle of A: 77 | a = np.fromiter(map(transformed_func, dists, cycle([r])), dtype=float) 78 | 79 | # build the full matrix 80 | A = squareform((c0 / (c0 + b)) * (1 - a)) 81 | 82 | # replace diagonal 0 with ones 83 | np.fill_diagonal(A, 1) 84 | 85 | return A 86 | 87 | 88 | def get_likelihood(variogram: Variogram) -> Callable: 89 | """ 90 | Returns the maximum likelihood function for the given variogram. 91 | The function takes a list of model parameters and returns the 92 | negative log-likelihood for this set of parameters. 93 | The signature of the reutrned function has the interface as 94 | needed by :func:`scipy.optimize.minimize`. 95 | The parameters for the models are given as ``[r, c0, b]``, which 96 | are the effective range, sill and nugget. For the 97 | :func:`Matérn ` and 98 | :func:`Stable ` models, the parameters include 99 | an additional parameter ``s``, which is the smoothness parameter. 100 | 101 | Example 102 | ------- 103 | 104 | .. code-block:: python 105 | 106 | from scipy.optimize import minimize 107 | like = get_likelihood(variogram) 108 | res = minimize(like) 109 | 110 | It is, hwowever, highly recommended to provide an initial guess and 111 | specify bounds for the parameter space. 112 | 113 | """ 114 | # extract the current data 115 | values = variogram.values 116 | # TODO: there is a bug as this is not working: 117 | # dists = variogram.distance 118 | try: 119 | dists = squareform(variogram._X.dists.todense()) 120 | except AttributeError: 121 | dists = variogram.distance 122 | 123 | # get the transformed model func 124 | has_s = variogram.model.__name__ in ('matern', 'stable') 125 | transformed_func = _model_transformed(variogram.model, has_s=has_s) 126 | 127 | def likelihood(params: List[float]) -> float: 128 | # calculate A 129 | A = _build_A(transformed_func, params, dists) 130 | n = len(A) 131 | 132 | # invert A 133 | A_inv = inv(A) 134 | 135 | # build the 1 vector and t 136 | ones = np.ones((n, 1)) 137 | z = values.reshape(n, -1) 138 | 139 | # build the estimate matrix for field means 140 | m = inv(ones.T @ A_inv @ ones) @ (ones.T @ A_inv @ z) 141 | b = np.log((z - m).T @ A_inv @ (z - m)) 142 | 143 | # get the log of determinant of A 144 | D = np.log(det(A)) 145 | # np.log(0.0) is -inf, so we need to check for this 146 | if D == -np.inf: # pragma: no cover 147 | # print a warning and return np.inf to not use these parameters 148 | print("Warning: D = -inf, returning np.inf") 149 | return np.inf 150 | 151 | # finally log-likelihood of the model given parameters 152 | loglike = (n / 2)*np.log(2*np.pi) + (n / 2) - (n / 2) * np.log(n) + 0.5 * D + (n / 2) * b 153 | 154 | # this is actually a 1x1 matrix 155 | return loglike.flatten()[0] 156 | 157 | # add the docstring to the likelihood function 158 | likelihood.__doc__ = LIKE_DOC.format(model_name=variogram.model.__name__, n=len(values)) 159 | 160 | # return the likelikhood function 161 | return likelihood 162 | -------------------------------------------------------------------------------- /docs/data/sample_sr.csv: -------------------------------------------------------------------------------- 1 | x,y,z 2 | 94,20,-0.39444438053747594 3 | 82,37,-2.283663062708473 4 | 43,13,-0.5462130280740465 5 | 78,42,-3.681383697834789 6 | 50,28,0.5045382784070095 7 | 20,74,-1.5041741384342935 8 | 71,88,1.3503808134843678 9 | 7,9,-1.1638796197023304 10 | 24,69,-0.34051346127283 11 | 77,28,-1.1920728498239048 12 | 71,17,-1.2669163335465252 13 | 29,74,-0.7427182051736556 14 | 49,59,0.48750491266151463 15 | 100,12,-2.962001584534514 16 | 5,52,-1.9117585922891978 17 | 16,82,-0.8417505208808288 18 | 16,17,-1.4281827604814472 19 | 88,58,0.7782599543685997 20 | 52,49,0.1786421715317022 21 | 63,85,0.7522280061388416 22 | 26,13,-0.7991250342467555 23 | 19,36,0.6432892058776345 24 | 36,79,-1.5280696290326905 25 | 57,97,-0.8259657245833522 26 | 62,99,-0.06959396015894331 27 | 32,30,0.8022020533234546 28 | 39,59,-0.05292820972410184 29 | 29,90,-1.0204864471818211 30 | 99,32,-0.6091409530661445 31 | 44,6,0.22039696575581752 32 | 43,58,0.4599801140437183 33 | 85,39,-2.31679305625176 34 | 70,54,0.5223039952706272 35 | 91,79,-0.3759977848107796 36 | 72,73,0.7416405213029846 37 | 26,14,-0.7804675204942965 38 | 74,27,-1.0553595338690305 39 | 95,81,-0.7486241434516918 40 | 57,64,1.7616136632222261 41 | 91,89,0.04677627985012556 42 | 16,15,-1.7478697007635944 43 | 43,64,0.16499520258020345 44 | 51,23,0.7437586449805585 45 | 48,15,-0.7098517053077216 46 | 44,62,0.6186416952805267 47 | 89,87,-0.16867483733838862 48 | 75,44,-3.514204440866991 49 | 28,60,-2.7366690143837133 50 | 40,15,-1.1281865226107042 51 | 93,11,-3.118152626954697 52 | 15,63,-1.2690922003864293 53 | 88,29,-0.18836588223815826 54 | 2,23,0.6799045629833986 55 | 46,96,0.21739536724147307 56 | 65,3,-1.4592660391212884 57 | 88,37,-1.107244883929112 58 | 65,95,0.2275609725399672 59 | 41,7,-0.2496356070766711 60 | 25,80,-1.1211940215729022 61 | 21,27,-0.09661807637216241 62 | 91,12,-2.406230668399388 63 | 32,81,-1.3500805459402252 64 | 65,83,0.5888676370398311 65 | 72,27,-0.9654962013539982 66 | 31,80,-1.456316317242763 67 | 34,77,-1.5715611436569268 68 | 75,52,-0.9936717510754695 69 | 83,57,-0.3972504171528985 70 | 53,54,0.9579444554514525 71 | 5,48,-0.9110466214925685 72 | 63,37,-1.668719852071336 73 | 52,88,0.30155665085569183 74 | 52,80,-0.11393389488718508 75 | 88,59,0.9498500062624038 76 | 82,40,-2.8630741280292114 77 | 88,97,-0.5724965644599087 78 | 16,39,0.3509623844845141 79 | 50,30,0.30712552582481484 80 | 94,10,-3.2346378326811025 81 | 32,38,0.4552089340350225 82 | 44,24,-0.7574018163971681 83 | 49,69,1.2356342827899252 84 | 80,28,-1.0178539036303835 85 | 44,97,0.04065055111399368 86 | 9,72,-1.5862874790244033 87 | 62,83,0.713159046633232 88 | 55,87,1.0014195983263146 89 | 73,88,0.9887212215545849 90 | 74,77,0.8412936419422302 91 | 41,10,-0.47373270525578615 92 | 80,70,0.1989277467301741 93 | 84,91,0.4362331855173341 94 | 19,41,0.008772448868853477 95 | 74,95,0.21200274251186702 96 | 53,77,-0.44142991018893873 97 | 77,71,0.3737558041580218 98 | 26,80,-1.3080066480679053 99 | 60,29,-0.8604750005960811 100 | 87,66,0.4825254242707143 101 | 49,26,0.2699907908494049 102 | 58,7,-1.7509555878489427 103 | 83,34,-1.9690554078818678 104 | 98,71,-0.3072215482324351 105 | 19,33,0.8195543425614512 106 | 69,37,-0.9869471073868279 107 | 77,16,-0.3032695851580103 108 | 17,5,-1.2189964119667087 109 | 12,3,-0.27257057240043464 110 | 36,10,-0.39356133696987433 111 | 60,92,0.8447638349149216 112 | 87,26,0.04083935755339871 113 | 42,88,-0.6016648813493206 114 | 95,68,-0.6153052578521497 115 | 26,73,0.10972407867717388 116 | 75,87,0.4837705304898816 117 | 8,72,-1.5017658843299335 118 | 72,38,-2.1548409371968544 119 | 91,53,-0.29640401021767016 120 | 17,43,-0.19065596156743903 121 | 78,44,-3.470794267709495 122 | 47,80,-0.15092944771479289 123 | 44,90,-0.2891096089347041 124 | 14,73,-1.6282848473826248 125 | 1,99,-2.7485938529435496 126 | 89,52,-1.0444974789331405 127 | 44,88,-0.23148281964714834 128 | 56,6,-1.2489821829912962 129 | 70,64,-0.1985023790465692 130 | 40,85,-0.6119284650766149 131 | 46,21,-1.0575872709845917 132 | 41,39,-0.49010823408513315 133 | 45,93,-0.14813508543192433 134 | 8,46,-0.6957694579956841 135 | 29,41,0.2925915080992926 136 | 32,98,-1.6807270769546596 137 | 80,100,-1.8149269806059642 138 | 65,94,0.4077290290404864 139 | 26,63,-1.475875594477606 140 | 49,6,0.05498998205253869 141 | 56,48,-0.22702813291513896 142 | 22,92,-0.8249706202797014 143 | 56,35,-0.9263564590999971 144 | 66,26,-0.8561138896895382 145 | 48,67,1.2573632103954488 146 | 100,75,0.06842200444814739 147 | 81,75,0.5473099797622492 148 | 33,49,-1.0321496503984102 149 | 27,99,-1.8838634225902282 150 | 85,68,0.4802607641737733 151 | 82,71,0.3196478743792045 152 | 80,82,0.5695006144567061 153 | 65,13,-1.06228576338799 154 | 96,76,-0.3524376227895107 155 | 2,74,0.5690909889388418 156 | 85,31,-1.246452342132829 157 | 4,33,-0.7953412470737332 158 | 67,50,0.5625661264080491 159 | 55,83,0.24351590040811572 160 | 15,38,0.36411193448911594 161 | 71,100,-0.4536559855457971 162 | 83,1,0.07933936128007926 163 | 88,45,-2.7457207428781283 164 | 39,95,0.23219385937063947 165 | 25,6,-0.2788122287793571 166 | 100,49,0.23626716108476928 167 | 44,86,-0.09441530966039002 168 | 18,92,-1.0357957584175066 169 | 89,36,-0.38685322915900544 170 | 7,35,-1.0239078717951824 171 | 63,46,-0.34438971963090154 172 | 22,84,0.23396407807136554 173 | 68,34,-0.903632978192785 174 | 96,11,-3.434910889878696 175 | 87,38,-1.785802886697303 176 | 29,79,-1.5565194005756282 177 | 83,59,0.6088959380848571 178 | 82,99,-1.5779406861904615 179 | 65,17,-1.7956575523376384 180 | 11,54,-1.5236653807493452 181 | 70,100,-0.4958109890037088 182 | 89,27,0.08154828239071249 183 | 59,70,0.8365371624753974 184 | 57,19,-1.070650685896748 185 | 14,47,-0.8417245700628229 186 | 77,1,0.37906792145208323 187 | 64,6,-1.8521839941654208 188 | 81,29,-1.224269671546148 189 | 78,97,-0.6538359282489696 190 | 98,48,-0.20965379737882728 191 | 13,16,-0.24930407680322653 192 | 53,92,0.4209881693233034 193 | 82,7,0.19664362382501466 194 | 6,56,-1.9742588680541977 195 | 15,51,-1.1446294699147188 196 | 90,76,-0.6174841523716045 197 | 24,11,-1.4410912136779468 198 | 70,15,-1.3418968539974467 199 | 42,82,-0.037885272206712395 200 | 88,40,-2.0035919907455177 201 | 85,88,0.12049021523462489 202 | -------------------------------------------------------------------------------- /skgstat/tests/sample.csv: -------------------------------------------------------------------------------- 1 | x,y,z 2 | 94,20,-0.39444438053747594 3 | 82,37,-2.283663062708473 4 | 43,13,-0.5462130280740465 5 | 78,42,-3.681383697834789 6 | 50,28,0.5045382784070095 7 | 20,74,-1.5041741384342935 8 | 71,88,1.3503808134843678 9 | 7,9,-1.1638796197023304 10 | 24,69,-0.34051346127283 11 | 77,28,-1.1920728498239048 12 | 71,17,-1.2669163335465252 13 | 29,74,-0.7427182051736556 14 | 49,59,0.48750491266151463 15 | 100,12,-2.962001584534514 16 | 5,52,-1.9117585922891978 17 | 16,82,-0.8417505208808288 18 | 16,17,-1.4281827604814472 19 | 88,58,0.7782599543685997 20 | 52,49,0.1786421715317022 21 | 63,85,0.7522280061388416 22 | 26,13,-0.7991250342467555 23 | 19,36,0.6432892058776345 24 | 36,79,-1.5280696290326905 25 | 57,97,-0.8259657245833522 26 | 62,99,-0.06959396015894331 27 | 32,30,0.8022020533234546 28 | 39,59,-0.05292820972410184 29 | 29,90,-1.0204864471818211 30 | 99,32,-0.6091409530661445 31 | 44,6,0.22039696575581752 32 | 43,58,0.4599801140437183 33 | 85,39,-2.31679305625176 34 | 70,54,0.5223039952706272 35 | 91,79,-0.3759977848107796 36 | 72,73,0.7416405213029846 37 | 26,14,-0.7804675204942965 38 | 74,27,-1.0553595338690305 39 | 95,81,-0.7486241434516918 40 | 57,64,1.7616136632222261 41 | 91,89,0.04677627985012556 42 | 16,15,-1.7478697007635944 43 | 43,64,0.16499520258020345 44 | 51,23,0.7437586449805585 45 | 48,15,-0.7098517053077216 46 | 44,62,0.6186416952805267 47 | 89,87,-0.16867483733838862 48 | 75,44,-3.514204440866991 49 | 28,60,-2.7366690143837133 50 | 40,15,-1.1281865226107042 51 | 93,11,-3.118152626954697 52 | 15,63,-1.2690922003864293 53 | 88,29,-0.18836588223815826 54 | 2,23,0.6799045629833986 55 | 46,96,0.21739536724147307 56 | 65,3,-1.4592660391212884 57 | 88,37,-1.107244883929112 58 | 65,95,0.2275609725399672 59 | 41,7,-0.2496356070766711 60 | 25,80,-1.1211940215729022 61 | 21,27,-0.09661807637216241 62 | 91,12,-2.406230668399388 63 | 32,81,-1.3500805459402252 64 | 65,83,0.5888676370398311 65 | 72,27,-0.9654962013539982 66 | 31,80,-1.456316317242763 67 | 34,77,-1.5715611436569268 68 | 75,52,-0.9936717510754695 69 | 83,57,-0.3972504171528985 70 | 53,54,0.9579444554514525 71 | 5,48,-0.9110466214925685 72 | 63,37,-1.668719852071336 73 | 52,88,0.30155665085569183 74 | 52,80,-0.11393389488718508 75 | 88,59,0.9498500062624038 76 | 82,40,-2.8630741280292114 77 | 88,97,-0.5724965644599087 78 | 16,39,0.3509623844845141 79 | 50,30,0.30712552582481484 80 | 94,10,-3.2346378326811025 81 | 32,38,0.4552089340350225 82 | 44,24,-0.7574018163971681 83 | 49,69,1.2356342827899252 84 | 80,28,-1.0178539036303835 85 | 44,97,0.04065055111399368 86 | 9,72,-1.5862874790244033 87 | 62,83,0.713159046633232 88 | 55,87,1.0014195983263146 89 | 73,88,0.9887212215545849 90 | 74,77,0.8412936419422302 91 | 41,10,-0.47373270525578615 92 | 80,70,0.1989277467301741 93 | 84,91,0.4362331855173341 94 | 19,41,0.008772448868853477 95 | 74,95,0.21200274251186702 96 | 53,77,-0.44142991018893873 97 | 77,71,0.3737558041580218 98 | 26,80,-1.3080066480679053 99 | 60,29,-0.8604750005960811 100 | 87,66,0.4825254242707143 101 | 49,26,0.2699907908494049 102 | 58,7,-1.7509555878489427 103 | 83,34,-1.9690554078818678 104 | 98,71,-0.3072215482324351 105 | 19,33,0.8195543425614512 106 | 69,37,-0.9869471073868279 107 | 77,16,-0.3032695851580103 108 | 17,5,-1.2189964119667087 109 | 12,3,-0.27257057240043464 110 | 36,10,-0.39356133696987433 111 | 60,92,0.8447638349149216 112 | 87,26,0.04083935755339871 113 | 42,88,-0.6016648813493206 114 | 95,68,-0.6153052578521497 115 | 26,73,0.10972407867717388 116 | 75,87,0.4837705304898816 117 | 8,72,-1.5017658843299335 118 | 72,38,-2.1548409371968544 119 | 91,53,-0.29640401021767016 120 | 17,43,-0.19065596156743903 121 | 78,44,-3.470794267709495 122 | 47,80,-0.15092944771479289 123 | 44,90,-0.2891096089347041 124 | 14,73,-1.6282848473826248 125 | 1,99,-2.7485938529435496 126 | 89,52,-1.0444974789331405 127 | 44,88,-0.23148281964714834 128 | 56,6,-1.2489821829912962 129 | 70,64,-0.1985023790465692 130 | 40,85,-0.6119284650766149 131 | 46,21,-1.0575872709845917 132 | 41,39,-0.49010823408513315 133 | 45,93,-0.14813508543192433 134 | 8,46,-0.6957694579956841 135 | 29,41,0.2925915080992926 136 | 32,98,-1.6807270769546596 137 | 80,100,-1.8149269806059642 138 | 65,94,0.4077290290404864 139 | 26,63,-1.475875594477606 140 | 49,6,0.05498998205253869 141 | 56,48,-0.22702813291513896 142 | 22,92,-0.8249706202797014 143 | 56,35,-0.9263564590999971 144 | 66,26,-0.8561138896895382 145 | 48,67,1.2573632103954488 146 | 100,75,0.06842200444814739 147 | 81,75,0.5473099797622492 148 | 33,49,-1.0321496503984102 149 | 27,99,-1.8838634225902282 150 | 85,68,0.4802607641737733 151 | 82,71,0.3196478743792045 152 | 80,82,0.5695006144567061 153 | 65,13,-1.06228576338799 154 | 96,76,-0.3524376227895107 155 | 2,74,0.5690909889388418 156 | 85,31,-1.246452342132829 157 | 4,33,-0.7953412470737332 158 | 67,50,0.5625661264080491 159 | 55,83,0.24351590040811572 160 | 15,38,0.36411193448911594 161 | 71,100,-0.4536559855457971 162 | 83,1,0.07933936128007926 163 | 88,45,-2.7457207428781283 164 | 39,95,0.23219385937063947 165 | 25,6,-0.2788122287793571 166 | 100,49,0.23626716108476928 167 | 44,86,-0.09441530966039002 168 | 18,92,-1.0357957584175066 169 | 89,36,-0.38685322915900544 170 | 7,35,-1.0239078717951824 171 | 63,46,-0.34438971963090154 172 | 22,84,0.23396407807136554 173 | 68,34,-0.903632978192785 174 | 96,11,-3.434910889878696 175 | 87,38,-1.785802886697303 176 | 29,79,-1.5565194005756282 177 | 83,59,0.6088959380848571 178 | 82,99,-1.5779406861904615 179 | 65,17,-1.7956575523376384 180 | 11,54,-1.5236653807493452 181 | 70,100,-0.4958109890037088 182 | 89,27,0.08154828239071249 183 | 59,70,0.8365371624753974 184 | 57,19,-1.070650685896748 185 | 14,47,-0.8417245700628229 186 | 77,1,0.37906792145208323 187 | 64,6,-1.8521839941654208 188 | 81,29,-1.224269671546148 189 | 78,97,-0.6538359282489696 190 | 98,48,-0.20965379737882728 191 | 13,16,-0.24930407680322653 192 | 53,92,0.4209881693233034 193 | 82,7,0.19664362382501466 194 | 6,56,-1.9742588680541977 195 | 15,51,-1.1446294699147188 196 | 90,76,-0.6174841523716045 197 | 24,11,-1.4410912136779468 198 | 70,15,-1.3418968539974467 199 | 42,82,-0.037885272206712395 200 | 88,40,-2.0035919907455177 201 | 85,88,0.12049021523462489 202 | -------------------------------------------------------------------------------- /docs/userguide/introduction.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Introduction 3 | ============ 4 | 5 | General 6 | ======= 7 | 8 | This user guide part of ``scikit-gstat``'s documentation is meant to be an 9 | user guide to the functionality offered by the module along with a more 10 | general introduction to geostatistical concepts. The main use case is to hand 11 | this description to students learning geostatistics, whenever 12 | ``scikit-gstat`` is used. 13 | But before introducing variograms, the more general question what 14 | geostatistics actually are has to be answered. 15 | 16 | .. note:: 17 | 18 | This user guide is meant to be an **introduction** to geostatistics. In 19 | case you are already familiar with the topic, you can skip this section. 20 | 21 | What is geostatistics? 22 | ====================== 23 | 24 | The basic idea of geostatistics is to describe and estimate spatial 25 | covariance, or correlation, in a set of point data. 26 | While the main tool, the semi-variogram, is quite easy to implement and use, 27 | a lot of important assumptions are underlying it. 28 | The typical application is geostatistics is an interpolation. Therefore, 29 | although using point data, a basic concept is to understand this point data 30 | as a sample of a (spatially) continuous variable that can be described as a 31 | random field :math:`rf`, or to be more precise, a Gaussian random field in many 32 | cases. The most fundamental assumption in geostatistics is that any two values 33 | :math:`x_i` and :math:`x_{i + h}` are more similar, the smaller :math:`h` is, 34 | which is a separating distance on the random field. In other words: *close 35 | observation points will show higher covariances than distant points*. In case 36 | this most fundamental conceptual assumption does not hold for a specific 37 | variable, geostatistics will not be the correct tool to analyse and 38 | interpolate this variable. 39 | 40 | One of the most easiest approaches to interpolate point data is to use IDW 41 | (inverse distance weighting). This technique is implemented in almost any GIS 42 | software. The fundamental conceptual model can be described as: 43 | 44 | .. math:: 45 | Z_u = \frac{\sum_{i}^{N} w_i * Z(i)}{N} 46 | 47 | where :math:`Z_u` is the value of :math:`rf` at a non-observed location with 48 | :math:`N` observations around it. These observations get weighted by the weight 49 | :math:`w_i`, which can be calculated like: 50 | 51 | .. math:: 52 | w_i = \frac{1}{||\overrightarrow{ux_i}||} 53 | 54 | where :math:`u` is the unobserved point and :math:`x_i` is one of the 55 | sample points. Thus, :math:`||\overrightarrow{ux_i}||` is the 2-norm of 56 | the vector between the two points: the Euclidean distance in the coordinate 57 | space (which by no means has to be limited to the :math:`\mathbb{R}^2` case). 58 | 59 | This basically describes a concept, where a value of the random field is 60 | estimated by a distance-weighted mean of the surrounding points. As close 61 | points shall have a higher impact, the inverse distance is used and thus the 62 | name of **inverse distance weighting**. 63 | 64 | In the case of geostatistics this basic model still holds, but is extended. 65 | Instead of depending the weights exclusively on the separating distance, a 66 | weight will be derived from a variance over all values that are separated by 67 | a similar distance. This has the main advantage of incorporating the actual 68 | (co)variance found in the observations and basing the interpolation on this 69 | (co)variance, but comes at the cost of some strict assumptions about the 70 | statistical properties of the sample. Elaborating and assessing these 71 | assumptions is one of the main challenges of geostatistics. 72 | 73 | Geostatistical Tools 74 | ==================== 75 | 76 | Geostatistics is a wide field spanning a wide variety of disciplines, like 77 | geology, biology, hydrology or geomorphology. Each discipline defines their 78 | own set of tools, and apparently definitions, and progress is made until 79 | today. It is not the objective of ``scikit-gstat`` to be a comprehensive 80 | collection of all available tools. The objective is more to offer some 81 | common and also more sophisticated tools for variogram analysis. 82 | Thus, when using ``scikit-gstat``, you typically need another library for 83 | the actual application, like interpolation. In most cases that will be 84 | `gstools `_. 85 | However, one can split geostatistics into three main fields, each of it with its 86 | own tools: 87 | 88 | * **variography:** with the variogram being the main tool, the variography 89 | focuses on describing, visualizing and modelling covariance structures in 90 | space and time. 91 | * **kriging:** is a family of interpolation methods, that utilize a variogram to 92 | estimate the kriging weights as sketched above. 93 | * **geostatistical simulation:** is aiming on generate random fields that fit 94 | a given set of observations or a pre-defined variogram or covariance function. 95 | 96 | .. note:: 97 | 98 | I am not planning to implement tools from all three fields. 99 | You can rather use one of the interfaces, like :func:`Variogram.to_gstools ` 100 | to export a variogram to another library, that covers kriging and 101 | spatial random field generation in great detail. 102 | 103 | 104 | How to use this guide 105 | ===================== 106 | 107 | The main idea behind the user-guide is to introduce geostatistics at the example of 108 | SciKit-GStat. The module has a growing collection of data examples, that are used 109 | throughout the documentation. They can be loaded from the `data` submodule. 110 | Each function will return a dictionary of the actual sample and a brief description. 111 | 112 | .. note:: 113 | 114 | Any data sample included has an origin and an owner. While they are all distributed 115 | under open licenses, you have to check the description for data ownership as all 116 | used licenses force you to attribute the owner. 117 | 118 | .. ipython:: python 119 | :okwarning: 120 | 121 | import skgstat as skg 122 | skg.data.aniso(N=20) 123 | 124 | These samples contain a coordinate and a value array. 125 | -------------------------------------------------------------------------------- /skgstat/interfaces/gstools.py: -------------------------------------------------------------------------------- 1 | """GSTools Interface.""" 2 | import numpy as np 3 | import warnings 4 | 5 | 6 | def stable_rescale(describe): 7 | """Get GSTools rescale parameter from sk-gstat stable model description.""" 8 | return np.power(3, 1 / describe["shape"]) 9 | 10 | 11 | MODEL_MAP = dict( 12 | spherical=dict(gs_cls="Spherical"), 13 | exponential=dict(gs_cls="Exponential", rescale=3.0), 14 | gaussian=dict(gs_cls="Gaussian", rescale=2.0), 15 | cubic=dict(gs_cls="Cubic"), 16 | stable=dict( 17 | gs_cls="Stable", arg_map={"alpha": "shape"}, rescale=stable_rescale 18 | ), 19 | matern=dict( 20 | gs_cls="Matern", arg_map={"nu": "smoothness"}, rescale=4.0 21 | ), 22 | ) 23 | 24 | 25 | def skgstat_to_gstools(variogram, **kwargs): 26 | """ 27 | Instantiate a corresponding GSTools :any:`CovModel `. 28 | 29 | By default, this will be an isotropic model. 30 | 31 | Parameters 32 | ---------- 33 | variogram : :any:`Variogram` 34 | Scikit-Gstat Variogram instance. 35 | **kwargs 36 | Keyword arguments forwarded to the instantiated GSTools CovModel. 37 | The default parameters 'dim', 'var', 'len_scale', 'nugget', 38 | 'rescale' and optional shape parameters will be extracted 39 | from the given Variogram but they can be overwritten here. 40 | 41 | Raises 42 | ------ 43 | ImportError 44 | When GSTools is not installed. 45 | ValueError 46 | When GSTools version is not v1.3 or greater. 47 | ValueError 48 | When given Variogram model is not supported ('harmonize'). 49 | 50 | Warns 51 | ----- 52 | Warning 53 | If the Variogram is a cross-variogram 54 | 55 | 56 | Returns 57 | ------- 58 | model : :any:`CovModel ` 59 | Corresponding GSTools covmodel. 60 | 61 | Note 62 | ---- 63 | In case you intend to use the 64 | :func:`coordinates ` 65 | in a GSTools workflow, you need to transpose the coordinate 66 | array like: 67 | 68 | >> cond_pos Variogram.coordinates.T 69 | 70 | """ 71 | # try to import gstools and notify user if not installed 72 | try: 73 | import gstools as gs 74 | except ImportError as e: # pragma: no cover 75 | raise ImportError("to_gstools: GSTools not installed.") from e 76 | 77 | # at least gstools>=1.3.0 is needed 78 | if list(map(int, gs.__version__.split(".")[:2])) < [1, 3]: # pragma: no cover 79 | raise ValueError("to_gstools: GSTools v1.3 or greater required.") 80 | 81 | # if Variogram is a cross-variogram warn the user 82 | if variogram.is_cross_variogram: 83 | warnings.warn("This instance is a cross-variogram!!" + 84 | " GSTools.CovModel will most likely not handle this Variogram correctly.") 85 | 86 | 87 | # gstolls needs the spatial dimension 88 | kwargs.setdefault("dim", variogram.dim) 89 | 90 | # extract all needed settings 91 | describe = variogram.describe() 92 | 93 | # get the theoretical model name 94 | # name = describe["name"] 95 | name = describe['model'] 96 | 97 | if name not in MODEL_MAP: 98 | raise ValueError("skgstat_to_gstools: model not supported: " + name) 99 | gs_describe = MODEL_MAP[name] 100 | 101 | # set variogram parameters 102 | gs_describe.setdefault("rescale", 1.0) 103 | gs_describe.setdefault("arg_map", dict()) 104 | gs_kwargs = dict( 105 | var=float(describe["sill"] - describe["nugget"]), 106 | len_scale=float(describe["effective_range"]), 107 | nugget=float(describe["nugget"]), 108 | ) 109 | 110 | # some skgstat models need different rescale 111 | rescale = gs_describe["rescale"] 112 | gs_kwargs["rescale"] = rescale(describe) if callable(rescale) else rescale 113 | arg_map = gs_describe["arg_map"] 114 | for arg in arg_map: 115 | gs_kwargs[arg] = float(describe[arg_map[arg]]) 116 | 117 | # update the parameters 118 | gs_kwargs.update(kwargs) 119 | 120 | # get the model and return the CovModel 121 | gs_model = getattr(gs, gs_describe["gs_cls"]) 122 | return gs_model(**gs_kwargs) 123 | 124 | 125 | def skgstat_to_krige(variogram, **kwargs): 126 | """ 127 | Instantiate a GSTools Krige class. 128 | 129 | This can only export isotropic models. 130 | Note: the `fit_variogram` is always set to `False` 131 | 132 | Parameters 133 | ---------- 134 | variogram : skgstat.Variogram 135 | Scikit-GStat Variogram instamce 136 | **kwargs 137 | Keyword arguments forwarded to GSTools Krige. 138 | Refer to :any:`Krige ` to 139 | learn about all possible options. 140 | Note that the `fit_variogram` parameter will 141 | always be False. 142 | 143 | Raises 144 | ------ 145 | ImportError 146 | When GSTools is not installed. 147 | ValueError 148 | When GSTools version is not v1.3 or greater. 149 | ValueError 150 | When given Variogram model is not supported ('harmonize'). 151 | 152 | Warns 153 | ----- 154 | Warning 155 | If the Variogram is a cross-variogram 156 | 157 | Returns 158 | ------- 159 | :any:`Krige ` 160 | Instantiated GSTools Krige class. 161 | 162 | See Also 163 | -------- 164 | gstools.Krige 165 | 166 | """ 167 | # try to import gstools and notify user if not installed 168 | try: 169 | import gstools as gs 170 | except ImportError as e: # pragma: no cover 171 | raise ImportError("to_gstools: GSTools not installed.") from e 172 | 173 | # at least gstools>=1.3.0 is needed 174 | if list(map(int, gs.__version__.split(".")[:2])) < [1, 3]: # pragma: no cover 175 | raise ValueError("to_gstools: GSTools v1.3 or greater required.") 176 | 177 | # convert variogram to a CovModel 178 | model = skgstat_to_gstools(variogram=variogram) 179 | 180 | # extract cond_pos and cond_vals 181 | cond_pos = variogram.coordinates.T 182 | cond_vals = variogram.values 183 | 184 | # disable the re-fitting of the variogram in gstools 185 | kwargs['fit_variogram'] = False 186 | 187 | # instantiate the Krige class 188 | krige = gs.krige.Krige(model, cond_pos, cond_vals, **kwargs) 189 | 190 | # return the class 191 | return krige 192 | -------------------------------------------------------------------------------- /skgstat/plotting/variogram_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | try: 5 | from plotly.subplots import make_subplots 6 | import plotly.graph_objects as go 7 | except ImportError: 8 | pass 9 | 10 | 11 | def __calculate_plot_data(variogram): 12 | # get the parameters 13 | _bins = variogram.bins 14 | _exp = variogram.experimental 15 | x = np.linspace(0, np.nanmax(_bins), 100) 16 | 17 | # apply the model 18 | y = variogram.transform(x) 19 | 20 | # handle the relative experimental variogram 21 | if variogram.normalized: 22 | _bins /= np.nanmax(_bins) 23 | y /= np.max(_exp) 24 | _exp /= np.nanmax(_exp) 25 | x /= np.nanmax(x) 26 | 27 | return x, y, _bins, _exp 28 | 29 | 30 | def matplotlib_variogram_plot( 31 | variogram, 32 | axes=None, 33 | grid=True, 34 | show=True, 35 | hist=True 36 | ): 37 | # get the plotting data 38 | x, y, _bins, _exp = __calculate_plot_data(variogram) 39 | 40 | # do the plotting 41 | if axes is None: 42 | if hist: 43 | fig = plt.figure(figsize=(8, 5)) 44 | ax1 = plt.subplot2grid((5, 1), (1, 0), rowspan=4) 45 | ax2 = plt.subplot2grid((5, 1), (0, 0), sharex=ax1) 46 | fig.subplots_adjust(hspace=0) 47 | else: 48 | fig, ax1 = plt.subplots(1, 1, figsize=(8, 4)) 49 | ax2 = None 50 | elif isinstance(axes, (list, tuple, np.ndarray)): 51 | ax1, ax2 = axes 52 | fig = ax1.get_figure() 53 | else: 54 | ax1 = axes 55 | ax2 = None 56 | fig = ax1.get_figure() 57 | 58 | # ------------------------ 59 | # plot Variograms model 60 | ax1.plot(_bins, _exp, '.b') 61 | ax1.plot(x, y, '-g') 62 | 63 | # ax limits 64 | if variogram.normalized: 65 | ax1.set_xlim([0, 1.05]) 66 | ax1.set_ylim([0, 1.05]) 67 | 68 | # grid settings 69 | if grid: 70 | ax1.grid(False) 71 | ax1.vlines( 72 | _bins, 73 | *ax1.axes.get_ybound(), 74 | colors=(.85, .85, .85), 75 | linestyles='dashed' 76 | ) 77 | 78 | # always print error bars above grid 79 | conf = variogram._experimental_conf_interval 80 | if conf is not None: 81 | lo = conf[:, 1] - conf[:, 0] 82 | up = conf[:, 2] - conf[:, 1] 83 | yerr = np.column_stack((lo, up)).T 84 | ax1.errorbar(_bins, _exp, fmt='.b', yerr=yerr) 85 | 86 | # annotation 87 | ax1.axes.set_ylabel('semivariance (%s)' % variogram._estimator.__name__) 88 | ax1.axes.set_xlabel('Lag (-)') 89 | 90 | # ------------------------ 91 | # plot histogram 92 | if ax2 is not None and hist: 93 | # calc the histogram 94 | _count = np.fromiter( 95 | (g.size for g in variogram.lag_classes()), dtype=int 96 | ) 97 | 98 | # set the sum of hist bar widths to 70% of the x-axis space 99 | w = (np.max(_bins) * 0.7) / len(_count) 100 | 101 | # plot 102 | ax2.bar(_bins, _count, width=w, align='center', color='red') 103 | 104 | # adjust 105 | plt.setp(ax2.axes.get_xticklabels(), visible=False) 106 | ax2.axes.set_yticks(ax2.axes.get_yticks()[1:]) 107 | 108 | # need a grid? 109 | if grid: # pragma: no cover 110 | ax2.grid(False) 111 | ax2.vlines( 112 | _bins, 113 | *ax2.axes.get_ybound(), 114 | colors=(.85, .85, .85), 115 | linestyles='dashed' 116 | ) 117 | 118 | # annotate 119 | ax2.axes.set_ylabel('N') 120 | 121 | # show the figure 122 | if show: # pragma: no cover 123 | fig.show() 124 | 125 | return fig 126 | 127 | 128 | def plotly_variogram_plot( 129 | variogram, 130 | fig=None, 131 | grid=True, 132 | show=True, 133 | hist=True 134 | ): 135 | # get the plotting data 136 | x, y, _bins, _exp = __calculate_plot_data(variogram) 137 | 138 | # create the figure 139 | if fig is None: 140 | if hist: 141 | fig = make_subplots( 142 | rows=5, cols=1, shared_xaxes=True, vertical_spacing=0.0, 143 | specs=[ 144 | [{}], [{'rowspan': 4}], [None], [None], [None] 145 | ] 146 | ) 147 | else: 148 | fig = make_subplots(rows=1, cols=1) 149 | elif isinstance(fig, go.Figure): 150 | pass 151 | else: 152 | raise ValueError('axes has to be None or a plotly.Figure.') 153 | 154 | # handle error bars on experimental 155 | conf = variogram._experimental_conf_interval 156 | if conf is not None: 157 | error_y = dict( 158 | type='data', 159 | symmetric=False, 160 | array=conf[:, 1] - conf[:, 0], 161 | arrayminus=conf[:, 2] - conf[:, 1] 162 | ) 163 | else: 164 | error_y = None 165 | 166 | # main plot 167 | fig.add_trace( 168 | go.Scatter( 169 | x=_bins, 170 | y=_exp, 171 | error_y=error_y, 172 | mode='markers', 173 | marker=dict(color='blue'), 174 | name='Experimental' 175 | ), 176 | row=2 if hist else 1, col=1 177 | ) 178 | fig.add_trace( 179 | go.Scatter( 180 | x=x, 181 | y=y, 182 | mode='lines', 183 | marker=dict(color='green'), 184 | name='%s model' % variogram.model.__name__ 185 | ), 186 | row=2 if hist else 1, col=1 187 | ) 188 | 189 | # update axis title 190 | fig.update_xaxes(title_text='Lag [-]', row=2 if hist else 1, col=1) 191 | fig.update_yaxes( 192 | title_text='semivariance (%s)' % variogram.estimator.__name__, 193 | row=2 if hist else 1, col=1 194 | ) 195 | 196 | # hist 197 | if hist: 198 | # calculate 199 | _count = np.fromiter( 200 | (g.size for g in variogram.lag_classes()), 201 | dtype=int 202 | ) 203 | 204 | fig.add_trace( 205 | go.Bar( 206 | x=_bins, 207 | y=_count, 208 | marker=dict(color='red'), 209 | name='Histogram' 210 | ) 211 | ) 212 | 213 | # title 214 | fig.update_yaxes(title_text='# of pairs', row=1, col=1) 215 | 216 | if show: 217 | fig.show() 218 | 219 | return fig 220 | -------------------------------------------------------------------------------- /skgstat/tests/test_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | import unittest 5 | 6 | import numpy as np 7 | from numpy.testing import assert_array_almost_equal 8 | 9 | from skgstat.models import spherical, exponential 10 | from skgstat.models import gaussian, cubic, stable, matern 11 | from skgstat.models import variogram 12 | 13 | 14 | class TestModels(unittest.TestCase): 15 | def setUp(self): 16 | self.h = np.array([5, 10, 30, 50, 100]) 17 | 18 | def test_spherical_default(self): 19 | # extract the actual function 20 | f = spherical.py_func 21 | 22 | result = [13.75, 20.0, 20.0, 20.0, 20.0] 23 | 24 | model = list(map(f, self.h, [10]*5, [20]*5)) 25 | 26 | for r, m in zip(result, model): 27 | self.assertAlmostEqual(r, m, places=2) 28 | 29 | def test_spherical_nugget(self): 30 | # extract the actual function 31 | f = spherical.py_func 32 | 33 | result = [15.44, 27.56, 33.0, 34.0, 35.0] 34 | 35 | # calculate 36 | nuggets = [1, 2, 3, 4, 5] 37 | model = list(map(f, self.h, [15] * 5, [30] * 5, nuggets)) 38 | 39 | for r, m in zip(result, model): 40 | self.assertAlmostEqual(r, m, places=2) 41 | 42 | def test_exponential_default(self): 43 | # extract the actual function 44 | f = exponential.py_func 45 | 46 | result = [5.18, 9.02, 16.69, 19., 19.95] 47 | model = list(map(f, self.h, [50]*5, [20]*5)) 48 | 49 | for r, m in zip(result, model): 50 | self.assertAlmostEqual(r, m, places=2) 51 | 52 | def test_exponential_nugget(self): 53 | # extract the actual function 54 | f = exponential.py_func 55 | 56 | result = [7.64, 13.8, 26.31, 31.54, 34.8] 57 | 58 | # calculate 59 | nuggets = [1, 2, 3, 4, 5] 60 | model = list(map(f, self.h, [60] * 5, [30] * 5, nuggets)) 61 | 62 | for r, m in zip(result, model): 63 | self.assertAlmostEqual(r, m, places=2) 64 | 65 | def test_gaussian_default(self): 66 | # extract the actual function 67 | f = gaussian.py_func 68 | 69 | result = [0.96, 3.58, 16.62, 19.86, 20.] 70 | model = list(map(f, self.h, [45]*5, [20]*5)) 71 | 72 | for r, m in zip(result, model): 73 | self.assertAlmostEqual(r, m, places=2) 74 | 75 | def test_gaussian_nugget(self): 76 | # extract the actual function 77 | f = gaussian.py_func 78 | 79 | result = [1.82, 5.15, 21.96, 32.13, 35.] 80 | 81 | # calculate 82 | nuggets = [1, 2, 3, 4, 5] 83 | model = list(map(f, self.h, [60] * 5, [30] * 5, nuggets)) 84 | 85 | for r, m in zip(result, model): 86 | self.assertAlmostEqual(r, m, places=2) 87 | 88 | def _test_cubic_default(self): 89 | # extract the actual function 90 | f = cubic.py_func 91 | 92 | result = [6.13, 21.11, 88.12, 100., 100.] 93 | model = list(map(f, self.h, [50]*5, [100]*5)) 94 | 95 | for r, m in zip(result, model): 96 | self.assertAlmostEqual(r, m, places=2) 97 | 98 | def test_cubic_nugget(self): 99 | # extract the actual function 100 | f = cubic.py_func 101 | 102 | result = [11.81, 34.74, 73., 74., 75.] 103 | 104 | # calculate 105 | nuggets = [1, 2, 3, 4, 5] 106 | model = list(map(f, self.h, [30] * 5, [70] * 5, nuggets)) 107 | 108 | for r, m in zip(result, model): 109 | self.assertAlmostEqual(r, m, places=2) 110 | 111 | def test_stable_default(self): 112 | # extract the actual function 113 | f = stable.py_func 114 | 115 | result = [9.05, 23.53, 75.2, 95.02, 99.98] 116 | model = list(map(f, self.h, [50]*5, [100]*5, [1.5]*5)) 117 | 118 | for r, m in zip(result, model): 119 | self.assertAlmostEqual(r, m, places=2) 120 | 121 | def test_stable_nugget(self): 122 | # extract the actual function 123 | f = stable.py_func 124 | 125 | result = [8.77, 10.8, 12.75, 13.91, 14.99] 126 | 127 | # calculate 128 | nuggets = [1, 2, 3, 4, 5] 129 | model = list(map(f, self.h, [20] * 5, [10] * 5, [0.5] * 5, nuggets)) 130 | 131 | for r, m in zip(result, model): 132 | self.assertAlmostEqual(r, m, places=2) 133 | 134 | def test_matern_default(self): 135 | # extract the actual function 136 | f = matern.py_func 137 | 138 | result = [24.64, 43.2, 81.68, 94.09, 99.65] 139 | model = list(map(f, self.h, [50]*5, [100]*5, [0.50001]*5)) 140 | 141 | for r, m in zip(result, model): 142 | self.assertAlmostEqual(r, m, places=2) 143 | 144 | def test_matern_nugget(self): 145 | # extract the actual function 146 | f = matern.py_func 147 | 148 | result = [3.44, 8.52, 12.99, 14., 15.] 149 | 150 | # calculate 151 | nuggets = [1, 2, 3, 4, 5] 152 | model = list(map(f, self.h, [20] * 5, [9.99999] * 5, [8] * 5, nuggets)) 153 | 154 | for r, m in zip(result, model): 155 | self.assertAlmostEqual(r, m, places=2) 156 | 157 | def test_matern_r_switch(self): 158 | # run the default with an extreme s value 159 | 160 | # extract the actual function 161 | f = matern.py_func 162 | 163 | result = [24.64, 43.20, 81.68, 94.09, 99.65] 164 | 165 | model = list(map(f, self.h, [50]*5, [100]*5, [0.5]*5)) 166 | 167 | assert_array_almost_equal(result, model, decimal=2) 168 | 169 | 170 | class TestVariogramDecorator(unittest.TestCase): 171 | def test_scalar(self): 172 | @variogram 173 | def scalar_function(a, b): 174 | return a, b 175 | 176 | a, b = 1, 4 177 | self.assertEqual(scalar_function(1, 4), (a, b)) 178 | 179 | def test_list(self): 180 | @variogram 181 | def adder(l, a): 182 | return l + a 183 | 184 | res = [5, 8, 12] 185 | 186 | for r, c in zip(res, adder([1, 4, 8], 4)): 187 | self.assertEqual(r, c) 188 | 189 | def test_sum_spherical(self): 190 | @variogram 191 | def sum_spherical(h, r1, c1, r2, c2, b1=0, b2=0): 192 | return spherical(h, r1, c1, b1) + spherical(h, r2, c2, b2) 193 | 194 | # Parameters for the two spherical models 195 | params = [1, 0.3, 10, 0.7] 196 | 197 | # Values at which we'll evaluate the function and its expected result 198 | vals = [0, 1, 100] 199 | res = [0, 0.3 + spherical(1, 10, 0.7), 1] 200 | 201 | for r, c in zip(res, sum_spherical(vals, *params)): 202 | self.assertEqual(r, c) 203 | 204 | 205 | if __name__=='__main__': 206 | unittest.main() 207 | -------------------------------------------------------------------------------- /skgstat/tests/test_spacetimevariogram.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | import matplotlib.pyplot as plt 6 | 7 | from skgstat import SpaceTimeVariogram 8 | 9 | 10 | class TestSpaceTimeVariogramInitialization(unittest.TestCase): 11 | def setUp(self): 12 | np.random.seed(42) 13 | self.c = np.random.gamma(10, 6, (60, 3)) 14 | np.random.seed(42) 15 | self.v = np.random.normal(15, 4, (60, 8)) 16 | 17 | def test_default_init(self): 18 | V = SpaceTimeVariogram(self.c, self.v) 19 | 20 | # test first 5 and 21 | assert_array_almost_equal( 22 | V.experimental[:5], 23 | np.array([14.527, 16.275, 16.195, 14.464, 12.619]), 24 | decimal=3 25 | ) 26 | 27 | # and last 5 28 | assert_array_almost_equal( 29 | V.experimental[-5:], 30 | np.array([13.911, 10.76 , 10.623, 9.434, 15.402]), 31 | decimal=3 32 | ) 33 | 34 | def test_values_setter(self): 35 | V = SpaceTimeVariogram(self.c, self.v) 36 | 37 | # get the differences 38 | diff = V.values 39 | 40 | # delete and reset by setter 41 | V._values = None 42 | self.assertIsNone(V.values) 43 | V.values = self.v 44 | 45 | # assert 46 | assert_array_almost_equal(V.values, diff, decimal=5) 47 | 48 | def test_set_values_raises_AttributeError(self): 49 | V = SpaceTimeVariogram(self.c, self.v) 50 | 51 | with self.assertRaises(AttributeError) as e: 52 | V.set_values(['string', 'don\'t', 'work']) 53 | self.assertEqual( 54 | str(e), 55 | 'values cannot be converted to a proper ' 56 | '(m,n) shaped array.' 57 | ) 58 | 59 | def test_set_values_raises_shape_error(self): 60 | V = SpaceTimeVariogram(self.c, self.v) 61 | 62 | with self.assertRaises(ValueError) as e: 63 | V.set_values(np.random.normal(10, 5, (55, 8))) 64 | self.assertEqual( 65 | str(e), 'The values shape do not match coordinates.' 66 | ) 67 | 68 | def test_set_value_raises_timeseries_error(self): 69 | V = SpaceTimeVariogram(self.c, self.v) 70 | 71 | with self.assertRaises(ValueError) as e: 72 | V.set_values(np.random.normal(0, 1, (60, 1))) 73 | self.assertEqual( 74 | str(e), 75 | 'A SpaceTimeVariogram needs more than one ' 76 | 'observation on the time axis.' 77 | ) 78 | 79 | 80 | class TestSpaceTimeVariogramArgumets(unittest.TestCase): 81 | def setUp(self): 82 | np.random.seed(1306) 83 | self.c = np.random.gamma(20, 10, (60, 3)) 84 | np.random.seed(1306) 85 | self.v = np.random.power(5, (60, 5)) 86 | 87 | def test_xdist_func(self): 88 | # use Manhattan distance 89 | V = SpaceTimeVariogram(self.c, self.v, xdist_func='cityblock') 90 | 91 | self.assertEqual(V.xdistance.size, 1770) 92 | # test arbitrary elements 93 | assert_array_almost_equal( 94 | V.xdistance[[10, 15, 492, 1023, 1765]], 95 | np.array([184.245, 162.17 , 46.296, 138.417, 91.457]), 96 | decimal=3 97 | ) 98 | 99 | def test_xdist_func_raises_ValueError(self): 100 | with self.assertRaises(ValueError) as e: 101 | V = SpaceTimeVariogram(self.c, self.v) 102 | V.xdist_func = lambda x: x**2 103 | 104 | self.assertEqual( 105 | str(e), 'For now only str arguments are supported.' 106 | ) 107 | 108 | def test_tdist_func_raises_ValueError(self): 109 | with self.assertRaises(ValueError) as e: 110 | V = SpaceTimeVariogram(self.c, self.v) 111 | V.tdist_func = 55.4 112 | 113 | self.assertEqual( 114 | str(e), 'For now only str arguments are supported.' 115 | ) 116 | 117 | def test_x_lags(self): 118 | V = SpaceTimeVariogram(self.c, self.v) 119 | 120 | self.assertEqual(V.x_lags, 10) 121 | V.x_lags = 25 122 | self.assertEqual(len(V.xbins), 25) 123 | 124 | def test_x_lags_raises_ValueError(self): 125 | with self.assertRaises(ValueError) as e: 126 | SpaceTimeVariogram(self.c, self.v, x_lags=15.4) 127 | 128 | self.assertEqual( 129 | str(e), 'Only integers are supported as lag counts.' 130 | ) 131 | 132 | def test_t_lags(self): 133 | V = SpaceTimeVariogram(self.c, self.v, t_lags=2) 134 | 135 | self.assertEqual(V.t_lags, 2) 136 | self.assertEqual(len(V.tbins), 2) 137 | V.t_lags = 'max' 138 | # this is still a bug, needs to be fixed one day 139 | self.assertEqual(V.t_lags, 4) 140 | self.assertEqual(len(V.tbins), 4) 141 | 142 | def test_t_lags_unkown(self): 143 | with self.assertRaises(ValueError) as e: 144 | SpaceTimeVariogram(self.c, self.v, t_lags='min') 145 | 146 | self.assertEqual( 147 | str(e), "Only 'max' supported as string argument." 148 | ) 149 | 150 | def test_autoset_lag_bins(self): 151 | V = SpaceTimeVariogram(self.c, self.v, xbins='scott', tbins='fd') 152 | 153 | # test if the bins were set correctly 154 | self.assertTrue(V.x_lags == 20) 155 | self.assertTrue(V.t_lags == 2) 156 | 157 | assert_array_almost_equal( 158 | V.xbins, 159 | np.array([21.5, 34.8, 48., 61.3, 74.5, 87.8, 101.1, 114.3, 127.6, 160 | 140.8, 154.1, 167.4, 180.6, 193.9, 207.2, 220.4, 233.7, 246.9, 161 | 260.2, 273.5]), 162 | decimal=1 163 | ) 164 | 165 | def test_change_lag_method(self): 166 | V = V = SpaceTimeVariogram(self.c, self.v, x_lags=4, t_lags='max') 167 | 168 | self.assertTrue(V.x_lags == V.t_lags == 4) 169 | 170 | V.set_bin_func('sqrt', 'space') 171 | 172 | self.assertTrue(V.x_lags == 43) 173 | 174 | 175 | class TestSpaceTimeVariogramPlots(unittest.TestCase): 176 | def setUp(self): 177 | np.random.seed(42) 178 | self.c = np.random.gamma(14, 8, (50,3)) 179 | np.random.seed(42) 180 | self.v = np.random.normal(10, 5, (50, 7)) 181 | 182 | def test_plot3d_scatter_default(self): 183 | pass 184 | 185 | def test_plot3d_wrong_axis(self): 186 | with self.assertRaises(ValueError) as e: 187 | SpaceTimeVariogram(self.c, self.v)._plot3d(ax=[plt.figure(), 55]) 188 | 189 | self.assertEqual( 190 | str(e), 191 | 'The passed ax object is not an instance ' 192 | 'of mpl_toolkis.mplot3d.Axes3D.' 193 | ) 194 | 195 | 196 | if __name__ == '__main__': 197 | unittest.main() 198 | -------------------------------------------------------------------------------- /skgstat/interfaces/variogram_estimator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.base import BaseEstimator 3 | from sklearn.utils.validation import check_X_y 4 | 5 | 6 | class VariogramEstimator(BaseEstimator): 7 | def __init__(self, 8 | estimator='matheron', 9 | model='spherical', 10 | dist_func='euclidean', 11 | bin_func='even', 12 | normalize=True, 13 | fit_method='trf', 14 | fit_sigma=None, 15 | use_nugget=False, 16 | maxlag=None, 17 | n_lags=10, 18 | verbose=False, 19 | use_score='rmse', 20 | cross_validate=False, 21 | **kwargs 22 | ): 23 | r"""VariogramEstimator class 24 | 25 | Interface class for usage with scikit-learn. This class is intended 26 | for usage with the GridSearchCV or Pipeline classes of scikit-learn. 27 | 28 | The input parameters are the same as for the 29 | :class:`Variogram ` class. 30 | Refer to the documentation there. 31 | 32 | The only parameter specific to the Estimator class is the `use_score` 33 | attribute. This can be the root mean squared error (rmse), mean squared 34 | error (mse) or mean absolute error (mae). The Estimater can either calculate 35 | the score based on the model fit (model ~ experimental) or using a 36 | leave-one-out cross-validation of a OrdinaryKriging using the model 37 | 38 | .. versionchanged:: 0.5.4 39 | Uses ['rmse', 'mse', 'mae'] as scores exclusesively now. 40 | Therefore, either the fit of the Variogram or a cross validation 41 | can be used for scoring 42 | 43 | Parameters 44 | ---------- 45 | use_score : str 46 | Scoring parameter to assess the Variogram fitting quality. 47 | Defaults to `'rmse'`, the Root mean squared error. 48 | Can be changed to `['mse', 'mae']`. 49 | cross_validate : bool 50 | .. versionadded:: 0.5.4 51 | If True, the score will be calculate from a cross-validation of 52 | the variogram model in OrdinaryKriging, rather than the model fit. 53 | 54 | Keyword Arguments 55 | ----------------- 56 | cross_n : int 57 | .. versionadded:: 0.5.4 58 | If not None, this is the amount of points (and iterations) used in 59 | cross valiation. Does not have any effect if `cross_validate=False`. 60 | seed : int 61 | .. versionadded:: 0.5.4 62 | Will be passed down to the 63 | :func:`cross_validation ` 64 | method of Variogram. 65 | 66 | Note 67 | ---- 68 | The workflow of this class is a bit different from the Variogram class. 69 | The Variogram parameters are passed on instantiation. The actual data, 70 | coordinates and values, are then passed to the fit method, which 71 | returns a fitted instance of the model. The predict method takes 72 | **distance** values and *predicts* the semi-variance according to the 73 | fitted model. This is in line with the Estimators of sklearn, but 74 | breaks the guidelines in one point, as the X passed to fit and 75 | predict are in fact two different things (and of different shape). 76 | 77 | """ 78 | # store all the passed attributes. 79 | # they will be needed to create the Variogram 80 | self.estimator = estimator 81 | self.model = model 82 | self.dist_func = dist_func 83 | self.bin_func = bin_func 84 | self.normalize = normalize 85 | self.fit_method = fit_method 86 | self.fit_sigma = fit_sigma 87 | self.use_nugget = use_nugget 88 | self.maxlag = maxlag 89 | self.n_lags = n_lags 90 | self.verbose = verbose 91 | 92 | # add Estimator specific attributes 93 | self.use_score = use_score 94 | self.cross_validate = cross_validate 95 | self._kwargs = kwargs 96 | 97 | # This is a workaround due to circular imports 98 | from skgstat import Variogram 99 | self.VariogramCls = Variogram 100 | 101 | def fit(self, X, y): 102 | """Fit a model 103 | 104 | Fits a variogram to the given data. 105 | 106 | Parameters 107 | ---------- 108 | X : numpy.ndarray 109 | input data coordinates. Usually 2D or 3D data, 110 | but any dimensionality is allowed. 111 | y : numpy.ndarray 112 | observation values at the location given in X. 113 | Has to be one dimensional 114 | 115 | Returns 116 | ------- 117 | variogram : VariogramEstimator 118 | A fitted instance of VariogramEstimator 119 | 120 | """ 121 | # check the input data 122 | X, y = check_X_y(X, y) 123 | 124 | # build the model 125 | self.variogram = self.VariogramCls( 126 | X, y, 127 | estimator=self.estimator, 128 | model=self.model, 129 | dist_func=self.dist_func, 130 | bin_func=self.bin_func, 131 | normalize=self.normalize, 132 | fit_method=self.fit_method, 133 | fit_sigma=self.fit_sigma, 134 | use_nugget=self.use_nugget, 135 | maxlag=self.maxlag, 136 | n_lags=self.n_lags 137 | ) 138 | 139 | # append the data 140 | self.X_ = X 141 | self.y_ = y 142 | 143 | # get the fitted model function 144 | self._model_func_ = self.variogram.fitted_model 145 | 146 | # append the variogram parameters 147 | d = self.variogram.describe() 148 | self.range_ = d['effective_range'] 149 | self.sill_ = d['sill'] 150 | self.nugget_ = d['nugget'] 151 | 152 | # return 153 | return self 154 | 155 | def predict(self, X): 156 | """Predict 157 | 158 | Predicting function. A prediction in this context is 159 | the estimation of semi-variance values for a given distance 160 | array. The X here is an 1D array of distances, **not coordinates**. 161 | 162 | """ 163 | return np.fromiter(map(self._model_func_, X.flatten()), dtype=float) 164 | 165 | def score(self, X=None, y=None): 166 | """Fit score 167 | .. versionchanged:: 0.5.4 168 | Can now use cross-validated scores 169 | 170 | Score ('rmse', 'mse', 'mae') based on the fitting. 171 | 172 | """ 173 | if self.cross_validate: 174 | # check if a n was given 175 | n = self._kwargs.get('cross_n') 176 | return self.variogram.cross_validate( 177 | n=n, 178 | metric=self.use_score, 179 | seed=self._kwargs.get('seed') 180 | ) 181 | else: 182 | # return the score 183 | return getattr(self.variogram, self.use_score) 184 | -------------------------------------------------------------------------------- /skgstat/stmodels.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | import numpy as np 4 | 5 | 6 | def stvariogram(func): 7 | @wraps(func) 8 | def wrapper(*args, **kwargs): 9 | st = args[0] 10 | if st.ndim == 2: 11 | new_args = args[1:] 12 | mapping = map(lambda lags: func(lags, *new_args, **kwargs), st) 13 | return np.fromiter(mapping, dtype=float) 14 | else: 15 | return func(*args, **kwargs) 16 | return wrapper 17 | 18 | 19 | @stvariogram 20 | def sum(lags, Vx, Vt): 21 | r"""Sum space-time model 22 | 23 | Separable space-time variogram model. This is the most basic model as the 24 | two marginal models of the space and time axis are simply summed up for 25 | each lag pair. Further, there are no fitting parameters. 26 | Please consider the notes before using this model. 27 | 28 | Parameters 29 | ---------- 30 | lags : tuple 31 | Tuple of the space (x) and time (t) lag given as tuple: (x, t) which 32 | will be used to calculate the dependent semivariance. 33 | Vx : skgstat.Variogram.fitted_model 34 | instance of the space marginal variogram with a fitted theoretical 35 | model sufficiently describing the marginal. If this model does not fit 36 | the experimental variogram, the space-time model fit will be poor as 37 | well. 38 | Vt : skgstat.Variogram.fitted_model 39 | instance of the time marginal variogram with a fitted theoretical 40 | model sufficiently describing the marginal. If this model does not fit 41 | the experimental variogram, the space-time model fit will be poor as 42 | well. 43 | 44 | Returns 45 | ------- 46 | gamma : float 47 | The semi-variance modeled for the given lags. 48 | 49 | Notes 50 | ----- 51 | This model is implemented like: 52 | 53 | .. math:: 54 | \gamma (h,t) = \gamma_x (h) + \gamma_t (t) 55 | 56 | Where :math:`\gamma_x(h)` is the spatial marginal variogram and 57 | :math:`\gamma_t(t)` is the temporal marginal variogram. 58 | 59 | It is not a good idea to use this model in almost any case, as it assumes 60 | the covariance field to be isotropic in space and time direction, 61 | which will hardly be true. Further, it might not be strictly definite as 62 | shown by [7]_, [8]_, [9]_. 63 | 64 | References 65 | ---------- 66 | .. [7] Myers, D. E., Journel, A. (1990), Variograms with Zonal 67 | Anisotropies and Non-Invertible Kriging Systems. 68 | Mathematical Geology 22, 779-785. 69 | .. [8] Dimitrakopoulos, R. and Lou, X. (1994), Spatiotemporal modeling: 70 | covariances and ordinary kriging systems, in R. Dimitrakopoulos, 71 | (ed.) Geostatistics for the next century, Kluwer Academic Publishers, 72 | Dodrecht 88-93. 73 | 74 | """ 75 | h, t = lags 76 | return Vx(h) + Vt(t) 77 | 78 | 79 | @stvariogram 80 | def product(lags, Vx, Vt, Cx, Ct): 81 | r"""Product model 82 | 83 | Separable space-time variogram model. This model is based on the product 84 | of the marginal space and time models. 85 | 86 | Parameters 87 | ---------- 88 | lags : tuple 89 | Tuple of the space (x) and time (t) lag given as tuple: (x, t) which 90 | will be used to calculate the dependent semivariance. 91 | Vx : skgstat.Variogram.fitted_model 92 | instance of the space marginal variogram with a fitted theoretical 93 | model sufficiently describing the marginal. If this model does not fit 94 | the experimental variogram, the space-time model fit will be poor as 95 | well. 96 | Vt : skgstat.Variogram.fitted_model 97 | instance of the time marginal variogram with a fitted theoretical 98 | model sufficiently describing the marginal. If this model does not fit 99 | the experimental variogram, the space-time model fit will be poor as 100 | well. 101 | Cx : float 102 | Marginal space sill. 103 | Ct : float 104 | Marignal time sill. 105 | 106 | Returns 107 | ------- 108 | gamma : float 109 | The semi-variance modeled for the given lags. 110 | 111 | Notes 112 | ----- 113 | The product sum model is implemented following [14]_: 114 | 115 | .. math:: 116 | \gamma (h,t) = C_x * \gamma_t(t) + C_t * \gamma_x(h) - \gamma_x(h) * \gamma_t(t) 117 | 118 | Where :math:`\gamma_x(h)` is the spatial marginal variogram and 119 | :math:`\gamma_t(t)` is the temporal marginal variogram. 120 | 121 | References 122 | ---------- 123 | .. [14] De Cesare, L., Myers, D., and Pose, D. (201b), FORTRAN 77 programs 124 | for space-time modeling, Computers & Geoscience 28, 205-212. 125 | 126 | """ 127 | h, t = lags 128 | return Cx * Vt(t) + Ct * Vx(h) - Vx(h) * Vt(t) 129 | 130 | 131 | @stvariogram 132 | def product_sum(lags, Vx, Vt, k1, k2, k3, Cx, Ct): 133 | r"""Product-Sum space-time model 134 | 135 | Separable space-time variogram model, based on a combination of 'sum' and 136 | 'product' models. Both base models are based on separated marginal 137 | variograms for the space and time axis. 138 | 139 | Parameters 140 | ---------- 141 | lags : tuple 142 | Tuple of the space (x) and time (t) lag given as tuple: (x, t) which 143 | will be used to calculate the dependent semivariance. 144 | Vx : skgstat.Variogram.fitted_model 145 | instance of the space marginal variogram with a fitted theoretical 146 | model sufficiently describing the marginal. If this model does not fit 147 | the experimental variogram, the space-time model fit will be poor as 148 | well. 149 | Vt : skgstat.Variogram.fitted_model 150 | instance of the time marginal variogram with a fitted theoretical 151 | model sufficiently describing the marginal. If this model does not fit 152 | the experimental variogram, the space-time model fit will be poor as 153 | well. 154 | k1 : float 155 | Fitting parameter. k1 has to be positive or zero and may not be larger 156 | than all marginal sill values. 157 | k2 : float 158 | Fitting parameter. k2 has to be positive or zero and may not be larger 159 | than all marginal sill values. 160 | k3 : float 161 | Fitting parameter. k3 has to be positive and may not be larger than 162 | all marginal sill values. 163 | Cx : float 164 | Marginal space sill. 165 | Ct : float 166 | Marignal time sill. 167 | 168 | Returns 169 | ------- 170 | gamma : float 171 | The semi-variance modeled for the given lags. 172 | 173 | Notes 174 | ----- 175 | This model implements the product-sum model as suggested by 176 | De Cesare et. al [15]_, [16]_: 177 | 178 | .. math:: 179 | \gamma_{ST}(h_s, h_t) = [k_1C_T(0) + k_2]*\gamma_S(h_s) + 180 | [k_1C_s(0) + k_3]\gamma_T(h_t) - k_1\gamma_s(h_s) x \gamma_T(h_t) 181 | 182 | References 183 | ---------- 184 | .. [15] De Cesare, L., Myers, D. and Posa, D. (2001a), Product-sum 185 | covariance for space-time mdeling, Environmetrics 12, 11-23. 186 | .. [16] De Cesare, L., Myers, D., and Pose, D. (201b), FORTRAN 77 programs 187 | for space-time modeling, Computers & Geoscience 28, 205-212. 188 | 189 | """ 190 | h, t = lags 191 | return (k2 + k1*Ct)*Vx(h) + (k3 + k1*Cx) * Vt(t) - k1 * Vx(h) * Vt(t) 192 | -------------------------------------------------------------------------------- /skgstat/tests/test_kriging.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | from skgstat import Variogram, OrdinaryKriging 6 | 7 | 8 | class TestKrigingInstantiation(unittest.TestCase): 9 | def setUp(self): 10 | np.random.seed(42) 11 | self.c = np.random.gamma(10, 4, size=(50, 2)) 12 | np.random.seed(42) 13 | self.v = np.random.normal(10, 2, size=50) 14 | self.V = Variogram(self.c, self.v, model='gaussian', normalize=False) 15 | 16 | def test_coordinates_and_values(self): 17 | ok = OrdinaryKriging(self.V) 18 | assert_array_almost_equal(self.c, ok.coords.coords) 19 | 20 | def test_coordinates_with_duplicates(self): 21 | c = self.c.copy() 22 | 23 | # create two duplicates 24 | c[14] = c[42] 25 | c[8] = c[42] 26 | 27 | V = Variogram(c, self.v) 28 | ok = OrdinaryKriging(V) 29 | 30 | # two instances should be removed 31 | self.assertEqual(len(ok.coords), 50 - 2) 32 | 33 | def test_min_points_type_check(self): 34 | with self.assertRaises(ValueError) as e: 35 | OrdinaryKriging(self.V, min_points=4.0) 36 | 37 | self.assertEqual( 38 | str(e.exception), 'min_points has to be an integer.' 39 | ) 40 | 41 | def test_min_points_negative(self): 42 | with self.assertRaises(ValueError) as e: 43 | OrdinaryKriging(self.V, min_points=-2) 44 | 45 | self.assertEqual( 46 | str(e.exception), 'min_points can\'t be negative.' 47 | ) 48 | 49 | def test_min_points_larger_max_points(self): 50 | with self.assertRaises(ValueError) as e: 51 | OrdinaryKriging(self.V, min_points=10, max_points=5) 52 | 53 | self.assertEqual( 54 | str(e.exception), 'min_points can\'t be larger than max_points.' 55 | ) 56 | 57 | def test_max_points_type_check(self): 58 | with self.assertRaises(ValueError) as e: 59 | OrdinaryKriging(self.V, max_points=16.0) 60 | 61 | self.assertEqual( 62 | str(e.exception), 'max_points has to be an integer.' 63 | ) 64 | 65 | def test_max_points_negative(self): 66 | with self.assertRaises(ValueError) as e: 67 | ok = OrdinaryKriging(self.V, max_points=10) 68 | ok.max_points = - 2 69 | 70 | self.assertEqual( 71 | str(e.exception), 'max_points can\'t be negative.' 72 | ) 73 | 74 | def test_max_points_smaller_min_points(self): 75 | with self.assertRaises(ValueError) as e: 76 | ok = OrdinaryKriging(self.V, min_points=3, max_points=5) 77 | ok.max_points = 2 78 | 79 | self.assertEqual( 80 | str(e.exception), 'max_points can\'t be smaller than min_points.' 81 | ) 82 | 83 | def test_mode_settings(self): 84 | # estimate mode 85 | ok = OrdinaryKriging(self.V, mode='estimate') 86 | self.assertIsNotNone(ok._prec_g) 87 | self.assertIsNotNone(ok._prec_dist) 88 | 89 | # exact mode 90 | ok.mode = 'exact' 91 | self.assertIsNone(ok._prec_g) 92 | self.assertIsNone(ok._prec_dist) 93 | 94 | def test_mode_unknown(self): 95 | with self.assertRaises(ValueError) as e: 96 | OrdinaryKriging(self.V, mode='foo') 97 | 98 | self.assertEqual( 99 | str(e.exception), "mode has to be one of 'exact', 'estimate'." 100 | ) 101 | 102 | def test_precision_TypeError(self): 103 | with self.assertRaises(TypeError) as e: 104 | OrdinaryKriging(self.V, precision='5.5') 105 | 106 | self.assertEqual( 107 | str(e.exception), 'precision has to be of type int' 108 | ) 109 | 110 | def test_precision_ValueError(self): 111 | with self.assertRaises(ValueError) as e: 112 | OrdinaryKriging(self.V, precision=0) 113 | 114 | self.assertEqual( 115 | str(e.exception), 'The precision has be be > 1' 116 | ) 117 | 118 | def test_solver_AttributeError(self): 119 | with self.assertRaises(AttributeError) as e: 120 | OrdinaryKriging(self.V, solver='peter') 121 | 122 | self.assertEqual( 123 | str(e.exception), "solver has to be ['inv', 'numpy', 'scipy']" 124 | ) 125 | 126 | 127 | class TestPerformance(unittest.TestCase): 128 | """ 129 | The TestPerformance class is not a real unittest. It will always be true. 130 | It does apply some benchmarking, which could be included into the testing 131 | framework, as soon as the OrdinaryKriging class is finalized. From that 132 | point on, new code should not harm the performance significantly. 133 | """ 134 | def setUp(self): 135 | # define the target field 136 | def func(x, y): 137 | return np.sin(0.02 * np.pi * y) * np.cos(0.02 * np.pi * x) 138 | 139 | # create a grid 140 | self.grid_x, self.grid_y = np.mgrid[0:100:100j, 0:100:100j] 141 | 142 | # sample the field 143 | np.random.seed(42) 144 | self.x = np.random.randint(100, size=300) 145 | np.random.seed(1337) 146 | self.y = np.random.randint(100, size=300) 147 | self.z = func(self.x, self.y) 148 | 149 | # build the Variogram and Kriging class 150 | self.V = Variogram(list(zip(self.x, self.y)), self.z, 151 | model='exponential', 152 | n_lags=15, 153 | maxlag=0.4, 154 | normalize=False 155 | ) 156 | self.ok = OrdinaryKriging(self.V, min_points=2, max_points=5, perf=True) 157 | 158 | def _run_benchmark(self, points): 159 | xi = self.grid_x.flatten()[:points] 160 | yi = self.grid_y.flatten()[:points] 161 | 162 | # run 163 | res = self.ok.transform(xi, yi) 164 | self.ok.perf_dist *= 1000 165 | self.ok.perf_mat *= 1000 166 | self.ok.perf_solv *= 1000 167 | 168 | print('Benchmarking OrdinaryKriging...') 169 | print('-------------------------------') 170 | print('Points:', points) 171 | print('Solver:', self.ok.solver) 172 | print('Mode:', self.ok.mode) 173 | print('Build distance matrix: %.1f ms (%.4f ms each)' % 174 | (np.sum(self.ok.perf_dist), np.std(self.ok.perf_dist))) 175 | print('Build variogram matrix: %.1f ms (%.4f ms each)' % 176 | (np.sum(self.ok.perf_mat), np.std(self.ok.perf_mat))) 177 | print('Solve kriging matrix: %.1f ms (%.4f ms each)' % 178 | (np.sum(self.ok.perf_solv), np.std(self.ok.perf_solv))) 179 | print('---------------------------------------------') 180 | 181 | def test_200points_exact(self): 182 | self.ok.mode = 'exact' 183 | self.ok.solver = 'inv' 184 | self._run_benchmark(points=200) 185 | 186 | def test_2000points_exact(self): 187 | self.ok.mode = 'exact' 188 | self.ok.solver = 'inv' 189 | self._run_benchmark(points=2000) 190 | 191 | def test_200points_estimate(self): 192 | self.ok.mode = 'estimate' 193 | self.ok.solver = 'inv' 194 | self._run_benchmark(points=200) 195 | 196 | def test_2000points_estimate(self): 197 | self.ok.mode = 'estimate' 198 | self.ok.solver = 'inv' 199 | self._run_benchmark(points=2000) 200 | 201 | 202 | if __name__ == '__main__': # pragma: no cover 203 | unittest.main() 204 | -------------------------------------------------------------------------------- /skgstat/util/uncertainty.py: -------------------------------------------------------------------------------- 1 | """ 2 | Estimate uncertainties propagated through the Variogram 3 | using a MonteCarlo approach 4 | """ 5 | from typing import Union, List 6 | from skgstat import Variogram 7 | import numpy as np 8 | from tqdm import tqdm 9 | from joblib import Parallel, delayed 10 | 11 | 12 | def propagate( 13 | variogram: Variogram = None, 14 | source: Union[str, List[str]] = 'values', 15 | sigma: Union[float, List[float]] = 5, 16 | evalf: Union[str, List[str]] = 'experimental', 17 | verbose: bool = False, 18 | use_bounds: bool = False, 19 | **kwargs 20 | ): 21 | """ 22 | Uncertainty propagation for the variogram. 23 | For a given :class:`Variogram ` 24 | instance a source of error and scale of error 25 | distribution can be specified. The function will 26 | propagate the uncertainty into different parts of 27 | the :class:`Variogram ` and 28 | return the confidence intervals or error bounds. 29 | 30 | Parameters 31 | ---------- 32 | variogram : skgstat.Variogram 33 | The base variogram. The variogram parameters will 34 | be used as fixed arguments for the Monte Carlo 35 | simulation. 36 | source : list 37 | Source of uncertainty. This has to be an attribute 38 | of :class:`Variogram `. Right 39 | now only ``'values'`` is really supported, anything 40 | else is untested. 41 | sigma : list 42 | Standard deviation of the error distribution. 43 | evalf : list 44 | Evaluation function. This specifies, which part of 45 | the :class:`Variogram ` should be 46 | used to be evaluated. Possible values are 47 | ``'experimental'`` for the experimental variogram, 48 | ``'model'`` for the fitted model and ``parameter'`` 49 | for the variogram parameters 50 | verbose : bool 51 | If True, the uncertainty_framework package used under 52 | the hood will print a progress bar to the console. 53 | Defaults to False. 54 | use_bounds : bool 55 | Shortcut to set the confidence interval bounds to the 56 | minimum and maximum value and thus return the error 57 | margins over a confidence interval. 58 | 59 | Keyword Arguments 60 | ----------------- 61 | distribution : str 62 | Any valid :any:`numpy.random` distribution function, that 63 | takes the scale as argument. 64 | Defaults to ``'normal'``. 65 | q : int 66 | Width (percentile) of the confidence interval. Has to be a 67 | number between 0 and 100. 0 will result in the minimum and 68 | maximum value as bounds. 100 turns both bounds into the 69 | median value. 70 | Defaults to ``10`` 71 | num_iter : int 72 | Number of iterations used in the Monte Carlo simulation. 73 | Defaults to ``500``. 74 | eval_at : int 75 | If evalf is set to model, the theoretical model get evaluated 76 | at this many evenly spaced lags up to maximum lag. 77 | Defaults to ``100``. 78 | n_jobs : int 79 | The evaluation can be performed in parallel. This will specify 80 | how many processes may be spawned in parallel. None will spwan 81 | only one (default). 82 | 83 | .. note:: 84 | This is an untested experimental feature. 85 | 86 | Returns 87 | ------- 88 | conf_interval : numpy.ndarray 89 | Confidence interval of the uncertainty propagation as 90 | [lower, median, upper]. If more than one evalf is given, a 91 | list of ndarrays will be returned. 92 | See notes for more details. 93 | 94 | Notes 95 | ----- 96 | For each member of the evaluated property, the lower and upper bound 97 | along with the median value is returned as ``[low, median, up]``. 98 | Thus the returned array has the shape ``(N, 3)``. 99 | N is the length of evaluated property, which is 100 | :func:`n_lags ` 103 | and ``100`` for ``'model'`` as the model gets evaluated at 104 | 100 evenly spaced lags up to the maximum lag class. This amount 105 | can be changed using the eval_at parameter. 106 | 107 | If more than one evalf parameter is given, the Variogram will be 108 | evaluated at multiple steps and each one will be returned as a 109 | confidence interval. Thus if ``len(evalf) == 2``, a list containing 110 | two confidence interval matrices will be returned. 111 | The order is [experimental, parameter, model]. 112 | 113 | """ 114 | # handle error bounds shortcut 115 | if use_bounds: 116 | kwargs['q'] = 0 117 | 118 | # extract the MetricSpace to speed things up a bit 119 | metricSpace = variogram._X 120 | 121 | # get the source of error 122 | if isinstance(source, str): 123 | source = [source] 124 | 125 | if not isinstance(sigma, (list, tuple)): 126 | sigma = [sigma] 127 | 128 | if isinstance(evalf, str): 129 | evalf = [evalf] 130 | 131 | # get the static variogram parameters 132 | _var_opts = variogram.describe().get('params', {}) 133 | omit_names = [*source, 'verbose'] 134 | args = {k: v for k, v in _var_opts.items() if k not in omit_names} 135 | 136 | # add back the metric space 137 | args['coordinates'] = metricSpace 138 | 139 | # build the parameter field 140 | num_iter = kwargs.get('num_iter', 500) 141 | rng = np.random.default_rng(kwargs.get('seed')) 142 | dist = getattr(rng, kwargs.get('distribution', 'normal')) 143 | param_field = [] 144 | 145 | for it in range(num_iter): 146 | par = {**args} 147 | 148 | # add the noisy params 149 | for s, err in zip(source, sigma): 150 | obs = getattr(variogram, s) 151 | size = len(obs) if hasattr(obs, '__len__') else 1 152 | par[s] = dist(obs, err, size=size) 153 | 154 | # append to param field 155 | param_field.append(par) 156 | 157 | # define the eval function 158 | def func(par): 159 | vario = Variogram(**par) 160 | out = [] 161 | if 'experimental' in evalf: 162 | out.append(vario.experimental) 163 | if 'parameter' in evalf: 164 | out.append(vario.parameters) 165 | if 'model' in evalf: 166 | x = np.linspace(0, np.max(vario.bins), kwargs.get('eval_at', 100)) 167 | out.append(vario.fitted_model(x)) 168 | return out 169 | 170 | # build the worker 171 | worker = Parallel(n_jobs=kwargs.get('n_jobs')) 172 | if verbose: 173 | generator = (delayed(func)(par) for par in tqdm(param_field)) 174 | else: 175 | generator = (delayed(func)(par) for par in param_field) 176 | 177 | # run 178 | result = worker(generator) 179 | 180 | # split up conf intervals 181 | conf_intervals = [] 182 | 183 | for i in range(len(evalf)): 184 | # unpack 185 | res = [result[j][i] for j in range(len(result))] 186 | 187 | # create the result 188 | ql = int(kwargs.get('q', 10) / 2) 189 | qu = 100 - int(kwargs.get('q', 10) / 2) 190 | conf_intervals.append( 191 | np.column_stack(( 192 | np.percentile(res, ql, axis=0), 193 | np.median(res, axis=0), 194 | np.percentile(res, qu, axis=0) 195 | )) 196 | ) 197 | 198 | # return 199 | if len(conf_intervals) == 1: 200 | return conf_intervals[0] 201 | return conf_intervals 202 | --------------------------------------------------------------------------------