├── .gitattributes
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── Structural Analysis.ipynb
├── docs
    ├── Makefile
    ├── basics
    │   ├── cokriging.rst
    │   ├── kriging.rst
    │   └── simulation.rst
    ├── conf.py
    ├── doc_env.yml
    ├── index.rst
    ├── install.rst
    └── tutorials
    │   ├── EDA.ipynb
    │   └── NST.ipynb
├── gslib_help
    ├── cokb3d_help.md
    ├── gam_params.md
    ├── kb2d_help.md
    ├── kt3d_help.md
    ├── sgsim_help.md
    └── super_block_search.md
├── img
    ├── Grid_Definition.png
    ├── behavior_near_origin.png
    ├── cokb3d_params.png
    ├── gam_params.png
    ├── gamv_params.png
    ├── graph_of_lag_model.png
    ├── head_tail.png
    ├── kb2d_params.png
    └── super_block_search.png
├── parameters
    ├── gam_write_parameters.py
    ├── gamv_write_parameters.py
    ├── krige2d_write_params.py
    ├── krige3d_write_params.py
    └── sgsim_write_params.py
├── pygeostatistics
    ├── __init__.py
    ├── _version.py
    ├── cokrige.py
    ├── eda.py
    ├── gam.py
    ├── gamv.py
    ├── gslib_reader.py
    ├── krige2d.py
    ├── krige3d.py
    ├── normal_score_transform.py
    ├── sgsim.py
    ├── super_block.py
    ├── variogram_model.py
    └── yaml_patch.py
├── setup.cfg
├── setup.py
├── testData
    ├── test.gslib
    ├── test_krige2d.par
    ├── test_krige3d.par
    ├── test_sgsim.par
    ├── xihuSmall_sparse_gam.par
    ├── xihuSmall_sparse_gamv.par
    └── xihu_sparse.gslib
├── tests
    ├── __init__.py
    └── test_eda.py
└── versioneer.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Pythonpygeostatistics/_version.py export-subst
2 | pygeostatistics/_version.py export-subst
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | .venv/
 83 | venv/
 84 | ENV/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | 
 89 | # Rope project settings
 90 | .ropeproject
 91 | 
 92 | # VS code
 93 | .vscode/
 94 | 
 95 | # Numpy data
 96 | *.npy
 97 | 
 98 | # pytest
 99 | .pytest_cache/
100 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | conda:
2 |     file: docs/doc_env.yml
3 | 
4 | python:
5 |    version: 3
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Yu Hao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include pygeostatistics/_version.py
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pyGeoStatistics
 2 | 
 3 | ![status](https://img.shields.io/badge/status-alpha-green.svg)
 4 | [![Documentation Status](https://readthedocs.org/projects/pygeostatistics/badge/?version=latest)](http://pygeostatistics.readthedocs.io/en/latest/?badge=latest)
 5 | 
 6 | A collection of python routines (accelerated with [Numba](https://github.com/numba/numba))
 7 | and jupyter notebooks for geostatistics,
 8 | which is immensely inspired by gslib (in Fortran).
 9 | 
10 | # Usage
11 | 
12 | Every routine reads its parameters from a parameter file written in `json`.
13 | All parameters including input/output file path need to be specified in these parameter
14 | files.
15 | 
16 | I've created scripts that assist in creating parameter files, they could be
17 | found in `\parameters` folder.
18 | 
19 | I tried to adhere to the naming convention of `gslib` when it comes to parameter
20 | names.
21 | 
22 | Markdown files describing parameters needed for each routine are in
23 | `\gslib_help`.
24 | 
25 | ## Example:
26 | 
27 | ```Python
28 | from pygeostatistics import Sgsim
29 | 
30 | sgsimulator = Sgsim("testData/test_sgsim.par")
31 | sgsimulator.simulate()
32 | ```
33 | 
34 | # Routines
35 | 
36 | - `eda.py`: exploratory data anaylysis.
37 | 
38 | - [`nst.py`](#normal-score-transform-nstpy): apply normal score transform to data.
39 | 
40 | - `gam.py`: calculate variogram for regular data.
41 | 
42 | - `gamv.py`: calculate variogram for irregular data.
43 | 
44 | - `sa.ipynb`: interactive structural analysis.
45 | 
46 | - [`krige2d.py`](#2d-kriging-krige2dpy): kriging 2d data.
47 | 
48 |   - Simple Kriging
49 |   - Ordinary Kriging
50 | 
51 | - [`krige3d.py`](#3d-kriging-krige3dpy): kriging 3d data.
52 | 
53 |   - Simple Kriging
54 |   - Ordinary Kriging
55 |   - Universal Kriging (Kriging with a Trend)
56 |   - Kriging the Trend
57 |   - Kriging with External drift
58 |   - SK with non-stationary drift
59 | 
60 | - [`sgsim.py`](#sequential-gaussian-simulation-sgsimpy): Sequential Gaussian Simulation.
61 | 
62 | # Other Utilities
63 | 
64 | - `super_block.py`: Class for performing super block search used in kriging.
65 |   - used in `krige3d.py`
66 |   - used in `sgsim.py`
67 | 
68 | - `normal_score_transform.py`: Class for NST used in Gaussian Simulation.
69 |   - used in `sgsim.py`
70 | 
71 | # Documentation
72 | 
73 | For full documentation, including installation, tutorials and PDF documents, please see http://pygeostatistics.readthedocs.io/.
74 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pyGeoStatistics
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/basics/cokriging.rst:
--------------------------------------------------------------------------------
  1 | CoKriging
  2 | =========
  3 | 
  4 | Intro
  5 | -----
  6 | 
  7 | The term kriging is traditionally reserved for liear regression using data on
  8 | the same attribute as that being estimated. For example, an unsampled porosity
  9 | value :math:`z(u)` is estimated from neighboring porosity sample values defined on
 10 | the same volume support.
 11 | 
 12 | The term cokriging is reserved for linear regression that also uses data defined
 13 | on different attributes. For example, the porosity values :math:`z(u)` may be estimated
 14 | from combination of porosity samples and related acoustic data values.
 15 | 
 16 | In the case of a single secondary variable (:math:`Y`), the ordinary cokriging
 17 | estimator of :math:`Z(\mathbf{u})` is written:
 18 | 
 19 | .. math::
 20 |     Z_{COK}^{*}(\mathbf{u})
 21 |     =\sum_{{\alpha}_{1}=1}^{{n}_{1}}{{\lambda}_{{\alpha}_{1}}(\mathbf{u})Z({\mathbf{u}}_{{\alpha}_{1}})}
 22 |     +\sum_{{\alpha}_{2}=1}^{{n}_{2}}{{\lambda}_{{\alpha}_{2}}^{'}(\mathbf{u})Y({\mathbf{u}}_{{\alpha}_{2}}^{'})}
 23 | 
 24 | where the :math:`{\lambda}_{{\alpha}_{1}}` are the weights applied to the :math:`{n}_{1}`
 25 | :math:`z` samples and the :math:`{\lambda}_{{\alpha}_{2}}^{'}` are the weights applied to
 26 | the :math:`n_2` `y` samples.
 27 | 
 28 | Kriging requires a model for the :math:`Z` covariance. Cokriging requires a joint
 29 | model for the matrix of covariance functions including the :math:`Z` covariance
 30 | :math:`C_{Z}(\mathbf{h})`, the :math:`Y` covariance :math:`C_{Y}(\mathbf{h})`, the cross :math:`Z-Y`
 31 | covariance :math:`C_{ZY}(\mathbf{h})=Cov\{Z(\mathbf{u}),Y(\mathbf{u+h})\}`, and the
 32 | cross :math:`Y-Z` covariance :math:`C_{YZ}(\mathbf{h})`
 33 | 
 34 | The covariance matrix requires :math:`K^2` covariance functions when :math:`K` different
 35 | variables are considered in a cokriging exercise. The inference becomes
 36 | extremely demanding in terms of data and the subsequent joint modeling is
 37 | particularly tedious. This is the main reason why cokriging has not been
 38 | extensively used in practice. Algorithms such as kriging with an external
 39 | drift and collocated cokriging have been developed to shortcut the tedious
 40 | inference and modeling process required by cokriging.
 41 | 
 42 | Ordinary Cokriging
 43 | ------------------
 44 | 
 45 | The sum of the weights applied to the primary variable is set to one, and
 46 | the sum of the weigths applied to any other variable is set to zero. In the
 47 | case of two variables, these two conditions are:
 48 | 
 49 | .. math::
 50 |     \begin{cases}
 51 |     \sum\limits_{{\alpha}_{1}}^{}{{\lambda}_{{\alpha}_{1}}(\mathbf{u})}=1\\
 52 |     \sum\limits_{{\alpha}_{2}}^{}{{\lambda}_{{\alpha}_{2}}(\mathbf{u})}=0
 53 |     \end{cases}
 54 | 
 55 | The problem with this traditional formalism is that the second condition tends
 56 | to limit severely the influence of the secondary variables.
 57 | 
 58 | Standardized Ordinary Cokriging
 59 | -------------------------------
 60 | 
 61 | Often, a better approach consists of creating new secondary variables with the
 62 | same mean as the primary variable. Then all the weights are constrained to
 63 | sum to one.
 64 | 
 65 | In the case of two variables, the expression could be written as:
 66 | 
 67 | .. math::
 68 |     Z_{COK}^{*}(\mathbf{u})
 69 |     =\sum_{{\alpha}_{1}=1}^{{n}_{1}}{{\lambda}_{{\alpha}_{1}}(\mathbf{u})Z({\mathbf{u}}_{{\alpha}_{1}})}
 70 |     +\sum_{{\alpha}_{2}=1}^{{n}_{2}}{{\lambda}_{{\alpha}_{2}}^{'}(\mathbf{u})[Y({\mathbf{u}}_{{\alpha}_{2}}^{'})+{m}_{Z}-{m}_{Y}]}
 71 | 
 72 | 
 73 | with a single condition:
 74 | 
 75 | .. math::
 76 |     \sum_{{\alpha}_{1}=1}^{{n}_{1}}{{\lambda}_{{\alpha}_{1}}}(\mathbf{u})+\sum_{{\alpha}_{2}=1}^{{n}_{2}}{{\lambda}_{{\alpha}_{2}}}(\mathbf{u})=1
 77 | 
 78 | where :math:`m_Z=E\{Z(u)\}` and :math:`m_Y=E\{Y(u)\}` are stationary means
 79 | of :math:`Z` and :math:`Y`.
 80 | 
 81 | Simple Cokriging
 82 | ----------------
 83 | 
 84 | There is no constraint on the weights. Just like simple kriging, this version
 85 | of cokriging requires working on data residuals or equivalently, on variables
 86 | whose means have all been standardized to zero. This is the case when applying
 87 | simple cokriging in an MG approach (the normal score transforms of each variable
 88 | have a stationary mean of zero).
 89 | 
 90 | Collocated Cokriging
 91 | --------------------
 92 | 
 93 | A reduced form of cokriging consists of retaining only the collocated
 94 | variable :math:`y(\mathbf{u})`, provided that it is availible at all locations
 95 | :math:`\mathbf{u}` being estimated. The cokriging estimator is written as:
 96 | 
 97 | .. math::
 98 |     Z_{COK}^{*}(\mathbf{u})
 99 |     =\sum_{{\alpha}_{1}=1}^{{n}_{1}}{{\lambda}_{{\alpha}_{1}}(\mathbf{u})Z({\mathbf{u}}_{{\alpha}_{1}})}
100 |     +{\lambda}^{'}(\mathbf{u})Y(\mathbf{u})
101 | 
102 | The corresponding cokriging system requires knowledge of only the :math:`Z`
103 | covariance :math:`C_{Z}(\mathbf{h})` and the :math:`Z-Y` cross-covariance
104 | :math:`C_{ZY}(\mathbf{h})`. The latter can be approximated through the following model:
105 | 
106 | .. math::
107 |     C_{ZY}(\mathbf{h})=B\cdot C_{Z}(\mathbf{h}),\quad\forall \mathbf{h}
108 | 
109 | where :math:`B=\sqrt{C_Y(0)/C_Z(0)}\cdot{\rho}_{ZY}(0)`, :math:`C_Z(0)`, :math:`C_Y(0)` are
110 | the variances of Z and Y, and :math:`{\rho}_{ZY}(0)` is the linear coefficient
111 | of correlation of collocated z-y data.
112 | 


--------------------------------------------------------------------------------
/docs/basics/kriging.rst:
--------------------------------------------------------------------------------
  1 | Kriging
  2 | =======
  3 | 
  4 | Intro
  5 | -----
  6 | 
  7 | Kriging is **"a collection of gneralized linear regression techniques for
  8 | minimizing an estimation variance defined from a priori model for a convariance".**
  9 | 
 10 | Consider the estimate of an unsampled value :math:`z(\mathbf{u})` from
 11 | neighboring data values :math:`z({\mathbf{u}}_{\alpha}),\alpha=1,\dots,n`.
 12 | The RF model :math:`Z(\mathbf{u})` is stationary with mean :math:`m` and
 13 | covariance :math:`C(\mathbf{h})`. In its simplest form, also known as
 14 | **Simple Kriging (SK)**, the algorithm considers the following linear estimator:
 15 | 
 16 | .. math::
 17 |     {Z}_{SK}^{*}(\mathbf{u})=\sum_{\alpha=1}^{n}{\lambda}_{\alpha}(\mathbf{u})
 18 |     Z({\mathbf{u}}_{\alpha})+\left(1-\sum_{\alpha=1}^{n}{\lambda}_{\alpha}
 19 |     (\mathbf{u})\right)m
 20 | 
 21 | The weights :math:`{\lambda}_{\alpha}(\mathbf{u})` are determined to minnimize
 22 | the error variance, also called the "estimation variance." That minimization
 23 | results in a set of normal equations:
 24 | 
 25 | .. math::
 26 |     \sum_{\beta=1}^{n}{\lambda}_{\beta}(\mathbf{u})C({\mathbf{u}}_{\beta}-{\mathbf{u}}_{\alpha})=C(\mathbf{u}-{\mathbf{u}}_{\alpha})\\
 27 |     \forall{\alpha}=1,\dots,n
 28 | 
 29 | The corresponding minimized estimation variance, or kriging variance, is:
 30 | 
 31 | .. math::
 32 |     {\sigma}_{SK}^{2}(\mathbf{u})=C(0)-\sum_{\alpha=1}^{n}{\lambda}_{\alpha}(\mathbf{u})C(\mathbf{u}-{\mathbf{u}}_{\alpha})\geq 0
 33 | 
 34 | **Ordinary Kriging (OK)** is the most commonly used variant of the previous
 35 | simple kriging algorithm, whereby the sum of the weights
 36 | :math:`\sum_{\alpha=1}^{n}{\lambda}_{\alpha}(\mathbf{u})` is considered to equal 1.
 37 | This allows building an estimator :math:`Z_{OK}^{*}(\mathbf{u})` that does not
 38 | require prior knowledge of the stationary mean m, yet remains unbiased in the
 39 | sense that :math:`E\{{Z}_{OK}^{*}(\mathbf{u})\}=E\{Z(\mathbf{u})\}`.
 40 | 
 41 | **None-linear kriging** is but linear kriging performed on some non-linear
 42 | transform of the z-data, e.g., the **log-transform**
 43 | :math:`\mathrm{ln}z` privided that :math:`z>0`, or the **indicator transform**
 44 | as defined in relation:
 45 | 
 46 | .. math::
 47 |     I(\mathbf{u};z)=\begin{cases}
 48 |     1,\quad Z(\mathbf{u})\leq z\\
 49 |     0,\quad \text{otherwise}
 50 |     \end{cases}
 51 | 
 52 | Traditionally, kriging (**SK** or **OK**) has been performed to provide
 53 | a "best" linear unbiased estimate (**BLUE**) for unsampled values
 54 | :math:`z(\mathbf{u})`, with the kriging variance being used to define Gaussian-type
 55 | confidence intervals, e.g.,
 56 | 
 57 | .. math::
 58 |     \text{Prob}\{Z(\mathbf{u})\in [{z}_{SK}^{*}(\mathbf{u})\pm 2{\sigma}_{SK}(\mathbf{u})]\cong 0.95\}
 59 | 
 60 | **Unfortunately, kriging variances of the type, being independent of the data**
 61 | **values, only provides a comparison of alternative geometric data configurations.**
 62 | **Kriging variances are usually not measures of local estimation accuracy.**
 63 | 
 64 | The kriging algorithm has two characteristic properties that allow its
 65 | use in determining **posterior ccdfs**. These two characteristic properties
 66 | are the basis for, respectively, the **multi-Gaussian (MG) approach** and
 67 | the **indicator kriging (IK) approach** to determination of ccdfs:
 68 | 
 69 | 1. The Multi-Gaussian Approach: If the RF model :math:`Z(\mathbf{u})` is
 70 | multivariate Gaussian, then the simple kriging estimate and variance identify
 71 | the mean and variance of the posterior ccdf. In addition, since that ccdf is
 72 | Gaussian, it is fully determined by these two parameters. This remarkable
 73 | result is at the basis of multi-Gaussian (MG) kriging and simulation.
 74 | The MG approach is said to be **parametric** in the sense that it determines
 75 | the ccdfs through their parameters (mean and variance). The MG algorithm is
 76 | remarkably fast and trouble-free; its limitation is the reliance on the very
 77 | specific and sometimes inappropriate properties fo the Gaussian RF model.
 78 | 
 79 | 2. The Indicator Kriging Approach: If the value to be estimated is the expected
 80 | value (mean) of a distribution, then least-squares (LS) regression
 81 | (i.e., kriging) is a priori the prefered algorithm. The reason is that
 82 | the LS estimator fo the variable :math:`Z(\mathbf{u})` is also the LS estimator of
 83 | its conditional expectation :math:`E\{Z(\mathbf{u})\mid(n)\}`, that is, of the
 84 | expected value of the ccdf. Instead of the variable :math:`Z(\mathbf{u})`,
 85 | consider its binary indicator transform :math:`I(\mathbf{u};z)`.
 86 | Kriging of the indicator RV :math:`I(\mathbf{u};z)` provides an estimate that
 87 | is also the best LS estimate fo the conditional expectation of :math:`I(\mathbf{u};z)`.
 88 | Now, the conditional expecation of :math:`I(\mathbf{u};z)`
 89 | is equal to the ccdf of :math:`Z(\mathbf{u})`, indeed:
 90 | 
 91 | .. math::
 92 |     \begin{array}{l}
 93 |     E\{I(\mathbf{u};z)\mid (n)\}&=
 94 |     \begin{array}{l}
 95 |     1\cdot \text{Prob}\{I(\mathbf{u};z)=1\mid (n)\}\\
 96 |     +0\cdot \text{Prob}\{I(\mathbf{u};z)=0\mid (n)\}
 97 |     \end{array}\\
 98 |     &=1\cdot \text{Prob}\{Z(\mathbf{u})\leq z\mid (n)\}\\\
 99 |     &\equiv F(\mathbf{u};z\mid (n))
100 |     \end{array}
101 | 
102 | Thus the kriging algorithm applied to indicator data provides LS estimates
103 | of the ccdf. Note that indicator kriging (IK) is not aimed at estimating the
104 | unsampled value :math:`z(\mathbf{u})` or its indicator transform
105 | :math:`I(\mathbf{u};z)` but at providing a ccdf model of uncertainty about
106 | :math:`z(\mathbf{u})`. The IK algorithm is said to be **non-parametric**
107 | in the sense that is does not approach the ccdf through its parameters
108 | (mean and variance); rather, the ccdf values for various threshold values
109 | :math:`z` are estimated directly.
110 | 
111 | Types of Kriging
112 | ----------------
113 | 
114 | +-----------------------------------+---------+--------------------------+--------------+
115 | | Kriging Form                      | Mean    | Drift Model              | Prerequisite |
116 | +===================================+=========+==========================+==============+
117 | | Simple Kriging (SK)               | Known   | None                     | Covariance   |
118 | +-----------------------------------+---------+--------------------------+--------------+
119 | | Ordinary Kriging (OK)             | Unknown | Constant                 | Variogram    |
120 | +-----------------------------------+---------+--------------------------+--------------+
121 | | Universal Kriging (UK)            | Unknown | Functions of coordinates | Variogram    |
122 | +-----------------------------------+---------+--------------------------+--------------+
123 | | Kriging with external drift (KED) | Unknown | External variable        | Variogram    |
124 | +-----------------------------------+---------+--------------------------+--------------+
125 | 
126 | 
127 | Simple Kriging
128 | --------------
129 | 
130 | In its simplist form, also known as simple kriging (SK), the algorithm considers
131 | the following linear estimator:
132 | 
133 | .. math::
134 |     Z_{SK}^{*}(\mathbf{u}) = \sum_{\alpha=1}^{n} \lambda_{\alpha}(\mathbf{u}) Z(\mathbf{u_{\alpha}}) + \left(1-\sum_{\alpha=1}^{n}\lambda_{\alpha}(\mathbf{u})\right) m
135 | 
136 | The weights :math:`\lambda_{\alpha}` are determined to minimize the error
137 | variance, also called the "estimation vairiance." That minimization result
138 | in a set of normal equations known as *Simple Kriging System*:
139 | 
140 | .. math::
141 |     \sum_{\beta=1}^{n} \lambda_{\beta}(\mathbf{u}) C(\mathbf{u_{\beta}}-\mathbf{u_{\alpha}})=C(\mathbf{u}-\mathbf{{u}_{\alpha}}),\\\forall \alpha=1, ... , n
142 | 
143 | In matrix notation, we have
144 | 
145 | .. math::
146 |     \boldsymbol{\Sigma}\boldsymbol{\lambda}=\boldsymbol{\sigma_{0}}
147 | 
148 | where :math:`\boldsymbol{\Sigma}=[{\sigma}_{\alpha\beta}]` is the :math:`N\times N`
149 | matrix of data-to-data covariances, :math:`\boldsymbol{\sigma_{0}}=[{\sigma}_{\alpha0}]`
150 | is the N-vector of covariances between the data and the target, and :math:`\boldsymbol{\lambda}=[\lambda_\alpha]`
151 | is the N-vector of solutions.
152 | 
153 | The corresponding minimized estimation variance, or kirging variance, is:
154 | 
155 | .. math::
156 |     \sigma_{SK}^{2}(\mathbf{u}) = C(0) - \sum_{\lambda=1}^{n}\lambda_{\alpha}(\mathbf{u}) C(\mathbf{u}-\mathbf{u_{\alpha}}) \geq 0
157 | 
158 | 
159 | Ordinary Kriging (OK)
160 | ---------------------
161 | 
162 | Ordinary Kriging (OK) filters the mean from the SK estimator by requiring that
163 | the kriging weights sum to one. This results in the following ordinary kriging
164 | estimator:
165 | 
166 | .. math::
167 |     {Z}_{OK}^{*}(\mathbf{u})=\sum_{\alpha=1}^{n}{{\lambda}_{\alpha}^{(OK)}(\mathbf{u})Z({\mathbf{u}}_{\alpha})}
168 | 
169 | and the sationary OK system:
170 | 
171 | .. math::
172 |     \begin{cases}
173 |     \sum_{\beta=1}^{n}{{\lambda}_{\beta}^{(OK)}(\mathbf{u}) C({\mathbf{u}}_{\beta}-{\mathbf{u}}_{\alpha})}+\mu(\mathbf{u})=C(\mathbf{u}-{\mathbf{u}}_{\alpha}),\quad \alpha=1,\dots,n \\
174 |     \sum_{\beta=1}^{n}{{\lambda}_{\beta}^{(OK)}(\mathbf{u})}=1\\
175 |     \end{cases}
176 | 
177 | In matrix notation, the above linear equations correspond to:
178 | 
179 | .. math::
180 |     \begin{bmatrix}{C}_{11} & {C}_{12} & \cdots & {C}_{1N} & 1 \\{C}_{21} & {C}_{22} & \cdots & {C}_{2N} & 1 \\ \vdots & \vdots & \cdots & \vdots & 1 \\ {C}_{N1} & {C}_{N2} & \cdots & {C}_{NN} & 1 \\ 1 & 1 & 1 & 1 & 0\end{bmatrix} \times \begin{bmatrix}{\lambda}_{1}\\{\lambda}_{2}\\ \vdots \\{\lambda}_{N}\\ \mu \end{bmatrix}
181 |     = \begin{bmatrix}{C}_{10}\\{C}_{20}\\ \vdots \\{C}_{N0}\\ 1 \end{bmatrix}
182 | 
183 | 
184 | The kriging variance is obtained by multiplying the first N equations of the
185 | kriging system by :math:`\lambda_\alpha`, summing over :math:`\alpha`, and
186 | then using the last equations. The result is the OK variance:
187 | 
188 | .. math::
189 |     {\sigma}_{OK}^{2}=E{({Z}^{*}-{Z}_{0})}^{2}={\sigma}_{00}-\sum\limits_{\alpha}{{\lambda}_{\alpha}{\sigma}_{\alpha0}}-\mu
190 | 
191 | The linear system has a unique solution if and only if the covarance matrix
192 | :math:`\boldsymbol{\Sigma}[{\sigma}_{\alpha\beta}]` is strictly positive
193 | definite, which is the case if we use strictly positive definite covariance
194 | function model and if all data are distinct.
195 | 
196 | Universal Kriging (UK) or Kriging with a Trend Model (KT)
197 | ---------------------------------------------------------
198 | 
199 | The general model, which Matheron(1969) named the *universal kriging* model
200 | for reasons explained below, assumes that the mean function can be represented
201 | as a reponse surface function
202 | 
203 | .. math::
204 |     m(x)=\sum\limits_{\mathscr{l}=0}^{L}{{a}_{\mathscr{l}}{f}^{\mathscr{l}}(x)}
205 | 
206 | where the :math:`{f}^{\mathscr{l}}(x)` are kown basis functions and :math:`{a}_{\mathscr{l}}`
207 | are fixed but unknown coefficients. Usually the first basis function
208 | (case :math:`\mathscr{l}=0`) is the constant function identically equal to 1,
209 | which guarantees that the constant-mean case is included in the model.
210 | The other functions are typically monomials of low degree in the cooridinates
211 | of x (in practice, the degree does not exceed two). In the case of monomials,
212 | the superscript :math:`\mathscr{l}`, which is an index, has the meaning of a
213 | power (in 1D, :math:`{f}^{\mathscr{l}}(x)={x}^{\mathscr{l}}`). Note that the
214 | above function may be regarded as a local approximation to :math:`m(x)`; that
215 | is, the coefficients :math:`{a}_{\mathscr{l}}` may vary in space but sufficiently
216 | slowly to be considered constant within estimation neighborhoods.
217 | 
218 | The universal kriging model is the decomposition of the variable :math:`Z(x)`
219 | into the sum:
220 | 
221 | .. math::
222 |     Z(x)=m(x)+Y(x)
223 | 
224 | of a smooth deterministic function :math:`m(x)`, describing the systematic aspect of
225 | the phenomenon, and called the drift, and a zero-mean random function :math:`Y(x)`,
226 | called the residual and capturing its erratic fluctuations. Note that the drift
227 | refers to a technically precise notion (the mean of the RF :math:`Z`),
228 | whereas *trend* is a generic term designating a general tendency, a systematic
229 | effect (besides, "trend" may imply an underlying driving force).
230 | 
231 | In order to minimize :math:`E{({Z}^{*}-{Z}_{0})}^{2}:math:`, we have to make
232 | :math:`{[E({Z}^{*}-{Z}_{0})]}^{2}` zero whatever the unknown coefficients
233 | :math:`{a}_{\mathscr{l}}`, which implies annihilating their factors in the above.
234 | This leads to the set of L+1 conditions:
235 | 
236 | .. math::
237 |     \sum\limits_{\alpha}{\lambda}_{\alpha}{f}_{\alpha}^{\mathscr{l}}={f}_{0}^{\mathscr{l}}, \quad \mathscr{l}=0,1,\dots,L
238 | 
239 | that Matheron(1969) called universality conditions, hence the name universal
240 | kriging (UK). They express that the estimator :math:`{Z}^{*}` is unbiased for
241 | all values of :math:`{\alpha}_{\mathscr{l}}`.
242 | 
243 | The Universal Kriging System can be expressed as:
244 | 
245 | .. math::
246 |     \begin{cases}
247 |     \sum\limits_{\beta}{{\lambda}_{\beta}{\sigma}_{\alpha\beta}}+\sum\limits_{\mathscr{l}}{{\mu}_{\mathscr{l}}{f}_{\alpha}^{\mathscr{l}}}={\sigma}_{\alpha0}, &\quad \alpha=1,\dots,N\\
248 |     \sum\limits_{\alpha}{{\lambda}_{\alpha}{f}_{\alpha}^{\mathscr{l}}}={f}_{0}^{\mathscr{l}}, &\quad \mathscr{l}=0,\dots,L
249 |     \end{cases}
250 | 
251 | In matrix notation the system is of the form :math:`\mathbf{Aw=b}` with the following structure:
252 | 
253 | .. math::
254 |     \begin{bmatrix}
255 |     \boldsymbol{\Sigma} & \mathbf{F} \\
256 |     {\mathbf{F}}^{'} & 0
257 |     \end{bmatrix}
258 |     \begin{bmatrix}
259 |     \boldsymbol{\lambda} \\
260 |     \boldsymbol{\mu}
261 |     \end{bmatrix}
262 |     =
263 |     \begin{bmatrix}
264 |     {\boldsymbol{\sigma}}_{0}\\
265 |     {\mathbf{f}}_{0}
266 |     \end{bmatrix}
267 | 
268 | where :math:`\boldsymbol{\Sigma}`, :math:`\boldsymbol{\lambda}` and
269 | :math:`{\boldsymbol{\sigma}}_{0}` are defined as for simple kriging and where
270 | 
271 | .. math::
272 |     \mathbf{F}=
273 |     \begin{bmatrix}
274 |     1&{f}_{1}^{1}&.&{f}_{1}^{L}\\
275 |     1&{f}_{1}^{1}&.&{f}_{1}^{L}\\
276 |     .&.&.&.\\
277 |     .&.&.&.\\
278 |     .&.&.&.\\
279 |     1&{f}_{1}^{1}&.&{f}_{1}^{L}
280 |     \end{bmatrix}, \quad
281 |     \boldsymbol{\mu}=
282 |     \begin{bmatrix}
283 |     {\mu}_{0}\\
284 |     {\mu}_{1}\\
285 |     .\\
286 |     .\\
287 |     .\\
288 |     {\mu}_{L}
289 |     \end{bmatrix}, \quad
290 |     {\mathbf{f}}_{0}=
291 |     \begin{bmatrix}
292 |     1 \\
293 |     {f}_{0}^{1} \\
294 |     .\\
295 |     .\\
296 |     .\\
297 |     {f}_{0}^{L}
298 |     \end{bmatrix}
299 | 
300 | Those :math:`1` s in :math:`\mathbf{F}` correspond to OK.
301 | 
302 | 
303 | Kriging with an External Drift
304 | ------------------------------
305 | 
306 | Kriging with an external drift variable is an extention of UK. The trend model
307 | is limited to two terms :math:`m(\mathbf{u})={a}_{0}+{a}_{1}{f}_{1}(\mathbf{u})` with
308 | the term :math:`{f}_{1}(\mathbf{u})` set equal to a secondary (external) variable.
309 | The smooth variability of the second variable is deemed related to that of the
310 | primary variable :math:`Z(\mathbf{u})` being estimated.
311 | 
312 | Let :math:`y(\mathbf{u})` be the secondary variable; the trend model is then:
313 | 
314 | .. math::
315 |     E\{Z(\mathbf{u})\}=m(\mathbf{u})={a}_{0}+{a}_{1}y(\mathbf{u})
316 | 
317 | :math:`y(\mathbf{u})` is assumed to reflect the spacial trends of the :math:`z` variability
318 | up to a linear rescalling of units (corresponding to the two parameters :math:`{a}_{0}`
319 | and :math:`{a}_{1}`)
320 | 
321 | The estimate of the :math:`z` variable and the corresponding system of equations are
322 | identical to the UK estimate and system with K=1, and
323 | :math:`{f}_{1}(\mathbf{u})={y}(\mathbf{u})`
324 | 
325 | .. math::
326 |     Z_{UK}^{*}(\mathbf{u})=\sum_{\alpha=1}^{n}{{\lambda}_{\alpha}^{UK}(\mathbf{u})Z({\mathbf{u}}_{\alpha})}
327 | 
328 | .. math::
329 |     \begin{cases}
330 |     \sum_{\beta=1}^{n}{{\lambda}_{\beta}^{UK}(\mathbf{u})C({\mathbf{u}}_{\alpha}-{\mathbf{u}}_{\alpha})} + {\mu}_{0}(\mathbf{u}) + {\mu}_{a}(\mathbf{u})y({\mathbf{u}}_{\alpha}) = C(\mathbf{u}-{\mathbf{u}}_{\alpha}) &\alpha=1,\dots,n\\
331 |     \sum_{\beta=1}^{n}{{\lambda}_{\beta}^{UK}}=1\\
332 |     \sum_{\beta=1}^{n}{{\lambda}_{\beta}^{UK}y({\mathbf{u}}_{\beta})}=y(\mathbf{u})
333 |     \end{cases}
334 | 
335 | The fundamental (hypothesis) relation must make physical sense.
336 | 
337 | Two conditions must be met before applying the external drift algorithm:
338 | (1) The external variable must vary smoothly in space, otherwise the resulting
339 | UK system may be unstable; and (2) the external variable must be known at all
340 | locations :math:`{\mathbf{u}}_{\alpha}` of the primary data values and at all locations
341 | :math:`\mathbf{u}` to be estimated.
342 | 
343 | Block Kriging
344 | -------------
345 | 
346 | The linearity of the kriging algorithm allows direct estimation of *linear*
347 | averages of the attributes :math:`z(\mathbf{u})`. For example, consider the
348 | estimation of the block average defined as:
349 | 
350 | .. math::
351 |     z_{V}(\mathbf{u})=\frac{1}{|V|}\int_{V(\mathbf{u})}{z({\mathbf{u}}')d{\mathbf{u}}'}\approx \frac{1}{N}\sum_{j=1}^{N}{z({\mathbf{u}}_{j}^{'})}
352 | 
353 | where :math:`V(\mathbf{u})` is a block of measure :math:`|V|` centered at u, and the
354 | :math:`{\mathbf{u}}_{j}^{'}` are N points discretizing the volume :math:`V(\mathbf{u})`.
355 | 
356 | Doing point kriging or block krging only affect the right handside of the
357 | kriging system. Each element in the right hand side matrix is the average
358 | covariance between the sample point and all points in the target block instead
359 | of just the covariance between the sample point and the target point.


--------------------------------------------------------------------------------
/docs/basics/simulation.rst:
--------------------------------------------------------------------------------
 1 | Simulation
 2 | ==========
 3 | 
 4 | Simulation differs from kriging or any interpolation algorithm, in two major aspects:
 5 | 
 6 | 1. In most interpolation algorithms, including kriging, the goal is to provide
 7 | a "best", hence unique, local estimate of the variable or any of its trend
 8 | components without specific regard to the resulting spatial statistics of the
 9 | estimates taken together. **In simulation, reproduction of global features
10 | (texture) and statistics (histogram, covariance) take precedence over local
11 | accuracy.** Kriging provides a set of local representations, say
12 | :math:`z^{*}(\mathbf{u}),\mathbf{u}\in A`, where local accuracy prevails.
13 | Simulation provides alternative global representations, :math:`z^{(l)}(u),u\in A`,
14 | where reproduction of patterns of spatial continuity prevails.
15 | 
16 | 2. Except if a Gaussian model for errors is assumed, kriging provides only an
17 | incomplete measure of local accuracy, and no appreciation of joint accuracy
18 | when several locations are considered together.
19 | **Simulations are designed specifically to provide such measures of accuracy, both local and involving several locations.**
20 | These measures are given by the differences between :math:`L` alternative
21 | simulated vlaues at any location (local accuracy) or the :math:`L` alternative
22 | simulated fields (global or joint accuracy).
23 | 
24 | Different simulation algorithms impart different global statistics and spatial
25 | features on each realization, For example, simulated categorical values can be
26 | made to honor specific geometrical patterns as in *object-based simulation* or
27 | the covariance of simulated continuous values can be made to honor a prior
28 | covariance model as for *Gaussian-related simulations*. A hybrid approach
29 | could be considered to generate numerical models that reflect widely different
30 | types of features. For example, one may start with an object-based process or
31 | categorical *indicator simulation* to generate the geometric architecture of
32 | the various lithofacies, following with a Gaussian algorithm to simulate the
33 | distribution of continuous petrophysical properties within each sperate lithofacies,
34 | then a simulated annealing process could be used to modify locally the petrophysical
35 | properties to match, say, well test data.


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import os
  4 | import sys
  5 | sys.path.insert(0, os.path.abspath('..'))
  6 | # -- General configuration ------------------------------------------------
  7 | 
  8 | # If your documentation needs a minimal Sphinx version, state it here.
  9 | #
 10 | # needs_sphinx = '1.0'
 11 | 
 12 | # Add any Sphinx extension module names here, as strings. They can be
 13 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 14 | # ones.
 15 | extensions = [
 16 |     'sphinx.ext.autodoc',
 17 |     'sphinx.ext.doctest',
 18 |     # 'sphinx.ext.todo',
 19 |     'sphinx.ext.intersphinx',
 20 |     'sphinx.ext.coverage',
 21 |     'sphinx.ext.mathjax',
 22 |     'sphinx.ext.ifconfig',
 23 |     'sphinx.ext.viewcode',
 24 |     'sphinx.ext.napoleon',
 25 |     'sphinx.ext.autosummary',
 26 |     'nbsphinx']
 27 | 
 28 | # numpydoc_class_members_toctree = False
 29 | 
 30 | napoleon_use_param = False
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix(es) of source filenames.
 35 | # You can specify multiple suffix as a list of string:
 36 | #
 37 | # from recommonmark.parser import CommonMarkParser
 38 | 
 39 | # source_parsers = {
 40 | #     '.md': CommonMarkParser,
 41 | # }
 42 | 
 43 | # source_suffix = ['.rst', '.md']
 44 | # source_suffix = '.rst'
 45 | 
 46 | # nbsphinx_prompt_width = 0
 47 | nbsphinx_prolog = """
 48 | .. raw:: html
 49 |     <style>
 50 |     .nbinput .prompt,
 51 |     .nboutput .prompt {
 52 |         display: none;
 53 |         }
 54 |     </style>
 55 | """
 56 | 
 57 | 
 58 | # The master toctree document.
 59 | master_doc = 'index'
 60 | 
 61 | # General information about the project.
 62 | project = 'pyGeoStatistics'
 63 | copyright = '2018, Yu Hao'
 64 | author = 'Yu Hao'
 65 | 
 66 | # The version info for the project you're documenting, acts as replacement for
 67 | # |version| and |release|, also used in various other places throughout the
 68 | # built documents.
 69 | #
 70 | # The short X.Y version.
 71 | version = '0.1'
 72 | # The full version, including alpha/beta/rc tags.
 73 | release = '0.1.0'
 74 | 
 75 | # The language for content autogenerated by Sphinx. Refer to documentation
 76 | # for a list of supported languages.
 77 | #
 78 | # This is also used if you do content translation via gettext catalogs.
 79 | # Usually you set "language" from the command line for these cases.
 80 | language = None
 81 | 
 82 | # List of patterns, relative to source directory, that match files and
 83 | # directories to ignore when looking for source files.
 84 | # This patterns also effect to html_static_path and html_extra_path
 85 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store',
 86 |                     '*/.ipynb_checkpoints']
 87 | 
 88 | # The name of the Pygments (syntax highlighting) style to use.
 89 | pygments_style = 'sphinx'
 90 | 
 91 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 92 | todo_include_todos = False
 93 | 
 94 | 
 95 | # -- Options for HTML output ----------------------------------------------
 96 | 
 97 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 98 | # a list of builtin themes.
 99 | import sphinx_bootstrap_theme
100 | html_theme = 'bootstrap'
101 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
102 | 
103 | # Custom sidebar templates, must be a dictionary that maps document names
104 | # to template names.
105 | #
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | #
110 | html_theme_options = dict(
111 |     bootstrap_version="3",
112 |     bootswatch_theme="simplex",
113 |     navbar_sidebarrel=False,
114 |     source_link_position="footer",
115 |     globaltoc_depth=2,
116 |     navbar_links=[
117 |         # ("Cookbook", "cookbook/index"),
118 |         # ("API", "api/api")
119 |         # ("Basics", "basics/index")
120 |     ]
121 | )
122 | 
123 | # Add any paths that contain custom static files (such as style sheets) here,
124 | # relative to this directory. They are copied after the builtin static files,
125 | # so a file named "default.css" will overwrite the builtin "default.css".
126 | html_static_path = ['_static']
127 | 
128 | # -- Options for HTMLHelp output ------------------------------------------
129 | 
130 | # Output file base name for HTML help builder.
131 | htmlhelp_basename = 'pyGeoSatisticsdoc'
132 | 
133 | 
134 | # -- Options for LaTeX output ---------------------------------------------
135 | 
136 | latex_elements = {
137 |     # The paper size ('letterpaper' or 'a4paper').
138 |     #
139 |     # 'papersize': 'letterpaper',
140 | 
141 |     # The font size ('10pt', '11pt' or '12pt').
142 |     #
143 |     # 'pointsize': '10pt',
144 | 
145 |     # Additional stuff for the LaTeX preamble.
146 |     #
147 |     # 'preamble': '',
148 | 
149 |     # Latex figure (float) alignment
150 |     #
151 |     # 'figure_align': 'htbp',
152 | }
153 | 
154 | # Grouping the document tree into LaTeX files. List of tuples
155 | # (source start file, target name, title,
156 | #  author, documentclass [howto, manual, or own class]).
157 | latex_documents = [
158 |     (master_doc, 'pyGeoStatistics.tex', 'pyGeoStatistics Documentation',
159 |      'Yu Hao', 'manual'),
160 | ]
161 | 
162 | 
163 | # -- Options for manual page output ---------------------------------------
164 | 
165 | # One entry per manual page. List of tuples
166 | # (source start file, name, description, authors, manual section).
167 | man_pages = [
168 |     (master_doc, 'pyGeoStatistics', 'pyGeoStatistics Documentation',
169 |      [author], 1)
170 | ]
171 | 
172 | 
173 | # -- Options for Texinfo output -------------------------------------------
174 | 
175 | # Grouping the document tree into Texinfo files. List of tuples
176 | # (source start file, target name, title, author,
177 | #  dir menu entry, description, category)
178 | texinfo_documents = [
179 |     (master_doc, 'pyGeoStatistics', 'pyGeoStatistics Documentation',
180 |      author, 'pyGeoStatistics', 'One line description of project.',
181 |      'Miscellaneous'),
182 | ]
183 | 
184 | 
185 | 
186 | 
187 | # Example configuration for intersphinx: refer to the Python standard library.
188 | intersphinx_mapping = {'https://docs.python.org/': None}
189 | 


--------------------------------------------------------------------------------
/docs/doc_env.yml:
--------------------------------------------------------------------------------
 1 | # doc_env.yml
 2 | # Configuration file for creating a Conda Environment with dependencies needed for pyRSD.
 3 | # Create the environment by running the following command (after installing Miniconda):
 4 | #   $ conda env create --file doc_env.yml
 5 | 
 6 | name: doc_env
 7 | 
 8 | channels:
 9 | - defaults
10 | 
11 | dependencies:
12 | - python=3.6
13 | - numpy
14 | - scipy
15 | - pandas
16 | - matplotlib
17 | - IPython
18 | - pip
19 | - pip:
20 |   - nbsphinx
21 |   - sphinx_bootstrap_theme
22 |   - sphinx-issues
23 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Pore Pressure Prediction documentation master file, created by
 2 |    sphinx-quickstart on Thu Oct 26 10:58:51 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | ===============
 7 | pyGeoStatistics
 8 | ===============
 9 | 
10 | Overview
11 | ========
12 | 
13 | A collection of python routines (accelerated with Numba) and jupyter notebooks
14 | for geostatistics, which is immensely inspired by gslib (in Fortran).
15 | 
16 | 
17 | Usage
18 | =====
19 | Every routine reads its parameters from a parameter file written in json. All parameters including input/output file path need to be specified in these parameter files.
20 | 
21 | I've created scripts that assist in creating parameter files, they could be found in \parameters folder.
22 | 
23 | I tried to adhere to the naming convention of gslib when it comes to parameter names.
24 | 
25 | Markdown files describing parameters needed for each routine are in \gslib_help.
26 | 
27 | Example:
28 | ========
29 | ::
30 | 
31 |     from pygeostatistics import Sgsim
32 | 
33 |     sgsimulator = Sgsim("testData/test_sgsim.par")
34 |     sgsimulator.simulate()
35 | 
36 | Contribute
37 | ==========
38 | - Issue Tracker: https://github.com/whimian/pyGeoStatistics/issues
39 | 
40 | - Source Code: https://github.com/whimian/pyGeoStatistics
41 | 
42 | License
43 | =======
44 | `MIT <https://github.com/whimian/pyGeoStatistics/blob/master/LICENSE>`_
45 | 
46 | .. toctree::
47 |     :maxdepth: 1
48 |     :caption: Getting Started
49 |     :hidden:
50 | 
51 |     install
52 | 
53 | .. toctree::
54 |   :maxdepth: 1
55 |   :caption: Geostatistics Basics
56 |   :hidden:
57 | 
58 |   basics/kriging
59 |   basics/cokriging
60 |   basics/simulation
61 | 
62 | .. toctree::
63 |   :maxdepth: 1
64 |   :caption: Tutorials
65 |   :hidden:
66 | 
67 |   tutorials/EDA
68 |   tutorials/NST
69 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Installation
 3 | ============
 4 | 
 5 | |
 6 | 
 7 | Denpendencies
 8 | =============
 9 | 
10 | - Python 3.6
11 | - NumPy 1.8 (or greater)
12 | - Numba
13 | - SciPy 0.13 (or greater)
14 | - matplotlib 1.3 (or greater)
15 | 
16 | *Optional:*
17 | 
18 | * IPython
19 | * Jupyter Notebook
20 | 
21 | Installing Python
22 | =================
23 | The recommended way to intall Python and the dependencies of this package is
24 | using conda package manager from Anaconda Inc. You may download and install
25 | Miniconda from https://conda.io/miniconda which contains both Python and
26 | conda package manager.
27 | 
28 | Installing pyGeoStatistics
29 | ==========================
30 | First, download the source code or clone from our Github repository.
31 | 
32 | pyGeoStatistics is recommended to be installed in a seperate python environment
33 | which can be easily created with conda. For example, with requirements.yml file
34 | the following command will create an environment named pyGeoStatistics_env with
35 | all of our denpendencies installed.
36 | 
37 | .. code:: bash
38 | 
39 |     conda update conda
40 |     conda env create --file environments.yml
41 | 
42 | Then, run the following command to install pyGeoStatistics.
43 | 
44 | .. code:: bash
45 | 
46 |     python setup.py install
47 | 


--------------------------------------------------------------------------------
/gslib_help/cokb3d_help.md:
--------------------------------------------------------------------------------
  1 | # Description:
  2 | 
  3 | A cokriging program for a points or blocks on a regular grid.
  4 | 
  5 | # Parameters:
  6 | 
  7 | - `datafl`: the input data in a simplified Geo-EAS formatted file.
  8 | 
  9 | - `nvar`: the number of variables (primary plus all secondary).
 10 | For example, `nvar`=2 if there is only one secondary variable.
 11 | 
 12 | - `icolx`, `icoly`, `icolz` and `icolvr()`: the columns for the x, y and
 13 | z coordinates, the primary variable to be kriged, and all secondary variables.
 14 | 
 15 | - `tmin` and `tmax`: all values (for all variables) strictly less than `tmin`
 16 | and greater than or equal to `tmax` are ignored.
 17 | 
 18 | - `icolloc`: set to 1 if performing co-located cokriging with a gridded secondary
 19 |  variable, otherwise set to 0.
 20 | 
 21 | - `secfl`: if co-located cokriging, the file with
 22 | the gridded secondary variable.
 23 | 
 24 | - `icolsec`: if co-located cokriging, the column number for the secondary
 25 | variable in `secfl`.
 26 | 
 27 | - `idbg`: an integer debugging level between 0 and 3. The higher the
 28 | debugging level the more output. Normally level 0 or 1 should be chosen.
 29 | 
 30 | - `dbgfl`: the debugging output is written to this file.
 31 | 
 32 | - `outfl`: the output grid is written to this file. The output file will
 33 | contain both the kriging estimate and the kriging variance for all
 34 | points/blocks. The output grid cycles fastest on x then y then z.
 35 | 
 36 | - `nx`, `xmn`, `xsiz`: definition of the grid system (x axis).
 37 | 
 38 | - `ny`, `ymn`, `ysiz`: definition of the grid system (y axis).
 39 | 
 40 | - `nz`, `zmn`, `zsiz`: definition of the grid system (z axis).
 41 | 
 42 | - `nxdis`, `nydis` and `nzdis`: the number of discretization points for
 43 | a block. If `nxdis`, `nydis` and `nzdis` are set to 1 then point cokriging
 44 | is performed.
 45 | 
 46 | - `ndmin`, `ndmaxp` and `ndmaxs`: the minimum and maximum number of primary
 47 | data, and the maximum number of secondary data (regardless of which secondary
 48 | variable) to use for kriging a block.
 49 | 
 50 | - `pradius_hmax`, `pradius_hmin` and `pradius_vert`:
 51 | search radii for primary data
 52 | 
 53 | - `sradius_hmax`, `sradius_hmin` and `sradius_vert`:
 54 | search radii for secondary data (same for all types)
 55 | 
 56 | - `sangle`, `sangle1` and `sangle2`: the angles defining the common
 57 | orientation of the search ellipsoids for primary and secondary data
 58 | 
 59 | - `ktype`: the kriging type must be specified:
 60 |     - 0 = simple cokriging;
 61 |     - 1 = standardized ordinary cokriging with re-centered variables
 62 |     and a single unbiasedness constraint;
 63 |     - 2 = traditional ordinary cokriging.
 64 | 
 65 | - `mean()`: the mean of the primary and all secondary variables are required
 66 |  input if either simple cokriging or standardized ordinary cokriging are used.
 67 |  The program calculates the data residuals from these means.
 68 | 
 69 | The direct and cross variograms may be specified in any order; they are
 70 | specified according to the variable number Variable `1` is the primary
 71 | (regardless of its column ordering in the input data files)
 72 | and the secondary variables are numbered from `2` depending on their
 73 |  ordered specification in `icolvr()`. It is unnecessary to specify
 74 |  the `j` to `i` cross variogram if the `i` to `j` cross variogram
 75 |  has been specified; the cross variogram is expected to be symmetric
 76 |  (as from theory). For each `i` to `j` variogram the following are required:
 77 | 
 78 | - `nst` and `c0`: the number of variogram structures and the isotropic
 79 | nugget constant. The nugget constant does not count as a structure.
 80 | 
 81 | - For each of the `nst` nested structures one must define `it` the type of
 82 | structure (the power model is not allowed);
 83 |     - `cc`, the c parameter;
 84 |     - `ang1`, `ang2`, `ang3` the angles defining the geometric anisotropy;
 85 |     - `aa_hmax`, the maximum horizontal range;
 86 |     - `aa_hmin`, the minimum horizontal range; and
 87 |     - `aa_vert`, the vertical range.
 88 | 
 89 | # Application notes:
 90 | 
 91 | The construction of the cokriging matrix requires the **linear model of
 92 | coregionalization**. The input variogram parameters are checked for
 93 | positive definiteness. *The power model is not allowed.*
 94 | 
 95 | A specific search is done for secondary data (same for all secondary)
 96 | allowing the option of collocated cokriging.
 97 | 
 98 | A cokriging program for scattered points and cross validation is not provided;
 99 | programs `cokb3d` and `kt3d` could be combined for this purpose.
100 | 


--------------------------------------------------------------------------------
/gslib_help/gam_params.md:
--------------------------------------------------------------------------------
 1 | `datafl`: the input data in a simplified Geo_EAS formatted file. The data are
 2 | ordered rowwise (X cycles fastest, then Y, then Z).
 3 | 
 4 | `nvar` and `ivar(1)` ... `ivar(nvar)`: the number of variables and their
 5 | columns in the data file.
 6 | 
 7 | `tmin` and `tmax`: all values, regardless of which variable, strictly less
 8 | than tmin and greater than or equal to tmax are ignored.
 9 | 
10 | `outfl`: the output variograms are written to a single output file named outfl.
11 |  The output file contains the variograms ordered by direction and then variogram
12 |  type specified in the parameter file (the directions cycle fastest then the
13 |  variogram number.) For each variogram there is a one-line description and then
14 |  nlag lines each with the following:
15 | 
16 |  1. lag number (increasing from 1 to nlag).
17 |  2. average separation distance for the lag.
18 |  3. the *semivariogram* value (whatever type was specified).
19 |  4. number of pairs for the lag.
20 |  5. mean of the data contributing to the tail.
21 |  6. mean of the data contributing to the head.
22 |  7. the tail and head variances (for the correlogram).
23 | 
24 | `igrid`: the grid or realization number. Recall that realization or grids are
25 | written on after another; therefore, if igrid=2 the input file must contain
26 | at least 2 nx ny nz values and the second set of nx ny nz values will be taken
27 | as the second grid.
28 | 
29 | `nx`, `xmn`, `xsiz`: definition of the grid system (x axis)
30 | 
31 | `ny`, `ymn`, `ysiz`: definition of the grid system (y axis)
32 | 
33 | `mz`, `zmn`, `zsiz`: definition of the grid system (z axis)


--------------------------------------------------------------------------------
/gslib_help/kb2d_help.md:
--------------------------------------------------------------------------------
 1 | # Description:
 2 | 
 3 | This is a straightforward 2-D simple and ordinary kriging subroutine that can
 4 | be used as is or as a basis for custom kriging programs.
 5 | 
 6 | 
 7 | # Paramters:
 8 | 
 9 | - `datafl`: the input data in a simplified Geo-EAS formatted file.
10 | 
11 | - `icolx`, `icoly` and `icolvr`: the columns for the x and y coordinates, and
12 | the variable to be kriged.
13 | 
14 | - `tmin` and `tmax`: all values strictly less than `tmin` and greater than or
15 | equal to `tmax` are ignored.
16 | 
17 | - `idbg`: an iteger debugging level between 0 and 3. The higher the debugging
18 | level, the more output. Normally level 0 or 1 should be chosen. If there are
19 | suspected problems, or if you would like to see the actual kriging matrices,
20 | level 2 or 3 can be chosen. It is advisable to restrict the actual number of
21 | points being estimated when the debugging level is high (the debugging file
22 | can become extremely large.)
23 | 
24 | - `dbgfl`: the debugging output is written to this file.
25 | 
26 | - `outfl`: the output grid is written to this file. The output file will contain
27 | both the kriging estimates and the kriging variance for all points/blocks. The
28 | output grid cycles fastest on *x* and then *y*.
29 | 
30 | - `nx`, `xmn`, `xsiz`: definition of the grid system (*x* axis).
31 | 
32 | - `ny`, `ymn`, `ysiz`: definition of the grid system (*y* axis).
33 | 
34 | - `nxdis` and `nydis`: the number of discretization points for a block. If both
35 | `nxdis` and `nydis` are set to 1, then point kriging is performed.
36 | 
37 | - `ndmin` and `ndmax`: the minimum and maximum number of data points to use for
38 | kriging a block.
39 | 
40 | - `radius`: the maximum isotopic search radius.
41 | 
42 | - `isk` and `skmean`: if `isk`=0, then simple kriging will be performed with a
43 | mean of `skmean`.
44 | 
45 | - `nst` and `c0`: the number of variogram structures and the isotopic nugget
46 | constant. The nugget constant does not count as a structure.
47 | 
48 | - For each of the `nst` nested structures one must define `it`, the type of
49 | structures; `cc`, the *c* parameter; `azm`, the maximum range; `a_max`, the
50 | maximum range; and `a_min`, the minimum range. A detailed description of these
51 | parameters is given in section II.3.
52 | 


--------------------------------------------------------------------------------
/gslib_help/kt3d_help.md:
--------------------------------------------------------------------------------
  1 | # Description:
  2 | 
  3 | The program kt3d provides a fairly advanced 3-D kriging program for points or
  4 | blocks by simple kriging (SK), ordinary kriging (OK), or kriging with a
  5 | polynomial trend model (KT) with up to nine monomial terms. The program works
  6 | in 2-D and is faster than kb2d if there are many data. One of the features
  7 | that makes this program fairly fast is the super block search.
  8 | 
  9 | # Parameters:
 10 | 
 11 | - `datafl`: the input data in a simplified Geo-EAS formatted file.
 12 | 
 13 | - `icolx`, `icoly`, `icolz`, `icolvr` and `icolsec`: the columns for the x, y,
 14 | and z coordinates, the variable to be estimated, and the external drift
 15 | variable (or non-stationary mean).
 16 | 
 17 | - `tmin` and `tmax`: all values strictly less than tmin and greater than or equal
 18 |  to tmax are ignored.
 19 | 
 20 | - `option`: set to 0 for kriging a grid of points or blocks, to 1 for cross
 21 | validation with the data in datafl and to 2 for jackknifing with data in
 22 | following file.
 23 | 
 24 | - `jackfl`: file with locations to perform estimation (jackknife option).
 25 | 
 26 | - `icolx`, `icoly`, `icolz`, `icolvr` and `icolsec`: the columns for the x, y,
 27 | and z coordinates, the variable, and the secondary variable in `jackfl`
 28 | 
 29 | - `idbg`: an integer debugging level between 0 and 3. The higher the debugging
 30 | level the more output. The normal levels are 0 and 1 which summarize the
 31 | results. Levels 2 and 3 provide all the kriging matrices and data used for
 32 | the estimation of every point/block. It is recommended that a high debugging
 33 | level not be used with a large grid.
 34 | 
 35 | - `dbgfl`: the debugging output is written to this file.
 36 | 
 37 | - `outfl`: the output grid is written to this file. The output contains the
 38 | estimate and the kriging variance for every point/block on the grid, cycling
 39 | fastest on x then y and finally z Unestimated points are flagged with a large
 40 | negative number (-999.). The parameter UNEST, in the source code, can be
 41 | changed if a different number is preferred.
 42 | 
 43 | - `nx`, `xmn`, `xsiz`: definition of the grid system (x axis).
 44 | 
 45 | - `ny`, `ymn`, `ysiz`: definition of the grid system (y axis).
 46 | 
 47 | - `nz`, `zmn`, `zsiz`: definition of the grid system (z axis).
 48 | 
 49 | - `nxdis`, `nydis` and `nzdis`: the number of discretization points for a block.
 50 | If nxdis, nydis and nzdis are all set to 1 then point kriging is performed.
 51 | 
 52 | - `ndmin` and `ndmax`: the minimum and maximum number of data points to use for
 53 |  kriging a block.
 54 | 
 55 | - `noct`: the maximum number to retain from an octant (an octant search is not
 56 | used if `noct`=0)
 57 | 
 58 | - `radius_hmax`, `radius_hmin` and `radius_vert` the search radii in the maximum
 59 |  horizontal direction, minimum horizontal direction, and vertical direction
 60 |  (see angles below).
 61 | 
 62 | - `sang1`, `sang2` and `sang3`: the angle parameters that describe the
 63 | orientation of the search ellipsoid. See the discussion on anisotropy
 64 | specification associated with Figure II.4.
 65 | 
 66 | - `ikrige` and `skmean`:
 67 |     - if `ikrige` is set to 0 then stationary simple kriging with (`skmean`) will be performed,
 68 |     - if `ikrige` is set to 1 then ordinary kriging will be performed,
 69 |     - if `ikrige` is set to 2 then non-stationary simple kriging with means taken from `secfile` will be performed,
 70 |     - if `ikrige` is set to 3 then kriging with an external drift will be performed.
 71 |     - Note that power law variogram models (`it`=4) are not allowed with simple kriging.
 72 | 
 73 | - `idrif(i),i=1...9`: indicators for those drift terms to be included in the
 74 | trend model. `idrif(i)` is set to 1 if the drift term number `i` should be
 75 | included, and is set to zero if not. The nine drift terms correspond to
 76 | the following:
 77 | 
 78 |     - `i = 1` linear drift in x
 79 |     - `i = 2` linear drift in y
 80 |     - `i = 3` linear drift in z
 81 |     - `i = 4` quadratic drift in x
 82 |     - `i = 5` quadratic drift in y
 83 |     - `i = 6` quadratic drift in z
 84 |     - `i = 7` cross quadratic drift in xy
 85 |     - `i = 8` cross quadratic drift in xz
 86 |     - `i = 9` cross quadratic drift in yz
 87 | 
 88 | - `itrend`: indicator of whether to estimate the trend (`itrend` =1) or the
 89 | variable (`itrend` =0). The trend may be kriged with ordinary kriging (all
 90 | `idrif(i)` values set to 0) or with any combination of trend kriging (some
 91 | `idrif(i)` terms set to 1).
 92 | 
 93 | - `secfl`: a file for the gridded external drift variable. The external drift
 94 | variable is needed at all grid locations to be estimated. The origin of the
 95 | grid network, the number of nodes, and the spacing of the grid nodes should
 96 | be exactly the same as the grid being kriged in kt3d This variable is used
 97 | only if `ikrige`=2 or 3.
 98 | 
 99 | - `iseccol`: the column number in secfl for the gridded secondary variable.
100 | This variable is used if `ikrige`=2 or 3.
101 | 
102 | - `nst` and `c0`: the number of variogram structures and the nugget constant.
103 | The nugget constant does not count as a structure.
104 | 
105 | - For each of the `nst` nested structures one must define `it`, the type of
106 | structure; `cc`, the c parameter; `ang1`,`ang2`,`ang3`, the angles defining
107 | the geometric anisotropy; `aa_hmax`, the maximum horizontal range; `aa_hmin`,
108 | the minimum horizontal range; and `aa_vert`, the vertical range.
109 | 
110 | # Application notes:
111 | - The program is set up so that a novice programmer can make changes to the form
112 |  of the polynomial drift. The external drift concept has been incorporated,
113 |  adding an additional unbiasedness constraint to the ordinary kriging system.
114 |  When using an external drift, it is necessary to know the value of the drift
115 |  variable at all data locations and all the locations that will be estimated
116 |  (i.e., all grid nodes).
117 | 
118 | - The program also allows simple kriging with non-stationary means read from
119 | an input file. The non-stationary means must be known at all data locations
120 | and all locations to be estimated.
121 | 


--------------------------------------------------------------------------------
/gslib_help/sgsim_help.md:
--------------------------------------------------------------------------------
  1 | # Description:
  2 | 
  3 | Sequential Gaussian simulation program
  4 | 
  5 | # Parameters:
  6 | 
  7 | - `datafl`: the input data in a simplified Geo-EAS formatted file. If this file does not exist then an unconditional simulation will be generated.
  8 | 
  9 | - `icolx`, `icoly`, `icolz`, `icolvr`, `icolwt` and `icolsec`: the column numbers for
 10 | the x, y and z coordinates, the variable to be simulated, the declustering
 11 | weight, and the secondary variable (e.g., for external drift if used).
 12 | One or two of the coordinate column numbers can be set to zero which indicates
 13 | that the simulation is 2-D or 1-D. For equal weighting, set `icolwt` to zero.
 14 | 
 15 | - `tmin` and `tmax`: all values strictly less than `tmin`
 16 | and strictly greater than `tmax` are ignored.
 17 | 
 18 | - `itrans`: if set to 0 then no transformation will be performed;
 19 | the variable is assumed already standard normal (the simulation results
 20 | will also be left unchanged). If `itrans`=1, transformations are performed.
 21 | 
 22 | - `transfl`: output file for the transformation table if transformation
 23 | is required (igauss=0).
 24 | 
 25 | - `ismooth`: if set to 0, then the data histogram, possibly with declustering
 26 | weights is used for transformation, if set to 1, then the data are transformed
 27 | according to the values in another file (perhaps from histogram smoothing).
 28 | 
 29 | - `smthfl`: file with the values to use for transformation to normal scores
 30 |  (if ismooth is set to 0).
 31 | 
 32 | - `icolvr` and `icolwt`: columns in smthfl for the variable and the
 33 |  declustering weight (set to 1 and 2 if smthfl is the output from histsmth).
 34 | 
 35 | - `zmin` and `zmax`: the minimum and maximum allowable data values.
 36 | These are used in the back transformation procedure.
 37 | 
 38 | - `ltail` and `ltpar` specify the back transformation implementation in
 39 | the lower tail of the distribution:
 40 |     - `ltail`=1 implements linear interpolation to the lower limit zmin,
 41 |     - `ltail`=2 implements power model interpolation, with w=`ltpar`,
 42 |     to the lower limit `zmin`.
 43 |     - The middle class interpolation is linear.
 44 | 
 45 | - `utail` and `utpar` specify the back transformation implementation in the
 46 | upper tail of the distribution:
 47 |     - `utail`=1 implements linear interpolation to the upper limit `zmax`,
 48 |     - `utail`=2 implements power model interpolation, with w=`utpar`,
 49 |     to the upper limit `zmax`,
 50 |     - `utail`=4 implements hyperbolic model extrapolation with w=`utpar`.
 51 |     - The hyperbolic tail extrapolation is limited by zmax.
 52 | 
 53 | - `idbg`: an integer debugging level between 0 and 3.
 54 | The larger the debugging level the more information written out.
 55 | 
 56 | - `dbgfl`: the file for the debugging output.
 57 | 
 58 | - `outfl`: the output grid is written to this file. The output file will
 59 | contain the results, cycling fastest on x then y then z then simulation by simulation.
 60 | 
 61 | - `nsim`: the number of simulations to generate.
 62 | 
 63 | - `nx`, `xmn`, `xsiz`: definition of the grid system (x axis).
 64 | 
 65 | - `ny`, `ymn`, `ysiz`: definition of the grid system (y axis).
 66 | 
 67 | - `nz`, `zmn`, `zsiz`: definition of the grid system (z axis).
 68 | 
 69 | - `seed`: random number seed (a large odd integer).
 70 | 
 71 | - `ndmin` and `ndmax`: the minimum and maximum number of original data that
 72 | should be used to simulate a grid node. If there are fewer than
 73 | `ndmin` data points the node is not simulated.
 74 | 
 75 | - `ncnode`: the maximum number of previously simulated nodes to use
 76 | for the simulation of another node.
 77 | 
 78 | - `sstrat`: if set to 0, the data and previously simulated grid nodes are
 79 | searched separately: the data are searched with a *super block* search and
 80 | the previously simulated nodes are searched with a *spiral search*.
 81 | If set to 1, the data are relocated to grid nodes and a spiral search is used
 82 | and the parameters `ndmin` and `ndmax` are not considered.
 83 | 
 84 | - `multgrid`: a multiple grid simulation will be performed if this is set to 1
 85 | (otherwise a standard spiral search for previously simulated nodes is considered).
 86 | 
 87 | - `nmult`: the number of multiple grid refinements to consider
 88 | (used only if multgrid is set to 1).
 89 | 
 90 | - `noct`: the number of original data to use per octant. If this parameter is
 91 | set less than or equal to 0, then it is not used; otherwise, it overrides
 92 | the ndmax parameter and the data is partitioned into octants and
 93 | the closest `noct` data in each octant is retained for the simulation of a grid node.
 94 | 
 95 | - `radius_hmax`, `radius_hmin` and `radius_vert`:
 96 | the search radii in the maximum horizontal direction,
 97 | minimum horizontal direction, and vertical direction (see angles below).
 98 | 
 99 | - `sang1`, `sang2` and `sang3`:
100 | the angle parameters that describe the orientation of the search ellipsoid.
101 | 
102 | - `ktype`: the kriging type (
103 |     0 = simple kriging,
104 |     1 = ordinary kriging,
105 |     2 = simple kriging with a locally varying mean,
106 |     3 = kriging with an external drift, or
107 |     4 = collocated cokriging with one secondary variable)
108 |     used throughout the loop over all nodes.
109 |     SK is required by theory; only in cases where the number of original data
110 |     found in the neighborhood is large enough can OK be used without
111 |     the risk of spreading data values beyond their range of influence.
112 | 
113 | - `rho`: correlation coefficient to use for collocated cokriging
114 | (used only if ktype = 4).
115 | 
116 | - `secfl`: the file for the locally varying mean, the external drift variable,
117 | or the secondary variable for collocated cokriging (the secondary variable
118 | must be gridded at the same resolution as the model being constructed by sgsim).
119 | 
120 | - `nst` and `c0`: the number of semivariogram structures and the isotropic nugget constant.
121 | For each of the nst nested structures one must define
122 |     - `it`, the type of structure;
123 |     - `cc`, the c parameter;
124 |     - `ang1`, `ang2`, `ang3`, the angles defining the geometric anisotropy;
125 |     - `aa_hmax`, the maximum horizontal range;
126 |     - `aa_hmin`, the minimum horizontal range; and
127 |     - `aa_vert`, the vertical range.
128 | 
129 | # Application notes:
130 | 
131 | This program requires standard normal data and writes standard normal simulated values. Normal score transform and back transform are to be performed outside of this program
132 | Recall that the power model is not a legitimate model for a multiGaussian phenomenon and it is not allowed in `sgsim`
133 | The semivariogram model is that of the normal scores. The kriging variance is directly interpreted as the variance of the conditional distribution; consequently, the nugget constant `c0` and `c` (sill) parameters should add to 1.0.
134 | 


--------------------------------------------------------------------------------
/gslib_help/super_block_search.md:
--------------------------------------------------------------------------------
 1 | # Super Block Search
 2 | 
 3 | The super block search strategy is an eficient algorithm to be used in cases
 4 | where many points are to be estimated, using local data neighborhoods, with
 5 | the same set of original data. The algorithm calls for an initial
 6 | classification and ordering of the data according to a regular network of
 7 | parallelipedic blocks. This grid network is independent of the grid
 8 | network of points/blocks being estimated or simulated. Typically, the size
 9 | of the search network is much larger than the final estimation or simulation
10 | grid node spacing.
11 | 
12 | When estimating any one point, only those data within nearby super
13 | blocks have to be checked. A large number of data are thus quickly eliminated
14 | because they have been classified in super blocks beyond the search limits.
15 | This is illustrated in 2D on Figure II.7, where an 11 by 11 super block grid
16 | network has been established over an area containing 140 data points. When
17 | estimating a point anywhere within the dark gray super block, only those data
18 | within the dark black line need be considered. Note that all search resolution
19 | less than the size of a super block has been lost. Also note that the light
20 | gray region is defined by the search ellipse (circle in this case) with its
21 | center translated to every node to be estimated within the dark gray super
22 | block. All super blocks intersected by the light gray region must be considered
23 | to ensure that all nearby data are considered for estimahion of any node within
24 | the central dark gray superblock.
25 | 
26 | ![super_block_search](img/super_block_search.png)
27 | 
28 | The first task is to build a template of super blocks, centered at the super
29 | block that contains the node being estimated. For example, the template
30 | is the relative locations of all 21 blocks enclosed by the solid line on Figure
31 | 11.7. With this template, the nearby super blocks are easily established
32 | when considering any new location.
33 | 
34 | 


--------------------------------------------------------------------------------
/img/Grid_Definition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/Grid_Definition.png


--------------------------------------------------------------------------------
/img/behavior_near_origin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/behavior_near_origin.png


--------------------------------------------------------------------------------
/img/cokb3d_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/cokb3d_params.png


--------------------------------------------------------------------------------
/img/gam_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/gam_params.png


--------------------------------------------------------------------------------
/img/gamv_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/gamv_params.png


--------------------------------------------------------------------------------
/img/graph_of_lag_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/graph_of_lag_model.png


--------------------------------------------------------------------------------
/img/head_tail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/head_tail.png


--------------------------------------------------------------------------------
/img/kb2d_params.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/kb2d_params.png


--------------------------------------------------------------------------------
/img/super_block_search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/img/super_block_search.png


--------------------------------------------------------------------------------
/parameters/gam_write_parameters.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Nov 03 20:53:02 2016
 4 | """
 5 | __author__ = "yuhao"
 6 | 
 7 | import os
 8 | import json
 9 | 
10 | PARAMS = {
11 |     'datafl': 'testData/xihu_sparse.gslib',
12 |     'nvar': 1,
13 |     'ivar': [1, 2],
14 |     'tmin': -1.0e21,
15 |     'tmax': 1.0e21,
16 |     'outfl': 'gam.out',
17 |     'igrid': 1,
18 |     'nx': 15,
19 |     'xmn': 0.5,
20 |     'xsiz': 5,
21 |     'ny': 23,
22 |     'ymn': 0.5,
23 |     'ysiz': 5,
24 |     'nz': 161,
25 |     'zmn': 0.5,
26 |     'zsiz': 0.5,
27 |     'ndir': 2,
28 |     'nlag': 10,
29 |     'ixd': [1, 0],
30 |     'iyd': [0, 1],
31 |     'izd': [0, 0],
32 |     'standardize': True,
33 |     'nvarg': 5,
34 |     'ivtail': [1, 1, 2, 2, 1],
35 |     'ivhead': [1, 1, 2, 2, 1],
36 |     'ivtype': [1, 3, 1, 3, 9]
37 |     }
38 | 
39 | PARENT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir)
40 | PARAM_DIR = os.path.join(PARENT_DIR, 'testData')
41 | 
42 | with open(os.path.join(PARAM_DIR, 'xihuSmall_sparse_gam.par'), 'w') as fout:
43 |     fout.write(json.dumps(PARAMS, sort_keys=True, indent=4))
44 | 


--------------------------------------------------------------------------------
/parameters/gamv_write_parameters.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Nov 06 18:19:28 2016
 4 | """
 5 | __author__ = "yuhao"
 6 | 
 7 | import os
 8 | import json
 9 | 
10 | PARAMS = {
11 |     'datafl': 'testData/test.gslib',
12 |     'icolx': 1,
13 |     'icoly': 2,
14 |     'icolz': 0,
15 |     'nvar': 1,
16 |     'ivar': [3, 4],
17 |     'tmin': -1.0e21,
18 |     'tmax': 1.0e21,
19 |     'outfl': 'out.dat',
20 |     'nlag': 20,
21 |     'xlag': 500.0,
22 |     'xltol': 300.0,
23 |     'ndir': 1,
24 |     'azm': [0.0],  # [0.0, 0.0, 90.],
25 |     'atol': [90.0],  # [90.0, 22.5, 22.5],
26 |     'bandwh': [200.0],   # [200.0, 200.0, 200.0],
27 |     'dip': [0.0],  # [0.0, 0.0, 0.0],
28 |     'dtol': [90.0],  # [90.0, 22.5, 22.5],
29 |     'bandwd': [200.0],  # [200.0, 200.0, 200.0],
30 |     'standardize': False,
31 |     'nvarg': 3,
32 |     'ivtail': [1, 1, 2],
33 |     'ivhead': [1, 1, 2],
34 |     'ivtype': [1, 3, 1]
35 | }
36 | 
37 | PARENT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir)
38 | PARAM_DIR = os.path.join(PARENT_DIR, 'testData')
39 | 
40 | with open(os.path.join(PARAM_DIR, 'xihuSmall_sparse_gamv.par'), 'w') as fout:
41 |     fout.write(json.dumps(PARAMS, sort_keys=True, indent=4))
42 | 


--------------------------------------------------------------------------------
/parameters/krige2d_write_params.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Nov 2016
 4 | """
 5 | __author__ = "yuhao"
 6 | 
 7 | import os
 8 | import json
 9 | 
10 | PARAMS = {
11 |     'datafl': 'testData/test.gslib',
12 |     'icolx': 0,
13 |     'icoly': 1,
14 |     'icolvr': 3,
15 |     'tmin': -1.0e21,
16 |     'tmax': 1.0e21,
17 |     'idbg': 3,
18 |     'dbgfl': 'kb2d.dbg',
19 |     'outfl': 'out.dat',
20 |     'nx': 98,
21 |     'xmn': 100,
22 |     'xsiz': 200,
23 |     'ny': 79,
24 |     'ymn': 200,
25 |     'ysiz': 1.0,
26 |     'nxdis': 1,
27 |     'nydis': 1,
28 |     'ndmin': 3,
29 |     'ndmax': 10,
30 |     'radius': 12000,
31 |     'isk': 0,
32 |     'skmean': 14.69588,
33 |     'nst': 1,
34 |     'c0': 0.05,
35 |     'it': [0],
36 |     'cc': [0.65],
37 |     'azm': [90],
38 |     'a_max':[3715.9],
39 |     'a_min': [3715.9]
40 | }
41 | 
42 | PARENT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir)
43 | PARAM_DIR = os.path.join(PARENT_DIR, 'testData')
44 | 
45 | with open(os.path.join(PARAM_DIR, 'test_krige2d.par'), 'w') as fout:
46 |     fout.write(json.dumps(PARAMS, sort_keys=True, indent=4))
47 | 


--------------------------------------------------------------------------------
/parameters/krige3d_write_params.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Nov 2016
 4 | """
 5 | __author__ = "yuhao"
 6 | 
 7 | import os
 8 | import json
 9 | 
10 | PARAMS = {
11 |     'datafl': 'testData/test.gslib',
12 |     'icolx': 0,
13 |     'icoly': 1,
14 |     'icolz': 2,
15 |     'icolvr': 3,
16 |     'icolsec': 4,
17 | 
18 |     'tmin': -1.0e21,
19 |     'tmax': 1.0e21,
20 | 
21 |     'option': 0,
22 |     'jackfl': 'jackfl.dat',
23 |     'jicolx': 0,
24 |     'jicoly': 1,
25 |     'jicolz': 2,
26 |     'jicolvr': 3,
27 |     'jicolsec': 4,
28 | 
29 |     'idbg': 3,
30 |     'dbgfl': 'kt3d.dbg',
31 |     'outfl': 'out.dat',
32 | 
33 |     'nx': 98,
34 |     'xmn': 100,
35 |     'xsiz': 200,
36 |     'ny': 79,
37 |     'ymn': 100,
38 |     'ysiz': 200,
39 |     'nz': 1,
40 |     'zmn': 0,
41 |     'zsiz': 200,
42 | 
43 |     'nxdis': 1,
44 |     'nydis': 1,
45 |     'nzdis': 1,
46 | 
47 |     'ndmin': 1,
48 |     'ndmax': 30,
49 | 
50 |     'noct': 0,
51 |     'radius_hmax': 4000,
52 |     'radius_hmin': 4000,
53 |     'radius_vert': 0,
54 |     'sang1' : 0,
55 |     'sang2' : 0,
56 |     'sang3' : 0,
57 | 
58 |     'ikrige': 0,
59 |     'skmean': 14.69588,
60 | 
61 |     'idrift': [False, False, False, False, False, False, False, False, False],
62 |     'itrend': False,
63 |     'secfl': 'secfl.dat',
64 |     'iseccol': 3,
65 | 
66 |     'nst': 1,
67 |     'c0': 0.05,
68 |     'it': [1],
69 |     'cc': [0.65],
70 |     'ang1': [0],
71 |     'ang2': [0],
72 |     'ang3': [0],
73 |     'aa_hmax': [3715.9],
74 |     'aa_hmin': [3715.9],
75 |     'aa_vert': [3715.9],
76 | }
77 | 
78 | PARENT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir)
79 | PARAM_DIR = os.path.join(PARENT_DIR, 'testData')
80 | 
81 | with open(os.path.join(PARAM_DIR, 'test_krige3d.par'), 'w') as fout:
82 |     fout.write(json.dumps(PARAMS, sort_keys=True, indent=4))
83 | 


--------------------------------------------------------------------------------
/parameters/sgsim_write_params.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Apr 2 2017
 4 | """
 5 | __author__ = "yuhao"
 6 | 
 7 | import os
 8 | import json
 9 | 
10 | PARAMS = {
11 |     'datafl': 'testData/test.gslib',
12 |     'icolx': 0,
13 |     'icoly': 1,
14 |     'icolz': -1,
15 |     'icolvr': 2,
16 |     'icolsec': -1,  # for external drift if used
17 |     'icolwt': -1,  # declustering weights
18 | 
19 |     # data limits
20 |     'tmin': -1.0e21,
21 |     'tmax': 1.0e21,
22 |     'itrans': True,  # boolean
23 |     # output file for transformation table if transformation is needed
24 |     'transfl': 'sgsim.trn',
25 |     'ismooth': False,  # boolean
26 |     # file with values used for transformation to normal scores
27 |     'smthfl': 'histsmth.out',
28 |     'icolsvr': 0,
29 |     'icolswt': 1,
30 |     # allowable data values used for backtransform
31 |     'zmin': 0,
32 |     'zmax': 30,
33 |     # lower and upper tail model specification for backtransform
34 |     'ltail': 1,
35 |     'ltpar': 0,
36 |     'utail': 1,
37 |     'utpar': 15,
38 |     # debug and output data file
39 |     'idbg': 3,
40 |     'dbgfl': 'sgsim.dbg',
41 |     'outfl': 'sgsim.out',
42 |     'nsim': 3,  # number of simulation
43 |     # Grid definition
44 |     'nx': 98,
45 |     'xmn': 100,
46 |     'xsiz': 200,
47 |     'ny': 79,
48 |     'ymn': 100,
49 |     'ysiz': 200,
50 |     'nz': 1,
51 |     'zmn': 0,
52 |     'zsiz': 200,
53 |     'seed': 1,  # random seed
54 |     # maximum and minimum data points used in kriging
55 |     'ndmin': 1,
56 |     'ndmax': 30,
57 |     'nodmax': 12,  # previously simulated nodes to use
58 |     'sstrat': 0,  # search strategy
59 |     'multgrid': False,  # boolean
60 |     'nmult': 2,  # scalar
61 |     'noct': 0,  # maximum number to retain from an octant
62 |     # search radii
63 |     'radius_hmax': 4000,
64 |     'radius_hmin': 4000,
65 |     'radius_vert': 0,
66 |     # search anisotropy angles
67 |     'sang1' : 0,
68 |     'sang2' : 0,
69 |     'sang3' : 0,
70 |     # size of covariance lookup table size
71 |     'mxctx': 30,
72 |     'mxcty': 30,
73 |     'mxctz': 30,
74 |     # kriging type
75 |     'ikrige': 0,
76 |     # self.skmean = params['skmean']
77 |     'rho': 0.7, # correlation coefficient for COCOK
78 |     'varred': 0.1, # variance reduction factor for COCOK
79 |     'secfl': 'ydata.dat',
80 |     'icollvm': 4,
81 |     # Vairography definition
82 |     'nst': 1,
83 |     'c0': 0.05,
84 |     'it': [1],
85 |     'cc': [0.65],
86 |     'ang1': [0],
87 |     'ang2': [0],
88 |     'ang3': [0],
89 |     'aa_hmax': [3715.9],
90 |     'aa_hmin': [3715.9],
91 |     'aa_vert': [3715.9]
92 | }
93 | 
94 | PARENT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir)
95 | PARAM_DIR = os.path.join(PARENT_DIR, 'testData')
96 | 
97 | with open(os.path.join(PARAM_DIR, 'test_sgsim.par'), 'w') as fout:
98 |     fout.write(json.dumps(PARAMS, sort_keys=True, indent=4))
99 | 


--------------------------------------------------------------------------------
/pygeostatistics/__init__.py:
--------------------------------------------------------------------------------
1 | from .variogram_model import spherical, exponential, gaussian, power, hole_effect
2 | from .krige3d import Krige3d
3 | from .sgsim import Sgsim
4 | from .gamv import Gamv
5 | 
6 | from ._version import get_versions
7 | __version__ = get_versions()['version']
8 | del get_versions
9 | 


--------------------------------------------------------------------------------
/pygeostatistics/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import os
 15 | import re
 16 | import subprocess
 17 | import sys
 18 | 
 19 | 
 20 | def get_keywords():
 21 |     """Get the keywords needed to look up the version information."""
 22 |     # these strings will be replaced by git during git-archive.
 23 |     # setup.py/versioneer.py will grep for the variable names, so they must
 24 |     # each be defined on a line of their own. _version.py will just call
 25 |     # get_keywords().
 26 |     git_refnames = " (HEAD -> master)"
 27 |     git_full = "e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36"
 28 |     git_date = "2020-07-26 10:58:27 +0800"
 29 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 30 |     return keywords
 31 | 
 32 | 
 33 | class VersioneerConfig:
 34 |     """Container for Versioneer configuration parameters."""
 35 | 
 36 | 
 37 | def get_config():
 38 |     """Create, populate and return the VersioneerConfig() object."""
 39 |     # these strings are filled in when 'setup.py versioneer' creates
 40 |     # _version.py
 41 |     cfg = VersioneerConfig()
 42 |     cfg.VCS = "git"
 43 |     cfg.style = "pep440"
 44 |     cfg.tag_prefix = ""
 45 |     cfg.parentdir_prefix = "None"
 46 |     cfg.versionfile_source = "pygeostatistics/_version.py"
 47 |     cfg.verbose = False
 48 |     return cfg
 49 | 
 50 | 
 51 | class NotThisMethod(Exception):
 52 |     """Exception raised if a method is not valid for the current scenario."""
 53 | 
 54 | 
 55 | LONG_VERSION_PY = {}
 56 | HANDLERS = {}
 57 | 
 58 | 
 59 | def register_vcs_handler(vcs, method):  # decorator
 60 |     """Decorator to mark a method as the handler for a particular VCS."""
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 |     return decorate
 68 | 
 69 | 
 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 71 |                 env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 80 |                                  stdout=subprocess.PIPE,
 81 |                                  stderr=(subprocess.PIPE if hide_stderr
 82 |                                          else None))
 83 |             break
 84 |         except EnvironmentError:
 85 |             e = sys.exc_info()[1]
 86 |             if e.errno == errno.ENOENT:
 87 |                 continue
 88 |             if verbose:
 89 |                 print("unable to run %s" % dispcmd)
 90 |                 print(e)
 91 |             return None, None
 92 |     else:
 93 |         if verbose:
 94 |             print("unable to find command, tried %s" % (commands,))
 95 |         return None, None
 96 |     stdout = p.communicate()[0].strip()
 97 |     if sys.version_info[0] >= 3:
 98 |         stdout = stdout.decode()
 99 |     if p.returncode != 0:
100 |         if verbose:
101 |             print("unable to run %s (error)" % dispcmd)
102 |             print("stdout was %s" % stdout)
103 |         return None, p.returncode
104 |     return stdout, p.returncode
105 | 
106 | 
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 |     """Try to determine the version from the parent directory name.
109 | 
110 |     Source tarballs conventionally unpack into a directory that includes both
111 |     the project name and a version string. We will also support searching up
112 |     two directory levels for an appropriately named parent directory
113 |     """
114 |     rootdirs = []
115 | 
116 |     for i in range(3):
117 |         dirname = os.path.basename(root)
118 |         if dirname.startswith(parentdir_prefix):
119 |             return {"version": dirname[len(parentdir_prefix):],
120 |                     "full-revisionid": None,
121 |                     "dirty": False, "error": None, "date": None}
122 |         else:
123 |             rootdirs.append(root)
124 |             root = os.path.dirname(root)  # up a level
125 | 
126 |     if verbose:
127 |         print("Tried directories %s but none started with prefix %s" %
128 |               (str(rootdirs), parentdir_prefix))
129 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 | 
131 | 
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 |     """Extract version information from the given file."""
135 |     # the code embedded in _version.py can just fetch the value of these
136 |     # keywords. When used from setup.py, we don't want to import _version.py,
137 |     # so we do it with a regexp instead. This function is not used from
138 |     # _version.py.
139 |     keywords = {}
140 |     try:
141 |         f = open(versionfile_abs, "r")
142 |         for line in f.readlines():
143 |             if line.strip().startswith("git_refnames ="):
144 |                 mo = re.search(r'=\s*"(.*)"', line)
145 |                 if mo:
146 |                     keywords["refnames"] = mo.group(1)
147 |             if line.strip().startswith("git_full ="):
148 |                 mo = re.search(r'=\s*"(.*)"', line)
149 |                 if mo:
150 |                     keywords["full"] = mo.group(1)
151 |             if line.strip().startswith("git_date ="):
152 |                 mo = re.search(r'=\s*"(.*)"', line)
153 |                 if mo:
154 |                     keywords["date"] = mo.group(1)
155 |         f.close()
156 |     except EnvironmentError:
157 |         pass
158 |     return keywords
159 | 
160 | 
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 |     """Get version information from git keywords."""
164 |     if not keywords:
165 |         raise NotThisMethod("no keywords at all, weird")
166 |     date = keywords.get("date")
167 |     if date is not None:
168 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 |         # -like" string, which we must then edit to make compliant), because
171 |         # it's been around since git-1.5.3, and it's too difficult to
172 |         # discover which version we're using, or to work around using an
173 |         # older one.
174 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 |     refnames = keywords["refnames"].strip()
176 |     if refnames.startswith("$Format"):
177 |         if verbose:
178 |             print("keywords are unexpanded, not using")
179 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 |     TAG = "tag: "
184 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 |     if not tags:
186 |         # Either we're using git < 1.8.3, or there really are no tags. We use
187 |         # a heuristic: assume all version tags have a digit. The old git %d
188 |         # expansion behaves like git log --decorate=short and strips out the
189 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 |         # between branches and tags. By ignoring refnames without digits, we
191 |         # filter out many common branch names like "release" and
192 |         # "stabilization", as well as "HEAD" and "master".
193 |         tags = set([r for r in refs if re.search(r'\d', r)])
194 |         if verbose:
195 |             print("discarding '%s', no digits" % ",".join(refs - tags))
196 |     if verbose:
197 |         print("likely tags: %s" % ",".join(sorted(tags)))
198 |     for ref in sorted(tags):
199 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
200 |         if ref.startswith(tag_prefix):
201 |             r = ref[len(tag_prefix):]
202 |             if verbose:
203 |                 print("picking %s" % r)
204 |             return {"version": r,
205 |                     "full-revisionid": keywords["full"].strip(),
206 |                     "dirty": False, "error": None,
207 |                     "date": date}
208 |     # no suitable tags, so version is "0+unknown", but full hex is still there
209 |     if verbose:
210 |         print("no suitable tags, using unknown + full revision id")
211 |     return {"version": "0+unknown",
212 |             "full-revisionid": keywords["full"].strip(),
213 |             "dirty": False, "error": "no suitable tags", "date": None}
214 | 
215 | 
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 |     """Get version from 'git describe' in the root of the source tree.
219 | 
220 |     This only gets called if the git-archive 'subst' keywords were *not*
221 |     expanded, and _version.py hasn't already been rewritten with a short
222 |     version string, meaning we're inside a checked out source tree.
223 |     """
224 |     GITS = ["git"]
225 |     if sys.platform == "win32":
226 |         GITS = ["git.cmd", "git.exe"]
227 | 
228 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 |                           hide_stderr=True)
230 |     if rc != 0:
231 |         if verbose:
232 |             print("Directory %s not under git control" % root)
233 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 | 
235 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
237 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 |                                           "--always", "--long",
239 |                                           "--match", "%s*" % tag_prefix],
240 |                                    cwd=root)
241 |     # --long was added in git-1.5.5
242 |     if describe_out is None:
243 |         raise NotThisMethod("'git describe' failed")
244 |     describe_out = describe_out.strip()
245 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 |     if full_out is None:
247 |         raise NotThisMethod("'git rev-parse' failed")
248 |     full_out = full_out.strip()
249 | 
250 |     pieces = {}
251 |     pieces["long"] = full_out
252 |     pieces["short"] = full_out[:7]  # maybe improved later
253 |     pieces["error"] = None
254 | 
255 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 |     # TAG might have hyphens.
257 |     git_describe = describe_out
258 | 
259 |     # look for -dirty suffix
260 |     dirty = git_describe.endswith("-dirty")
261 |     pieces["dirty"] = dirty
262 |     if dirty:
263 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
264 | 
265 |     # now we have TAG-NUM-gHEX or HEX
266 | 
267 |     if "-" in git_describe:
268 |         # TAG-NUM-gHEX
269 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 |         if not mo:
271 |             # unparseable. Maybe git-describe is misbehaving?
272 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
273 |                                % describe_out)
274 |             return pieces
275 | 
276 |         # tag
277 |         full_tag = mo.group(1)
278 |         if not full_tag.startswith(tag_prefix):
279 |             if verbose:
280 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
281 |                 print(fmt % (full_tag, tag_prefix))
282 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 |                                % (full_tag, tag_prefix))
284 |             return pieces
285 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 | 
287 |         # distance: number of commits since tag
288 |         pieces["distance"] = int(mo.group(2))
289 | 
290 |         # commit: short hex revision ID
291 |         pieces["short"] = mo.group(3)
292 | 
293 |     else:
294 |         # HEX: no tags
295 |         pieces["closest-tag"] = None
296 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 |                                     cwd=root)
298 |         pieces["distance"] = int(count_out)  # total number of commits
299 | 
300 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 |                        cwd=root)[0].strip()
303 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 | 
305 |     return pieces
306 | 
307 | 
308 | def plus_or_dot(pieces):
309 |     """Return a + if we don't already have one, else return a ."""
310 |     if "+" in pieces.get("closest-tag", ""):
311 |         return "."
312 |     return "+"
313 | 
314 | 
315 | def render_pep440(pieces):
316 |     """Build up version string, with post-release "local version identifier".
317 | 
318 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 | 
321 |     Exceptions:
322 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 |     """
324 |     if pieces["closest-tag"]:
325 |         rendered = pieces["closest-tag"]
326 |         if pieces["distance"] or pieces["dirty"]:
327 |             rendered += plus_or_dot(pieces)
328 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 |             if pieces["dirty"]:
330 |                 rendered += ".dirty"
331 |     else:
332 |         # exception #1
333 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 |                                           pieces["short"])
335 |         if pieces["dirty"]:
336 |             rendered += ".dirty"
337 |     return rendered
338 | 
339 | 
340 | def render_pep440_pre(pieces):
341 |     """TAG[.post.devDISTANCE] -- No -dirty.
342 | 
343 |     Exceptions:
344 |     1: no tags. 0.post.devDISTANCE
345 |     """
346 |     if pieces["closest-tag"]:
347 |         rendered = pieces["closest-tag"]
348 |         if pieces["distance"]:
349 |             rendered += ".post.dev%d" % pieces["distance"]
350 |     else:
351 |         # exception #1
352 |         rendered = "0.post.dev%d" % pieces["distance"]
353 |     return rendered
354 | 
355 | 
356 | def render_pep440_post(pieces):
357 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
358 | 
359 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
360 |     (a dirty tree will appear "older" than the corresponding clean one),
361 |     but you shouldn't be releasing software with -dirty anyways.
362 | 
363 |     Exceptions:
364 |     1: no tags. 0.postDISTANCE[.dev0]
365 |     """
366 |     if pieces["closest-tag"]:
367 |         rendered = pieces["closest-tag"]
368 |         if pieces["distance"] or pieces["dirty"]:
369 |             rendered += ".post%d" % pieces["distance"]
370 |             if pieces["dirty"]:
371 |                 rendered += ".dev0"
372 |             rendered += plus_or_dot(pieces)
373 |             rendered += "g%s" % pieces["short"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post%d" % pieces["distance"]
377 |         if pieces["dirty"]:
378 |             rendered += ".dev0"
379 |         rendered += "+g%s" % pieces["short"]
380 |     return rendered
381 | 
382 | 
383 | def render_pep440_old(pieces):
384 |     """TAG[.postDISTANCE[.dev0]] .
385 | 
386 |     The ".dev0" means dirty.
387 | 
388 |     Eexceptions:
389 |     1: no tags. 0.postDISTANCE[.dev0]
390 |     """
391 |     if pieces["closest-tag"]:
392 |         rendered = pieces["closest-tag"]
393 |         if pieces["distance"] or pieces["dirty"]:
394 |             rendered += ".post%d" % pieces["distance"]
395 |             if pieces["dirty"]:
396 |                 rendered += ".dev0"
397 |     else:
398 |         # exception #1
399 |         rendered = "0.post%d" % pieces["distance"]
400 |         if pieces["dirty"]:
401 |             rendered += ".dev0"
402 |     return rendered
403 | 
404 | 
405 | def render_git_describe(pieces):
406 |     """TAG[-DISTANCE-gHEX][-dirty].
407 | 
408 |     Like 'git describe --tags --dirty --always'.
409 | 
410 |     Exceptions:
411 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412 |     """
413 |     if pieces["closest-tag"]:
414 |         rendered = pieces["closest-tag"]
415 |         if pieces["distance"]:
416 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 |     else:
418 |         # exception #1
419 |         rendered = pieces["short"]
420 |     if pieces["dirty"]:
421 |         rendered += "-dirty"
422 |     return rendered
423 | 
424 | 
425 | def render_git_describe_long(pieces):
426 |     """TAG-DISTANCE-gHEX[-dirty].
427 | 
428 |     Like 'git describe --tags --dirty --always -long'.
429 |     The distance/hash is unconditional.
430 | 
431 |     Exceptions:
432 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433 |     """
434 |     if pieces["closest-tag"]:
435 |         rendered = pieces["closest-tag"]
436 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 |     else:
438 |         # exception #1
439 |         rendered = pieces["short"]
440 |     if pieces["dirty"]:
441 |         rendered += "-dirty"
442 |     return rendered
443 | 
444 | 
445 | def render(pieces, style):
446 |     """Render the given version pieces into the requested style."""
447 |     if pieces["error"]:
448 |         return {"version": "unknown",
449 |                 "full-revisionid": pieces.get("long"),
450 |                 "dirty": None,
451 |                 "error": pieces["error"],
452 |                 "date": None}
453 | 
454 |     if not style or style == "default":
455 |         style = "pep440"  # the default
456 | 
457 |     if style == "pep440":
458 |         rendered = render_pep440(pieces)
459 |     elif style == "pep440-pre":
460 |         rendered = render_pep440_pre(pieces)
461 |     elif style == "pep440-post":
462 |         rendered = render_pep440_post(pieces)
463 |     elif style == "pep440-old":
464 |         rendered = render_pep440_old(pieces)
465 |     elif style == "git-describe":
466 |         rendered = render_git_describe(pieces)
467 |     elif style == "git-describe-long":
468 |         rendered = render_git_describe_long(pieces)
469 |     else:
470 |         raise ValueError("unknown style '%s'" % style)
471 | 
472 |     return {"version": rendered, "full-revisionid": pieces["long"],
473 |             "dirty": pieces["dirty"], "error": None,
474 |             "date": pieces.get("date")}
475 | 
476 | 
477 | def get_versions():
478 |     """Get version information or return default if unable to do so."""
479 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 |     # __file__, we can work backwards from there to the root. Some
481 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 |     # case we can only use expanded keywords.
483 | 
484 |     cfg = get_config()
485 |     verbose = cfg.verbose
486 | 
487 |     try:
488 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 |                                           verbose)
490 |     except NotThisMethod:
491 |         pass
492 | 
493 |     try:
494 |         root = os.path.realpath(__file__)
495 |         # versionfile_source is the relative path from the top of the source
496 |         # tree (where the .git directory might live) to this file. Invert
497 |         # this to find the root from __file__.
498 |         for i in cfg.versionfile_source.split('/'):
499 |             root = os.path.dirname(root)
500 |     except NameError:
501 |         return {"version": "0+unknown", "full-revisionid": None,
502 |                 "dirty": None,
503 |                 "error": "unable to find root of source tree",
504 |                 "date": None}
505 | 
506 |     try:
507 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 |         return render(pieces, cfg.style)
509 |     except NotThisMethod:
510 |         pass
511 | 
512 |     try:
513 |         if cfg.parentdir_prefix:
514 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 |     except NotThisMethod:
516 |         pass
517 | 
518 |     return {"version": "0+unknown", "full-revisionid": None,
519 |             "dirty": None,
520 |             "error": "unable to compute version", "date": None}
521 | 


--------------------------------------------------------------------------------
/pygeostatistics/cokrige.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | A cokriging program for a points or blocks on a regular grid.
  4 | 
  5 | Created on Fri Dec 2 2016
  6 | """
  7 | from __future__ import division, print_function, absolute_import
  8 | import yaml
  9 | from itertools import product
 10 | import time
 11 | from collections import namedtuple
 12 | import numpy as np
 13 | from scipy import linalg
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from pygeostatistics.yaml_patch import loader_patched
 17 | from pygeostatistics.super_block import SuperBlockSearcher
 18 | 
 19 | __author__ = "yuhao"
 20 | 
 21 | class Cokrige(object):
 22 |     def __init__(self, param_file):
 23 |         self.param_file = param_file
 24 |         self._read_params()
 25 |         self._check_params()
 26 |         self.property_name = None
 27 |         self.vr = None
 28 |         self.rotmat = None
 29 |         self.estimation = None
 30 |         self.estimation_variance = None
 31 | 
 32 |         self.xdb = None
 33 |         self.ydb = None
 34 |         self.zdb = None
 35 | 
 36 |         self._2d = False
 37 |         self.searcher = None
 38 |         self.const = None
 39 | 
 40 |         self._block_covariance = None
 41 |         self._unbias = None
 42 |         self.maxcov = None
 43 |         self._mdt = None
 44 | 
 45 |         self.resc = None
 46 | 
 47 |         self.nst = list()
 48 |         self.c0 = list()
 49 |         self.it = list()
 50 |         self.cc = list()
 51 |         self.ang1 = list()
 52 |         self.ang2 = list()
 53 |         self.ang3 = list()
 54 |         self.aa_hmax = list()
 55 |         self.aa_hmin = list()
 56 |         self.aa_vert = list()
 57 | 
 58 |     def _read_params(self):
 59 |         with open(self.param_file, "r") as fin:
 60 |             params = yaml.load(fin, Loader=loader_patched())
 61 |             # data file definition
 62 |             self.datafl = params['datafl']  #: 'testData/test.gslib',
 63 |             self.nvr = params['nvar']  # number (primary + secondary)
 64 |             self.ixl = params['icolx']  #: 1,
 65 |             self.iyl = params['icoly']  #: 2,
 66 |             self.izl = params['icolz']
 67 |             self.ivrl = params['icolvr']  # list
 68 |             # data limits
 69 |             self.tmin = params['tmin']  #: -1.0e21,
 70 |             self.tmax = params['tmax']  #: 1.0e21,
 71 |             # collocated cokriging or not
 72 |             self.icolloc = params['icolloc'] # boolean
 73 | 
 74 |             # definition of collocated data file
 75 |             self.secfl = params['secfl']
 76 |             self.iclcol = params['iclcol']
 77 | 
 78 |             self.idbg = params['idbg']  #: 3,
 79 |             self.dbgfl = params['dbgfl']  #: 'kb2d.dbg',
 80 |             self.outfl = params['outfl']  #: 'out.dat',
 81 |             # Grid definition
 82 |             self.nx = params['nx']  #: 50,
 83 |             self.xmn = params['xmn']  #: 0.5,
 84 |             self.xsiz = params['xsiz']  #: 1.0,
 85 |             self.ny = params['ny']  #: 50,
 86 |             self.ymn = params['ymn']  #: 0.5,
 87 |             self.ysiz = params['ysiz']  #: 1.0,
 88 |             self.nz = params['nz']  #: 50,
 89 |             self.zmn = params['zmn']  #: 0.5,
 90 |             self.zsiz = params['zsiz']  #: 1.0,
 91 |             # discretization definition
 92 |             self.nxdis = params['nxdis']  #: 1,
 93 |             self.nydis = params['nydis']  #: 1,
 94 |             self.nzdis = params['nzdis']  #: 1,
 95 |             # maximum and minimum data points used in kriging
 96 |             self.ndmin = params['ndmin']  # for both
 97 |             self.ndmaxp = params['ndmaxp']  # primary
 98 |             self.ndmaxs = params['ndmaxs']  # secondary
 99 |             # search radii for primary variable
100 |             self.pradius_hmax = params['radius_hmax']  # scalar
101 |             self.pradius_hmin = params['radius_hmin']  # scalar
102 |             self.pradius_vert = params['radius_vert']  # scalar
103 |             # search radii for secondary variables
104 |             self.sradius_hmax = params['radius_hmax']  # scalar
105 |             self.sradius_hmin = params['radius_hmin']  # scalar
106 |             self.sradius_vert = params['radius_vert']  # scalar
107 |             # search ellipsoid
108 |             self.sang1 = params['sang1']  # scalar
109 |             self.sang2 = params['sang2']  # scalar
110 |             self.sang3 = params['sang3']  # scalar
111 |             # kriging type
112 |             self.ktype = params['ikrige']
113 |             # mean values for primary and secondary variables
114 |             self.vmean = params['mean']  # list
115 |             # Vairography definition
116 |             self.vario = params['vario']  # list of dictionaries
117 | 
118 |     def _fill_check_covariance(self):
119 |         self.variography = [dict()] * self.nvr * self.nvr
120 |         for var in self.vario:
121 |             self.variography[(var['i']-1) * self.nvr + (var['j']-1)] = var
122 |         # try fill in symmetric covariance element
123 |         for i, j in product(range(self.nvr), range(self.nvr)):
124 |             idx1 = i + j * self.nvr
125 |             idx2 = j + i * self.nvr
126 |             if idx1 == {} and idx2 == {}:
127 |                 raise ValueError("need variogram between {},{}".format(i, j))
128 |             elif idx1 == {}:
129 |                 self.variography[idx1] = self.variography[idx2]
130 |             elif idx2 == {}:
131 |                 self.variography[idx2] = self.variography[idx1]
132 |         for var in self.variography:
133 |             self.nst.append(var['nst'])
134 |             self.c0.append(var['c0'])
135 |             self.it.append(var['it'])
136 |             for idx in range(var['nst']):
137 |                 self.cc.append(var['cc'][idx])
138 |                 self.ang1.append(var['ang1'][idx])
139 |                 self.ang2.append(var['ang2'][idx])
140 |                 self.ang3.append(var['ang3'][idx])
141 |                 self.aa_hmax.append(var['aa_hmax'][idx])
142 |                 self.aa_hmin.append(var['aa_hmin'][idx])
143 |                 self.aa_vert.append(var['aa_vert'][idx])
144 | 
145 |         # check linear model of coregionalization
146 |         # check definite positiveness
147 | 
148 |     def _check_params(self):
149 |         # Check search radius
150 |         if self.pradius_hmax <= 0:
151 |             raise ValueError("pradius_hmax should be larger than zero.")
152 |         if self.sradius_hmax <= 0:
153 |             raise ValueError("sradius_hmax should be larger than zero.")
154 |         # Check data file definition
155 |         if self.ixl < 0 and self.nx > 1:
156 |             raise ValueError("WARNING: ixl=0 and nx>1 !")
157 |         if self.iyl < 0 and self.ny > 1:
158 |             raise ValueError("WARNING: iyl=0 and ny>1 !")
159 |         if self.izl < 0 and self.nz > 1:
160 |             raise ValueError("WARNING: izl=0 and nz>1 !")
161 |         if self.ndmin <= 0:
162 |             raise ValueError("ndmin too small")
163 |         if self.ndmaxs/2 <= self.nvr and self.ktype == 2:
164 |             print('WARNING: with traditional ordinary cokriging the '+\
165 |                   'sum of the weights applied to EACH secondary data'+\
166 |                   'is zero.  With ndmaxs set low and nvr large the'+\
167 |                   'secondary data will not contribute to the estimate')
168 | 
169 |     def read_data(self):
170 |         "Read a simplified Geo-EAS formatted file."
171 |         data_list = None
172 |         with open(self.datafl, 'r') as fin:
173 |             data_list = fin.readlines()
174 |         name = data_list[0].strip()
175 |         ncols = int(data_list[1].strip())
176 |         column_name = [item.strip() for item in data_list[2: ncols+2]]
177 |         self.property_name = [item for item in column_name
178 |                               if item not in ['x', 'y', 'z']]
179 |         if 'z' not in column_name:
180 |             self._2d = True
181 |             column_name.append('z')
182 |             data_list = [tuple(item.strip().split() + ['0'])
183 |                          for item in data_list[ncols+2:]]
184 |         else:
185 |             data_list = [tuple(item.strip().split())
186 |                          for item in data_list[ncols+2:]]
187 |         data_dtype = np.dtype({
188 |             'names': column_name,
189 |             'formats': ['f8'] * len(column_name)})
190 |         self.vr = np.array(data_list, dtype=data_dtype)
191 | 
192 |     def _preprocess(self):
193 |         """create variables needed before performing kriging"""
194 |         # calculate dimensional constants
195 |         cokrige_const = namedtuple('Cokrige_const',
196 |                                    ['PMX', 'MAXNST', 'MAXSB', 'MAXDIS',
197 |                                     'MAXSAM', 'UNEST', 'MAXVAR', 'MAXARG',
198 |                                     'MAXCOK'])
199 |         maxsbx = 1
200 |         if self.nx > 1:
201 |             maxsbx = int(self.nx/2)
202 |             if maxsbx > 50:
203 |                 maxsbx = 50
204 |         maxsby = 1
205 |         if self.ny > 1:
206 |             maxsby = int(self.ny/2)
207 |             if maxsby > 50:
208 |                 maxsby = 50
209 |         maxsbz = 1
210 |         if self.nz > 1:
211 |             maxsbz = int(self.nz/2)
212 |             if maxsbz > 50:
213 |                 maxsbz = 50
214 |         self.const = cokrige_const(
215 |             PMX=999,
216 |             MAXNST=4,
217 |             MAXSB=(maxsbx, maxsby, maxsbz),
218 |             MAXDIS=self.nxdis * self.nydis * self.nzdis,
219 |             MAXSAM=self.ndmaxp + self.ndmaxs,
220 |             UNEST=np.nan,
221 |             MAXVAR=self.nvr,
222 |             MAXARG=self.nvr*self.nvr,
223 |             MAXCOK=(self.ndmaxp + self.ndmaxs)*self.nvr + self.nvr
224 |             )
225 |         # Calculate needed programing variables from input parameters
226 |         self.pradsqd = self.pradius_hmax * self.pradius_hmax
227 |         self.psanis1 = self.pradius_hmin / self.pradius_hmax
228 |         self.psanis2 = self.pradius_vert / self.pradius_hmax
229 | 
230 |         self.sradsqd = self.sradius_hmax * self.sradius_hmax
231 |         self.ssanis1 = self.sradius_hmin / self.sradius_hmax
232 |         self.ssanis2 = self.sradius_vert / self.sradius_hmax
233 | 
234 |         self.anis1 = np.array(self.aa_hmin) / \
235 |                      np.maximum(self.aa_hmax, np.finfo(float).eps)
236 |         self.anis2 = np.array(self.aa_vert) / \
237 |                      np.maximum(self.aa_hmax, np.finfo(float).eps)
238 | 
239 |         self._fill_check_covariance()
240 | 
241 |     def _set_rotation(self):
242 |         """
243 |         Set up rotation matrix for both anisotropy and searching.
244 |         with self.rotmat being an array of 3*3 rotation matrix, the last matrix
245 |         in the array are the searching matrix
246 |         """
247 |         ang1 = np.append(self.ang1, self.sang1)
248 |         ang2 = np.append(self.ang2, self.sang2)
249 |         ang3 = np.append(self.ang3, self.sang3)
250 |         anis1 = np.append(self.anis1, self.psanis1)
251 |         anis2 = np.append(self.anis2, self.psanis2)
252 |         anis1 = np.append(anis1, self.ssanis1)
253 |         anis2 = np.append(anis2, self.ssanis2)
254 |         self.rotmat = np.full((ang1.shape[0], 3, 3), np.nan)
255 |         def convert_ang1(ang):
256 |             if ang <= 0 and ang < 270:
257 |                 alpha = np.deg2rad(90 - ang)
258 |             else:
259 |                 alpha = np.deg2rad(450 - ang)
260 |             return alpha
261 |         v_convert = np.vectorize(convert_ang1)
262 | 
263 |         alpha = v_convert(ang1)
264 |         beta = np.deg2rad(-ang2)
265 |         theta = np.deg2rad(ang3)
266 | 
267 |         sina = np.sin(alpha)
268 |         sinb = np.sin(beta)
269 |         sint = np.sin(theta)
270 |         cosa = np.cos(alpha)
271 |         cosb = np.cos(beta)
272 |         cost = np.cos(theta)
273 | 
274 |         afac1 = 1.0 / np.maximum(anis1, np.finfo(float).eps)
275 |         afac2 = 1.0 / np.maximum(anis2, np.finfo(float).eps)
276 |         self.rotmat[:, 0, 0] = cosb * cosa
277 |         self.rotmat[:, 0, 1] = cosb * sina
278 |         self.rotmat[:, 0, 2] = -sinb
279 |         self.rotmat[:, 1, 0] = afac1 * (-cost * sina + sint * sinb * cosa)
280 |         self.rotmat[:, 1, 1] = afac1 * (cost * cosa + sint * sinb * sina)
281 |         self.rotmat[:, 1, 2] = afac1 * (sint * cosb)
282 |         self.rotmat[:, 2, 0] = afac2 * (sint * sina + cost * sinb * cosa)
283 |         self.rotmat[:, 2, 1] = afac2 * (-sint * cosa + cost * sinb * sina)
284 |         self.rotmat[:, 2, 2] = afac2 * (cost * cosb)
285 | 
286 |     def krige(self):
287 |         self._fill_check_covariance()
288 |         self._preprocess()
289 |         # Set up the rotation/anisotropy matrices needed for variogram
290 |         # and searching
291 |         self._set_rotation()
292 |         # compute maximum covariance for the rescaling factor:
293 |         self._max_covariance()
294 |         # Set up for super block searching:
295 |         print("Setting up Super Block Search...")
296 |         self._create_searcher()
297 |         # Set up discretization points per block
298 |         self._block_discretization()
299 |         # Find unbias value
300 |         self.unbias = self.maxcov
301 | 
302 |         nxy = self.nx * self.ny
303 |         nloop = self.nx * self.ny * self.nz
304 |         print("Start working on the kriging...")
305 |         # time
306 |         t1 = time.time()
307 |         ts = 0
308 |         percent_od = 0
309 |         self.estimation = np.full((nloop,), np.nan)
310 |         self.estimation_variance = np.full((nloop,), np.nan)
311 |         # MAIN LOOP
312 |         for index in range(nloop):
313 |             self.iz = index // nxy
314 |             self.iy = (index - self.iz * nxy) // self.nx
315 |             self.ix = index - self.iz * nxy - self.iy * self.nx
316 |             xloc = self.xmn + self.ix * self.xsiz
317 |             yloc = self.ymn + self.iy * self.ysiz
318 |             zloc = self.zmn + self.iz * self.zsiz
319 |             # Search for proximity data
320 |             ts_1 = time.time()
321 |             self.searcher.search(xloc, yloc, zloc)
322 |             ts += time.time() - ts_1
323 |             # load nearest data in xa, ya, za, vra, vea
324 |             xa = list()
325 |             ya = list()
326 |             za = list()
327 |             vra = list()
328 |             iva = list()  # which variable
329 |             npri = 0  # number of primary data
330 |             nsec = 0  # number of secondary data
331 |             na = 0  # number of both kinds
332 |             for i in range(self.searcher.nclose):
333 |                 if npri == self.ndmaxp and nsec == self.ndmaxs:
334 |                     continue
335 |                 idx = self.searcher.close_samples[i]
336 |                 # Load primary data
337 |                 prim = self.vr[self.property_name[0]][idx]
338 |                 if prim <= self.tmin and prim > self.tmax and \
339 |                         npri < self.ndmaxp:
340 |                     npri += 1
341 |                     na += 1
342 |                     xa.append(self.vr['x'][idx] - xloc)
343 |                     ya.append(self.vr['y'][idx] - yloc)
344 |                     za.append(self.vr['z'][idx] - zloc)
345 |                     vra.append(prim)
346 |                     iva.append(0)
347 |                 # Load secondary data
348 |                 sec1 = self.vr[self.property_name[1]][idx]
349 |                 if sec1 <= self.tmin and sec1 > self.tmax and \
350 |                         nsec < self.ndmaxs:
351 |                     nsec += 1
352 |                     na += 1
353 |                     xa.append(self.vr['x'][idx] - xloc)
354 |                     ya.append(self.vr['y'][idx] - yloc)
355 |                     za.append(self.vr['z'][idx] - zloc)
356 |                     if self.ktype != 2:
357 |                         vra.append(sec1 - self.vmean[1] - self.vmean[0])
358 |                     else:
359 |                         vra.append(sec1)
360 |                     iva.append(1)
361 |                 sec2 = self.vr[self.property_name[2]][idx]
362 |                 if sec2 <= self.tmin and sec2 > self.tmax and \
363 |                         nsec < self.ndmaxs:
364 |                     nsec += 1
365 |                     na += 1
366 |                     xa.append(self.vr['x'][idx] - xloc)
367 |                     ya.append(self.vr['y'][idx] - yloc)
368 |                     za.append(self.vr['z'][idx] - zloc)
369 |                     if self.ktype != 2:
370 |                         vra.append(sec1 - self.vmean[2] - self.vmean[0])
371 |                     else:
372 |                         vra.append(sec1)
373 |                     iva.append(2)
374 |                 sec3 = self.vr[self.property_name[3]][idx]
375 |                 if sec3 <= self.tmin and sec3 > self.tmax and \
376 |                         nsec < self.ndmaxs:
377 |                     nsec += 1
378 |                     na += 1
379 |                     xa.append(self.vr['x'][idx] - xloc)
380 |                     ya.append(self.vr['y'][idx] - yloc)
381 |                     za.append(self.vr['z'][idx] - zloc)
382 |                     if self.ktype != 2:
383 |                         vra.append(sec1 - self.vmean[3] - self.vmean[0])
384 |                     else:
385 |                         vra.append(sec1)
386 |                     iva.append(3)
387 | 
388 |             est, estv = self._many_samples(xa, ya, za, vra, na)
389 |             self.estimation[index] = est
390 |             self.estimation_variance[index] = estv
391 |             # print working percentage
392 |             percent = np.round(index/nloop*100, decimals=0)
393 |             dtime = time.time() - t1
394 |             if percent != percent_od:
395 |                 print("{}% ".format(percent) +\
396 |                   "."*20 + "{}s elapsed.".format(np.round(dtime, decimals=3)))
397 |             percent_od = percent
398 |         print("Kriging Finished.")
399 |         print("Time used for searching: {}s".format(ts))
400 | 
401 |     def _many_samples(self, xa, ya, za, vra, na):
402 |         if self.ktype == 0:
403 |             neq = na
404 |         elif self.ktype == 1:
405 |             neq = na + 1
406 |         elif self.ktype == 2:
407 |             neq = na + self.nvr
408 |         if (neq - na) > na or na < self.ndmin:
409 |             print("not enough data.")
410 |             return np.nan, np.nan
411 |         # left side
412 |         left = np.full((neq, neq), np.nan)
413 |         # fill the kriging matrix:
414 |         for i, j in product(range(na), range(na)):
415 |             if np.isnan(left[j, i]):
416 |                 left[i, j] = self._cova3((xa[i], ya[i], za[i]),
417 |                                          (xa[j], ya[j], za[j]))
418 |             else:
419 |                 left[i, j] = left[j, i]
420 | 
421 | 
422 |     @property
423 |     def block_covariance(self):
424 |         "return average covariance within block"
425 |         if self._block_covariance is None:
426 |             if self.ndb <= 1:  # point kriging
427 |                 self._block_covariance = self.unbias
428 |             else:
429 |                 cov = list()
430 |                 for x1, y1, z1 in zip(self.xdb, self.ydb, self.zdb):
431 |                     for x2, y2, z2 in zip(self.xdb, self.ydb, self.zdb):
432 |                         cov.append(self._cova3((x1, y1, z1), (x2, y2, z2)))
433 |                 cov = np.array(cov).reshape((self.ndb, self.ndb))
434 |                 cov[np.diag_indices_from(cov)] -= self.c0
435 |                 self._block_covariance = np.mean(cov)
436 |         return self._block_covariance
437 | 
438 |     def _block_discretization(self):
439 |         self.nxdis = 1 if self.nxdis < 1 else self.nxdis
440 |         self.nydis = 1 if self.nydis < 1 else self.nydis
441 |         self.nzdis = 1 if self.nzdis < 1 else self.nzdis
442 |         self.ndb = self.nxdis * self.nydis * self.nzdis
443 |         if self.ndb > self.const.MAXDIS:
444 |             raise ValueError("Too many discretization points")
445 |         xdis = self.xsiz / max(self.nxdis, 1)
446 |         ydis = self.ysiz / max(self.nydis, 1)
447 |         zdis = self.zsiz / max(self.nzdis, 1)
448 |         self.xdb = np.arange(0, self.nxdis, 1) * xdis + \
449 |                    (-0.5 * self.xsiz + 0.5 * xdis)
450 |         self.ydb = np.arange(0, self.nydis, 1) * ydis + \
451 |                    (-0.5 * self.ysiz + 0.5 * ydis)
452 |         self.zdb = np.arange(0, self.nzdis, 1) * zdis + \
453 |                    (-0.5 * self.zsiz + 0.5 * zdis)
454 | 
455 |     def _max_covariance(self):
456 |         '''
457 |         Calculate the maximum covariance value (used for zero distances and
458 |         for power model covariance):
459 |         '''
460 |         self.maxcov = self.c0
461 |         for ist in range(self.nst):
462 |             if self.it[ist] == 4:
463 |                 self.maxcov += self.const.PMX
464 |             else:
465 |                 self.maxcov += self.cc[ist]
466 | 
467 |     def _create_searcher(self):
468 |         "Help create and initialize the searcher object"
469 |         self.searcher = SuperBlockSearcher()
470 |         # initialize required atrributes
471 |         # grid definition
472 |         self.searcher.nx = self.nx
473 |         self.searcher.xmn = self.xmn
474 |         self.searcher.xsiz = self.xsiz
475 |         self.searcher.ny = self.ny
476 |         self.searcher.ymn = self.ymn
477 |         self.searcher.ysiz = self.ysiz
478 |         self.searcher.nz = self.nz
479 |         self.searcher.zmn = self.zmn
480 |         self.searcher.zsiz = self.zsiz
481 |         # data
482 |         self.searcher.vr = self.vr
483 |         self.searcher.MAXSB = self.const.MAXSB
484 |         # rotation matrix
485 |         self.searcher.rotmat = self.rotmat[-1]
486 |         self.searcher.radsqd = self.radsqd
487 |         # octant search
488 |         self.searcher.noct = self.noct
489 |         # Setup
490 |         self.searcher.setup()
491 |         self.searcher.pickup()
492 |         # sort data according to superblock number
493 |         self.vr = self.vr[self.searcher.sort_index]
494 | 
495 |     def _cova3(self, point1, point2, ivarg):
496 |         """
497 |         Parameters
498 |         ----------
499 |         point1, point2: tuple of 3
500 |             coordinates of two points
501 |         ivarg: 0, 1, 2, 3
502 |             0 for primary, 1,2,3 for secondary
503 |         Returns
504 |         -------
505 |         cova: scalar
506 |             covariance between (x1,y1,z1) and (x2,y2,z2)
507 |         """
508 |         # Calculate the maximum covariance
509 |         istart = sum(self.nst[:ivarg])
510 |         cmax = self.c0[ivarg]
511 |         for iss in range(self.nst[ivarg]):
512 |             ist = istart + iss
513 |             if self.it[ist] == 4:
514 |                 cmax += self.const.PMX
515 |             else:
516 |                 cmax += self.cc[ist]
517 |         # check for 'zero' distance, return maxcov if so:
518 |         hsqd = self._sqdist(point1, point2, self.rotmat[istart])
519 |         if hsqd < np.finfo(float).eps:
520 |             cova = cmax
521 |             return cova
522 | 
523 |         # loop over all structures
524 |         cova = 0
525 |         for ist in range(istart, self.nst[ivarg]):
526 |             if ist != 1:
527 |                 hsqd = self._sqdist(point1, point2, self.rotmat[ist])
528 |             h = np.sqrt(hsqd)
529 |             if self.it[ist] == 1:  # Spherical
530 |                 hr = h / self.aa_hmax[ist]
531 |                 if hr < 1:
532 |                     cova += self.cc[ist] * (1 - hr * (1.5 - 0.5 * hr * hr))
533 |             elif self.it[ist] == 2:  # Exponential
534 |                 cova += self.cc[ist] * np.exp(-3.0 * h / self.aa_hmax[ist])
535 |             elif self.it[ist] == 3:  # Gaussian
536 |                 cova += self.cc[ist] * \
537 |                         np.exp(-3.0 * (h / self.aa_hmax[ist]) *
538 |                                (h/self.aa_hmax[ist]))
539 |             elif self.it[ist] == 4:  # Power
540 |                 cova += self.maxcov - self.cc[ist] * (h**(self.aa_hmax[ist]))
541 |             elif self.it[ist] == 5:  # Hole Effect
542 |                 cova += self.cc[ist] * np.cos(h / self.aa_hmax[ist] * np.pi)
543 |         return cova
544 | 
545 |     def _sqdist(self, point1, point2, rotmat):
546 |         """
547 |         This routine calculates the anisotropic distance between two points
548 |         given the coordinates of each point and a definition of the
549 |         anisotropy.
550 | 
551 |         This method only consider a single anisotropy senario.
552 | 
553 |         Parameters
554 |         ----------
555 |         point1 : tuple
556 |             Coordinates of first point (x1,y1,z1)
557 |         point2 : tuple
558 |             Coordinates of second point (x2,y2,z2)
559 |         rotmat : 3*3 ndarray
560 |             matrix of rotation for this structure
561 | 
562 |         Returns
563 |         -------
564 |         sqdist : scalar
565 |             The squared distance accounting for the anisotropy
566 |             and the rotation of coordinates (if any).
567 |         """
568 |         dx = point1[0] - point2[0]
569 |         dy = point1[1] - point2[1]
570 |         dz = point1[2] - point2[2]
571 |         sqdist = 0.0
572 |         for i in range(3):
573 |             cont = rotmat[i, 0] * dx + \
574 |                    rotmat[i, 1] * dy + \
575 |                    rotmat[i, 2] * dz
576 |             sqdist += cont * cont
577 |         return sqdist
578 | 
579 | if __name__ == '__main__':
580 |     test_cokrige = Cokrige("testData/test_cokrige.par")
581 |     test_cokrige.read_data()
582 |     test_cokrige.krige()
583 | 


--------------------------------------------------------------------------------
/pygeostatistics/eda.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Exploratory Data Analysis
  4 | 
  5 | Created on Mon Nov 07 2016
  6 | """
  7 | from __future__ import division, print_function
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | from scipy.spatial.distance import pdist
 11 | 
 12 | 
 13 | class EDA():
 14 |     def __init__(self, datafile):
 15 |         self.datafl = datafile
 16 |         self.vr = None
 17 |         self.property_name = None
 18 |         self._2d = False
 19 | 
 20 |     def preview(self):
 21 |         print("Data File")
 22 |         print("---------")
 23 |         with open(self.datafl, 'r') as fin:
 24 |             for line in fin.readlines(20):
 25 |                 print(line.strip())
 26 | 
 27 |     def read(self):
 28 |         data_list = None
 29 |         with open(self.datafl, 'r') as fin:
 30 |             data_list = fin.readlines()
 31 |         self.name = data_list[0].strip()
 32 |         ncols = int(data_list[1].strip())
 33 |         column_name = [item.strip() for item in data_list[2: ncols+2]]
 34 |         self.property_name = [item for item in column_name
 35 |                               if item not in ['x', 'y', 'z']]
 36 |         if 'z' not in column_name:
 37 |             self._2d = True
 38 |             column_name.append('z')
 39 |             data_list = [tuple(item.strip().split() + ['0'])
 40 |                          for item in data_list[ncols+2:]]
 41 |         else:
 42 |             data_list = [tuple(item.strip().split())
 43 |                          for item in data_list[ncols+2:]]
 44 |         data_dtype = np.dtype({
 45 |             'names': column_name,
 46 |             'formats': ['f8'] * len(column_name)})
 47 |         self.vr = np.array(data_list, dtype=data_dtype)
 48 | 
 49 |     def pdf(self, bins=15):
 50 |         hist, bin_edges = np.histogram(self.vr[self.property_name[0]],
 51 |                                        bins=bins)
 52 |         fig, ax = plt.subplots()
 53 |         ax.set_title("pdf")
 54 |         plt.bar(bin_edges[:-1], hist, width=bin_edges[1]-bin_edges[0],
 55 |                 color='red', alpha=0.5)
 56 |         fig.show()
 57 | 
 58 |     def cdf(self):
 59 |         data = self.vr[self.property_name[0]]
 60 |         data = np.sort(data)
 61 |         cdf = np.arange(1, len(data) + 1) / len(data)
 62 |         fig, ax = plt.subplots()
 63 |         ax.set_title("cdf")
 64 |         ax.plot(data, cdf)
 65 |         fig.show()
 66 | 
 67 |     @property
 68 |     def maximum(self):
 69 |         return np.max(self.vr[self.property_name[0]])
 70 | 
 71 |     @property
 72 |     def minimum(self):
 73 |         return np.min(self.vr[self.property_name[0]])
 74 | 
 75 |     @property
 76 |     def mean(self):
 77 |         return np.mean(self.vr[self.property_name[0]])
 78 | 
 79 |     @property
 80 |     def variance(self):
 81 |         return np.var(self.vr[self.property_name[0]])
 82 | 
 83 |     @property
 84 |     def meadian(self):
 85 |         return np.median(self.vr[self.property_name[0]])
 86 | 
 87 |     @property
 88 |     def upper_quartile(self):
 89 |         index = None
 90 |         even = False
 91 |         length = self.vr.shape[0]
 92 |         if length / 2 == 0:
 93 |             even = True
 94 |         if even:
 95 |             index = int((3*length + 2)/4) + 1
 96 |         else:
 97 |             index = int((3*length + 3)/4) + 1
 98 |         return self.vr[self.property_name[0]][index]
 99 | 
100 |     @property
101 |     def lower_quartile(self):
102 |         index = None
103 |         even = False
104 |         length = self.vr.shape[0]
105 |         if length / 2 == 0:
106 |             even = True
107 |         if even:
108 |             index = int((length + 2)/4)
109 |         else:
110 |             index = int((length + 1)/4)
111 |         return self.vr[self.property_name[0]][index]
112 |     @property
113 |     def num(self):
114 |         return self.vr.shape[0]
115 | 
116 |     def statistics(self):
117 |         print("\nStatistics")
118 |         print("-"*10)
119 |         print("Number of Data: {}".format(self.num))
120 |         print("Mean: {}".format(self.mean))
121 |         print("Variance: {}".format(self.variance))
122 |         print("-"*10)
123 |         print("Minimum: {}".format(self.minimum))
124 |         print("Lower Quartile: {}".format(self.lower_quartile))
125 |         print("Median: {}".format(self.meadian))
126 |         print("Upper Quartile: {}".format(self.upper_quartile))
127 |         print("Maximum: {}".format(self.maximum))
128 |         print("-"*10)
129 | 
130 |     def distance(self):
131 |         num = self.vr.shape[0]
132 |         dist = pdist(np.concatenate((self.vr['x'].reshape((num, 1)),
133 |                                      self.vr['y'].reshape((num, 1))), axis=1))
134 |         print("\nDistance\n"+"-"*8)
135 |         print("Max distance: {}\nMin distance: {}".format(np.max(dist),
136 |                                                           np.min(dist)))
137 |         print("-"*8)
138 |         print("X: {} - {}".format(np.min(self.vr['x']), np.max(self.vr['x'])))
139 |         print("Y: {} - {}".format(np.min(self.vr['y']), np.max(self.vr['y'])))
140 |         if self._2d is False:
141 |             print("Z: {} - {}".format(np.min(self.vr['z']),
142 |                                       np.max(self.vr['z'])))
143 | 
144 |     def view2d(self, pname=None):
145 |         pname = self.property_name[0] if pname is None else pname
146 |         if self._2d is False:
147 |             print("3D data, use view3d() instead.")
148 |         else:
149 |             fig, ax = plt.subplots()
150 |             abscissa = self.vr['x']
151 |             ordinate = self.vr['y']
152 |             sc = ax.scatter(abscissa, ordinate, c=self.vr[pname],
153 |                             cmap='jet')
154 |             ax.set(xlim=(np.min(abscissa), np.max(abscissa)),
155 |                    ylim=(np.min(ordinate), np.max(ordinate)),
156 |                    xlabel="X (m)", ylabel="Y (m)",
157 |                    title="Data Scatter", aspect='equal',
158 |                    facecolor='grey')
159 |             fig.colorbar(sc)
160 |             fig.show()
161 | 
162 | if __name__ == "__main__":
163 |     eda = EDA("testData/test.gslib")
164 | #    eda.preview()
165 |     eda.read()
166 |     eda.pdf()
167 |     eda.cdf()
168 |     eda.statistics()
169 |     eda.distance()
170 |     eda.view2d()
171 | 


--------------------------------------------------------------------------------
/pygeostatistics/gam.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Compute Variogram on regularly spaced data
  4 | 
  5 | Created on Tue Nov 03 2016
  6 | """
  7 | __author__ = "yuhao"
  8 | 
  9 | import yaml
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | from pygeostatistics.yaml_patch import loader_patched
 14 | 
 15 | 
 16 | class Gam():
 17 |     def __init__(self, param_file):
 18 |         self.param_file = param_file
 19 |         self._read_params()
 20 |         self._check_params()
 21 |         self.vr = None
 22 |         self.gam = None
 23 |         self.npair = None
 24 |         self.directions = None
 25 |         self.mean = None
 26 |         self.variance = None
 27 | 
 28 |     def _read_params(self):
 29 |         with open(self.param_file, "r") as fin:
 30 |             params = yaml.load(fin, Loader=loader_patched())
 31 |             self.datafl = params['datafl']
 32 |             self.nvar = params['nvar']
 33 |             #   'ivar'
 34 |             self.tmin = params['tmin']
 35 |             self.tmax = params['tmax']
 36 |             #   'outfl'
 37 |             #   'igrid'
 38 |             self.nx = params['nx']
 39 |             self.xmn = params['xmn']
 40 |             self.xsiz = params['xsiz']
 41 |             self.ny = params['ny']
 42 |             self.ymn = params['ymn']
 43 |             self.ysiz = params['ysiz']
 44 |             self.nz = params['nz']
 45 |             self.zmn = params['zmn']
 46 |             self.zsiz = params['zsiz']
 47 |             self.ndir = params['ndir']
 48 |             self.nlag = params['nlag']
 49 |             self.ixd = params['ixd']
 50 |             self.iyd = params['iyd']
 51 |             self.izd = params['izd']
 52 |             self.standardize = params['standardize']
 53 |             self.nvarg = params['nvarg']
 54 |             self.ivtail = params['ivtail']
 55 |             self.ivhead = params['ivhead']
 56 |             self.ivtype = params['ivtype']
 57 | 
 58 |     def read_data(self):
 59 |         data_list = list()
 60 |         name = None
 61 |         ncols = None
 62 |         column_name = None
 63 |         data_dtype = None
 64 |         with open(self.datafl, 'r') as fin:
 65 |             name = fin.readline()
 66 |             ncols = int(fin.readline())
 67 |             column_name = list()
 68 |             for i in range(ncols):
 69 |                 column_name.append(fin.readline().rstrip('\n'))
 70 |             data_dtype = np.dtype({
 71 |                 'names': column_name,
 72 |                 'formats': ['f8'] * ncols})
 73 |             for line in fin:
 74 |                 data = line.split()
 75 |                 for i in range(ncols):
 76 |                     data[i] = float(data[i])
 77 |                 data = tuple(data)
 78 |                 data_list.append(data)
 79 |         input_data = np.array(data_list, dtype=data_dtype)
 80 |         self.vr = input_data[column_name[-1]]
 81 |         self.vr = self.vr.reshape((self.nx, self.ny, self.nz))
 82 | 
 83 |     def _check_params(self):
 84 |         try:
 85 |             # self.datafl
 86 |             if not isinstance(type(self.nvar), int) != 'int' or self.nvar < 1:
 87 |                 raise ValueError("wrong value with number of variables")
 88 |             #   'ivar'
 89 |             # self.tmin = params['tmin']
 90 |             # self.tmax = params['tmax']
 91 |             # #   'outfl'
 92 |             # #   'igrid'
 93 |             # self.nx = params['nx']
 94 |             # self.xmn = params['xmn']
 95 |             # self.xsiz = ['xsiz']
 96 |             # self.ny = params['ny']
 97 |             # self.ymn = params['ymn']
 98 |             # self.ysiz = ['ysiz']
 99 |             # self.nz = params['nz']
100 |             # self.zmn = params['zmn']
101 |             # self.zsiz = ['zsiz']
102 |             # self.ndir = ['ndir']
103 |             # self.nlag = ['nlag']
104 |             # self.ixd = params['ixd']
105 |             # self.iyd = params['iyd']
106 |             # self.izd = params['izd']
107 |             # #   'standardize':True,
108 |             # self.nvarg = ['nvarg']
109 |             # self.ivtail = ['ivtail']
110 |             # self.ivhead = ['ivhead']
111 |             # self.ivtype = ['ivtype']
112 |         except Exception as inst:
113 |             print(inst)
114 | 
115 |     def _preprocess(self):
116 |         # put three input directional vector into a direction list
117 |         self.directions = list()
118 |         for ix, iy, iz in zip(self.ixd, self.iyd, self.izd):
119 |             self.directions.append((ix, iy, iz))
120 |         self.mean = np.mean(self.vr)
121 |         self.variance = np.var(self.vr)
122 | 
123 |     def gamma(self):
124 |         """
125 |         This subroutine computes any of eight different measures of spatial
126 |         continuity for regular spaced 3-D data.  Missing values are allowed
127 |         and the grid need not be cubic.
128 |         """
129 |         # initialize the summation arrays for each direction, variogram and lag
130 |         self._preprocess()
131 | 
132 |         head_data = list()
133 |         tail_data = list()
134 |         for i in range(self.ndir):
135 |             head_data.append(list())
136 |             tail_data.append(list())
137 |             for j in range(self.nlag):
138 |                 head_data[i].append(list())
139 |                 tail_data[i].append(list())
140 |         # loop over all points on the grid
141 |         coordination = np.meshgrid(
142 |             np.arange(self.nx), np.arange(self.ny), np.arange(self.nz))
143 |         # loop over all points on the grid
144 |         for ix, iy, iz in zip(
145 |                 coordination[0].flatten(),
146 |                 coordination[1].flatten(),
147 |                 coordination[2].flatten()):
148 |             # loop over each direction
149 |             for idir, (ixd, iyd, izd) in enumerate(self.directions):
150 |                 ix1, iy1, iz1 = ix + ixd, iy + iyd, iz + izd
151 |                 ilag = 0
152 |                 # add indexes of every eligible pair to
153 |                 # index_pairs[nlag][npairs] list
154 |                 while ix1 < self.nx and iy1 < self.ny and iz1 < self.nz:
155 |                     if ilag < self.nlag:
156 |                         head_value = self.vr[(ix, iy, iz)]
157 |                         tail_value = self.vr[(ix1, iy1, iz1)]
158 |                         # if head_value > s
159 |                         if not np.isnan(head_value) and \
160 |                                 not np.isnan(tail_value):
161 |                             head_data[idir][ilag].append(head_value)
162 |                             tail_data[idir][ilag].append(tail_value)
163 |                     else:
164 |                         break
165 |                     ilag = ilag + 1
166 |                     ix1 = ix1 + ixd
167 |                     iy1 = iy1 + iyd
168 |                     iz1 = iz1 + izd
169 |         # after figure out all the index pairs, use the index to get data
170 | 
171 |         self.gam = np.zeros((self.ndir, self.nlag))
172 |         self.npair = np.zeros((self.ndir, self.nlag), dtype='>i4')
173 |         # hm = np.zeros((self.ndir, self.nlag))
174 |         # tm = np.zeros((self.ndir, self.nlag))
175 |         # hv = np.zeros((self.ndir, self.nlag))
176 |         # tv = np.zeros((self.ndir, self.nlag))
177 | 
178 |         for i, (head_lags, tail_lags) in enumerate(zip(head_data, tail_data)):
179 |             for j, (head_lag, tail_lag) in enumerate(zip(head_lags, tail_lags)):
180 |                 self.npair[i][j] = len(head_lag)
181 |                 for hd, td in zip(head_lag, tail_lag):
182 |                     self.gam[i][j] = self.gam[i][j] + (hd - td)**2
183 |         self.gam /= self.npair
184 |         self.gam *= 0.5  # * self.gam
185 |         if self.standardize is True:
186 |             self.gam /= self.variance
187 | 
188 |     def graph(self):
189 |         fig, axes = plt.subplots(nrows=self.ndir, ncols=1)
190 |         for ax, data, (idx, idy, idz) in zip(axes, self.gam, self.directions):
191 |             real_lag_value = np.sqrt((idx * self.xsiz)**2 +
192 |                                      (idy * self.ysiz)**2 +
193 |                                      (idz * self.zsiz)**2)
194 |             temp = np.ones(self.nlag) * real_lag_value
195 |             abscissa = temp * np.arange(1, self.nlag + 1)
196 |             ax.plot(abscissa, data, linestyle='--', marker='s',
197 |                     fillstyle='none', color='black', markeredgecolor='blue')
198 | #            ax.step(abscissa, data, label='pre (default)')
199 | #            ax.scatter(abscissa, data, marker='D')
200 |             ax.set_xlim(left=0, right=real_lag_value*(self.nlag+1))
201 |             ax.set_ylim(bottom=0)
202 |             ax.set_title("directional vector [{}, {}, {}]".format(
203 |                                                               idx, idy, idz))
204 |             ax.set_ylabel("Variogram $\gamma(h)$")
205 |             ax.set_xlabel("Distance")
206 |         fig.tight_layout()
207 |         plt.draw()
208 | 
209 | if __name__ == "__main__":
210 |     data_analysis = Gam("testData/xihuSmall_sparse_gam.par")
211 |     data_analysis.read_data()
212 |     data_analysis.gamma()
213 |     data_analysis.graph()
214 | 


--------------------------------------------------------------------------------
/pygeostatistics/gamv.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Compute Variogram on irregularly spaced data
  4 | 
  5 | Created on Sun Nov 06 2016
  6 | """
  7 | __author__ = "yuhao"
  8 | 
  9 | import yaml
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | from pygeostatistics.yaml_patch import loader_patched
 14 | 
 15 | 
 16 | class Gamv():
 17 |     def __init__(self, param_file):
 18 |         self.param_file = param_file
 19 |         self._read_params()
 20 |         self._check_params()
 21 |         self.vr = None
 22 |         self.gam = None
 23 |         self.npair = None
 24 |         self.distance = None
 25 |         self.mean = None
 26 |         self.variance = None
 27 |         self.hm = None
 28 |         self.tm = None
 29 |         self.property_name = None
 30 |         self.lag_interval = None
 31 | 
 32 |     def _read_params(self):
 33 |         with open(self.param_file, "r") as fin:
 34 |             params = yaml.load(fin, Loader=loader_patched())
 35 |             self.datafl = params['datafl']
 36 |             self.icolx = params['icolx']  #: 1,
 37 |             self.icoly = params['icoly']  #: 2,
 38 |             self.icolz = params['icolz']  #: 0,
 39 |             self.nvar = params['nvar']  # : 2,
 40 |             self.ivar = params['ivar']  # : [3, 4],
 41 |             self.tmin = params['tmin']  # : -1.0e21,
 42 |             self.tmax = params['tmax']  # : 1.0e21,
 43 |             self.outfl = params['outfl']  # : 'out.dat',
 44 |             self.nlag = params['nlag']  # : 10,
 45 |             self.xlag = params['xlag']  # : 5.0,
 46 |             self.xltol = params['xltol']  # : 3.0,
 47 |             self.ndir = params['ndir']  # : 3,
 48 |             self.azm = params['azm']  # : [0.0, 0.0, 90.],
 49 |             self.atol = params['atol']  # : [90.0, 22.5, 22.5],
 50 |             self.bandwh = params['bandwh']  # : [50.0, 25.0, 25.0],
 51 |             self.dip = params['dip']  # : [0.0, 0.0, 0.0],
 52 |             self.dtol = params['dtol']  # : [90.0, 22.5, 22.5],
 53 |             self.bandwd = params['bandwd']  # : [50.0, 25.0, 25.0],
 54 |             self.standardize = params['standardize']  # : False,
 55 |             self.nvarg = params['nvarg']  # : 3,
 56 |             self.ivtail = params['ivtail']  # : [1, 1, 2],
 57 |             self.ivhead = params['ivhead']  # : [1, 1, 2],
 58 |             self.ivtype = params['ivtype']  # : [1, 3, 1]
 59 | 
 60 |     def _check_params(self):
 61 |         # # check lag tolerance
 62 |         # if self.xltol <= 0:
 63 |         #     self.xltol = 0.5 * self.xlag
 64 |         # # check azimuth tolerance
 65 |         # for i, item in enumerate(self.atol):
 66 |         #     if item <= 0:
 67 |         #         self.atol[i] = 45.0
 68 |         # # check dip tolerance
 69 |         # for i, item in enumerate(self.dtol):
 70 |         #     if item <= 0:
 71 |         #         self.dtol[i] = 45.0
 72 |         if self.ndir != len(self.azm):
 73 |             raise ValueError('number of directions does not match provided \
 74 |                               azimuth.')
 75 | 
 76 |     def read_data(self):
 77 |         data_list = None
 78 |         with open(self.datafl, 'r') as fin:
 79 |             data_list = fin.readlines()
 80 |         name = data_list[0].strip()
 81 |         ncols = int(data_list[1].strip())
 82 |         column_name = [item.strip() for item in data_list[2: ncols+2]]
 83 |         self.property_name = [item for item in column_name
 84 |                          if item not in ['x', 'y', 'z']]
 85 |         if 'z' not in column_name:
 86 |             column_name.append('z')
 87 |             data_list = [tuple(item.strip().split() + ['0'])
 88 |                          for item in data_list[ncols+2:]]
 89 |         else:
 90 |             data_list = [tuple(item.strip().split())
 91 |                          for item in data_list[ncols+2:]]
 92 |         data_dtype = np.dtype({
 93 |             'names': column_name,
 94 |             'formats': ['f8'] * len(column_name)})
 95 |         self.vr = np.array(data_list, dtype=data_dtype)
 96 | 
 97 |     def _preprocess(self):
 98 |         self.mean = np.mean(self.vr[self.property_name[0]])
 99 |         self.variance = np.var(self.vr[self.property_name[0]])
100 |         # check lag tolerance
101 |         if self.xltol <= 0:
102 |             self.xltol = 0.5 * self.xlag
103 |         # check azimuth tolerance
104 |         # for i, item in enumerate(self.atol):
105 |         #     if item <= 0:
106 |         #         self.atol[i] = 45.0
107 |         self.atol = [45.0 if item <= 0 else item for item in self.atol]
108 |         # check dip tolerance
109 |         # for i, item in enumerate(self.dtol):
110 |         #     if item <= 0:
111 |         #         self.dtol[i] = 45.0
112 |         self.dtol = [45.0 if item <= 0 else item for item in self.dtol]
113 | 
114 |     def gamv(self):
115 |         self._check_params()
116 |         self._preprocess()
117 |         self.gam = np.zeros((self.ndir, self.nlag+2))
118 |         self.npair = np.zeros((self.ndir, self.nlag+2), dtype='>i4')
119 |         self.distance = np.zeros((self.ndir, self.nlag+2))
120 |         self.tm = np.zeros((self.ndir, self.nlag+2))
121 |         self.hm = np.zeros((self.ndir, self.nlag+2))
122 | 
123 | 
124 | #        tail_data = list()
125 | #        head_data = list()
126 | #        for i in range(self.ndir):
127 | #            tail_data.append(list())
128 | #            head_data.append(list())
129 | #            for j in range(self.nlag+2):
130 | #                tail_data[i].append(list())
131 | #                head_data[i].append(list())
132 |         # calculate the value interval of each lag
133 |         # method 1
134 | #        temp = np.arange(0, self.nlag+2, 1.)
135 | #        temp -= 1
136 | #        temp[0] = 0
137 | #        temp *= self.xlag
138 | #        length, = temp.shape
139 | #        temp.reshape((length, 1))
140 | #        upper = temp + self.xltol
141 | #        upper[0][0] = 0
142 | #        lower = temp - self.xltol
143 | #        lower[0][0] = 0
144 | #        self.lag_interval = np.concatenate((lower, upper), axis=1)
145 |         # method 2
146 |         self.lag_interval = np.ones((self.nlag+2, 2))
147 |         self.lag_interval[:2, :] = 0
148 |         self.lag_interval *= self.xlag
149 |         scale = np.arange(0, self.nlag+2, 1) - 1
150 |         self.lag_interval[:, 0] *= scale
151 |         self.lag_interval[:, 1] *= scale
152 |         self.lag_interval[:, 0] -= self.xltol
153 |         self.lag_interval[0, 0] = 0.0
154 |         self.lag_interval[:, 1] += self.xltol
155 |         self.lag_interval[0, 1] = 0.0
156 |         # The mathematical azimuth is measured counterclockwise from EW and
157 |         # not clockwise from NS as the conventional azimuth is:
158 |         # name it azmuth instead of azimuth
159 |         azmuth = np.deg2rad(90.0 - np.array(self.azm))
160 |         uvxazm = np.cos(azmuth)
161 |         uvyazm = np.sin(azmuth)
162 |         csatol = np.cos(np.deg2rad(self.atol))
163 | #        for i, az in enumerate(self.atol):
164 | #            if az == 90:
165 | #                csatol[i] = 0
166 |         # The declination is measured positive from vertical (up) rather than
167 |         # negative down from horizontal:
168 |         declin = np.deg2rad(90.0 - np.array(self.dip))
169 |         uvzdec = np.cos(declin)
170 |         uvhdec = np.sin(declin)
171 |         csdtol = np.cos(np.deg2rad(self.dtol))
172 | #        for i, di in enumerate(self.dtol):
173 | #            if di == 90:
174 | #                csdtol[i] = 0
175 |         # square of maxmium distance
176 |         dismxs = ((self.nlag + 0.5 - np.finfo('float').eps) * self.xlag)**2
177 |         num_of_data = int(self.vr.shape[0])
178 |         ijlist = list()
179 |         for i in range(num_of_data - 1):
180 |             for j in range(i+1, num_of_data):
181 |                 ijlist.append((i, j))
182 |         for ijtuple in ijlist:
183 |             i, j = ijtuple
184 |             # i_coor = list(self.vr[i])[:-1]
185 |             # j_coor = list(self.vr[j])[:-1]
186 |             # calculate the lag corresponding to the current pair
187 |             # dx = j_coor[0] - i_coor[0]
188 |             # dy = j_coor[1] - i_coor[1]
189 |             # dz = j_coor[2] - i_coor[2]
190 |             dx = self.vr['x'][j] - self.vr['x'][i]
191 |             dy = self.vr['y'][j] - self.vr['y'][i]
192 |             dz = self.vr['z'][j] - self.vr['z'][i]
193 |             # square of lag h
194 |             hs = dx**2 + dy**2 + dz**2
195 |             if hs > dismxs:
196 |                 # print("skip pair {},{} for maxdistance".format(i, j))
197 |                 continue  # skip to next pair
198 |             h = np.sqrt(hs)
199 |             # determine which lag
200 |             lag_num = list()  # could be in two lags
201 |             for k, lg_in in enumerate(self.lag_interval):
202 |                 if h >= lg_in[0] and h <= lg_in[1]:
203 |                     lag_num.append(k)
204 |             if len(lag_num) == 0:
205 |                 # print("skip pair {},{} for no lag".format(i, j))
206 |                 continue  # skip if cannot find which lag
207 | 
208 |             for idir in range(self.ndir):
209 |                 omni = self.atol[idir] >= 90.0
210 |                 # check for an acceptable azimuth angle:
211 |                 dxy = np.sqrt(max(dx**2 + dy**2, 0.0))
212 |                 if dxy < np.finfo('float').eps:
213 |                     dcazm = 1.0
214 |                 else:
215 |                     dcazm = (dx*uvxazm[idir] + dy*uvyazm[idir])/dxy
216 |                 if np.abs(dcazm) < csatol[idir]:
217 |                     # print("skip pair {},{} for az".format(i, j))
218 |                     continue
219 |                 # check for the horizontal bandwidth criteria (maximium
220 |                 # deviation perpendicular to the specified direction azimuth):
221 |                 band = uvxazm[idir]*dy - uvyazm[idir]*dx
222 |                 if np.abs(band) > self.bandwh[idir] and not omni:
223 |                     # print("skip pair {},{} for az bwd".format(i, j))
224 |                     continue
225 |                 # check for an acceptable dip angle:
226 |                 if dcazm < 0:
227 |                     dxy = -dxy
228 |                 if lag_num[0] == 0:
229 |                     dcdec = 0
230 |                 else:
231 |                     dcdec = (dxy*uvhdec[idir] + dz*uvzdec[idir])/h
232 |                     if np.abs(dcdec) < csdtol[idir]:
233 |                         # print("skip pair {},{} for dip".format(i, j))
234 |                         continue
235 |                 # check the vertical bandwidth criteria (maximium deviation
236 |                 # perpendicular to the specified dip direction):
237 |                 band = uvhdec[idir]*dz - uvzdec[idir]*dxy
238 |                 if np.abs(band) > self.bandwd[idir]  and not omni:
239 |                     # print("skip pair {},{} for dip bwd".format(i, j))
240 |                     continue
241 |                 # check whether or not an omi-directional variogram is being
242 |                 # computed:
243 |                 # omni = False
244 |                 # if self.atol[idir] >= 90.0:
245 |                 #     omni = True
246 |                 omni = self.atol[idir] >= 90.0
247 |                 # then this pair is acceptable, proceed to compute variogram
248 |                 # sort out which is tail and head:
249 |                 if dcazm >= 0 and dcdec <= 0:
250 |                     # vrh = list(self.vr[i])[-1]
251 |                     # vrt = list(self.vr[j])[-1]
252 |                     vrh = self.vr[self.property_name[0]][i]
253 |                     vrt = self.vr[self.property_name[0]][j]
254 |                     if omni:
255 |                         # vrtpr = list(self.vr[i])[-1]
256 |                         # vrhpr = list(self.vr[j])[-1]
257 |                         vrtpr = self.vr[self.property_name[0]][i]
258 |                         vrhpr = self.vr[self.property_name[0]][j]
259 |                 else:
260 |                     # vrh = list(self.vr[j])[-1]
261 |                     # vrt = list(self.vr[i])[-1]
262 |                     vrh = self.vr[self.property_name[0]][j]
263 |                     vrt = self.vr[self.property_name[0]][i]
264 |                     if omni:
265 |                         # vrtpr = list(self.vr[j])[-1]
266 |                         # vrhpr = list(self.vr[i])[-1]
267 |                         vrtpr = self.vr[self.property_name[0]][j]
268 |                         vrhpr = self.vr[self.property_name[0]][i]
269 |                 # reject this pair on the basis of missing values:
270 |                 if vrt < self.tmin or vrh < self.tmin or\
271 |                         vrt > self.tmax or vrh > self.tmax:
272 |                     continue
273 |                 # COMPUTE THE APPRORIATE "VARIOGRAM" MEASURE
274 |                 #         ***Semivariogram***
275 |                 for ilag in lag_num:
276 |                     self.npair[idir][ilag] += 1
277 |                     self.distance[idir][ilag] += h
278 |                     self.tm[idir][ilag] += vrt
279 |                     self.hm[idir][ilag] += vrh
280 |                     self.gam[idir][ilag] += (vrh - vrt)**2
281 |                     if omni:
282 |                         if vrtpr >= self.tmin or vrhpr >= self.tmin or\
283 |                                 vrtpr < self.tmax or vrhpr < self.tmax:
284 |                             self.npair[idir][ilag] += 1
285 |                             self.distance[idir][ilag] += h
286 |                             self.tm[idir][ilag] += vrtpr
287 |                             self.hm[idir][ilag] += vrhpr
288 |                             self.gam[idir][ilag] += (vrhpr - vrtpr)**2
289 |         self.gam /= self.npair
290 |         if self.standardize is True:
291 |             self.gam /= self.variance
292 |         self.gam /= 2
293 | 
294 |         self.distance /= self.npair
295 |         self.tm /= self.npair
296 |         self.hm /= self.npair
297 | 
298 |     def graph(self):
299 |         abscissa = np.arange(0, self.nlag+1, 1.0)
300 |         abscissa *= self.xlag
301 |         fig, axes = plt.subplots(nrows=self.ndir, ncols=1)
302 |         if isinstance(axes, list):
303 |             for i, ax in enumerate(axes):
304 |                 ordinate = np.insert(self.gam[i][2:], 0, None)
305 |                 ax.scatter(abscissa, ordinate)
306 |                 ax.set_title(r"Azimuth: {}$^\circ$({}$^\circ$), ".format(
307 |                     self.azm[i], self.atol[i]) +
308 |                              r"Dip: {}$^\circ$({}$^\circ$)".format(
309 |                                  self.dip[i], self.dtol[i]))
310 |                 ax.set_ylim(bottom=0)
311 |                 ax.set_xlim(left=0)
312 |                 ax.grid()
313 |         else:
314 |             ordinate = np.insert(self.gam[0][2:], 0, None)
315 |             axes.scatter(abscissa, ordinate)
316 |             axes.set_title(r"Azimuth: {}$^\circ$({}$^\circ$), ".format(
317 |                 self.azm[0], self.atol[0]) +
318 |                            r"Dip: {}$^\circ$({}$^\circ$)".format(
319 |                                self.dip[0], self.dtol[0]))
320 |             axes.set_ylim(bottom=0)
321 |             axes.set_xlim(left=0)
322 |             axes.grid()
323 |         fig.tight_layout()
324 |         # plt.draw()
325 |         return fig, axes
326 | 
327 | 
328 | if __name__ == "__main__":
329 |     data_analysis = Gamv("testData/xihuSmall_sparse_gamv.par")
330 |     data_analysis.read_data()
331 |     data_analysis.gamv()
332 |     data_analysis.graph()
333 | 


--------------------------------------------------------------------------------
/pygeostatistics/gslib_reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Read gslib file format
  4 | 
  5 | Created on Wen Sep 5th 2018
  6 | """
  7 | from __future__ import absolute_import, division, print_function
  8 | 
  9 | __author__ = "yuhao"
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | from scipy.spatial.distance import pdist
 14 | from mpl_toolkits.mplot3d import Axes3D
 15 | 
 16 | 
 17 | class SpatialData(object):
 18 |     def __init__(self, file_path):
 19 |         self.datafl = file_path
 20 |         self.vr = None
 21 |         self.property_name = None
 22 |         self._2d = False
 23 |         self._read_data()
 24 | 
 25 |     def _read_data(self):
 26 |         """
 27 |         read gslib file
 28 |         """
 29 |         column_name = []
 30 |         with open(self.datafl, 'r') as fin:
 31 |             _ = fin.readline().strip()
 32 |             ncols = int(fin.readline().strip())
 33 |             for _ in range(ncols):
 34 |                 column_name.append(fin.readline().strip())
 35 |         self.property_name = [item for item in column_name
 36 |                               if item not in ['x', 'y', 'z']]
 37 |         df = pd.read_csv(self.datafl, sep='\t', header=None, names=column_name,
 38 |                          skiprows=ncols+2)
 39 |         if 'z' not in column_name:
 40 |             self._2d = True
 41 |             column_name.append('z')
 42 |             df['z'] = 0
 43 |         self.df = df
 44 | 
 45 |         data_dtype = np.dtype({
 46 |             'names': column_name,
 47 |             'formats': ['f8'] * len(column_name)})
 48 | 
 49 |         self.vr = np.core.records.fromarrays(
 50 |             df.values.transpose(), dtype=data_dtype)
 51 | 
 52 |     def preview(self):
 53 |         return self.vr.head(20)
 54 | 
 55 |     def pdf(self, ax, bins=15):
 56 |         hist, bin_edges = np.histogram(self.vr[self.property_name[0]],
 57 |                                        bins=bins)
 58 |         ax.set_title("pdf")
 59 |         ax.bar(bin_edges[:-1], hist, width=bin_edges[1]-bin_edges[0],
 60 |                color='red', alpha=0.5)
 61 | 
 62 |     def cdf(self, ax):
 63 |         data = self.vr[self.property_name[0]]
 64 |         data = np.sort(data)
 65 |         cdf = np.arange(1, len(data) + 1) / len(data)
 66 |         ax.set_title("cdf")
 67 |         ax.plot(data, cdf)
 68 | 
 69 |     @property
 70 |     def maximum(self):
 71 |         return self.df[self.property_name[0]].max()
 72 | 
 73 |     @property
 74 |     def minimum(self):
 75 |         return self.df[self.property_name[0]].min()
 76 | 
 77 |     @property
 78 |     def mean(self):
 79 |         return self.df[self.property_name[0]].mean()
 80 | 
 81 |     @property
 82 |     def variance(self):
 83 |         return self.df[self.property_name[0]].var()
 84 | 
 85 |     @property
 86 |     def meadian(self):
 87 |         return np.median(self.vr[self.property_name[0]])
 88 | 
 89 |     @property
 90 |     def upper_quartile(self):
 91 |         return self.df[self.property_name[0]].quantile(0.75)
 92 | 
 93 |     @property
 94 |     def lower_quartile(self):
 95 |         return self.df[self.property_name[0]].quantile(0.25)
 96 | 
 97 |     @property
 98 |     def num(self):
 99 |         return self.vr.shape[0]
100 | 
101 |     def distance(self):
102 |         num = self.vr.shape[0]
103 |         return pdist(np.concatenate((self.vr['x'].reshape((num, 1)),
104 |                                      self.vr['y'].reshape((num, 1))), axis=1))
105 | 
106 |     @property
107 |     def summary(self):
108 |         return (
109 |             "Summary\n"
110 |             "-------\n"
111 |             "Number of Points: {}\n"
112 |             "Mean: {}\n"
113 |             "Variance: {}\n"
114 |             "Minimum: {}\n"
115 |             "Lower Quartile: {}\n"
116 |             "Median: {}\n"
117 |             "Upper Quartile: {}\n"
118 |             "Maximum: {}\n").format(
119 |                 self.num,
120 |                 self.mean,
121 |                 self.variance,
122 |                 self.minimum,
123 |                 self.lower_quartile,
124 |                 self.meadian,
125 |                 self.upper_quartile,
126 |                 self.maximum)
127 | 
128 |     def scatter(self, ax, prop=None):
129 |         """
130 |         Plot scatter of data points on given axis
131 | 
132 |         Parameters
133 |         ----------
134 |         ax : AxesSubplot or Axes3DSubplot
135 |             axis on which the scatter plot is drawn
136 |         prop : str
137 |             property to display with colormap
138 |         """
139 |         sc = None
140 |         prop = self.property_name[0] if prop is None else prop
141 | 
142 |         if not self._2d and isinstance(ax, Axes3D):
143 |             sc = ax.scatter(
144 |                 self.vr['x'], self.vr['y'], self.vr['z'],
145 |                 c=prop)
146 |         else:
147 |             sc = ax.scatter(
148 |                 self.vr['x'], self.vr['y'], c=prop)
149 |         return sc
150 | 


--------------------------------------------------------------------------------
/pygeostatistics/krige2d.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | A straightforward 2D kriging program
  4 | 
  5 | Created on Fri Nov 11 2016
  6 | """
  7 | __author__ = "yuhao"
  8 | 
  9 | import yaml
 10 | import numpy as np
 11 | from scipy import linalg
 12 | import matplotlib.pyplot as plt
 13 | from itertools import product
 14 | import time
 15 | 
 16 | from pygeostatistics.yaml_patch import loader_patched
 17 | 
 18 | 
 19 | class Krige2d():
 20 |     def __init__(self, param_file):
 21 |         self.param_file = param_file
 22 |         self._read_params()
 23 |         self._check_params()
 24 |         self.property_name = None
 25 |         self.vr = None
 26 |         self.maxcov = None
 27 |         self.rotmat = None
 28 |         self.estimation = None
 29 |         self.estimation_variance = None
 30 | 
 31 |         self.xdb = None
 32 |         self.ydb = None
 33 | 
 34 |         self._block_covariance = None
 35 |         self._unbias = None
 36 | 
 37 |         self._2d = False
 38 | 
 39 |     def _read_params(self):
 40 |         with open(self.param_file) as fin:
 41 |             params = yaml.load(fin, Loader=loader_patched())
 42 |             self.datafl = params['datafl']  #: 'testData/test.gslib',
 43 |             self.icolx = params['icolx']  #: 1,
 44 |             self.icoly = params['icoly']  #: 2,
 45 |             self.icolvr = params['icolvr']  #: 0,
 46 |             self.tmin = params['tmin']  #: -1.0e21,
 47 |             self.tmax = params['tmax']  #: 1.0e21,
 48 |             self.idbg = params['idbg']  #: 3,
 49 |             self.dbgfl = params['dbgfl']  #: 'kb2d.dbg',
 50 |             self.outfl = params['outfl']  #: 'out.dat',
 51 |             self.nx = params['nx']  #: 50,
 52 |             self.xmn = params['xmn']  #: 0.5,
 53 |             self.xsiz = params['xsiz']  #: 1.0,
 54 |             self.ny = params['ny']  #: 50,
 55 |             self.ymn = params['ymn']  #: 0.5,
 56 |             self.ysiz = params['ysiz']  #: 1.0,
 57 |             self.nxdis = params['nxdis']  #: 1,
 58 |             self.nydis = params['nydis']  #: 1,
 59 |             self.ndmin = params['ndmin']  #: ,
 60 |             self.ndmax = params['ndmax']  #: ,
 61 |             self.radius = params['radius']  #: ,
 62 |             self.ktype = params['isk']  #: ,
 63 |             self.skmean = params['skmean']  #: ,
 64 |             self.nst = params['nst']  #: 1,
 65 |             self.c0 = params['c0']  #: 0,
 66 |             self.it = params['it']  #: [],
 67 |             self.cc = params['cc']  #: [],
 68 |             self.azm = params['azm']  #: [],
 69 |             self.a_max = params['a_max']  #:[],
 70 |             self.a_min = params['a_min']  #: []
 71 | 
 72 |     def read_data(self):
 73 |         data_list = None
 74 |         with open(self.datafl, 'r') as fin:
 75 |             data_list = fin.readlines()
 76 |         name = data_list[0].strip()
 77 |         ncols = int(data_list[1].strip())
 78 |         column_name = [item.strip() for item in data_list[2: ncols+2]]
 79 |         self.property_name = [item for item in column_name
 80 |                               if item not in ['x', 'y', 'z']]
 81 |         if 'z' not in column_name:
 82 |             self._2d = True
 83 |             column_name.append('z')
 84 |             data_list = [tuple(item.strip().split() + ['0'])
 85 |                          for item in data_list[ncols+2:]]
 86 |         else:
 87 |             data_list = [tuple(item.strip().split())
 88 |                          for item in data_list[ncols+2:]]
 89 |         data_dtype = np.dtype({
 90 |             'names': column_name,
 91 |             'formats': ['f8'] * len(column_name)})
 92 |         self.vr = np.array(data_list, dtype=data_dtype)
 93 | 
 94 |     def _check_params(self):
 95 |         for vtype, a_range in zip(self.it, self.a_max):
 96 |             if vtype not in np.arange(1, 6):
 97 |                 raise ValueError("INVALID variogram number {}".format(vtype))
 98 |             if vtype == 4:
 99 |                 if a_range < 0:
100 |                     raise ValueError("INVALID power variogram")
101 |                 elif a_range > 2.0:
102 |                     raise ValueError("INVALID power variogram")
103 |             if vtype == 5:
104 |                 raise ValueError("Cannot handle this type of variogram.")
105 | 
106 |     def _rotation_matirx(self):
107 |         azumth = np.deg2rad(90.0 - np.array(self.ang))
108 |         self.rotmat = np.zeros((4, self.nst))
109 |         self.rotmat[0] = np.cos(azumth)
110 |         self.rotmat[1] = np.sin(azumth)
111 |         self.rotmat[2] = -np.sin(azumth)
112 |         self.rotmat[3] = np.cos(azumth)
113 | 
114 |     def _max_covariance(self):
115 |         PMX = 9999.0  # max value used for power model
116 |         self.maxcov = self.c0
117 |         for kind, contri in zip(self.it, self.cc):
118 |             if kind == 4:
119 |                 self.maxcov += PMX
120 |             else:
121 |                 self.maxcov += contri
122 | 
123 |     def _cova2(self, x1, y1, x2, y2):
124 |         "calculte covariance using provided variogram model"
125 |         PMX = 9999.0  # max value used for power model
126 |         dx = x2 - x1
127 |         dy = y2 - y1
128 |         # check for small distance
129 |         if (dx*dx + dy*dy) < np.finfo("float").eps:
130 |             return self.maxcov
131 |         # for non-zero distance
132 |         cova = 0.0
133 |         for iss in range(self.nst):
134 |             dx1 = dx*self.rotmat[0, iss] + dy*self.rotmat[1, iss]
135 |             dy1 = (dx*self.rotmat[2, iss] + dy*self.rotmat[3, iss]) / \
136 |                   self.anis[iss]
137 |             h = np.sqrt(np.maximum(dx1*dx1 + dy1*dy1, 0))
138 |             if self.it[iss] == 1:  # spherical model
139 |                 hr = h/self.a_max[iss]
140 |                 if hr < 1:
141 |                     cova += self.cc[iss] * (1 - hr * (1.5 - 0.5 * hr * hr))
142 |             elif self.it[iss] == 2:  # exponential model
143 |                 cova += self.cc[iss]*np.exp(-3.0*h/self.a_max[iss])
144 |             elif self.it[iss] == 3:  # gaussian model
145 |                 cova += self.cc*np.exp(-3.0 * h * h / \
146 |                                         (self.a_max[iss] * self.a_max[iss]))
147 |             elif self.it[iss] == 4:  # power model
148 |                 cova += PMX - self.cc[iss]*(h**(self.a_max[iss]))
149 |         return cova
150 | 
151 |     def _block_discretization(self):
152 |         """
153 |         Set up the discretization points per block. Figure out how many are
154 |         needed, the spacing, and fill the xdb and ydb arrays with the
155 |         offsets relative to the block center
156 |         """
157 |         xdis = self.xsiz / np.maximum(self.nxdis, 1.0)
158 |         ydis = self.ysiz / np.maximum(self.nydis, 1.0)
159 |         xloc = -0.5*(self.xsiz + xdis)
160 |         yloc = -0.5*(self.ysiz + ydis)
161 |         xdb_temp = np.arange(1, self.nxdis+1, 1) * xdis + xloc
162 |         ydb_temp = np.arange(1, self.nydis+1, 1) * ydis + yloc
163 |         xdb, ydb = np.meshgrid(xdb_temp, ydb_temp)
164 |         self.xdb, self.ydb = xdb.flat, ydb.flat
165 |         # xdb and ydb are nxdis * nydis array
166 | 
167 |     @property
168 |     def unbias(self):
169 |         "the unbiasedness constraint"
170 |         if self._unbias is None:
171 |             self._unbias = self._cova2(self.xdb[0], self.ydb[0],
172 |                                       self.xdb[0], self.ydb[0])
173 |         return self._unbias
174 | 
175 |     @property
176 |     def block_covariance(self):
177 |         "the block covariance"
178 |         if self._block_covariance is None:
179 |             self._block_covariance = 0
180 |             if self.ndb <= 1:  # point kriging
181 |                 self._block_covariance = self.unbias
182 |             else:  # block kriging
183 |                 cov = list()
184 |                 for x1, y1 in zip(self.xdb, self.ydb):
185 |                     for x2, y2 in zip(self.xdb, self.ydb):
186 |                         cov.append(self._cova2(x1, y1, x2, y2))
187 |                 cov = np.array(cov).reshape((self.ndb, self.ndb))
188 |                 cov[np.diag_indices_from(cov)] -= self.c0
189 |                 self._block_covariance = np.mean(cov)
190 |         return self._block_covariance
191 | 
192 |     def _preprocess(self):
193 |         self._read_params()
194 |         # number of points in discretization block
195 |         self.ndb = self.nxdis * self.nydis
196 | 
197 |         self.anis = np.array(self.a_min)/np.array(self.a_max)
198 |         self.ang = np.array(self.azm)
199 | 
200 |         self._rotation_matirx()
201 |         self._max_covariance()
202 |         self._block_discretization()
203 |         if self.nxdis == 1 and self.nydis == 1:
204 |             self.block_kriging = False
205 | 
206 |     def kd2d(self):
207 |         self._preprocess()
208 |         print("Start kriging...")
209 |         # For each target point on the grid
210 |         xloc_temp = np.arange(self.nx) * self.xsiz + self.xmn
211 |         yloc_temp = np.arange(self.ny) * self.ysiz + self.ymn
212 |         yloc_mesh, xloc_mesh = np.meshgrid(yloc_temp, xloc_temp)
213 |         self.estimation = list()
214 |         self.estimation_variance = list()
215 |         num_of_points = self.nx*self.ny
216 |         t1 = time.time()
217 |         ts = 0
218 |         percent_od = 0
219 |         for idx, (xloc, yloc) in enumerate(zip(xloc_mesh.flat, yloc_mesh.flat)):
220 |             ts_1 = time.time()
221 |             # Find the nearest samples within each octant:
222 |             nums, dist = self._search(xloc, yloc)
223 | 
224 |             ts += time.time() - ts_1
225 |             # is there enough samples?
226 |             if len(dist) < self.ndmin:
227 |                 print("Block {},{} not estimated.".format(
228 |                     (xloc-self.xmn)/self.xsiz,
229 |                     (yloc-self.ymn)/self.ysiz))
230 |                 self.estimation.append(np.nan)
231 |                 self.estimation_variance.append(np.nan)
232 |                 continue
233 |             na = dist.shape[0]
234 | 
235 |             # Put coordinates and values of neighborhood samples into xa,ya,vra
236 |             xa = self.vr['x'][nums]
237 |             ya = self.vr['y'][nums]
238 |             vra = self.vr[self.property_name[0]][nums]
239 |             # handle the situation of only one sample:
240 |             if na == 1:
241 |                 est, estv = self._one_sample(xloc, yloc, xa, ya, vra)
242 |                 self.estimation.append(est)
243 |                 self.estimation_variance.append(estv)
244 |             else:  # many samples
245 |                 est, estv = self._many_sample(xloc, yloc, xa, ya, vra)
246 |                 self.estimation.append(est)
247 |                 self.estimation_variance.append(estv)
248 | 
249 |             percent = np.round(idx/num_of_points*100, decimals=0)
250 |             dtime = time.time() - t1
251 |             if percent != percent_od:
252 |                 print("{}% ".format(percent) +\
253 |                   "."*20 + "{}s elapsed.".format(np.round(dtime, decimals=3)))
254 |             percent_od = percent
255 |         print("Kriging finished.")
256 |         print("Time used for searching: {}s".format(ts))
257 |         self.estimation = np.array(self.estimation).reshape((self.nx, self.ny))
258 |         self.estimation_variance = np.array(
259 |             self.estimation_variance).reshape((self.nx, self.ny))
260 | 
261 |     def _search(self, xloc, yloc):
262 |         "Search all points return point index and distance to (xloc,yloc)"
263 |         dist = list()
264 |         nums = list()
265 |         # Scan all the samples:
266 |         for idd in range(self.vr.shape[0]):
267 |             dx = self.vr['x'][idd] - xloc
268 |             dy = self.vr['y'][idd] - yloc
269 |             h2 = dx*dx + dy*dy
270 |             if h2 > self.radius*self.radius:
271 |                 continue
272 |             # do not consider this sample if there are enough close ones:
273 |             if len(nums) == self.ndmax:
274 |                 if h2 >= dist[-1]:
275 |                     continue
276 |                 elif h2 < dist[-1]:
277 |                     del nums[-1]
278 |                     del dist[-1]
279 |             # consider this sample (it will be added in the correct location):
280 |             if len(nums) < self.ndmax:
281 |                 nums.append(idd)
282 |                 dist.append(h2)
283 |         if len(dist) == 0:
284 |             return np.array([]), np.array([])
285 |         else:
286 |             # Sort samples found thus far in increasing order of distance:
287 |             dist = np.array(dist)
288 |             nums = np.array(nums)
289 |             sort_index = np.argsort(dist)
290 |             dist = dist[sort_index]
291 |             nums = nums[sort_index]
292 |             return nums, dist
293 | 
294 |     def _one_sample(self, xloc, yloc, xa, ya, vra):
295 |         # Left Hand Side Covariance:
296 |         left = self._cova2(xa[0], ya[0], xa[0], ya[0])
297 | 
298 |         # Right Hand Side Covariance:
299 |         xx = xa[0] - xloc
300 |         yy = ya[0] - yloc
301 |         if not self.block_kriging:  # point kriging
302 |             right = self._cova2(xx, yy, self.xdb[0], self.ydb[0])
303 |         else:  # block kriging
304 |             right = 0.0
305 |             # cb_list = list()
306 |             for i in range(self.ndb):
307 |                 right = self._cova2(xx, yy, self.xdb[i], self.ydb[i])
308 |                 dx = xx - self.xdb[i]
309 |                 dy = yy - self.ydb[i]
310 |                 if dx*dx + dy*dy < np.finfo('float').eps:
311 |                     right -= self.c0
312 |             right /= self.ndb
313 | 
314 |         # Estimation
315 |         if self.ktype == 0:  # Simple kriging
316 |             # Solve for lambda
317 |             s = right / self.block_covariance
318 | 
319 |             est = s * vra[0] + (1.0 - s) * self.skmean
320 |             estv = self.block_covariance - s * right
321 |             return est, estv
322 | 
323 |         else:  # Ordinary kriging
324 |             est = vra[0]
325 |             estv = self.block_covariance - 2.0 * right + left
326 |             return est, estv
327 | 
328 |     def _many_sample(self, xloc, yloc, xa, ya, vra):
329 |         "Solve the Kriging System with more than one sample"
330 |         na = len(vra)
331 |         # number of equations, for simple kriging there're na,
332 |         # for ordinary there're na + 1
333 |         neq = na + self.ktype
334 | 
335 |         # Establish left hand side covariance matrix:
336 |         left = np.full((neq, neq), np.nan)
337 |         for i, j in product(range(na), range(na)):
338 |             if np.isnan(left[j, i]):
339 |                 left[i, j] = self._cova2(xa[i], ya[i], xa[j], ya[j])
340 |             else:
341 |                 left[i, j] = left[j, i]
342 | 
343 |         # Establish the Right Hand Side Covariance:
344 |         right = list()
345 | 
346 |         for j in range(na):
347 |             xx = xa[j] - xloc
348 |             yy = ya[j] - yloc
349 |             if not self.block_kriging:
350 |                 cb = self._cova2(xx, yy, self.xdb[0], self.ydb[0])
351 |             else:
352 |                 cb = 0.0
353 |                 for i in range(self.ndb):
354 |                     cb += self._cova2(xx, yy, self.xdb[i], self.ydb[i])
355 |                     dx = xx - self.xdb[i]
356 |                     dy = yy - self.ydb[i]
357 |                     if dx*dx + dy*dy < np.finfo('float').eps:
358 |                         cb -= self.c0
359 |                 cb /= self.ndb
360 |             right.append(cb)
361 | 
362 |         if self.ktype == 1:  # for ordinary kriging
363 |             # Set the unbiasedness constraint
364 |             left[neq-1, :-1] = self.unbias
365 |             left[:-1, neq-1] = self.unbias
366 |             left[-1, -1] = 0
367 |             right.append(self.unbias)
368 | 
369 |         # Solve the kriging system
370 |         s = None
371 |         try:
372 |             s = linalg.solve(left, right)
373 |         except linalg.LinAlgError as inst:
374 |             print("Warning kb2d: singular matrix for block " + \
375 |                   "{},{}".format((xloc-self.xmn)/self.xsiz,
376 |                                  (yloc-self.ymn)/self.ysiz))
377 |             return np.nan, np.nan
378 | 
379 |         estv = self.block_covariance
380 |         if self.ktype == 1:  # ordinary kriging
381 |             estv -= s[-1]*self.unbias  # s[-1] is mu
382 |         est = np.sum(s[:na]*vra[:na])
383 |         estv -= np.sum(s[:na]*right[:na])
384 |         if self.ktype == 0:  # simple kriging
385 |             est += (1 - np.sum(s[:na])) * self.skmean
386 |         return est, estv
387 | 
388 |     def view(self, pname=None):
389 |         pname = self.property_name[0] if pname is None else pname
390 |         fig, ax = plt.subplots()
391 |         im = ax.imshow(self.estimation.T, interpolation='nearest',
392 |                        origin='lower',
393 |                        extent=[self.xmn,
394 |                                self.xmn + (self.nx - 1)*self.xsiz,
395 |                                self.ymn,
396 |                                self.ymn + (self.ny - 1)*self.ysiz])
397 |         ax.set_xlabel("X (m)")
398 |         ax.set_ylabel("Y (m)")
399 |         ax.set_title("Estimation")
400 |         ax.set_aspect('equal')
401 |         fig.colorbar(im)
402 |         fig.show()
403 | 
404 | if __name__ == '__main__':
405 |     test_krige = Krige2d("testData/test_krige2d.par")
406 |     test_krige.read_data()
407 |     test_krige.kd2d()
408 |     test_krige.view()
409 | 


--------------------------------------------------------------------------------
/pygeostatistics/normal_score_transform.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Normal Score Transform
  4 | 
  5 | Created on Tue Dec 6 2016
  6 | """
  7 | from __future__ import division, print_function, absolute_import
  8 | 
  9 | import numpy as np
 10 | from numba import jit
 11 | from scipy import interpolate
 12 | 
 13 | 
 14 | class NormalScoreTransform(object):
 15 |     def __init__(self, data, weights, zmin, zmax, ltail, ltpar, utail, utpar):
 16 |         """
 17 |         Perform Normal score transform of data.
 18 | 
 19 |         Atrributes
 20 |         ----------
 21 |         data: 1-d ndarray
 22 |             data to be transformed
 23 |         weights: 1-d ndarray
 24 |             declustering weights for transform
 25 |         zmin, zmax: float, float
 26 |             allowable values for back-transform
 27 |         ltail: {1, 2}
 28 |             option to handle values smaller than minimun data, lower tail
 29 |         ltpar: float
 30 |             parameter for lower tail option
 31 |         utail: int
 32 |             option to handle values larger than maximum data, upper tail
 33 |         utpar: float
 34 |             parameter for upper tail option
 35 |         """
 36 |         self.data = np.array(data) # input data ndarray
 37 |         self.weights = np.array(weights) # input declustering weight ndarray
 38 | 
 39 |         self.transform_table = None
 40 |         self.zmin = zmin # allowable value for backtransform
 41 |         self.zmax = zmax # allowable value for backtransform
 42 |         self.ltail = ltail  # option to handle values less than vrg[0]:
 43 |         self.ltpar = ltpar  # parameter required for option ltail
 44 |         self.utail = utail  # option to handle values greater than vrg[-1]:
 45 |         self.utpar = utpar  # parameter required for option utail
 46 | 
 47 |     def _create_table(self):
 48 |         "create transformation lookup table"
 49 |         # sort input data by value
 50 |         sort_index = np.argsort(self.data)
 51 |         sorted_data = self.data[sort_index]
 52 |         sorted_weight = self.weights[sort_index]
 53 |         # compute cumulative probabilities
 54 |         weight_sum = np.sum(sorted_weight)
 55 |         cum_weight = np.cumsum(sorted_weight / weight_sum)
 56 |         cum_weight_old = np.append(np.array([0]), cum_weight[:-1])
 57 |         average = 0.5 * (cum_weight + cum_weight_old)
 58 |         # calculate normal score value:
 59 |         score = [gauinv(element) for element in average]
 60 |         # create lookup table
 61 |         table = [(da, sc) for da, sc in zip(sorted_data, score)]
 62 |         self.transform_table = np.array(table, dtype=np.dtype({
 63 |             'names': ['value', 'score'],
 64 |             'formats': ['f8'] * 2
 65 |         }))
 66 | 
 67 |     def create_transform_func(self):
 68 |         self._create_table()
 69 |         nrows = self.transform_table['value'].shape[0]
 70 |         self.forward_func = interpolate.interp1d(
 71 |             self.transform_table['value'].reshape((nrows,)),
 72 |             self.transform_table['score'].reshape((nrows,)),
 73 |             kind='linear',
 74 |             fill_value="extrapolate")
 75 | 
 76 |         self.back_func = interpolate.interp1d(
 77 |             self.transform_table['score'].reshape((nrows,)),
 78 |             self.transform_table['value'].reshape((nrows,)),
 79 |             kind='linear',
 80 |             fill_value="extrapolate")
 81 | 
 82 |     def transform(self, values):
 83 |         "transform data to normal score"
 84 |         return self.forward_func(values)
 85 | 
 86 |     def back_transform(self, scores):
 87 |         "transform normal score back to orginal data"
 88 |         values = np.full_like(scores, np.nan)
 89 | 
 90 |         lo_value = self.transform_table['value'][0]
 91 |         up_value = self.transform_table['value'][-1]
 92 |         lo_score = self.transform_table['score'][0]
 93 |         up_score = self.transform_table['score'][-1]
 94 |         # scores in normal range
 95 |         normal_mask = np.logical_and(scores <= up_score, scores >= lo_score)
 96 |         normal_scores = scores[normal_mask]
 97 |         values[normal_mask] = self.back_func(normal_scores)
 98 |         # scores in lower tail: 1=linear, 2=power
 99 |         lower_mask = scores < lo_score
100 |         lower_scores = scores[lower_mask]
101 |         temp = list()
102 |         for sc in lower_scores:
103 |             backtr = lo_value
104 |             cdflo = gcum(lo_score)
105 |             cdfbt = gcum(sc)
106 |             if self.ltail == 1:  # linear
107 |                 backtr = powint(0, cdflo, self.zmin, lo_value, cdfbt, 1)
108 |                 temp.append(backtr)
109 |             elif self.ltail == 2:  # power
110 |                 cpow = 1.0 / self.ltpar
111 |                 backtr = powint(0, cdflo, self.zmin, lo_value, cdfbt, cpow)
112 |                 temp.append(backtr)
113 |         values[lower_mask] = temp
114 |         # scores in upper tail: 1=linear, 2=power, 4=hyperbolic
115 |         upper_mask = scores > up_score
116 |         upper_scores = scores[upper_mask]
117 |         temp = list()
118 |         for sc in upper_scores:
119 |             backtr = up_value
120 |             cdfhi = gcum(up_score)
121 |             cdfbt = gcum(sc)  # cdf value of the score to be back-transformed
122 |             if self.utail == 1:  # linear
123 |                 backtr = powint(cdfhi, 1.0, up_value, self.zmax, cdfbt, 1)
124 |                 temp.append(backtr)
125 |             elif self.utail == 2:  # power
126 |                 cpow = 1.0 / self.utpar
127 |                 backtr = powint(cdfhi, 1.0, up_value, self.zmax, cdfbt, cpow)
128 |                 temp.append(backtr)
129 |             elif self.utail == 4:  # hyperbolic
130 |                 l = (up_value**self.utpar) * (1 - gcum(up_score))
131 |                 backtr = (l / (1 - gcum(sc)))**(1 / self.utpar)
132 |                 temp.append(backtr)
133 |         values[upper_mask] = temp
134 |         return values
135 | 
136 | @jit(nopython=True)
137 | def gauinv(p):
138 |     """
139 |     Computes the inverse of the standard normal cumulative distribution
140 |     function with a numerical approximation.
141 | 
142 |     Parameters
143 |     ----------
144 |     p : scalar, ndarray
145 |         Cumulative probability funciton value
146 | 
147 |     Returns
148 |     -------
149 |     xp : scalar, ndarray
150 |         Quantile function value
151 | 
152 |     Notes
153 |     -----
154 |     .. [1] Statistical Computing, by W.J. Kennedy, Jr. and James E. Gentle,
155 |             1980, p. 95.
156 |     """
157 |     lim = 1.0e-10
158 |     p0 = -0.322232431088
159 |     p1 = -1.0
160 |     p2 = -0.342242088547
161 |     p3 = -0.0204231210245
162 |     p4 = -0.0000453642210148
163 |     q0 = 0.0993484626060
164 |     q1 = 0.588581570495
165 |     q2 = 0.531103462366
166 |     q3 = 0.103537752850
167 |     q4 = 0.0038560700634
168 |     # check for an error situation
169 |     if p < lim:
170 |         return -1e10
171 |     if p > 1 - lim:
172 |         return 1e10
173 |     pp = p
174 |     if p > 0.5:
175 |         pp = 1 - pp
176 |     if p == 0.5:
177 |         return 0
178 |     y = np.sqrt(np.log(1.0/(pp*pp)))
179 |     xp = y + ((((y*p4 + p3)*y + p2)*y + p1)*y + p0) / \
180 |         ((((y*q4 + q3)*y + q2)*y + q1)*y + q0)
181 |     if p == pp:
182 |         xp = -xp
183 | 
184 |     return xp
185 | 
186 | @jit(nopython=True)
187 | def gcum(x):
188 |     """
189 |     Evaluate the standard normal cdf given a normal deviate x.  gcum is
190 |     the area under a unit normal curve to the left of x.  The results are
191 |     accurate only to about 5 decimal places.
192 |     """
193 |     z = -x if x < 0 else x
194 |     t = 1. / (1. + 0.2316419 * z)
195 |     gcum = t*(0.31938153 + t*(-0.356563782 + t*(1.781477937 + \
196 |            t*(-1.821255978 + t*1.330274429))))
197 |     e2 = np.exp(-z*z/2.)*0.3989422803 if z <= 6 else 0
198 |     gcum = 1.0 - e2 * gcum
199 |     if x >= 0:
200 |         return gcum
201 |     else:
202 |         return 1.0 - gcum
203 | 
204 | @jit(nopython=True)
205 | def powint(xlow, xhigh, ylow, yhigh, value, power):
206 |     "power interpolation"
207 |     if xhigh-xlow < np.finfo(float).eps:
208 |         return (yhigh + ylow) / 2.0
209 |     else:
210 |         return ylow + (yhigh - ylow) * \
211 |                (((value - xlow) / (xhigh - xlow))**power)
212 | 


--------------------------------------------------------------------------------
/pygeostatistics/super_block.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | class for performing Super Block Search
  4 | 
  5 | Created on Tue Nov 22 2016
  6 | """
  7 | from __future__ import division, print_function
  8 | from itertools import product
  9 | import numpy as np
 10 | from numba import jit
 11 | 
 12 | 
 13 | class SuperBlockSearcher(object):
 14 |     """
 15 |     Class for performing Super Block Search
 16 | 
 17 |     This subroutine sets up a 3-D "super block" model and orders the data
 18 |     by super block number.  The limits of the super block is set to the
 19 |     minimum and maximum limits of the grid; data outside are assigned to
 20 |     the nearest edge block.
 21 | 
 22 |     The idea is to establish a 3-D block network that contains all the
 23 |     relevant data. The data are then sorted by their index location in
 24 |     the search network, i.e., the index location is given after knowing
 25 |     the block index in each coordinate direction (ix,iy,iz):
 26 |             ii = (iz-1)*nxsup*nysup + (iy-1)*nxsup + ix
 27 |     An array, the same size as the number of super blocks, is constructed
 28 |     that contains the cumulative number of data in the model. With this
 29 |     array it is easy to quickly check what data are located near any given
 30 |     location.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     nx,xmn,xsiz      Definition of the X grid being considered
 35 |     ny,ymn,ysiz      Definition of the Y grid being considered
 36 |     nz,zmn,zsiz      Definition of the Z grid being considered
 37 |     vr(nd)           x, y, z, other variables
 38 |     MAXSB[X,Y,Z]     Maximum size of super block network
 39 | 
 40 |     """
 41 |     def __init__(self):
 42 |         # grid definition
 43 |         self.nx = None
 44 |         self.xmn = None
 45 |         self.xsiz = None
 46 |         self.ny = None
 47 |         self.ymn = None
 48 |         self.ysiz = None
 49 |         self.nz = None
 50 |         self.zmn = None
 51 |         self.zsiz = None
 52 |         # data
 53 |         self.vr = None  # with x,y,z and variable and other secondary variable
 54 |         self.MAXSB = []
 55 | 
 56 |         # rotation matrix
 57 |         self.rotmat = None  # rotation matrix for searching!!!
 58 |         self.radsqd = None  # squared search radius
 59 | 
 60 |         # octant search
 61 |         self.noct = None  #  the number of data noct to retain from each octant
 62 | 
 63 |         #To be calculated
 64 |         self.nisb = None  # array with cumulative number of data in each super block.
 65 |         # super block definitions
 66 |         self.nxsup = None
 67 |         self.xmnsup = None
 68 |         self.xsizsup = None
 69 |         self.nysup = None
 70 |         self.ymnsup = None
 71 |         self.ysizsup = None
 72 |         self.nzsup = None
 73 |         self.zmnsup = None
 74 |         self.zsizsup = None
 75 |         # superblocks to search
 76 |         self.nsbtosr = None    # Number of super blocks to search
 77 |         self.ixsbtosr = None   # X offsets for super blocks to search
 78 |         self.iysbtosr = None   # Y offsets for super blocks to search
 79 |         self.izsbtosr = None   # Z offsets for super blocks to search
 80 |         # points found within nearby super blocks
 81 |         self.nclose = None
 82 |         self.close_samples = None
 83 |         self.infoct = None
 84 |         # output sort_index
 85 |         self.sort_index = None
 86 | 
 87 |     def setup(self):
 88 |         """
 89 |         Variables estimated
 90 |         -------------------
 91 |         nisb()                Array with cumulative number of data in each
 92 |                                 super block.
 93 |         nxsup,xmnsup,xsizsup  Definition of the X super block grid
 94 |         nysup,ymnsup,ysizsup  Definition of the Y super block grid
 95 |         nzsup,zmnsup,zsizsup  Definition of the Z super block grid
 96 |         """
 97 |         # Establish super block definition
 98 |         self.nxsup = min(self.nx, self.MAXSB[0])
 99 |         self.nysup = min(self.ny, self.MAXSB[1])
100 |         self.nzsup = min(self.nz, self.MAXSB[2])
101 | 
102 |         self.xsizsup = self.nx * self.xsiz / self.nxsup
103 |         self.ysizsup = self.ny * self.ysiz / self.nysup
104 |         self.zsizsup = self.nz * self.zsiz / self.nzsup
105 | 
106 |         self.xmnsup = (self.xmn - 0.5 * self.xsiz) + 0.5 * self.xsizsup
107 |         self.ymnsup = (self.ymn - 0.5 * self.ysiz) + 0.5 * self.ysizsup
108 |         self.zmnsup = (self.zmn - 0.5 * self.zsiz) + 0.5 * self.zsizsup
109 | 
110 |         # partition data into each super block
111 |         x_block = np.arange(self.xmnsup - 0.5 * self.xsizsup,
112 |                             self.xmnsup + (self.nxsup + 1) * self.xsizsup + 1,
113 |                             self.xsizsup)
114 |         x_index = np.searchsorted(x_block, self.vr['x']) - 1
115 | 
116 |         y_block = np.arange(self.ymnsup - 0.5 * self.ysizsup,
117 |                             self.ymnsup + (self.nysup + 1) * self.ysizsup + 1,
118 |                             self.ysizsup)
119 |         y_index = np.searchsorted(y_block, self.vr['y']) - 1
120 | 
121 |         z_block = np.arange(self.zmnsup - 0.5 * self.zsizsup,
122 |                             self.zmnsup + (self.nzsup + 1) * self.zsizsup + 1,
123 |                             self.zsizsup)
124 |         z_index = np.searchsorted(z_block, self.vr['z']) - 1
125 | 
126 |         # self.super_block = np.full((self.nxsup, self.nysup, self.nzsup), [])
127 |         temp = np.zeros_like(self.vr['x'])
128 |         self.nisb = np.zeros((self.nxsup*self.nysup*self.nzsup,))
129 |         for idx, (ix, iy, iz) in enumerate(zip(x_index, y_index, z_index)):
130 |             ii = super_flat_index(ix, iy, iz, self.nxsup, self.nysup)
131 |             temp[idx] = ii
132 |             self.nisb[ii] += 1
133 | 
134 |         # sort data by asceding super block number:
135 |         self.sort_index = np.argsort(temp)
136 |         self.vr = self.vr[self.sort_index]
137 |         # set up nisb
138 |         self.nisb = np.cumsum(self.nisb, dtype=np.int)
139 | 
140 |     def pickup(self):
141 |         """
142 |         This subroutine establishes which super blocks must be searched given
143 |         that a point being estimated/simulated falls within a super block
144 |         centered at 0,0,0.
145 | 
146 |         Variables estimated
147 |         -------------------
148 |         nsbtosr          Number of super blocks to search
149 |         ixsbtosr         X offsets for super blocks to search
150 |         iysbtosr         Y offsets for super blocks to search
151 |         izsbtosr         Z offsets for super blocks to search
152 |         """
153 |         self.nsbtosr = 0
154 |         self.ixsbtosr = list()
155 |         self.iysbtosr = list()
156 |         self.izsbtosr = list()
157 |         float_max = np.finfo('float').max
158 |         self.nsbtosr, self.ixsbtosr, self.iysbtosr, self.izsbtosr = func_pickup(
159 |             self.nxsup, self.nysup, self.nzsup,
160 |             self.xsizsup, self.ysizsup, self.zsizsup,
161 |             self.rotmat, self.radsqd, float_max)
162 | 
163 |     def search(self, xloc, yloc, zloc,):
164 |         """
165 |         Variables estimated
166 |         -------------------
167 |         nclose           Number of close data
168 |         close()          Index of close data
169 |         infoct           Number of informed octants (only computes if
170 |                              performing an octant search)
171 |         """
172 |         ix, iy, iz = getindx(xloc, yloc, zloc,
173 |                              self.xmnsup, self.xsizsup, self.nxsup,
174 |                              self.ymnsup, self.ysizsup, self.nysup,
175 |                              self.zmnsup, self.zsizsup, self.nzsup)
176 | 
177 |         self.nclose, self.close_samples = func_search(
178 |             xloc, yloc, zloc,
179 |             ix, iy, iz,
180 |             self.nsbtosr, self.ixsbtosr, self.iysbtosr, self.izsbtosr,
181 |             self.nxsup, self.nysup, self.nzsup,
182 |             self.nisb, self.rotmat, self.radsqd,
183 |             self.vr)
184 |         # perform octant search partition
185 |         if self.noct <= 0:
186 |             return
187 |         else:  # partition the data into octant
188 |             inoct = np.zeros((8,))
189 |             # pick up the closes samples in each octant
190 |             nt = self.noct * 8
191 |             na = 0
192 |             for j in range(self.nclose):
193 |                 i = int(self.close_samples[j])
194 |                 h = distance[j]
195 |                 dx = self.vr['x'][i] - xloc
196 |                 dy = self.vr['y'][i] - yloc
197 |                 dz = self.vr['z'][i] - zloc
198 |                 if dz >= 0:
199 |                     iq = 3
200 |                     if dx <= 0 and dy > 0:
201 |                         iq = 0
202 |                     if dx > 0 and dy >= 0:
203 |                         iq = 1
204 |                     if dx < 0 and dy <= 0:
205 |                         iq = 2
206 |                 else:
207 |                     iq = 7
208 |                     if dx <= 0 and dy > 0:
209 |                         iq = 4
210 |                     if dx > 0 and dy >= 0:
211 |                         iq = 5
212 |                     if dx < 0 and dy >= 0:
213 |                         iq = 6
214 |                 inoct[iq] += 1
215 | 
216 |                 if inoct[iq] <= self.noct:
217 |                     self.close_samples[na] = i
218 |                     distance[na] = h
219 |                     if na == nt:
220 |                         self.nclose = na
221 |                         break
222 |                     na += 1
223 |         # how many octants from which samples are drawn
224 |         self.infoct = np.count_nonzero(inoct)
225 | 
226 | @jit(nopython=True)
227 | def func_pickup(nxsup, nysup, nzsup,
228 |                 xsizsup, ysizsup, zsizsup,
229 |                 rotmat, radsqd, float_max):
230 |     nsbtosr = 0
231 |     ixsbtosr = []
232 |     iysbtosr = []
233 |     izsbtosr = []
234 |     # for i, j, k in product(range(-(nxsup-1), nxsup),
235 |     #                        range(-(nysup-1), nysup),
236 |     #                        range(-(nzsup-1), nzsup)):
237 |     for i in range(-(nxsup-1), nxsup):
238 |         for j in range(-(nysup-1), nysup):
239 |             for k in range(-(nzsup-1), nzsup):
240 |                 xo = i * xsizsup
241 |                 yo = j * ysizsup
242 |                 zo = k * zsizsup
243 |                 shortest = float_max
244 |                 # for i1, j1, k1 in product([-1, 1], [-1, 1], [-1, 1]):
245 |                 #     for i2, j2, k2 in product([-1, 1], [-1, 1], [-1, 1]):
246 |                 for i1 in [-1, 1]:
247 |                     for j1 in [-1, 1]:
248 |                         for k1 in [-1, 1]:
249 |                             for i2 in [-1, 1]:
250 |                                 for j2 in [-1, 1]:
251 |                                     for k2 in [-1, 1]:
252 |                                         xdis = (i1 - i2) * 0.5 * xsizsup + xo
253 |                                         ydis = (j1 - j2) * 0.5 * ysizsup + yo
254 |                                         zdis = (k1 - k2) * 0.5 * zsizsup + zo
255 |                                         hsqd = sqdist(
256 |                                             (0, 0, 0), (xdis, ydis, zdis),
257 |                                             rotmat)
258 |                                         shortest = hsqd if hsqd < shortest \
259 |                                             else shortest
260 |                 if shortest <= radsqd:
261 |                     nsbtosr += 1
262 |                     ixsbtosr.append(i)
263 |                     iysbtosr.append(j)
264 |                     izsbtosr.append(k)
265 |     return nsbtosr, ixsbtosr, iysbtosr, izsbtosr
266 | 
267 | @jit(nopython=True)
268 | def func_search(xloc, yloc, zloc,
269 |                 ix, iy, iz,
270 |                 nsbtosr, ixsbtosr, iysbtosr, izsbtosr,
271 |                 nxsup, nysup, nzsup,
272 |                 nisb, rotmat, radsqd,
273 |                 vr):
274 |     nclose = 0
275 |     close_samples = []
276 |     distance = []
277 |     # loop over all super blocks
278 |     for isup in range(nsbtosr):
279 |         ixsup = ix + ixsbtosr[isup]
280 |         iysup = iy + iysbtosr[isup]
281 |         izsup = iz + izsbtosr[isup]
282 |         if ixsup < 0 or ixsup >= nxsup or \
283 |                 iysup < 0 or iysup >= nysup or \
284 |                 izsup < 0 or izsup >= nzsup:
285 |             continue
286 |         # find number of points within this super block
287 |         ii = super_flat_index(ixsup, iysup, izsup, nxsup, nysup)
288 |         # ii = self._super_flat_index(ixsup, iysup, izsup)
289 |         i = 0
290 |         if ii == 0:
291 |             nums = nisb[ii]
292 |             i = 0
293 |         else:
294 |             nums = nisb[ii] - nisb[ii - 1]
295 |             i = nisb[ii - 1]
296 |         # loop over all the data within this super block
297 |         for k in range(0, nums):
298 |             # hsqd = self.sqdist((xloc, yloc, zloc),
299 |             #                    (self.vr['x'][i], self.vr['y'][i],
300 |             #                     self.vr['z'][i]))
301 |             hsqd = sqdist(
302 |                 (xloc, yloc, zloc),
303 |                 (vr['x'][i], vr['y'][i], vr['z'][i]),
304 |                 # (vrx[i], vry[i], vrz[i]),
305 |                 rotmat)
306 |             if hsqd > radsqd:
307 |                 continue
308 |             nclose += 1
309 |             close_samples.append(i)
310 |             distance.append(i)
311 |             i += 1
312 |     # sort nearby samples by distance
313 |     distance = np.array(distance)
314 |     close_samples = np.array(close_samples)
315 |     sort_index = np.argsort(distance)
316 |     close_samples = close_samples[sort_index]
317 |     return nclose, close_samples
318 | 
319 | @jit(nopython=True)
320 | def getindx(xloc, yloc, zloc,
321 |             xmnsup, xsizsup, nxsup,
322 |             ymnsup, ysizsup, nysup,
323 |             zmnsup, zsizsup, nzsup):
324 |     """
325 |     determine which superblock are the given point or list of points in
326 | 
327 |     Parameters
328 |     ----------
329 |     xloc, yloc, zloc: scalar or 1-D ndarray
330 |     """
331 |     x_block = np.arange(xmnsup - 0.5 * xsizsup,
332 |                         xmnsup + (nxsup + 1) * xsizsup + 1,
333 |                         xsizsup)
334 |     x_index = np.searchsorted(x_block, xloc) - 1
335 | 
336 |     y_block = np.arange(ymnsup - 0.5 * ysizsup,
337 |                         ymnsup + (nysup + 1) * ysizsup + 1,
338 |                         ysizsup)
339 |     y_index = np.searchsorted(y_block, yloc) - 1
340 | 
341 |     z_block = np.arange(zmnsup - 0.5 * zsizsup,
342 |                         zmnsup + (nzsup + 1) * zsizsup + 1,
343 |                         zsizsup)
344 |     z_index = np.searchsorted(z_block, zloc) - 1
345 | 
346 |     return x_index, y_index, z_index
347 |     # return None
348 | 
349 | @jit(nopython=True)
350 | def sqdist(point1, point2, rotmat):
351 |     """
352 |     This routine calculates the anisotropic distance between two points
353 |     given the coordinates of each point and a definition of the
354 |     anisotropy.
355 | 
356 |     Parameters
357 |     ----------
358 |     point1 : tuple
359 |         Coordinates of first point (x1,y1,z1)
360 |     point2 : tuple
361 |         Coordinates of second point (x2,y2,z2)
362 | 
363 |     Returns
364 |     -------
365 |     sqdist : scalar
366 |         The squared distance accounting for the anisotropy
367 |         and the rotation of coordinates (if any).
368 |     """
369 |     dx = point1[0] - point2[0]
370 |     dy = point1[1] - point2[1]
371 |     dz = point1[2] - point2[2]
372 |     sqdist = 0.0
373 |     for i in range(3):
374 |         cont = rotmat[i, 0] * dx + \
375 |                rotmat[i, 1] * dy + \
376 |                rotmat[i, 2] * dz
377 |         sqdist += cont * cont
378 |     return sqdist
379 | 
380 | @jit(nopython=True)
381 | def super_flat_index(ixsup, iysup, izsup, nxsup, nysup):
382 |     return ixsup + iysup * nxsup + izsup * nxsup * nysup
383 | 


--------------------------------------------------------------------------------
/pygeostatistics/variogram_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Nov 2016
 4 | 
 5 | Five different kinds of variogram model
 6 | -----
 7 | use parameter name crange instead of range which is a built-in python function
 8 | """
 9 | from __future__ import division
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | 
13 | 
14 | def spherical(lag, sill, crange):
15 |     if lag <= crange:
16 |         return sill*(1.5*(lag/crange) - 0.5*(lag/crange)**3)
17 |     else:
18 |         return sill
19 | 
20 | 
21 | def exponential(lag, sill, crange):
22 |     return sill*(1 - np.exp(-(3*lag/crange)))
23 | 
24 | 
25 | def gaussian(lag, sill, crange):
26 |     return sill*(1 - np.exp(-(3*lag**2/crange**2)))
27 | 
28 | 
29 | def power(lag, sill, omega):
30 |     return sill*lag**omega
31 | 
32 | 
33 | def hole_effect(lag, sill, crange):
34 |     return sill*(1-np.cos((lag/crange)*np.pi))
35 | 
36 | if __name__ == '__main__':
37 |     func = np.vectorize(exponential)
38 |     abscissa = np.arange(0, 100, 0.1)
39 |     ordinate = func(abscissa, 1, 40)
40 |     plt.plot(abscissa, ordinate)
41 | 


--------------------------------------------------------------------------------
/pygeostatistics/yaml_patch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | add resolver to pyyaml for correctly parsing scientific notation
 4 | 
 5 | Created on Wen Nov 21
 6 | """
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | __author__ = "yuhao"
10 | 
11 | import re
12 | import yaml
13 | 
14 | def loader_patched():
15 |     loader = yaml.SafeLoader
16 |     loader.add_implicit_resolver(
17 |         u'tag:yaml.org,2002:float',
18 |         re.compile(u'''^(?:
19 |         [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
20 |         |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
21 |         |\\.[0-9_]+(?:[eE][-+][0-9]+)?
22 |         |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
23 |         |[-+]?\\.(?:inf|Inf|INF)
24 |         |\\.(?:nan|NaN|NAN))$''', re.X),
25 |         list(u'-+0123456789.'))
26 |     return loader
27 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = pygeostatistics/_version.py
5 | versionfile_build = pygeostatistics/_version.py
6 | tag_prefix =
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Created on Sep 8th 2018
 4 | """
 5 | from distutils.core import setup
 6 | from setuptools import find_packages
 7 | import versioneer
 8 | 
 9 | CLASSIFIERS = [
10 |     'Development Status :: 4 - Beta',
11 |     'Intended Audience :: Developers',
12 |     'Intended Audience :: Science/Research',
13 |     'License :: OSI Approved :: MIT License',
14 |     'Programming Language :: Python',
15 |     'Topic :: Scientific/Engineering',
16 |     'Topic :: Scientific/Engineering :: Mathematics',
17 |     'Topic :: Scientific/Engineering :: Physics',
18 |     'Operating System :: Microsoft :: Windows',
19 |     'Operating System :: POSIX',
20 |     'Operating System :: Unix',
21 |     'Operating System :: MacOS',
22 |     'Natural Language :: English',
23 | ]
24 | 
25 | with open("README.md") as f:
26 |     LONG_DESCRIPTION = ''.join(f.readlines())
27 | 
28 | setup(
29 |     name="pyGeoStatistics",
30 |     version=versioneer.get_version(),
31 |     cmdclass=versioneer.get_cmdclass(),
32 |     install_requires=[
33 |         'scipy',
34 |         'pandas',
35 |         'numba',
36 |         'matplotlib'
37 |     ],
38 |     packages=find_packages(exclude=['tests', 'testData']),
39 |     author="Yu Hao",
40 |     author_email="yuhao@live.cn",
41 |     description="pyGeoStatistics: Geostatistics with Python",
42 |     long_description=LONG_DESCRIPTION,
43 |     license="MIT",
44 |     keywords="geostatistics",
45 |     url="https://github.com/whimian/pyGeoStatistics",
46 |     download_url="https://github.com/whimian/pyGeoStatistics",
47 |     classifiers=CLASSIFIERS,
48 |     platforms=["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"],
49 |     zip_safe=False
50 | )
51 | 


--------------------------------------------------------------------------------
/testData/test.gslib:
--------------------------------------------------------------------------------
 1 | test
 2 | 3
 3 | x
 4 | y
 5 | por
 6 | 12100.0	8300.0	14.6515
 7 | 5300.0	8700.0	14.5093
 8 | 3500.0	13900.0	14.0639
 9 | 5100.0	1900.0	15.1084
10 | 9900.0	13700.0	13.919
11 | 2900.0	900.0	13.1304
12 | 7900.0	6700.0	14.5724
13 | 16900.0	4900.0	15.0814
14 | 18700.0	1500.0	13.91
15 | 2700.0	2100.0	13.4024
16 | 10700.0	5100.0	14.9395
17 | 7500.0	12900.0	15.2159
18 | 5500.0	11100.0	14.5777
19 | 9500.0	9100.0	14.2483
20 | 15300.0	3100.0	14.4281
21 | 4700.0	9700.0	15.2606
22 | 16700.0	15700.0	16.1859
23 | 19500.0	9700.0	14.2079
24 | 16900.0	13100.0	16.9583
25 | 900.0	3700.0	13.8354
26 | 500.0	11900.0	14.1859
27 | 9100.0	1300.0	14.0381
28 | 9100.0	13700.0	14.3685
29 | 9900.0	12900.0	13.4018
30 | 6300.0	100.0	15.8953
31 | 3700.0	5100.0	12.8667
32 | 16300.0	900.0	15.1039
33 | 18300.0	13500.0	15.7736
34 | 9500.0	6900.0	14.1333
35 | 17900.0	3100.0	13.3369
36 | 9900.0	15500.0	15.1362
37 | 7100.0	8900.0	15.0847
38 | 19300.0	7100.0	14.2498
39 | 2300.0	5700.0	12.6811
40 | 7300.0	8900.0	14.9384
41 | 13900.0	3700.0	15.6005
42 | 8500.0	10100.0	13.7796
43 | 8100.0	8700.0	15.2907
44 | 14700.0	11900.0	15.6881
45 | 6300.0	2300.0	15.3677
46 | 11900.0	12900.0	14.3283
47 | 18100.0	7100.0	14.7374
48 | 11300.0	7100.0	15.0547
49 | 12500.0	3100.0	14.8889
50 | 2700.0	12700.0	14.436
51 | 2700.0	4300.0	12.1491
52 | 8500.0	11300.0	13.624
53 | 1500.0	900.0	14.188
54 | 7300.0	1300.0	14.9072
55 | 10700.0	4100.0	15.2029
56 | 7100.0	1900.0	15.3468
57 | 3900.0	8500.0	15.939
58 | 17100.0	6100.0	15.7269
59 | 14100.0	10100.0	15.3238
60 | 11500.0	4900.0	14.0445
61 | 13300.0	15700.0	14.4032
62 | 1900.0	12100.0	14.3586
63 | 15100.0	2900.0	14.6007
64 | 6500.0	900.0	16.1458
65 | 8900.0	6100.0	15.7727
66 | 4500.0	2300.0	13.6234
67 | 12900.0	10300.0	15.1024
68 | 10900.0	5700.0	15.3546
69 | 3500.0	700.0	13.8431
70 | 16300.0	3700.0	14.9427
71 | 900.0	5100.0	14.4139
72 | 12900.0	12900.0	13.6177
73 | 15300.0	9300.0	16.3787
74 | 7300.0	6900.0	14.258
75 | 16300.0	12500.0	15.7772
76 | 100.0	8900.0	14.6553
77 | 1700.0	11700.0	14.3627
78 | 17500.0	11100.0	15.9659
79 | 14900.0	8300.0	16.0095
80 | 8300.0	10900.0	13.9639
81 | 4100.0	14500.0	14.2649
82 | 11100.0	15300.0	15.7684
83 | 500.0	4900.0	14.591
84 | 13100.0	1500.0	15.1377
85 | 18900.0	1700.0	14.095
86 | 3500.0	7500.0	15.1486
87 | 3700.0	6900.0	13.9584
88 | 14500.0	13300.0	14.7381
89 | 4900.0	9100.0	15.0689
90 | 9700.0	5700.0	15.8042
91 | 


--------------------------------------------------------------------------------
/testData/test_krige2d.par:
--------------------------------------------------------------------------------
 1 | {
 2 |     "a_max": [
 3 |         3715.9
 4 |     ],
 5 |     "a_min": [
 6 |         3715.9
 7 |     ],
 8 |     "azm": [
 9 |         0
10 |     ],
11 |     "c0": 0.05,
12 |     "cc": [
13 |         0.65
14 |     ],
15 |     "datafl": "testData/test.gslib",
16 |     "dbgfl": "kb2d.dbg",
17 |     "icolvr": 3,
18 |     "icolx": 0,
19 |     "icoly": 1,
20 |     "idbg": 3,
21 |     "isk": 0,
22 |     "it": [
23 |         1
24 |     ],
25 |     "ndmax": 50,
26 |     "ndmin": 1,
27 |     "nst": 1,
28 |     "nx": 98,
29 |     "nxdis": 1,
30 |     "ny": 79,
31 |     "nydis": 1,
32 |     "outfl": "out.dat",
33 |     "radius": 4000,
34 |     "skmean": 14.69588,
35 |     "tmax": 1e+21,
36 |     "tmin": -1e+21,
37 |     "xmn": 100,
38 |     "xsiz": 200,
39 |     "ymn": 100,
40 |     "ysiz": 200
41 | }


--------------------------------------------------------------------------------
/testData/test_krige3d.par:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aa_hmax": [
 3 |         3715.9
 4 |     ], 
 5 |     "aa_hmin": [
 6 |         3715.9
 7 |     ], 
 8 |     "aa_vert": [
 9 |         3715.9
10 |     ], 
11 |     "ang1": [
12 |         0
13 |     ], 
14 |     "ang2": [
15 |         0
16 |     ], 
17 |     "ang3": [
18 |         0
19 |     ], 
20 |     "c0": 0.05, 
21 |     "cc": [
22 |         0.65
23 |     ], 
24 |     "datafl": "testData/test.gslib", 
25 |     "dbgfl": "kb2d.dbg", 
26 |     "icolsec": 4, 
27 |     "icolvr": 3, 
28 |     "icolx": 0, 
29 |     "icoly": 1, 
30 |     "icolz": 2, 
31 |     "idbg": 3, 
32 |     "idrift": [
33 |         false, 
34 |         false, 
35 |         false, 
36 |         false, 
37 |         false, 
38 |         false, 
39 |         false, 
40 |         false, 
41 |         false
42 |     ], 
43 |     "ikrige": 0, 
44 |     "iseccol": 3, 
45 |     "it": [
46 |         1
47 |     ], 
48 |     "itrend": false, 
49 |     "jackfl": "jackfl.dat", 
50 |     "jicolsec": 4, 
51 |     "jicolvr": 3, 
52 |     "jicolx": 0, 
53 |     "jicoly": 1, 
54 |     "jicolz": 2, 
55 |     "ndmax": 30, 
56 |     "ndmin": 1, 
57 |     "noct": 0, 
58 |     "nst": 1, 
59 |     "nx": 98, 
60 |     "nxdis": 1, 
61 |     "ny": 79, 
62 |     "nydis": 1, 
63 |     "nz": 1, 
64 |     "nzdis": 1, 
65 |     "option": 0, 
66 |     "outfl": "out.dat", 
67 |     "radius_hmax": 4000, 
68 |     "radius_hmin": 4000, 
69 |     "radius_vert": 0, 
70 |     "sang1": 0, 
71 |     "sang2": 0, 
72 |     "sang3": 0, 
73 |     "secfl": "secfl.dat", 
74 |     "skmean": 14.69588, 
75 |     "tmax": 1e+21, 
76 |     "tmin": -1e+21, 
77 |     "xmn": 100, 
78 |     "xsiz": 200, 
79 |     "ymn": 100, 
80 |     "ysiz": 200, 
81 |     "zmn": 0, 
82 |     "zsiz": 200
83 | }


--------------------------------------------------------------------------------
/testData/test_sgsim.par:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aa_hmax": [
 3 |         3715.9
 4 |     ], 
 5 |     "aa_hmin": [
 6 |         3715.9
 7 |     ], 
 8 |     "aa_vert": [
 9 |         3715.9
10 |     ], 
11 |     "ang1": [
12 |         0
13 |     ], 
14 |     "ang2": [
15 |         0
16 |     ], 
17 |     "ang3": [
18 |         0
19 |     ], 
20 |     "c0": 0.05, 
21 |     "cc": [
22 |         0.65
23 |     ], 
24 |     "datafl": "testData/test.gslib", 
25 |     "dbgfl": "sgsim.dbg", 
26 |     "icollvm": 4, 
27 |     "icolsec": -1, 
28 |     "icolsvr": 0, 
29 |     "icolswt": 1, 
30 |     "icolvr": 2, 
31 |     "icolwt": -1, 
32 |     "icolx": 0, 
33 |     "icoly": 1, 
34 |     "icolz": -1, 
35 |     "idbg": 3, 
36 |     "ikrige": 0, 
37 |     "ismooth": false, 
38 |     "it": [
39 |         1
40 |     ], 
41 |     "itrans": true, 
42 |     "ltail": 1, 
43 |     "ltpar": 0, 
44 |     "multgrid": false, 
45 |     "mxctx": 30, 
46 |     "mxcty": 30, 
47 |     "mxctz": 30, 
48 |     "ndmax": 30, 
49 |     "ndmin": 1, 
50 |     "nmult": 2, 
51 |     "noct": 0, 
52 |     "nodmax": 12, 
53 |     "nsim": 1, 
54 |     "nst": 1, 
55 |     "nx": 98, 
56 |     "ny": 79, 
57 |     "nz": 1, 
58 |     "outfl": "sgsim.out", 
59 |     "radius_hmax": 4000, 
60 |     "radius_hmin": 4000, 
61 |     "radius_vert": 0, 
62 |     "rho": 0.7, 
63 |     "sang1": 0, 
64 |     "sang2": 0, 
65 |     "sang3": 0, 
66 |     "secfl": "ydata.dat", 
67 |     "seed": 1, 
68 |     "smthfl": "histsmth.out", 
69 |     "sstrat": 0, 
70 |     "tmax": 1e+21, 
71 |     "tmin": -1e+21, 
72 |     "transfl": "sgsim.trn", 
73 |     "utail": 1, 
74 |     "utpar": 15, 
75 |     "varred": 0.1, 
76 |     "xmn": 100, 
77 |     "xsiz": 200, 
78 |     "ymn": 100, 
79 |     "ysiz": 200, 
80 |     "zmax": 30, 
81 |     "zmin": 0, 
82 |     "zmn": 0, 
83 |     "zsiz": 200
84 | }


--------------------------------------------------------------------------------
/testData/xihuSmall_sparse_gam.par:
--------------------------------------------------------------------------------
1 | {"zsiz": 0.5, "xsiz": 5, "standardize": false, "nlag": 10, "ivhead": [1, 1, 2, 2, 1], "ysiz": 5, "ndir": 2, "datafl": "testData/xihu_sparse.gslib", "xmn": 0.5, "izd": [0, 0], "ixd": [1, 0], "nx": 15, "ny": 23, "nz": 161, "ivtype": [1, 3, 1, 3, 9], "ivtail": [1, 1, 2, 2, 1], "ymn": 0.5, "tmin": -1e+21, "nvarg": 5, "nvar": 1, "zmn": 0.5, "iyd": [0, 1], "ivar": [1, 2], "tmax": 1e+21, "igrid": 1, "outfl": "gam.out"}


--------------------------------------------------------------------------------
/testData/xihuSmall_sparse_gamv.par:
--------------------------------------------------------------------------------
 1 | {
 2 |     "atol": [
 3 |         90.0
 4 |     ], 
 5 |     "azm": [
 6 |         0.0
 7 |     ], 
 8 |     "bandwd": [
 9 |         200.0
10 |     ], 
11 |     "bandwh": [
12 |         200.0
13 |     ], 
14 |     "datafl": "testData/test.gslib", 
15 |     "dip": [
16 |         0.0
17 |     ], 
18 |     "dtol": [
19 |         90.0
20 |     ], 
21 |     "icolx": 1, 
22 |     "icoly": 2, 
23 |     "icolz": 0, 
24 |     "ivar": [
25 |         3, 
26 |         4
27 |     ], 
28 |     "ivhead": [
29 |         1, 
30 |         1, 
31 |         2
32 |     ], 
33 |     "ivtail": [
34 |         1, 
35 |         1, 
36 |         2
37 |     ], 
38 |     "ivtype": [
39 |         1, 
40 |         3, 
41 |         1
42 |     ], 
43 |     "ndir": 1, 
44 |     "nlag": 20, 
45 |     "nvar": 1, 
46 |     "nvarg": 3, 
47 |     "outfl": "out.dat", 
48 |     "standardize": false, 
49 |     "tmax": 1e+21, 
50 |     "tmin": -1e+21, 
51 |     "xlag": 500.0, 
52 |     "xltol": 300.0
53 | }


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whimian/pyGeoStatistics/e119c4e47c57e0dc1ba3ff13e45782d0e33e0c36/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_eda.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Test
 4 | 
 5 | Created on Sep. 4th 2018
 6 | """
 7 | __author__ = "yuhao"
 8 | 
 9 | import pytest
10 | from pygeostatistics.eda import EDA
11 | 
12 | 
13 | def test__EDA():
14 |     eda = EDA("testData/test.gslib")
15 |     eda.read()
16 |     assert eda.maximum == 16.9583
17 |     assert eda.minimum == 12.1491
18 |     assert float("{:.4f}".format(eda.mean)) == 14.6959
19 |     assert float("{:.4f}".format(eda.variance)) == 0.7776
20 |     assert eda.meadian == 14.6515
21 | 


--------------------------------------------------------------------------------