├── tests
├── __init__.py
├── test_basic.py
└── data
│ └── 30-03-18.csv
├── polire
├── gp
│ ├── __init__.py
│ ├── gp.py
│ └── tests
│ │ └── GP interpolation.ipynb
├── idw
│ ├── __init__.py
│ ├── idw.py
│ └── tests
│ │ ├── IDW Initial.ipynb
│ │ └── Numpy+IDWTest.ipynb
├── nsgp
│ ├── __init__.py
│ └── nsgp.py
├── kriging
│ ├── __init__.py
│ ├── kriging.py
│ └── tests
│ │ └── Kriging Interpolation.ipynb
├── random
│ ├── __init__.py
│ └── random.py
├── spatial
│ ├── __init__.py
│ └── spatial.py
├── spline
│ ├── __init__.py
│ └── bspline.py
├── trend
│ ├── __init__.py
│ ├── polynomials.py
│ └── trend.py
├── natural_neighbors
│ ├── __init__.py
│ └── natural_neighbors.py
├── base
│ ├── __init__.py
│ └── base.py
├── utils
│ ├── __init__.py
│ ├── distance.py
│ └── gridding.py
├── custom
│ ├── __init__.py
│ └── custom.py
├── preprocessing
│ ├── __init__.py
│ └── sptial_features.py
├── constants.py
└── __init__.py
├── requirements.txt
├── requirements-dev.txt
├── .pre-commit-config.yaml
├── .gitignore
├── index.qmd
├── pyproject.toml
├── setup.py
├── setup.cfg
├── _quarto.yml
├── .github
└── workflows
│ ├── quarto_publish.yml
│ ├── auto_publish_pypi.yml
│ └── tests.yml
├── usage3.py
├── LICENSE
├── usage.py
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/gp/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/idw/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/nsgp/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/kriging/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/random/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/spatial/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/spline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/trend/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/natural_neighbors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/polire/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Base
2 |
--------------------------------------------------------------------------------
/polire/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .gridding import *
2 |
--------------------------------------------------------------------------------
/polire/custom/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom import CustomInterpolator
2 |
--------------------------------------------------------------------------------
/polire/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | from .sptial_features import SpatialFeatures
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | pandas
4 | pykrige
5 | scipy
6 | seaborn
7 | Shapely
8 | xgboost
9 | # GPy
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | coveralls
4 | scikit-learn
5 | pre-commit
6 | xarray
7 | pooch
8 | jinja2
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 23.7.0
4 | hooks:
5 | - id: black
6 | args: [--line-length=79]
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.ipynb_checkpoints
2 | **/*.pyc
3 | .env/
4 | *__pycache__/
5 | .pytest_cache
6 | .vscode
7 | .Rhistory
8 | build
9 | dist
10 | polire.egg-info
11 | global_test.py
12 | /.quarto/
13 |
14 | # ignore docs
15 | docs/
--------------------------------------------------------------------------------
/polire/constants.py:
--------------------------------------------------------------------------------
1 | """This python script contains all the constants that
2 | might be needed in the various interpolation pacakages.
3 | """
4 |
5 | low_res = 10
6 | med_res = 100
7 | high_res = 1000
8 |
9 | RESOLUTION = {"low": low_res, "standard": med_res, "high": high_res}
10 |
--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
1 | ## Polire
2 |
3 | ```python
4 | pip install polire
5 | ```
6 |
7 |
8 | The word "interpolation" has Latin origin and is composed of two words - Inter meaning between and Polire meaning to polish.
9 |
10 |
11 | Polire is a collection of several spatial interpolation algorithms.
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=50.0",
4 | "setuptools_scm[toml]>=6.0",
5 | "setuptools_scm_git_archive",
6 | "wheel>=0.33",
7 | "numpy>=1.16",
8 | "cython>=0.29",
9 | ]
10 |
11 | [tool.setuptools_scm]
12 | write_to = "polire/_version.py"
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | with open("requirements.txt") as f:
4 | requirements = f.read().splitlines()
5 |
6 | setup(
7 | packages=find_packages(exclude=["docs"]),
8 | python_requires=">=3.8",
9 | install_requires=requirements,
10 | include_package_data=True,
11 | )
12 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = polire
3 | author = Zeel B Patel, Deepak Narayanan, Apoorv Agnihotri, Nipun Batra
4 | author_email = patel_zeel@iitgn.ac.in
5 | description = Spatial Interpolation in Python
6 | url = https://github.com/sustainability-lab/polire
7 | license = BSD 3-Clause License
8 | license_file = LICENCE
9 | long_description = file: README.md
10 | long_description_content_type = text/markdown
--------------------------------------------------------------------------------
/polire/__init__.py:
--------------------------------------------------------------------------------
1 | from .random.random import Random
2 | from .idw.idw import IDW
3 | from .spline.bspline import Spline
4 | from .trend.trend import Trend
5 | from .spatial.spatial import SpatialAverage
6 | from .natural_neighbors.natural_neighbors import NaturalNeighbor
7 | from .kriging.kriging import Kriging
8 |
9 | # from .gp.gp import GP
10 | from .custom.custom import CustomInterpolator
11 |
12 | # from .nsgp.nsgp import NSGP
13 |
--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
1 | project:
2 | type: website
3 | output-dir: docs
4 |
5 | # render only the contents mentioned in the _quarto.yml file
6 |
7 |
8 | website:
9 | title: "Polire"
10 | sidebar:
11 | style: "docked"
12 | search: true
13 | contents:
14 | - section: "Introduction"
15 | path: "index.qmd"
16 |
17 | - section: "Examples"
18 | contents:
19 | - examples/all_in_one.ipynb
20 |
21 | execute:
22 | freeze: auto
--------------------------------------------------------------------------------
/.github/workflows/quarto_publish.yml:
--------------------------------------------------------------------------------
1 | on:
2 | workflow_dispatch:
3 | push:
4 | branches: master
5 |
6 | name: Quarto Publish
7 |
8 | jobs:
9 | build-deploy:
10 | runs-on: ubuntu-latest
11 | permissions:
12 | contents: write
13 | steps:
14 | - name: Check out repository
15 | uses: actions/checkout@v3
16 |
17 | - name: Set up Quarto
18 | uses: quarto-dev/quarto-actions/setup@v2
19 |
20 | - name: Render and Publish
21 | uses: quarto-dev/quarto-actions/publish@v2
22 | with:
23 | target: gh-pages
24 | env:
25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--------------------------------------------------------------------------------
/polire/trend/polynomials.py:
--------------------------------------------------------------------------------
1 | """File containing polynomials supported by Trend interpolation
2 | Class.
3 | """
4 |
5 |
6 | def _create_polynomial(order):
7 | if order is None: # custom function by the user
8 | return None
9 |
10 | elif order == 0:
11 |
12 | def func(X, a):
13 | return a
14 |
15 | elif order == 1:
16 |
17 | def func(X, a, b, c):
18 | x1, x2 = X
19 | return a + b * x1 + c * x2
20 |
21 | elif order == 2:
22 |
23 | def func(X, a, b, c, d, e, f):
24 | x1, x2 = X
25 | return (
26 | a
27 | + b * x1
28 | + c * x2
29 | + d * (x1**2)
30 | + e * (x1**2)
31 | + f * x1 * x2
32 | )
33 |
34 | else:
35 | raise NotImplementedError(
36 | f"{order} order polynomial needs to be defined manually"
37 | )
38 |
39 | return func
40 |
--------------------------------------------------------------------------------
/usage3.py:
--------------------------------------------------------------------------------
1 | # imports
2 | import seaborn as sns
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 |
6 | from polire import CustomInterpolator
7 | import xgboost
8 | from sklearn.ensemble import RandomForestRegressor
9 | from sklearn.linear_model import LinearRegression
10 | from sklearn.neighbors import KNeighborsRegressor
11 | from sklearn.gaussian_process import GaussianProcessRegressor
12 | from sklearn.gaussian_process.kernels import Matern
13 |
14 | # sample data
15 | X = [[0, 0], [0, 3], [3, 0], [3, 3]]
16 | y = [0, 1.5, 1.5, 3]
17 | X = np.array(X)
18 | y = np.array(y)
19 |
20 | for r in [
21 | CustomInterpolator(xgboost.XGBRegressor()),
22 | CustomInterpolator(RandomForestRegressor()),
23 | CustomInterpolator(LinearRegression(normalize=True)),
24 | CustomInterpolator(KNeighborsRegressor(n_neighbors=3, weights="distance")),
25 | CustomInterpolator(
26 | GaussianProcessRegressor(normalize_y=True, kernel=Matern())
27 | ),
28 | ]:
29 | r.fit(X, y)
30 | Z = r.predict_grid((0, 3), (0, 3)).reshape(100, 100)
31 | sns.heatmap(Z)
32 | plt.title(r)
33 | plt.show()
34 | plt.close()
35 |
--------------------------------------------------------------------------------
/.github/workflows/auto_publish_pypi.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python package using Twine when a release is
2 | # created. For more information see the following link:
3 | # https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
4 |
5 | name: Publish to PyPI
6 |
7 | on:
8 | release:
9 | types: [published]
10 |
11 | jobs:
12 | deploy:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | # Make sure tags are fetched so we can get a version.
19 | - run: |
20 | git fetch --prune --unshallow --tags
21 | - name: Set up Python
22 | uses: actions/setup-python@v2
23 | with:
24 | python-version: '3.x'
25 |
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install -U setuptools 'setuptools_scm[toml]' setuptools_scm_git_archive wheel twine
30 | - name: Build and publish
31 | env:
32 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
33 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
34 |
35 | run: |
36 | python setup.py sdist
37 | twine upload dist/*
--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 | from time import time
4 | from polire import (
5 | IDW,
6 | Spline,
7 | Trend,
8 | # GP,
9 | Kriging,
10 | NaturalNeighbor,
11 | SpatialAverage,
12 | CustomInterpolator,
13 | # NSGP,
14 | )
15 | from sklearn.linear_model import LinearRegression
16 |
17 | X = np.random.rand(20, 2)
18 | y = np.random.rand(20)
19 |
20 | X_new = np.random.rand(40, 2)
21 |
22 |
23 | @pytest.mark.parametrize(
24 | "model",
25 | [
26 | IDW(),
27 | Spline(),
28 | Trend(),
29 | # GP(),
30 | Kriging(),
31 | NaturalNeighbor(),
32 | SpatialAverage(),
33 | CustomInterpolator(LinearRegression()),
34 | # NSGP(),
35 | ],
36 | )
37 | def test_fit_predict(model):
38 | init = time()
39 | model.fit(X, y)
40 | y_new = model.predict(X_new)
41 |
42 | assert y_new.shape == (40,)
43 | print("Passed", "Time:", np.round(time() - init, 3), "seconds")
44 |
45 |
46 | @pytest.mark.skip(reason="Temporarily disabled")
47 | def test_nsgp():
48 | model = NSGP()
49 | init = time()
50 | model.fit(X, y, **{"ECM": X @ X.T})
51 | y_new = model.predict(X_new)
52 |
53 | assert y_new.shape == (40,)
54 | assert y_new.sum() == y_new.sum() # No NaN
55 | print("Passed", "Time:", np.round(time() - init, 3), "seconds")
56 |
--------------------------------------------------------------------------------
/polire/random/random.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..base import Base
4 |
5 |
6 | class Random(Base):
7 | """
8 | Class to randomly interpolate by picking values between maximum and
9 | minimum measurements.
10 |
11 | Note: Even if a point on the requested grid is present in
12 | the training set, we return a random value for it.
13 | """
14 |
15 | def __init__(self, resolution="standard", coordinate_type="Euclidean"):
16 | super().__init__(resolution, coordinate_type)
17 |
18 | def _fit(self, X, y):
19 | """Function for fitting random interpolation.
20 | This function is not supposed to be called directly.
21 | """
22 | self.ymax = max(y)
23 | self.ymin = min(y)
24 | return self
25 |
26 | def _predict_grid(self, x1lim, x2lim):
27 | """Function for random grid interpolation.
28 | This function is not supposed to be called directly.
29 | """
30 | return np.random.uniform(
31 | low=self.ymin,
32 | high=self.ymax,
33 | size=(self.resolution, self.resolution),
34 | )
35 |
36 | def _predict(self, X):
37 | """Function for random interpolation.
38 | This function is not supposed to be called directly.
39 | """
40 | return np.random.uniform(
41 | low=self.ymin, high=self.ymax, size=(X.shape[0])
42 | )
43 |
--------------------------------------------------------------------------------
/polire/utils/distance.py:
--------------------------------------------------------------------------------
1 | """
2 | A module to have different distance metrics for spatial interpolation
3 | """
4 | import numpy as np
5 | from scipy.spatial.distance import cdist
6 |
7 |
8 | def haversine(X1, X2):
9 | """
10 | Arguments
11 | ---------
12 | One test point
13 | Multiple Train Points
14 |
15 | Long Lat Order
16 | """
17 |
18 | # Non-vectorized version
19 | # X1 = X1.reshape(1, 2)
20 | # difference = (X1 - X2) * np.pi / 180
21 | # test_point_lat = X1[:, 1] * np.pi / 180
22 | # training_locations_lat = X2[:, 1] * np.pi / 180
23 |
24 | # a = np.sin(difference[:, 0] / 2)**2 * np.cos(test_point_lat) * np.cos(training_locations_lat) +\
25 | # np.sin(difference[:, 1] / 2)**2
26 | # radius = 6371
27 | # c = 2 * np.arcsin(np.sqrt(a))
28 | # return radius * c
29 |
30 | # Vectorized code
31 | lon1, lat1, lon2, lat2 = map(
32 | np.radians,
33 | [X1[:, 0, None], X1[:, 1, None], X2[:, 0, None], X2[:, 1, None]],
34 | )
35 |
36 | dlon = lon2.T - lon1
37 | dlat = lat2.T - lat1
38 |
39 | a = (
40 | np.sin(dlat / 2.0) ** 2
41 | + np.cos(lat1) @ np.cos(lat2.T) * np.sin(dlon / 2.0) ** 2
42 | )
43 |
44 | c = 2 * np.arcsin(np.sqrt(a))
45 | km = 6371 * c
46 | return km
47 |
48 |
49 | def euclidean(X1, X2):
50 | # return np.linalg.norm(X1 - X2, 2, axis=1)
51 | return cdist(X1, X2)
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2020, sustainability-lab
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and produce coveralls report.
2 | # Inspired from https://github.com/wesselb/stheno/blob/master/.github/workflows/ci.yml
3 |
4 | name: Tests
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | test:
14 | runs-on: ubuntu-latest
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | python-version: ["3.9", "3.10", "3.11"]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v2
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install -r requirements.txt
30 | pip install -r requirements-dev.txt
31 | - name: Pre-commit
32 | continue-on-error: true
33 | run: |
34 | pre-commit run --all-files
35 | - name: Test
36 | run: |
37 | pytest -v --cov=polire --cov-report term-missing
38 | coveralls --service=github
39 | env:
40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41 | COVERALLS_FLAG_NAME: ${{ matrix.test-name }}
42 | COVERALLS_PARALLEL: true
43 |
44 | finish:
45 | name: Finish Coveralls
46 | needs: test
47 | runs-on: ubuntu-latest
48 | steps:
49 | - name: Finish Coveralls
50 | uses: coverallsapp/github-action@v1
51 | with:
52 | github-token: ${{ secrets.github_token }}
53 | parallel-finished: true
--------------------------------------------------------------------------------
/polire/custom/custom.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from ..base import Base
4 |
5 |
6 | class CustomInterpolator(Base):
7 | """
8 | Class to interpolate by fitting a sklearn type Regressor to
9 | the given data.
10 |
11 | Parameters
12 | ----------
13 | regressor: class definition,
14 | This variable is used to pass in the Regressor we would like
15 | to use for interpolation. The regressor sould be sklearn type
16 | regressor. Example from sklearn.ensemble -> RandomForestRegressor
17 |
18 | reg_kwargs: dict, optional
19 | This is a dictionary that is passed into the Regressor initialization.
20 | Use this to change the behaviour of the passed regressor. Default = empty dict
21 |
22 | Attributes
23 | ----------
24 | reg : object
25 | Object of the `regressor` class passed.
26 | """
27 |
28 | def __init__(
29 | self, regressor, resolution="standard", coordinate_type="Euclidean"
30 | ):
31 | super().__init__(resolution, coordinate_type)
32 | self.reg = regressor
33 |
34 | def _fit(self, X, y):
35 | """Function for fitting.
36 | This function is not supposed to be called directly.
37 | """
38 | self.reg.fit(X, y)
39 | return self
40 |
41 | def _predict_grid(self, x1lim, x2lim):
42 | """Function for grid interpolation.
43 | This function is not supposed to be called directly.
44 | """
45 | # getting the boundaries for interpolation
46 | x1min, x1max = x1lim
47 | x2min, x2max = x2lim
48 |
49 | # building the grid
50 | x1 = np.linspace(x1min, x1max, self.resolution)
51 | x2 = np.linspace(x2min, x2max, self.resolution)
52 | X1, X2 = np.meshgrid(x1, x2)
53 | return self.reg.predict(np.asarray([X1.ravel(), X2.ravel()]).T)
54 |
55 | def _predict(self, X):
56 | """Function for interpolation on specific points.
57 | This function is not supposed to be called directly.
58 | """
59 | return self.reg.predict(X)
60 |
61 | def __repr__(self):
62 | return self.__class__.__name__ + "." + self.reg.__class__.__name__
63 |
--------------------------------------------------------------------------------
/polire/spatial/spatial.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.spatial.distance import cdist
3 |
4 | from ..base import Base
5 | from ..utils.distance import euclidean, haversine
6 |
7 |
8 | class SpatialAverage(Base):
9 | """
10 | Class to interpolate by fitting a XGBoost Regressor to given
11 | data.
12 | Note that radius you specify must be in kilometres if you are passing latitude and longitude as inputs
13 | """
14 |
15 | def __init__(
16 | self,
17 | radius=100,
18 | resolution="standard",
19 | coordinate_type="Euclidean",
20 | **kwargs
21 | ):
22 | super().__init__(resolution, coordinate_type)
23 | self.radius = radius
24 | if self.coordinate_type == "Geographic":
25 | self.distance = haversine
26 | elif self.coordinate_type == "Euclidean":
27 | self.distance = euclidean
28 | else:
29 | raise NotImplementedError(
30 | "Only Geographic and Euclidean Coordinates are available"
31 | )
32 |
33 | def _fit(self, X, y):
34 | """Function for fitting.
35 | This function is not supposed to be called directly.
36 | """
37 | self.X = X
38 | self.y = y
39 | return self
40 |
41 | def _predict_grid(self, x1lim, x2lim):
42 | """Function for grid interpolation.
43 | This function is not supposed to be called directly.
44 | """
45 | # getting the boundaries for interpolation
46 | x1min, x1max = x1lim
47 | x2min, x2max = x2lim
48 |
49 | # building the grid
50 | x1 = np.linspace(x1min, x1max, self.resolution)
51 | x2 = np.linspace(x2min, x2max, self.resolution)
52 | X1, X2 = np.meshgrid(x1, x2)
53 | return self._predict(np.asarray([X1.ravel(), X2.ravel()]).T)
54 |
55 | def _predict(self, X):
56 | """Function for interpolation on specific points.
57 | This function is not supposed to be called directly.
58 | """
59 | return self._average(X)
60 |
61 | def _average(self, X):
62 | dist = self.distance(X, self.X)
63 | mask = self.radius >= dist
64 | return (self.y * mask).sum(axis=1) / mask.sum(axis=1)
65 |
--------------------------------------------------------------------------------
/tests/data/30-03-18.csv:
--------------------------------------------------------------------------------
1 | ,location,parameter,value,latitude,longitude
2 | 4087,"Jawaharlal Nehru Stadium, Delhi - DPCC",pm25,194.0,28.581197,77.234291
3 | 3930,"Sonia Vihar, Delhi - DPCC",pm25,267.0,28.739434,77.245721
4 | 4020,"Narela, Delhi - DPCC",pm25,273.0,28.822931,77.101961
5 | 4057,"Najafgarh, Delhi - DPCC",pm25,129.0,28.620806,76.991463
6 | 3998,"NSIT Dwarka, New Delhi - CPCB",pm25,176.0,28.609090000000002,77.03254129999999
7 | 4104,"Dwarka-Sector 8, Delhi - DPCC ",pm25,172.0,28.570859,77.072196
8 | 4009,"R K Puram, New Delhi - DPCC",pm25,168.0,28.5646102,77.1670103
9 | 4099,"IGI Airport Terminal - 3, New Delhi - IMD",pm25,105.0,28.562776300000003,77.1180053
10 | 4074,"Okhla Phase-2, Delhi - DPCC",pm25,203.0,28.530782,77.272404
11 | 4068,"Nehru Nagar, Delhi - DPCC",pm25,192.0,28.563827,77.26075
12 | 4039,"Pusa, New Delhi - IMD",pm25,95.0,28.610304,77.0996943
13 | 3947,"Lodhi Road, New Delhi - IMD",pm25,148.0,28.5918245,77.2273074
14 | 4004,"Rohini, Delhi - DPCC",pm25,203.0,28.732219,77.09211
15 | 4112,"Vivek Vihar, Delhi - DPCC",pm25,221.0,28.668672,77.317084
16 | 4103,"North Campus, DU, New Delhi - IMD",pm25,141.0,28.657381400000002,77.15854470000001
17 | 4078,"IHBAS, Dilshad Garden,New Delhi - CPCB",pm25,192.0,28.6802747,77.20115729999999
18 | 4015,"Major Dhyan Chand National Stadium, Delhi - DPCC",pm25,203.0,28.612561,77.237372
19 | 3959,"Patparganj, Delhi - DPCC",pm25,152.0,28.632707,77.305651
20 | 3960,"Shadipur, New Delhi - CPCB",pm25,185.0,28.651478100000002,77.1473105
21 | 3980,"Wazirpur, Delhi - DPCC",pm25,290.0,28.699254,77.16482
22 | 4041,"Jahangirpuri, Delhi - DPCC",pm25,273.0,28.728722,77.170221
23 | 4005,"Mandir Marg, New Delhi - DPCC",pm25,173.0,28.6372688,77.2005604
24 | 3970,"Burari Crossing, New Delhi - IMD",pm25,269.0,28.725650399999996,77.20115729999999
25 | 4089,"Punjabi Bagh, Delhi - DPCC",pm25,160.0,28.669119,77.136777
26 | 396386,"Dr. Karni Singh Shooting Range, Delhi - DPCC",pm25,78.0,28.49968,77.267246
27 | 4117,"ITO, New Delhi - CPCB",pm25,211.0,28.631694500000002,77.2494387
28 | 4072,"CRRI Mathura Road, New Delhi - IMD",pm25,252.0,28.5512005,77.27357370000001
29 | 4033,"Sirifort, New Delhi - CPCB",pm25,133.0,28.5504249,77.2159377
30 | 396397,"DTU, New Delhi - CPCB",pm25,77.0,28.7500499,77.1112615
31 | 4180,US Diplomatic Post: New Delhi,pm25,96.0,28.635759999999998,77.22445
32 |
--------------------------------------------------------------------------------
/polire/gp/gp.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a module for GP Interpolation
3 | """
4 | import numpy as np
5 | from ..base import Base
6 | from GPy.models import GPRegression
7 | from GPy.kern import RBF
8 |
9 |
10 | class GP(Base):
11 | """A class that is declared for performing GP interpolation.
12 | GP interpolation (usually) works on the principle of finding the
13 | best unbiased predictor.
14 |
15 | Parameters
16 | ----------
17 | type : str, optional
18 | This parameter defines the type of Kriging under consideration. This
19 | implementation uses PyKrige package (https://github.com/bsmurphy/PyKrige).
20 | The user needs to choose between "Ordinary" and "Universal".
21 |
22 | """
23 |
24 | def __init__(
25 | self,
26 | kernel=RBF(2, ARD=True),
27 | ):
28 | super().__init__()
29 | self.kernel = kernel
30 |
31 | def _fit(self, X, y, n_restarts=5, verbose=False, random_state=None):
32 | """Fit method for GP Interpolation
33 | This function shouldn't be called directly.
34 | """
35 | np.random.seed(random_state)
36 | if len(y.shape) == 1:
37 | y = y.reshape(-1, 1)
38 | self.model = GPRegression(X, y, self.kernel)
39 | self.model.optimize_restarts(n_restarts, verbose=verbose)
40 | return self
41 |
42 | def _predict_grid(self, x1lim, x2lim):
43 | """The function that is called to return the interpolated data in Kriging Interpolation
44 | in a grid. This method shouldn't be called directly"""
45 | lims = (*x1lim, *x2lim)
46 | x1min, x1max, x2min, x2max = lims
47 | x1 = np.linspace(x1min, x1max, self.resolution)
48 | x2 = np.linspace(x2min, x2max, self.resolution)
49 |
50 | X1, X2 = np.meshgrid(x1, x2)
51 | X = np.array([(i, j) for i, j in zip(X1.ravel(), X2.ravel())])
52 |
53 | predictions = self.model.predict(X)[0].reshape(len(x1), len(x2))
54 |
55 | return predictions.ravel()
56 |
57 | def _predict(self, X, return_variance=False):
58 | """This function should be called to return the interpolated data in kriging
59 | in a pointwise manner. This method shouldn't be called directly."""
60 |
61 | predictions, variance = self.model.predict(X)
62 | if return_variance:
63 | return predictions.ravel(), variance
64 | else:
65 | return predictions.ravel()
66 |
--------------------------------------------------------------------------------
/polire/utils/gridding.py:
--------------------------------------------------------------------------------
1 | """ Standard Utility Script for Gridding Data
2 | 1. Contains all the common functions that
3 | will be employed across various different interpolators
4 |
5 | """
6 | import numpy as np
7 | from scipy import spatial
8 |
9 |
10 | def make_grid(self, x, y, res, offset=0.2):
11 | """This function returns the grid to perform interpolation on.
12 | This function is used inside the fit() attribute of the idw class.
13 |
14 | Parameters
15 | ----------
16 | x: array-like, shape(n_samples,)
17 | The first coordinate values of all points where
18 | ground truth is available
19 | y: array-like, shape(n_samples,)
20 | The second coordinate values of all points where
21 | ground truth is available
22 | res: int
23 | The resolution value
24 | offset: float, optional
25 | A value between 0 and 0.5 that specifies the extra interpolation to be done
26 | Default is 0.2
27 |
28 | Returns
29 | -------
30 | xx : {array-like, 2D}, shape (n_samples, n_samples)
31 | yy : {array-like, 2D}, shape (n_samples, n_samples)
32 | """
33 | y_min = y.min() - offset
34 | y_max = y.max() + offset
35 | x_min = x.min() - offset
36 | x_max = x.max() + offset
37 | x_arr = np.linspace(x_min, x_max, res)
38 | y_arr = np.linspace(y_min, y_max, res)
39 | xx, yy = np.meshgrid(x_arr, y_arr)
40 | return xx, yy
41 |
42 |
43 | def find_closest(grid, X, l=2):
44 | """Function used to find the indices of the grid points closest
45 | to the passed points in X.
46 |
47 | Parameters
48 | ----------
49 | grid: {list of 2 arrays}, (shape(res, res), shape(res, res))
50 | This is generated by meshgrid.
51 |
52 | X: {array-like, 2D matrix}, shape(n_samples, 2)
53 | The set of points to which we need to provide closest points
54 | on the grid.
55 |
56 | l: str, optional
57 | To decide the `l`th norm to use. `Default = 2`.
58 |
59 | Returns
60 | -------
61 | ix: array, shape(X.shape[0],)
62 | The index of the point closest to points in X.
63 |
64 | ref - https://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates
65 | """
66 | points = np.asarray(
67 | [grid[0].ravel(), grid[1].ravel()]
68 | ).T # ravel is inplace
69 | kdtree = spatial.KDTree(points)
70 | ixs = [] # for containing the indices of closest points found on grid
71 |
72 | for point_ix in range(X.shape[0]):
73 | point = X[point_ix, :]
74 | _, ix = kdtree.query(point)
75 | ixs.append(ix)
76 |
77 | return ixs
78 |
--------------------------------------------------------------------------------
/polire/trend/trend.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.optimize import curve_fit
3 |
4 | from ..base import Base
5 | from .polynomials import _create_polynomial
6 |
7 |
8 | class Trend(Base):
9 | """Class to interpolate by fitting a curve to the data points
10 | available using `scipy`'s `curve_fit`.
11 |
12 | Parameters
13 | ----------
14 | order: int, default 1
15 | Selects the order of the polynomial to best fit.
16 | Possible values 0 <= order <= 2.
17 |
18 | custom_poly: functor, default None
19 | If you would like to fit to your custom function,
20 | _set order to None_ and then pass a functor.
21 | See Example functor passing below
22 | .. highlight:: python
23 | .. code-block:: python
24 | def func(X, a, b, c):
25 | x1, x2 = X
26 | return np.log(a) + b*np.log(x1) + c*np.log(x2)
27 | t = Trend(order=None, custom_poly=func)
28 | ...
29 | """
30 |
31 | def __init__(
32 | self,
33 | order=1,
34 | custom_poly=None,
35 | resolution="standard",
36 | coordinate_type="Euclidean",
37 | ):
38 | super().__init__(resolution, coordinate_type)
39 | self.order = order
40 | # setting the polynomial to fit our data
41 | if _create_polynomial(order) is not None:
42 | self.func = _create_polynomial(order)
43 | else:
44 | if custom_poly is not None:
45 | self.func = custom_poly
46 | else:
47 | raise ValueError("Arguments passed are not valid")
48 |
49 | def _fit(self, X, y):
50 | """Function for fitting trend interpolation.
51 | This function is not supposed to be called directly.
52 | """
53 | # fitting the curve using scipy
54 | self.popt, self.pcov = curve_fit(self.func, (X[:, 0], X[:, 1]), y)
55 | return self
56 |
57 | def _predict_grid(self, x1lim, x2lim):
58 | """Function for trend interpolation.
59 | This function is not supposed to be called directly.
60 | """
61 | # getting the boundaries for interpolation
62 | x1min, x1max = x1lim
63 | x2min, x2max = x2lim
64 |
65 | # forming the grid
66 | x1 = np.linspace(x1min, x1max, self.resolution)
67 | x2 = np.linspace(x2min, x2max, self.resolution)
68 | X1, X2 = np.meshgrid(x1, x2)
69 | return self.func((X1, X2), *self.popt)
70 |
71 | def _predict(self, X):
72 | """Function for random interpolation.
73 | This function is not supposed to be called directly.
74 | """
75 | x1, x2 = X[:, 0], X[:, 1]
76 | return self.func((x1, x2), *self.popt)
77 |
--------------------------------------------------------------------------------
/usage.py:
--------------------------------------------------------------------------------
1 | # imports
2 | import seaborn as sns
3 | import matplotlib.pyplot as plt
4 | import numpy as np
5 | import pandas as pd
6 | from GPy.kern import Matern32
7 |
8 | from polire import (
9 | Random,
10 | Trend,
11 | Spline,
12 | IDW,
13 | Kriging,
14 | SpatialAverage,
15 | NaturalNeighbor,
16 | GP,
17 | )
18 |
19 | # sample data
20 | X = [[0, 0], [0, 3], [3, 0], [3, 3]]
21 | y = [0, 1.5, 1.5, 3]
22 | X = np.array(X)
23 | y = np.array(y)
24 | regressors = [
25 | Random(),
26 | SpatialAverage(),
27 | Spline(kx=1, ky=1),
28 | Trend(),
29 | IDW(coordinate_type="Geographic"),
30 | Kriging(),
31 | GP(Matern32(input_dim=2)),
32 | ]
33 |
34 |
35 | def test_grid():
36 | # Gridded interpolation testing
37 | print("\nTesting on small dataset")
38 | for r in regressors:
39 | r.fit(X, y)
40 | y_pred = r.predict_grid()
41 | Z = y_pred
42 | sns.heatmap(Z)
43 | plt.title(r)
44 | plt.show()
45 | plt.close()
46 | print("\nTesting completed on a small dataset\n")
47 |
48 | print("\nTesting on a reasonable dataset")
49 |
50 | df = pd.read_csv("tests/data/30-03-18.csv")
51 | X1 = np.array(df[["longitude", "latitude"]])
52 | y1 = np.array(df["value"])
53 |
54 | for r in regressors:
55 | r.fit(X1, y1)
56 | y_pred = r.predict_grid()
57 | Z = y_pred
58 | sns.heatmap(Z)
59 | plt.title(r)
60 | plt.show()
61 | plt.close()
62 |
63 |
64 | def test_point():
65 | # Pointwise interpolation testing
66 | for r in regressors:
67 | r.fit(X, y)
68 | test_data = [
69 | [0, 0],
70 | [0, 3],
71 | [3, 0],
72 | [3, 3],
73 | [1, 1],
74 | [1.5, 1.5],
75 | [2, 2],
76 | [2.5, 2.5],
77 | [4, 4],
78 | ]
79 | y_pred = r.predict(np.array(test_data))
80 | print(r)
81 | print(y_pred)
82 |
83 |
84 | def test_nn():
85 | print("\nNatural Neighbors - Point Wise")
86 | nn = NaturalNeighbor()
87 | df = pd.read_csv("tests/data/30-03-18.csv")
88 | X = np.array(df[["longitude", "latitude"]])
89 | y = np.array(df["value"])
90 | nn.fit(X, y)
91 | test_data = [[77.16, 28.70], X[0]]
92 | y_pred = nn.predict(np.array(test_data))
93 | print(y_pred)
94 | del nn
95 | print("\nNatural Neighbors - Entire Grid")
96 | # Suggested by Apoorv as a temporary fix
97 | # Patience pays
98 | nn = NaturalNeighbor()
99 | nn.fit(X, y)
100 | y_pred = nn.predict_grid()
101 | print(y_pred)
102 | sns.heatmap(y_pred)
103 | plt.title(nn)
104 | plt.show()
105 | plt.close()
106 |
107 |
108 | if __name__ == "__main__":
109 | print("Testing Gridded Interpolation")
110 | test_grid()
111 | print("\nTesting Pointwise Interpolation")
112 | test_point()
113 | print("\nTesting Natural Neighbors")
114 | test_nn()
115 |
--------------------------------------------------------------------------------
/polire/spline/bspline.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.interpolate import bisplrep, bisplev
3 |
4 |
5 | from ..base import Base
6 | from ..utils import find_closest
7 |
8 |
9 | class Spline(Base):
10 | """
11 | Class to use a bivariate B-spline to interpolate values.
12 | https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.interpolate.bisplrep.html#scipy.interpolate.bisplrep
13 |
14 | Parameters
15 | ----------
16 | kx, ky: int, int, optional
17 | The degrees of the spline (1 <= kx, ky <= 5).
18 | Third order (kx=ky=3) is recommended.
19 |
20 | s : float, optional
21 | A non-negative smoothing factor. If weights correspond
22 | to the inverse of the standard-deviation of the errors
23 | in z, then a good s-value should be found in the
24 | range `(m-sqrt(2*m),m+sqrt(2*m))` where `m=len(x)`.
25 | """
26 |
27 | def __init__(
28 | self,
29 | kx=3,
30 | ky=3,
31 | s=None,
32 | resolution="standard",
33 | coordinate_type="Euclidean",
34 | ):
35 | super().__init__(resolution, coordinate_type)
36 | self.kx = kx
37 | self.ky = ky
38 | self.s = s
39 |
40 | def _fit(self, X, y):
41 | """The function call to fit the spline model on the given data.
42 | This function is not supposed to be called directly.
43 | """
44 | # fitting the curve
45 | # bisplrep returns details of the fitted curve
46 | # read bisplrep docs for more info about it's return values.
47 | self.tck = bisplrep(
48 | X[:, 0], X[:, 1], y, kx=self.kx, ky=self.ky, s=self.s
49 | )
50 | return self
51 |
52 | def _predict_grid(self, x1lim, x2lim):
53 | """The function to predict grid interpolation using the BSpline.
54 | This function is not supposed to be called directly.
55 | """
56 | # getting the boundaries for interpolation
57 | x1min, x1max = x1lim
58 | x2min, x2max = x2lim
59 |
60 | # interpolating over the grid
61 | # TODO Relook here, we might expect the result to be transpose
62 | return bisplev(
63 | np.linspace(x1min, x1max, self.resolution),
64 | np.linspace(x2min, x2max, self.resolution),
65 | self.tck,
66 | )
67 |
68 | def _predict(self, X):
69 | """The function to predict using the BSpline interpolation.
70 | This function is not supposed to be called directly.
71 | """
72 | results = []
73 | for ix in range(X.shape[0]):
74 | interpolated_y = bisplev(
75 | X[ix, 0], X[ix, 1], self.tck
76 | ).item() # one value returned
77 | results.append(interpolated_y)
78 |
79 | return np.array(results)
80 |
81 | # # form a grid
82 | # x1 = np.linspace(self.x1min_d, self.x1max_d, self.resolution),
83 | # x2 = np.linspace(self.x2min_d, self.x2max_d, self.resolution),
84 | # X1, X2 = np.meshgrid(x1, x2)
85 |
86 | # # be default run grid interpolation on the whole train data
87 | # interpolated_grid = bisplev(
88 | # x1, x2,
89 | # self.tck,
90 | # )
91 |
92 | # # find the closest points on the interpolated grid
93 | # ix = find_closest(grid=(X1, X2), X)
94 | # return interpolated_grid[ix] # TODO this can be wrong, must depend on
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | [](https://github.com/psf/black)
3 | [](https://coveralls.io/github/sustainability-lab/polire?branch=master)
4 |
5 | ## Polire
6 |
7 | ```python
8 | pip install polire
9 | ```
10 |
11 |
12 | The word "interpolation" has a Latin origin and is composed of two words - Inter, meaning between, and Polire, meaning to polish.
13 |
14 |
15 | This repository is a collection of several spatial interpolation algorithms.
16 |
17 |
18 | ## Examples
19 | Please refer to [the documentation](https://sustainability-lab.github.io/polire/) to check out practical examples on real datasets.
20 |
21 | ### Minimal example of interpolation
22 | ```python
23 | import numpy as np
24 | from polire import Kriging
25 |
26 | # Data
27 | X = np.random.rand(10, 2) # Spatial 2D points
28 | y = np.random.rand(10) # Observations
29 | X_new = np.random.rand(100, 2) # New spatial points
30 |
31 | # Fit
32 | model = Kriging()
33 | model.fit(X, y)
34 |
35 | # Predict
36 | y_new = model.predict(X_new)
37 | ```
38 |
39 | ### Supported Interpolation Methods
40 | ```python
41 | from polire import (
42 | Kriging, # Best spatial unbiased predictor
43 | GP, # Gaussian process interpolator from GPy
44 | IDW, # Inverse distance weighting
45 | SpatialAverage,
46 | Spline,
47 | Trend,
48 | Random, # Predict uniformly within the observation range, a reasonable baseline
49 | NaturalNeighbor,
50 | CustomInterpolator # Supports any regressor from Scikit-learn
51 | )
52 | ```
53 |
54 | ### Use GP kernels from GPy (temporarily unavailable)
55 | ```python
56 | from GPy.kern import Matern32 # or any other GPy kernel
57 |
58 | # GP model
59 | model = GP(Matern32(input_dim=2))
60 | ```
61 |
62 | ### Regressors from sklearn
63 | ```py
64 | from sklearn.linear_model import LinearRegression # or any Scikit-learn regressor
65 | from polire import GP, CustomInterpolator
66 |
67 | # Sklearn model
68 | model = CustomInterpolator(LinearRegression())
69 | ```
70 |
71 | ### Extract spatial features from spatio-temporal dataset
72 | ```python
73 | # X and X_new are datasets as numpy arrays with first three dimensions as longitude, latitute and time.
74 | # y is corresponding observations with X
75 |
76 | from polire.preprocessing import SpatialFeatures
77 | spatial = SpatialFeatures(n_closest=10)
78 | Features = spatial.fit_transform(X, y)
79 | Features_new = spatial.transform(X_new)
80 | ```
81 |
82 | ## Citation
83 |
84 | If you use this library, please cite the following paper:
85 |
86 | ```
87 | @inproceedings{10.1145/3384419.3430407,
88 | author = {Narayanan, S Deepak and Patel, Zeel B and Agnihotri, Apoorv and Batra, Nipun},
89 | title = {A Toolkit for Spatial Interpolation and Sensor Placement},
90 | year = {2020},
91 | isbn = {9781450375900},
92 | publisher = {Association for Computing Machinery},
93 | address = {New York, NY, USA},
94 | url = {https://doi.org/10.1145/3384419.3430407},
95 | doi = {10.1145/3384419.3430407},
96 | booktitle = {Proceedings of the 18th Conference on Embedded Networked Sensor Systems},
97 | pages = {653–654},
98 | numpages = {2},
99 | location = {Virtual Event, Japan},
100 | series = {SenSys '20}
101 | }
102 | ```
--------------------------------------------------------------------------------
/polire/idw/idw.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a module for inverse distance weighting (IDW) Spatial Interpolation
3 | """
4 | import numpy as np
5 | from ..utils.distance import haversine, euclidean
6 | from ..base import Base
7 |
8 |
9 | class IDW(Base):
10 | """A class that is declared for performing IDW Interpolation.
11 | For more information on how this method works, kindly refer to
12 | https://en.wikipedia.org/wiki/Inverse_distance_weighting
13 |
14 | Parameters
15 | ----------
16 | exponent : positive float, optional
17 | The rate of fall of values from source data points.
18 | Higher the exponent, lower is the value when we move
19 | across space. Default value is 2.
20 |
21 | Attributes
22 | ----------
23 | Interpolated Values : {array-like, 2D matrix}, shape(resolution, resolution)
24 | This contains all the interpolated values when the interpolation is performed
25 | over a grid, instead of interpolation over a set of points.
26 |
27 | X : {array-like, 2D matrix}, shape(n_samples, 2)
28 | Set of all the coordinates available for interpolation.
29 |
30 | y : array-like, shape(n_samples,)
31 | Set of all the available values at the specified X coordinates.
32 |
33 | result : array_like, shape(n_to_predict, )
34 | Set of all the interpolated values when interpolating over a given
35 | set of data points.
36 |
37 | """
38 |
39 | def __init__(
40 | self, exponent=2, resolution="standard", coordinate_type="Euclidean"
41 | ):
42 | super().__init__(resolution, coordinate_type)
43 | self.exponent = exponent
44 | self.interpolated_values = None
45 | self.X = None
46 | self.y = None
47 | self.result = None
48 | if self.coordinate_type == "Geographic":
49 | self.distance = haversine
50 | elif self.coordinate_type == "Euclidean":
51 | self.distance = euclidean
52 | else:
53 | raise NotImplementedError(
54 | "Only Geographic and Euclidean Coordinates are available"
55 | )
56 |
57 | def _fit(self, X, y):
58 | """This function is for the IDW Class.
59 | This is not expected to be called directly
60 | """
61 | self.X = X
62 | self.y = y
63 | return self
64 |
65 | def _predict_grid(self, x1lim, x2lim):
66 | """Gridded interpolation for natural neighbors interpolation. This function should not
67 | be called directly.
68 | """
69 | lims = (*x1lim, *x2lim)
70 | x1min, x1max, x2min, x2max = lims
71 | x1 = np.linspace(x1min, x1max, self.resolution)
72 | x2 = np.linspace(x2min, x2max, self.resolution)
73 | X1, X2 = np.meshgrid(x1, x2)
74 | return self._predict(np.array([X1.ravel(), X2.ravel()]).T)
75 |
76 | def _predict(self, X):
77 | """The function call to predict using the interpolated data
78 | in IDW interpolation. This should not be called directly.
79 | """
80 |
81 | dist = self.distance(self.X, X)
82 | weights = 1 / np.power(dist, self.exponent)
83 | result = (weights * self.y[:, None]).sum(axis=0) / weights.sum(axis=0)
84 |
85 | # if point is from train data, ground truth must not change
86 | for i in range(X.shape[0]):
87 | mask = np.equal(X[i], self.X).all(axis=1)
88 | if mask.any():
89 | result[i] = (self.y * mask).sum()
90 |
91 | return result
92 |
--------------------------------------------------------------------------------
/polire/preprocessing/sptial_features.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..idw.idw import IDW
3 | from ..utils.distance import haversine, euclidean
4 |
5 |
6 | class SpatialFeatures:
7 | """Generate spatial features from N-closest locations
8 |
9 | Args:
10 | n_closest : 'N' closest locations
11 |
12 | idw : To use idw output as one of the feature
13 |
14 | idw_exponent : Exponent to be used in idw (if idw is False, ignore)
15 |
16 | coordinate_type : 'Eucleadian' or 'Geographic' (if idw is False, ignore)
17 |
18 | resolution : 'low', 'standard' or 'high' (if idw is False, ignore)
19 | """
20 |
21 | def __init__(
22 | self,
23 | n_closest: int = 5,
24 | idw: bool = True,
25 | idw_exponent: float = 2,
26 | coordinate_type: str = "Euclidean",
27 | resolution: str = "standard",
28 | ) -> None:
29 | self.n_closest = n_closest
30 | self.idw = idw
31 | self.idw_exponent = idw_exponent
32 | self.coordinate_type = coordinate_type
33 | self.resolution = resolution
34 | if self.coordinate_type == "Eucledian":
35 | self.distance = euclidean
36 | elif self.coordinate_type == "Geographic":
37 | self.distance = haversine
38 | else:
39 | raise NotImplementedError(
40 | '"'
41 | + self.coordinate_type
42 | + '" is not implemented yet or invalid'
43 | )
44 |
45 | def fit(self, X: np.ndarray, y: np.ndarray) -> object:
46 | """[summary]
47 |
48 | Args:
49 | X : Reference X data (longitude, latitude, time, ...)
50 | y : Reference y data
51 |
52 | Returns:
53 | self
54 | """
55 | self.X = X
56 | self.y = y
57 |
58 | def transform(self, X: np.ndarray) -> np.ndarray:
59 | """Transform features
60 |
61 | Args:
62 | X (np.ndarray): (longitude, latitude, time, ...)
63 |
64 | Raises:
65 | Exception: If not already fitted
66 |
67 | Returns:
68 | np.ndarray: Transformed features
69 | """
70 | try:
71 | self.X
72 | except AttributeError:
73 | raise Exception("Not fitted yet. first call the 'fit' method")
74 |
75 | Xflag = False
76 | if np.all(X == self.X):
77 | Xflag = True
78 |
79 | F = (
80 | np.empty(
81 | (X.shape[0], (X.shape[1] - 3) + self.n_closest * 2 + self.idw)
82 | )
83 | * np.nan
84 | )
85 | for t in np.unique(X[:, 2]): # Iterating over time
86 | mask = X[:, 2] == t # rows with time t
87 | trn_mask = self.X[:, 2] == t
88 | X_local = X[mask]
89 | self_X_local = self.X[trn_mask]
90 |
91 | lonlat = X_local[:, :2] # locs
92 | self_lonlat = self_X_local[:, :2] # Reference locs
93 | dst = self.distance(lonlat, self_lonlat)
94 | if Xflag:
95 | idx = dst.argsort()[:, 1 : self.n_closest + 1]
96 | else:
97 | idx = dst.argsort()[:, : self.n_closest]
98 |
99 | # Feature set 1: closest distances
100 | f1 = dst[np.arange(lonlat.shape[0])[:, None], idx]
101 |
102 | self_y_local = self.y[trn_mask] # Train obs
103 | ymat = self_y_local[:, None].repeat(lonlat.shape[0], 1).T
104 | # Feature set 2: closest observations
105 | f2 = ymat[np.arange(lonlat.shape[0])[:, None], idx]
106 |
107 | if self.idw:
108 |
109 | def for_each_row(i):
110 | i = i[0]
111 | model = IDW(exponent=self.idw_exponent)
112 | model.resolution = self.resolution
113 | model.coordinate_type = self.coordinate_type
114 | model.fit(self_lonlat[idx[i]], self_y_local[idx[i]])
115 | return model.predict(lonlat[i][None, :])
116 |
117 | # Feature set 3: IDW observation
118 | f3 = np.apply_along_axis(
119 | for_each_row,
120 | axis=1,
121 | arr=np.arange(lonlat.shape[0]).reshape(-1, 1),
122 | )
123 | F[mask] = np.concatenate([X_local[:, 3:], f1, f2, f3], axis=1)
124 | else:
125 | F[mask] = np.concatenate([X_local[:, 3:], f1, f2], axis=1)
126 |
127 | return F
128 |
129 | def fit_transform(self, X: np.ndarray, y: np.ndarray):
130 | self.fit(X, y)
131 | return self.transform(X)
132 |
--------------------------------------------------------------------------------
/polire/base/base.py:
--------------------------------------------------------------------------------
1 | from ..constants import RESOLUTION
2 |
3 |
4 | class Base:
5 | """A class that is declared for performing Interpolation.
6 | This class should not be called directly, use one of it's
7 | children.
8 | """
9 |
10 | def __init__(self, resolution="standard", coordinate_types="Euclidean"):
11 | self.resolution = RESOLUTION[resolution]
12 | self.coordinate_type = coordinate_types
13 | self._fit_called = False
14 |
15 | def fit(self, X, y, **kwargs):
16 | """The function call to fit the model on the given data.
17 |
18 | Parameters
19 | ----------
20 |
21 | X: {array-like, 2D matrix}, shape(n_samples, 2)
22 | The set of all coordinates, where we have ground truth
23 | values
24 | y: array-like, shape(n_samples,)
25 | The set of all the ground truth values using which
26 | we perform interpolation
27 |
28 | Returns
29 | -------
30 |
31 | self : object
32 | Returns self
33 |
34 | """
35 | assert len(X.shape) == 2, "X must be a 2D array got shape = " + str(
36 | X.shape
37 | )
38 | # assert X.shape[1] == 2, "X can not have more than 2 dimensions"
39 | assert len(y.shape) == 1, "y should be a 1d array"
40 | assert y.shape[0] == X.shape[0], "X and y must be of the same size"
41 |
42 | # saving that fit was called
43 | self._fit_called = True
44 |
45 | # saving boundaries
46 | self.x1min_d = min(X[:, 0])
47 | self.x1max_d = max(X[:, 0])
48 | self.x2min_d = min(X[:, 1])
49 | self.x2max_d = max(X[:, 1])
50 | return self._fit(X, y, **kwargs) # calling child specific fit method
51 |
52 | def predict(self, X, **kwargs):
53 | """The function call to return interpolated data on specific
54 | points.
55 |
56 | Parameters
57 | ----------
58 |
59 | X: {array-like, 2D matrix}, shape(n_samples, 2)
60 | The set of all coordinates, where we have ground truth
61 | values
62 |
63 | Returns
64 | -------
65 |
66 | y_pred : array-like, shape(n_samples,)
67 | The set of interpolated values for the points used to
68 | call the function.
69 | """
70 |
71 | assert len(X.shape) == 2, "X must be a 2D array got shape = " + str(
72 | X.shape
73 | )
74 | # assert X.shape[1] == 2, "X can not have more than 2 dimensions"
75 |
76 | # checking if model is fitted or not
77 | assert self._fit_called, "First call fit method to fit the model"
78 |
79 | # calling child specific _predict method
80 | return self._predict(X, **kwargs)
81 |
82 | def predict_grid(self, x1lim=None, x2lim=None, support_extrapolation=True):
83 | """Function to interpolate data on a grid of given size.
84 | .
85 | Parameters
86 | ----------
87 | x1lim: tuple(float, float),
88 | Upper and lower bound on 1st dimension for the interpolation.
89 |
90 | x2lim: tuple(float, float),
91 | Upper and lower bound on 2nd dimension for the interpolation.
92 |
93 | Returns
94 | -------
95 | y: array-like, shape(n_samples,)
96 | Interpolated values on the grid requested.
97 | """
98 | # checking if model is fitted or not
99 | assert self._fit_called, "First call fit method to fit the model"
100 |
101 | # by default we interpolate over the whole grid
102 | if x1lim is None:
103 | x1lim = (self.x1min_d, self.x1max_d)
104 | if x2lim is None:
105 | x2lim = (self.x2min_d, self.x2max_d)
106 | (x1min, x1max) = x1lim
107 | (x2min, x2max) = x2lim
108 |
109 | # extrapolation isn't supported yet
110 | if not support_extrapolation:
111 | assert self.x1min_d >= x1min, "Extrapolation not supported"
112 | assert self.x1max_d <= x1max, "Extrapolation not supported"
113 | assert self.x2min_d >= x2min, "Extrapolation not supported"
114 | assert self.x2max_d <= x2max, "Extrapolation not supported"
115 |
116 | # calling child specific _predict_grid method
117 | pred_y = self._predict_grid(x1lim, x2lim)
118 | return pred_y.reshape(self.resolution, self.resolution)
119 |
120 | def __repr__(self):
121 | return self.__class__.__name__
122 |
123 | def _fit(self, X, y):
124 | raise NotImplementedError
125 |
126 | def _predict_grid(self, x1lim, x2lim):
127 | raise NotImplementedError
128 |
129 | def _predict(self, X):
130 | raise NotImplementedError
131 |
--------------------------------------------------------------------------------
/polire/kriging/kriging.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a module for Kriging Interpolation
3 | """
4 | import numpy as np
5 | from ..base import Base
6 | from pykrige.ok import OrdinaryKriging
7 | from pykrige.uk import UniversalKriging
8 |
9 |
10 | class Kriging(Base):
11 | """A class that is declared for performing Kriging interpolation.
12 | Kriging interpolation (usually) works on the principle of finding the
13 | best unbiased predictor. Ordinary Kriging, for an example, involves finding out the
14 | best unbaised linear predictor.
15 |
16 | Parameters
17 | ----------
18 | type : str, optional
19 | This parameter defines the type of Kriging under consideration. This
20 | implementation uses PyKrige package (https://github.com/bsmurphy/PyKrige).
21 | The user needs to choose between "Ordinary" and "Universal".
22 |
23 | plotting: boolean, optional
24 | This parameter plots the fit semivariogram. We use PyKrige's inbuilt plotter for the same.s
25 |
26 | variogram_model : str, optional
27 | Specifies which variogram model to use; may be one of the following:
28 | linear, power, gaussian, spherical, exponential, hole-effect.
29 | Default is linear variogram model. To utilize a custom variogram model,
30 | specify 'custom'; you must also provide variogram_parameters and
31 | variogram_function. Note that the hole-effect model is only technically
32 | correct for one-dimensional problems.
33 |
34 | require_variance : Boolean, optional
35 | This variable returns the uncertainity in the interpolated values using Kriging
36 | interpolation. If this is True, kindly call the attribute return_variance, of this class
37 | to retreive the computed variances. False is the default value.d
38 |
39 | nlags: int, optional
40 | Number of lags to be considered for semivariogram. As in PyKrige, we set default to be 6.
41 | """
42 |
43 | def __init__(
44 | self,
45 | type="Ordinary",
46 | plotting=False,
47 | variogram_model="linear",
48 | require_variance=False,
49 | resolution="standard",
50 | coordinate_type="Eucledian",
51 | nlags=6,
52 | ):
53 | super().__init__(resolution, coordinate_type)
54 | self.variogram_model = variogram_model
55 | self.ok = None
56 | self.uk = None
57 | self.type = type
58 | self.plotting = plotting
59 | self.coordinate_type = None
60 | self.require_variance = require_variance
61 | self.variance = None
62 |
63 | if coordinate_type == "Eucledian":
64 | self.coordinate_type = "euclidean"
65 | else:
66 | self.coordinate_type = "geographic"
67 |
68 | self.nlags = nlags
69 |
70 | def _fit(self, X, y):
71 | """This method of the Kriging Class is used to fit Kriging interpolation model to
72 | the train data. This function shouldn't be called directly."""
73 | if self.type == "Ordinary":
74 | self.ok = OrdinaryKriging(
75 | X[:, 0],
76 | X[:, 1],
77 | y,
78 | variogram_model=self.variogram_model,
79 | enable_plotting=self.plotting,
80 | coordinates_type=self.coordinate_type,
81 | nlags=self.nlags,
82 | )
83 |
84 | elif self.type == "Universal":
85 | self.uk = UniversalKriging(
86 | X[:, 0],
87 | X[:, 1],
88 | y,
89 | variogram_model=self.variogram_model,
90 | enable_plotting=self.plotting,
91 | )
92 |
93 | else:
94 | raise ValueError(
95 | "Choose either Universal or Ordinary - Given argument is neither"
96 | )
97 |
98 | return self
99 |
100 | def _predict_grid(self, x1lim, x2lim):
101 | """The function that is called to return the interpolated data in Kriging Interpolation
102 | in a grid. This method shouldn't be called directly"""
103 | lims = (*x1lim, *x2lim)
104 | x1min, x1max, x2min, x2max = lims
105 | x1 = np.linspace(x1min, x1max, self.resolution)
106 | x2 = np.linspace(x2min, x2max, self.resolution)
107 |
108 | if self.ok is not None:
109 | predictions, self.variance = self.ok.execute(
110 | style="grid", xpoints=x1, ypoints=x2
111 | )
112 |
113 | else:
114 | predictions, self.variance = self.uk.execute(
115 | style="grid", xpoints=x1, ypoints=x2
116 | )
117 |
118 | return predictions
119 |
120 | def _predict(self, X):
121 | """This function should be called to return the interpolated data in kriging
122 | in a pointwise manner. This method shouldn't be called directly."""
123 | if self.ok is not None:
124 | predictions, self.variance = self.ok.execute(
125 | style="points", xpoints=X[:, 0], ypoints=X[:, 1]
126 | )
127 |
128 | else:
129 | predictions, self.variance = self.uk.execute(
130 | style="points", xpoints=X[:, 0], ypoints=X[:, 1]
131 | )
132 |
133 | return predictions
134 |
135 | def return_variance(self):
136 | """This method of the Kriging class returns the variance at the interpolated
137 | points if the user chooses to use this option at the beginning of the interpolation
138 | """
139 | if self.require_variance:
140 | return self.variance
141 |
142 | else:
143 | print(
144 | "Variance not asked for, while instantiating the object. Returning None"
145 | )
146 | return None
147 |
--------------------------------------------------------------------------------
/polire/gp/tests/GP interpolation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pykrige import OrdinaryKriging"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 4,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import pandas as pd\n",
19 | "import numpy as np"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 38,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": []
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 10,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "ok = OrdinaryKriging(data[:,0],data[:,1],data[:,2])\n",
36 | "ok.ex"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 43,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "a,b = ok.execute('grid',x[0],y[:,0])"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 61,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "from pykrige import OrdinaryKriging\n",
55 | "import pandas as pd\n",
56 | "import numpy as np\n",
57 | "\n",
58 | "def ordinary_kriging(dataset, resolution='standard', coordinate_type='euclidean',verbose='False',method='grid', isvariance = False):\n",
59 | " if coordinate_type == 'latlong_small':\n",
60 | " \"\"\"\n",
61 | " Assume that the Earth is a Sphere, and use polar coordinates\n",
62 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n",
63 | " \"\"\"\n",
64 | " return \"To be done later\"\n",
65 | " if coordinate_type == 'latlong_large':\n",
66 | " \"\"\"\n",
67 | " Code to be written after understanding all the projections.\n",
68 | " \"\"\"\n",
69 | " return \"To be done later\"\n",
70 | " if coordinate_type==\"euclidean\":\n",
71 | " \n",
72 | " ok = OrdinaryKriging(dataset[:,0],dataset[:,1],dataset[:,2])\n",
73 | " X = dataset[:,0]\n",
74 | " y = dataset[:,1]\n",
75 | " \n",
76 | " if resolution=='high':\n",
77 | " xx,yy = make_grid(X,y,1000)\n",
78 | " \n",
79 | " elif resolution=='low':\n",
80 | " xx,yy = make_grid(X,y,10)\n",
81 | " \n",
82 | " elif resolution=='standard':\n",
83 | " xx,yy = make_grid(X,y,100)\n",
84 | " \n",
85 | " else:\n",
86 | " print('Value Error - Resolution can only be one of \\nhigh, low or standard')\n",
87 | " \n",
88 | " values, variances = ok.execute(method, xx[0], yy[:,0])\n",
89 | " \n",
90 | " if isvariance:\n",
91 | " return values, variances\n",
92 | " else:\n",
93 | " del variances\n",
94 | " return np.array(values)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 62,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "array([[129.94984945, 129.7682324 , 129.58820662, ..., 159.34079485,\n",
106 | " 159.99175016, 160.63241067],\n",
107 | " [130.22090025, 130.03615966, 129.8529146 , ..., 159.9575165 ,\n",
108 | " 160.61228126, 161.25625641],\n",
109 | " [130.50105231, 130.31324536, 130.12683652, ..., 160.59265384,\n",
110 | " 161.25084023, 161.8977369 ],\n",
111 | " ...,\n",
112 | " [207.22133238, 207.82739139, 208.44615116, ..., 248.64646661,\n",
113 | " 248.3790241 , 248.11033441],\n",
114 | " [207.92838926, 208.53490708, 209.15376273, ..., 248.91678379,\n",
115 | " 248.65601627, 248.39371596],\n",
116 | " [208.61942088, 209.22595474, 209.84445913, ..., 249.17442481,\n",
117 | " 248.9203453 , 248.66446245]])"
118 | ]
119 | },
120 | "execution_count": 62,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "ordinary_kriging(data)"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "* What does ok('points') really do?\n",
134 | "* Specifically test when points aren't really passed - they are let's say the point of an array\n",
135 | "* Returns the diagonal matrix of all these coordinates"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 63,
141 | "metadata": {
142 | "scrolled": true
143 | },
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/plain": [
148 | "array([129.94984945, 130.03615966, 130.12683652, 130.22219703,\n",
149 | " 130.32258826, 130.42839089, 130.54002324, 130.65794596,\n",
150 | " 130.7826674 , 130.91474976, 131.05481629, 131.20355964,\n",
151 | " 131.36175158, 131.53025441, 131.71003442, 131.90217771,\n",
152 | " 132.107909 , 132.32861401, 132.56586607, 132.82145795,\n",
153 | " 133.0974399 , 133.39616477, 133.72034153, 134.07309736,\n",
154 | " 134.45804822, 134.87937482, 135.34189663, 135.85112772,\n",
155 | " 136.41328222, 137.03517039, 137.72388496, 138.48612122,\n",
156 | " 139.326921 , 140.24763047, 141.24300526, 142.29757046,\n",
157 | " 143.37881815, 144.38425962, 144.49187978, 143.1202101 ,\n",
158 | " 141.66667134, 140.45686022, 139.66795657, 142.48270308,\n",
159 | " 147.03665055, 151.8487008 , 156.90272514, 162.25791164,\n",
160 | " 168.04938768, 173.63870768, 180.93567147, 190.3440156 ,\n",
161 | " 199.86834472, 208.48375248, 215.75635742, 222.1915652 ,\n",
162 | " 228.08641413, 233.15249702, 236.89713686, 239.83524192,\n",
163 | " 242.45744315, 244.57483343, 245.52139699, 245.88236757,\n",
164 | " 246.12295211, 246.3306567 , 246.52369882, 246.70598807,\n",
165 | " 246.87792737, 247.03919426, 247.18952217, 247.3288843 ,\n",
166 | " 247.45749059, 247.57573348, 247.68412862, 247.78326467,\n",
167 | " 247.87376505, 247.95626051, 248.03137024, 248.09968963,\n",
168 | " 248.16178271, 248.21817801, 248.26936683, 248.31580309,\n",
169 | " 248.35790422, 248.39605277, 248.43059841, 248.46186013,\n",
170 | " 248.49012851, 248.51566797, 248.53871897, 248.55950011,\n",
171 | " 248.57821004, 248.59502931, 248.61012204, 248.62363741,\n",
172 | " 248.63571111, 248.64646661, 248.65601627, 248.66446245])"
173 | ]
174 | },
175 | "execution_count": 63,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "ordinary_kriging(data,method='points')"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "def make_grid(X,y,res):\n",
191 | " y_min = y.min()-0.2\n",
192 | " y_max = y.max()+0.2\n",
193 | " x_min = X.min()-0.2\n",
194 | " x_max = X.max()+0.2\n",
195 | " x_arr = np.linspace(x_min,x_max,res)\n",
196 | " y_arr = np.linspace(y_min,y_max,res)\n",
197 | " xx,yy = np.meshgrid(x_arr,y_arr) \n",
198 | " return xx,yy\n",
199 | "x, y = make_grid(data[:,0],data[:,1],100)"
200 | ]
201 | }
202 | ],
203 | "metadata": {
204 | "kernelspec": {
205 | "display_name": "Python 3",
206 | "language": "python",
207 | "name": "python3"
208 | },
209 | "language_info": {
210 | "codemirror_mode": {
211 | "name": "ipython",
212 | "version": 3
213 | },
214 | "file_extension": ".py",
215 | "mimetype": "text/x-python",
216 | "name": "python",
217 | "nbconvert_exporter": "python",
218 | "pygments_lexer": "ipython3",
219 | "version": "3.6.8"
220 | }
221 | },
222 | "nbformat": 4,
223 | "nbformat_minor": 2
224 | }
225 |
--------------------------------------------------------------------------------
/polire/kriging/tests/Kriging Interpolation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from pykrige import OrdinaryKriging"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 4,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import pandas as pd\n",
19 | "import numpy as np"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 38,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": []
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 10,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "ok = OrdinaryKriging(data[:,0],data[:,1],data[:,2])\n",
36 | "ok.ex"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 43,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "a,b = ok.execute('grid',x[0],y[:,0])"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 61,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "from pykrige import OrdinaryKriging\n",
55 | "import pandas as pd\n",
56 | "import numpy as np\n",
57 | "\n",
58 | "def ordinary_kriging(dataset, resolution='standard', coordinate_type='euclidean',verbose='False',method='grid', isvariance = False):\n",
59 | " if coordinate_type == 'latlong_small':\n",
60 | " \"\"\"\n",
61 | " Assume that the Earth is a Sphere, and use polar coordinates\n",
62 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n",
63 | " \"\"\"\n",
64 | " return \"To be done later\"\n",
65 | " if coordinate_type == 'latlong_large':\n",
66 | " \"\"\"\n",
67 | " Code to be written after understanding all the projections.\n",
68 | " \"\"\"\n",
69 | " return \"To be done later\"\n",
70 | " if coordinate_type==\"euclidean\":\n",
71 | " \n",
72 | " ok = OrdinaryKriging(dataset[:,0],dataset[:,1],dataset[:,2])\n",
73 | " X = dataset[:,0]\n",
74 | " y = dataset[:,1]\n",
75 | " \n",
76 | " if resolution=='high':\n",
77 | " xx,yy = make_grid(X,y,1000)\n",
78 | " \n",
79 | " elif resolution=='low':\n",
80 | " xx,yy = make_grid(X,y,10)\n",
81 | " \n",
82 | " elif resolution=='standard':\n",
83 | " xx,yy = make_grid(X,y,100)\n",
84 | " \n",
85 | " else:\n",
86 | " print('Value Error - Resolution can only be one of \\nhigh, low or standard')\n",
87 | " \n",
88 | " values, variances = ok.execute(method, xx[0], yy[:,0])\n",
89 | " \n",
90 | " if isvariance:\n",
91 | " return values, variances\n",
92 | " else:\n",
93 | " del variances\n",
94 | " return np.array(values)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 62,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "array([[129.94984945, 129.7682324 , 129.58820662, ..., 159.34079485,\n",
106 | " 159.99175016, 160.63241067],\n",
107 | " [130.22090025, 130.03615966, 129.8529146 , ..., 159.9575165 ,\n",
108 | " 160.61228126, 161.25625641],\n",
109 | " [130.50105231, 130.31324536, 130.12683652, ..., 160.59265384,\n",
110 | " 161.25084023, 161.8977369 ],\n",
111 | " ...,\n",
112 | " [207.22133238, 207.82739139, 208.44615116, ..., 248.64646661,\n",
113 | " 248.3790241 , 248.11033441],\n",
114 | " [207.92838926, 208.53490708, 209.15376273, ..., 248.91678379,\n",
115 | " 248.65601627, 248.39371596],\n",
116 | " [208.61942088, 209.22595474, 209.84445913, ..., 249.17442481,\n",
117 | " 248.9203453 , 248.66446245]])"
118 | ]
119 | },
120 | "execution_count": 62,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "ordinary_kriging(data)"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {},
132 | "source": [
133 | "* What does ok('points') really do?\n",
134 | "* Specifically test when points aren't really passed - they are let's say the point of an array\n",
135 | "* Returns the diagonal matrix of all these coordinates"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 63,
141 | "metadata": {
142 | "scrolled": true
143 | },
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/plain": [
148 | "array([129.94984945, 130.03615966, 130.12683652, 130.22219703,\n",
149 | " 130.32258826, 130.42839089, 130.54002324, 130.65794596,\n",
150 | " 130.7826674 , 130.91474976, 131.05481629, 131.20355964,\n",
151 | " 131.36175158, 131.53025441, 131.71003442, 131.90217771,\n",
152 | " 132.107909 , 132.32861401, 132.56586607, 132.82145795,\n",
153 | " 133.0974399 , 133.39616477, 133.72034153, 134.07309736,\n",
154 | " 134.45804822, 134.87937482, 135.34189663, 135.85112772,\n",
155 | " 136.41328222, 137.03517039, 137.72388496, 138.48612122,\n",
156 | " 139.326921 , 140.24763047, 141.24300526, 142.29757046,\n",
157 | " 143.37881815, 144.38425962, 144.49187978, 143.1202101 ,\n",
158 | " 141.66667134, 140.45686022, 139.66795657, 142.48270308,\n",
159 | " 147.03665055, 151.8487008 , 156.90272514, 162.25791164,\n",
160 | " 168.04938768, 173.63870768, 180.93567147, 190.3440156 ,\n",
161 | " 199.86834472, 208.48375248, 215.75635742, 222.1915652 ,\n",
162 | " 228.08641413, 233.15249702, 236.89713686, 239.83524192,\n",
163 | " 242.45744315, 244.57483343, 245.52139699, 245.88236757,\n",
164 | " 246.12295211, 246.3306567 , 246.52369882, 246.70598807,\n",
165 | " 246.87792737, 247.03919426, 247.18952217, 247.3288843 ,\n",
166 | " 247.45749059, 247.57573348, 247.68412862, 247.78326467,\n",
167 | " 247.87376505, 247.95626051, 248.03137024, 248.09968963,\n",
168 | " 248.16178271, 248.21817801, 248.26936683, 248.31580309,\n",
169 | " 248.35790422, 248.39605277, 248.43059841, 248.46186013,\n",
170 | " 248.49012851, 248.51566797, 248.53871897, 248.55950011,\n",
171 | " 248.57821004, 248.59502931, 248.61012204, 248.62363741,\n",
172 | " 248.63571111, 248.64646661, 248.65601627, 248.66446245])"
173 | ]
174 | },
175 | "execution_count": 63,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "ordinary_kriging(data,method='points')"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": [
190 | "def make_grid(X,y,res):\n",
191 | " y_min = y.min()-0.2\n",
192 | " y_max = y.max()+0.2\n",
193 | " x_min = X.min()-0.2\n",
194 | " x_max = X.max()+0.2\n",
195 | " x_arr = np.linspace(x_min,x_max,res)\n",
196 | " y_arr = np.linspace(y_min,y_max,res)\n",
197 | " xx,yy = np.meshgrid(x_arr,y_arr) \n",
198 | " return xx,yy\n",
199 | "x, y = make_grid(data[:,0],data[:,1],100)"
200 | ]
201 | }
202 | ],
203 | "metadata": {
204 | "kernelspec": {
205 | "display_name": "Python 3",
206 | "language": "python",
207 | "name": "python3"
208 | },
209 | "language_info": {
210 | "codemirror_mode": {
211 | "name": "ipython",
212 | "version": 3
213 | },
214 | "file_extension": ".py",
215 | "mimetype": "text/x-python",
216 | "name": "python",
217 | "nbconvert_exporter": "python",
218 | "pygments_lexer": "ipython3",
219 | "version": "3.6.8"
220 | }
221 | },
222 | "nbformat": 4,
223 | "nbformat_minor": 2
224 | }
225 |
--------------------------------------------------------------------------------
/polire/natural_neighbors/natural_neighbors.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a module for Natural Neighbors Interpolation
3 | """
4 |
5 | import numpy as np
6 | from scipy.spatial import Voronoi, voronoi_plot_2d
7 | import matplotlib.pyplot as plt
8 | from ..base import Base
9 | from shapely.geometry import Point
10 | from shapely.geometry.polygon import Polygon
11 | from math import atan2
12 | from copy import deepcopy
13 |
14 |
15 | def is_row_in_array(row, arr):
16 | return list(row) in arr.tolist()
17 |
18 |
19 | def get_index(row, arr):
20 | t1 = np.where(arr[:, 0] == row[0])
21 | t2 = np.where(arr[:, 1] == row[1])
22 | index = np.intersect1d(t1, t2)[0]
23 | # If length of index exceeds one!! - Uniqueness Error
24 | return index
25 |
26 |
27 | def order_poly(vertices):
28 | """This function essentially is used to order the vertices
29 | of the Voronoi polygon in a clockwise manner. This ensures
30 | that Shapely doesn't produce Polygon objects that are potentially
31 | non-convex and non-zero area.
32 |
33 | Arguments
34 | ---------
35 | vertices : {array-like, 2D matrix}
36 | This contains the list of vertices of the Polygon to be sorted
37 |
38 | Returns
39 | -------
40 | new_vertices : {array-like, 2D matrix}
41 | All the vertices reordered in a clockwise manner
42 | """
43 | mean_x = np.mean(vertices[:, 0])
44 | mean_y = np.mean(vertices[:, 1])
45 |
46 | def condition(x):
47 | """This is the condition to be used while sorting. We convert the coordinates
48 | to Polar and sort the points
49 | """
50 | return atan2(x[0] - mean_x, x[1] - mean_y) * 180 / np.pi
51 |
52 | return sorted(vertices, key=condition)
53 |
54 |
55 | class NaturalNeighbor(Base):
56 | """Class used for natural neighbors interpolation. This method is an implementation first
57 | proposed by Sibson et al. [1] in 1981. We use the weights derived using the work in [1]
58 | and leave it for future addition, the use of Laplace Weights [2].
59 |
60 | Parameters
61 | ----------
62 | weights: str, optional
63 | This defines the type of weights to be used for natural neighbor interpolation.
64 | We use Sibson Weights, and plan to add Laplace weights in the future
65 | Default value is "sibson"
66 |
67 | display: Boolean, optional
68 | True value displays the voronoi tesselation to the user after fitting the model.
69 | Default value is False.
70 |
71 | Notes
72 | -----
73 | This is for contributors:
74 | The way in which part of the code is used is in the assumption that
75 | we use the data's ordering to find its voronoi partitions.
76 |
77 | References
78 | ----------
79 | [1] Sibson, R. (1981). "A brief description of natural neighbor interpolation (Chapter 2)". In V. Barnett (ed.). Interpolating Multivariate Data. Chichester: John Wiley. pp. 21–36.
80 | [2] V.V. Belikov; V.D. Ivanov; V.K. Kontorovich; S.A. Korytnik; A.Y. Semenov (1997). "The non-Sibsonian interpolation: A new method of interpolation of the values of a function on an arbitrary set of points". Computational mathematics and mathematical physics. 37 (1): 9–15.
81 | [3] N.H. Christ; R. Friedberg, R.; T.D. Lee (1982). "Weights of links and plaquettes in a random lattice". Nuclear Physics B. 210 (3): 337–346.
82 | """
83 |
84 | def __init__(
85 | self,
86 | weights="sibson",
87 | display=False,
88 | resolution="standard",
89 | coordinate_type="Eucledian",
90 | ):
91 | super().__init__(resolution, coordinate_type)
92 | self.weights = weights
93 | self.X = None
94 | self.y = None
95 | self.result = None
96 | self.voronoi = None
97 | self.vertices = (
98 | None # This variable stored the voronoi partition's vertices
99 | )
100 | self.vertex_poly_map = (
101 | dict()
102 | ) # This variable stores the polygon to data point map
103 | self.display = display
104 |
105 | def _fit(self, X, y):
106 | """This function is for the natural neighbors interpolation method.
107 | This is not expected to be called directly.
108 | """
109 | self.X = X
110 | self.y = y
111 | self.voronoi = Voronoi(X, incremental=True)
112 | self.vertices = self.voronoi.vertices
113 |
114 | self.vertex_poly_map = {i: 0 for i in range(len(X))}
115 |
116 | for i in range(len(self.X)):
117 | index = np.where(self.voronoi.point_region == i)[0][0]
118 | point = Point(self.X[index])
119 | region = self.voronoi.regions[i]
120 | if -1 not in region and region != []:
121 | # -1 corresponds to unbounded region - we can't have this in interpolation
122 | # and the function returns an empty list anyways
123 | # at least in the case of non-incremental NN
124 | p = Polygon(order_poly(self.vertices[region]))
125 | self.vertex_poly_map[index] = p
126 | # Remove all the data points that do not contribute to Nearest Neighhbor interpolation
127 | for i in range(len(self.vertex_poly_map)):
128 | if self.vertex_poly_map[i] == 0:
129 | self.vertex_poly_map.pop(i, None)
130 |
131 | if self.display:
132 | voronoi_plot_2d(self.voronoi)
133 | plt.show()
134 | self.display = False
135 |
136 | return self
137 |
138 | def _predict_grid(self, x1lim, x2lim):
139 | """Gridded interpolation for natural neighbors interpolation. This function should not
140 | be called directly.
141 | """
142 | lims = (*x1lim, *x2lim)
143 | x1min, x1max, x2min, x2max = lims
144 | x1 = np.linspace(x1min, x1max, self.resolution)
145 | x2 = np.linspace(x2min, x2max, self.resolution)
146 | X1, X2 = np.meshgrid(x1, x2)
147 | return self._predict(np.array([X1.ravel(), X2.ravel()]).T)
148 |
149 | def _predict(self, X):
150 | """The function taht is called to predict the interpolated data in Natural Neighbors
151 | interpolation. This should not be called directly.
152 | If this method returns None, then we cannot interpolate because of the formed Voronoi
153 | Tesselation
154 | """
155 | result = np.zeros(len(X))
156 | # Potentially create so many class objects as the
157 | # length of the to be predicted array
158 | # not a bad idea if memory is not a constraints
159 | for index in range(len(X)):
160 | if is_row_in_array(X[index], self.X):
161 | idx = get_index(X[index], self.X)
162 | # Check if query data point already exists
163 | result[index] = self.y[idx]
164 |
165 | else:
166 | # QHull object can't bgit ae pickled. Deepcopy doesn't work.
167 | # So we need to fit the model for each and every query data point.
168 | self._fit(self.X, self.y)
169 |
170 | vor = self.voronoi
171 | vor.add_points(np.array([X[index]]))
172 | vor.close()
173 | # We exploit the incremental processing of Scipy's Voronoi.
174 | # We create a copy to ensure that the original copy is preserved.
175 | new_regions = vor.regions
176 | new_vertices = vor.vertices
177 | final_regions = []
178 |
179 | for i in new_regions:
180 | if i != [] and -1 not in i:
181 | final_regions.append(i)
182 |
183 | new = [] # this stores the newly created voronoi partitions
184 | for i in range(len(new_vertices)):
185 | if new_vertices[i] not in self.vertices:
186 | new.append(new_vertices[i])
187 | new = np.array(new)
188 | if len(new) < 3:
189 | # We need atleast a traingle to interpolate
190 | # Three new voronoi vertices form a triangle
191 | result[index] = np.nan
192 | continue
193 |
194 | weights = {} # Weights that we use for interpolation
195 | new_polygon = Polygon(order_poly(new))
196 | new_polygon_area = new_polygon.area
197 |
198 | for i in self.vertex_poly_map:
199 | if new_polygon.intersects(self.vertex_poly_map[i]):
200 | weights[i] = (
201 | new_polygon.intersection(self.vertex_poly_map[i])
202 | ).area / new_polygon_area
203 |
204 | prediction = np.array(
205 | [self.y[i] * weights[i] for i in weights]
206 | ).sum()
207 | result[index] = prediction
208 | del vor, weights, new_polygon, new_polygon_area
209 |
210 | return result
211 |
--------------------------------------------------------------------------------
/polire/nsgp/nsgp.py:
--------------------------------------------------------------------------------
1 | from ..base import Base
2 | import numpy as np
3 | import multiprocessing as mp
4 | from GPy.kern import Matern32, Matern52, RBF, ExpQuad
5 | from scipy.optimize import least_squares
6 |
7 |
8 | class NSGP(Base):
9 | """
10 | A class to learn Nott and Dunsmuir's non-stationary kernel. For more information, refer to
11 | https://academic.oup.com/biomet/article-abstract/89/4/819/242307
12 |
13 | Parameters
14 | ------------
15 |
16 | N : int, default=10
17 | Number of nearby points to learn each kernel locally
18 |
19 | eta : int, default=1
20 | A hyperparameter used in weight function
21 |
22 | loc_kernel : str, default='m32', ('m32', 'm52' or 'rbf')
23 | type of kernel to be used
24 | """
25 |
26 | def __init__(self, N=10, eta=1, kernel_name="m32", verbose=True):
27 | super().__init__()
28 | self.__N = N + 1 # Number of datapoints for local kernel learning
29 | self.__eta = eta # Eta hyperparameter for weighting function
30 | self.__kernel_name = kernel_name
31 | self.__param_dict = {
32 | "N": self.__N,
33 | "eta": self.__eta,
34 | "kernel_name": self.__kernel_name,
35 | }
36 | self._KX_inv = None
37 |
38 | def get_all_params(self):
39 | """
40 | Returns class parameters
41 | """
42 | return self.__param_dict
43 |
44 | def get_param(self, param):
45 | """
46 | Returns the value of a parameter
47 | """
48 | return self.__param_dict[param]
49 |
50 | def __calculate_dmat(self):
51 | self.__dmat = np.zeros((self._X.shape[0], self._X.shape[0]))
52 | for i in range(self._X.shape[0]):
53 | for j in range(i, self._X.shape[0]):
54 | self.__dmat[i, j] = np.linalg.norm(self._X[i] - self._X[j])
55 | self.__dmat[j, i] = self.__dmat[i, j]
56 |
57 | def __get_close_locs(self):
58 | self.__calculate_dmat() # Distance matrix
59 | return [
60 | self.__dmat[i].argsort()[: self.__N]
61 | for i in range(self._X.shape[0])
62 | ]
63 |
64 | def __weight_func(self, S):
65 | return np.exp(-(1 / self.__eta) * ((S - self._X) ** 2).sum(axis=1))
66 |
67 | def _model(self, loc):
68 | def __D_z(sj):
69 | return self._Gamma[np.ix_(sj, sj)]
70 |
71 | def __obfunc(x):
72 | kernel = kern_dict[self.__kernel_name]
73 | kernel.variance = x[0]
74 | kernel.lengthscale = x[1:]
75 | kern_vals = kernel.K(self._X[self.__close_locs[loc]])
76 | term = (__D_z(self.__close_locs[loc]) - kern_vals) / kern_vals
77 | return np.sum(term**2)
78 |
79 | # ARD can be added
80 | kern_dict = {
81 | "m32": Matern32(
82 | input_dim=self._X.shape[1],
83 | active_dims=list(range(self._X.shape[1])),
84 | ARD=True,
85 | ),
86 | "m52": Matern52(
87 | input_dim=self._X.shape[1],
88 | active_dims=list(range(self._X.shape[1])),
89 | ARD=True,
90 | ),
91 | "rbf": RBF(
92 | input_dim=self._X.shape[1],
93 | active_dims=list(range(self._X.shape[1])),
94 | ARD=True,
95 | ),
96 | "expqd": ExpQuad(
97 | input_dim=self._X.shape[1],
98 | active_dims=list(range(self._X.shape[1])),
99 | ARD=True,
100 | ),
101 | }
102 |
103 | kernel = kern_dict[self.__kernel_name]
104 | params = least_squares(__obfunc, np.ones((self._X.shape[1] + 1))).x
105 | kernel.variance = params[0]
106 | kernel.lengthscale = params[1:]
107 | return kernel.K
108 |
109 | def _c_inv(self, kern_func):
110 | return np.linalg.pinv(kern_func(self._X))
111 |
112 | def __learnLocal(self):
113 | # self._verbose_print('Training local kernels. This may take a few moments')
114 |
115 | job = mp.Pool()
116 | self.__kernels = job.map(self._model, list(range(self._X.shape[0])))
117 | self.__C_inv = job.map(self._c_inv, self.__kernels)
118 | job.close()
119 |
120 | # self._verbose_print('Training complete')
121 |
122 | def _Kernel(self, S1, S2=None):
123 | """
124 | This function is for the NSGP Class.
125 | This is not expected to be called directly.
126 | """
127 | S2exists = True
128 | if np.all(S1 == S2) or S2 is None:
129 | S2exists = False
130 | S2 = S1
131 |
132 | assert S1.shape[1] == self._X.shape[1]
133 | assert S2.shape[1] == self._X.shape[1]
134 |
135 | # Calculating Weights & c_mats
136 | self.__v_s1 = np.zeros((S1.shape[0], self._X.shape[0]))
137 | self.__v_s2 = np.zeros((S2.shape[0], self._X.shape[0]))
138 | self.__c_mat_s1 = np.zeros(
139 | (self._X.shape[0], S1.shape[0], self._X.shape[0])
140 | )
141 | self.__c_mat_s2 = np.zeros(
142 | (self._X.shape[0], self._X.shape[0], S2.shape[0])
143 | )
144 | self.__c_mat_s1s2 = np.zeros(
145 | (self._X.shape[0], S1.shape[0], S2.shape[0])
146 | )
147 |
148 | for s1i, s1 in enumerate(S1):
149 | s_vec = self.__weight_func(s1)
150 | self.__v_s1[s1i, :] = s_vec / s_vec.sum()
151 | if S2exists:
152 | for s2i, s2 in enumerate(S2):
153 | s_vec = self.__weight_func(s2)
154 | self.__v_s2[s2i, :] = s_vec / s_vec.sum()
155 | for i in range(self._X.shape[0]):
156 | self.__c_mat_s1[i, :, :] = self.__kernels[i](S1, self._X)
157 | self.__c_mat_s2[i, :, :] = self.__kernels[i](self._X, S2)
158 | self.__c_mat_s1s2[i, :, :] = self.__kernels[i](S1, S2)
159 | else:
160 | self.__v_s2 = self.__v_s1
161 | for i in range(self._X.shape[0]):
162 | self.__c_mat_s1[i, :, :] = self.__kernels[i](S1, self._X)
163 | self.__c_mat_s2[i, :, :] = self.__c_mat_s1[i, :, :].T
164 | self.__c_mat_s1s2[i, :, :] = self.__kernels[i](S1)
165 |
166 | # Calculating main covariance function
167 | first_term = np.zeros((S1.shape[0], S2.shape[0]), dtype="float64")
168 | for i in range(self._X.shape[0]):
169 | for j in range(self._X.shape[0]):
170 | first_term += (
171 | self.__c_mat_s1[i, :, :]
172 | .dot(self.__C_inv[i])
173 | .dot(self._Gamma)
174 | .dot(self.__C_inv[j])
175 | .dot(self.__c_mat_s2[j, :, :])
176 | ) * (
177 | self.__v_s1[:, i]
178 | .reshape(-1, 1)
179 | .dot(self.__v_s2[:, j].reshape(1, -1))
180 | )
181 |
182 | second_term = np.zeros((S1.shape[0], S2.shape[0]))
183 | for i in range(self._X.shape[0]):
184 | second_term += np.sqrt(
185 | self.__v_s1[:, i]
186 | .reshape(-1, 1)
187 | .dot(self.__v_s2[:, i].reshape(1, -1))
188 | ) * (
189 | self.__c_mat_s1s2[i, :, :]
190 | - self.__c_mat_s1[i, :, :]
191 | .dot(self.__C_inv[i])
192 | .dot(self.__c_mat_s2[i, :, :])
193 | )
194 |
195 | return first_term + second_term
196 |
197 | def _fit(self, X, y, ECM):
198 | """
199 | This function is for the NSGP Class.
200 | This is not expected to be called directly.
201 | """
202 |
203 | self._Gamma = ECM # Empirical Covariance Matrix
204 | assert type(self._Gamma) == type(
205 | np.zeros((1, 1))
206 | ), "ECM must be a numpy array"
207 | assert self._Gamma.shape[0] == self._Gamma.shape[1] == X.shape[0], (
208 | "ECM must have ("
209 | + str(X.shape[0])
210 | + ", "
211 | + str(X.shape[0])
212 | + ") shape"
213 | )
214 |
215 | self._X = X # training fetures
216 | self._y = y # Training values
217 | self.__param_dict["X"] = X
218 | self.__param_dict["y"] = y
219 | self.__param_dict["ECM"] = self._Gamma
220 |
221 | # Get closest N locations for each train location
222 | self.__close_locs = self.__get_close_locs()
223 | self.__learnLocal() # Learning local kernels
224 | return self
225 |
226 | def _predict(self, X, return_cov=False):
227 | """
228 | This function is for the NSGP Class.
229 | This is not expected to be called directly.
230 | """
231 | if self._KX_inv is None:
232 | self._KX_inv = np.linalg.pinv(self._Kernel(self._X, self._X))
233 | KX_test = self._Kernel(X, self._X)
234 | pred_mean = (
235 | KX_test.dot(self._KX_inv).dot(self._y - self._y.mean())
236 | + self._y.mean()
237 | )
238 | if return_cov:
239 | pred_var = self._Kernel(X, X) - KX_test.dot(self._KX_inv).dot(
240 | KX_test.T
241 | )
242 | return (pred_mean, pred_var)
243 | return pred_mean
244 |
--------------------------------------------------------------------------------
/polire/idw/tests/IDW Initial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Inverse Distance Weighting (IDW) Interpolation"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Let us suppose we have a data that shows the variation of one quantity of interest across space.\n",
15 | "This could be equivalently viewed as { ($\\vec{x_1}, y_1)$,$(\\vec{x_2}, y_2)$,$(\\vec{x_3}, y_3)$, ...}, where the $\\vec{x_i}$'s represent the coordinates of the points where we have data and the $y_i$'s are the actual data at those points.
\n",
16 | "We would like to perform an interpolation using these data points such that a few things are satisifed.\n",
17 | "1. The interpolation is exact - the value at the known data points is the same as the estimated value, and \n",
18 | "2. We would want far away points from a given source data point to receive less importance than nearby points.\n",
19 | "3. Wikipedia has an excellent article on IDW. I am linking it [here](https://en.wikipedia.org/wiki/Inverse_distance_weighting)."
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "We are using the following approximation for coordinate_type being latlong_small
\n",
27 | "$| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 1,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "import numpy as np\n",
37 | "import pandas as pd\n",
38 | "df = pd.read_csv('../../testdata/30-03-18.csv')\n",
39 | "data = np.array(df[['longitude','latitude','value']])"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "def make_grid(X,y,res):\n",
49 | " y_min = y.min()-0.2\n",
50 | " y_max = y.max()+0.2\n",
51 | " x_min = X.min()-0.2\n",
52 | " x_max = X.max()+0.2\n",
53 | " x_arr = np.linspace(x_min,x_max,res)\n",
54 | " y_arr = np.linspace(y_min,y_max,res)\n",
55 | " xx,yy = np.meshgrid(x_arr,y_arr) \n",
56 | " return xx,yy\n",
57 | "\n",
58 | "def idw(dataset, exponent = 2, resolution='standard', coordinate_type='euclidean',verbose='False'):\n",
59 | " \"\"\"\n",
60 | " Here X is the set of spatial locations - Usually assumed to be Lat-Long\n",
61 | " To be extended to higher dimenstions y - estimated value , exponenet - how\n",
62 | " much weight to assign to far off locations to be estimated for each data point, \n",
63 | " extent - interpolate over a grid - what is xmax xmin ymax ymin\n",
64 | " \"\"\"\n",
65 | " if coordinate_type == 'latlong_small':\n",
66 | " \"\"\"\n",
67 | " Assume that the Earth is a Sphere, and use polar coordinates\n",
68 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n",
69 | " \"\"\"\n",
70 | " return \"To be done later\"\n",
71 | " if coordinate_type == 'latlong_large':\n",
72 | " \"\"\"\n",
73 | " Code to be written after understanding all the projections.\n",
74 | " \"\"\"\n",
75 | " return \"To be done later\"\n",
76 | " if coordinate_type==\"euclidean\":\n",
77 | " \n",
78 | "# print(dataset)\n",
79 | " X = dataset[:,0]\n",
80 | " y = dataset[:,1]\n",
81 | " if resolution=='high':\n",
82 | " xx,yy = make_grid(X,y,1000)\n",
83 | " \n",
84 | " if resolution=='low':\n",
85 | " xx,yy = make_grid(X,y,10)\n",
86 | " \n",
87 | " if resolution=='standard':\n",
88 | " xx,yy = make_grid(X,y,100)\n",
89 | " \n",
90 | " new = []\n",
91 | " new_arr = dataset\n",
92 | " for points in new_arr:\n",
93 | " mindist = np.inf\n",
94 | " val = 0\n",
95 | " for j in range(len(yy)):\n",
96 | " temp = yy[j][0]\n",
97 | " for i in range(len(xx[0])):\n",
98 | " dist = np.linalg.norm(np.array([xx[0][i],temp]) - points[:2])\n",
99 | " if dist"
342 | ]
343 | },
344 | "execution_count": 6,
345 | "metadata": {},
346 | "output_type": "execute_result"
347 | }
348 | ],
349 | "source": [
350 | "a = idw()\n",
351 | "import pandas as pd\n",
352 | "df = pd.read_csv('../../testdata/30-03-18.csv')\n",
353 | "data = np.array(df[['longitude','latitude','value']])\n",
354 | "a.fit(data[:,:2],data[:,2])"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": 5,
360 | "metadata": {},
361 | "outputs": [
362 | {
363 | "data": {
364 | "text/plain": [
365 | "array([[171.89189189, 171.89597641, 171.90813547, ..., 173.89050472,\n",
366 | " 173.89261459, 173.89466512],\n",
367 | " [171.77142857, 171.77625338, 171.79060316, ..., 173.89585441,\n",
368 | " 173.89787202, 173.89983245],\n",
369 | " [171.63636364, 171.64211895, 171.65921778, ..., 173.9012935 ,\n",
370 | " 173.90321551, 173.90508269],\n",
371 | " ...,\n",
372 | " [174.49681529, 174.49676176, 174.49660126, ..., 174.24671184,\n",
373 | " 174.24416446, 174.24164382],\n",
374 | " [174.49056604, 174.49051451, 174.49035999, ..., 174.24671343,\n",
375 | " 174.24419773, 174.2417078 ],\n",
376 | " [174.48447205, 174.48442242, 174.48427358, ..., 174.2466762 ,\n",
377 | " 174.24419219, 174.24173298]])"
378 | ]
379 | },
380 | "execution_count": 5,
381 | "metadata": {},
382 | "output_type": "execute_result"
383 | }
384 | ],
385 | "source": [
386 | "a.interpolated_values"
387 | ]
388 | }
389 | ],
390 | "metadata": {
391 | "kernelspec": {
392 | "display_name": "Python 3",
393 | "language": "python",
394 | "name": "python3"
395 | },
396 | "language_info": {
397 | "codemirror_mode": {
398 | "name": "ipython",
399 | "version": 3
400 | },
401 | "file_extension": ".py",
402 | "mimetype": "text/x-python",
403 | "name": "python",
404 | "nbconvert_exporter": "python",
405 | "pygments_lexer": "ipython3",
406 | "version": "3.6.8"
407 | }
408 | },
409 | "nbformat": 4,
410 | "nbformat_minor": 2
411 | }
412 |
--------------------------------------------------------------------------------