├── tests ├── __init__.py ├── test_basic.py └── data │ └── 30-03-18.csv ├── polire ├── gp │ ├── __init__.py │ ├── gp.py │ └── tests │ │ └── GP interpolation.ipynb ├── idw │ ├── __init__.py │ ├── idw.py │ └── tests │ │ ├── IDW Initial.ipynb │ │ └── Numpy+IDWTest.ipynb ├── nsgp │ ├── __init__.py │ └── nsgp.py ├── kriging │ ├── __init__.py │ ├── kriging.py │ └── tests │ │ └── Kriging Interpolation.ipynb ├── random │ ├── __init__.py │ └── random.py ├── spatial │ ├── __init__.py │ └── spatial.py ├── spline │ ├── __init__.py │ └── bspline.py ├── trend │ ├── __init__.py │ ├── polynomials.py │ └── trend.py ├── natural_neighbors │ ├── __init__.py │ └── natural_neighbors.py ├── base │ ├── __init__.py │ └── base.py ├── utils │ ├── __init__.py │ ├── distance.py │ └── gridding.py ├── custom │ ├── __init__.py │ └── custom.py ├── preprocessing │ ├── __init__.py │ └── sptial_features.py ├── constants.py └── __init__.py ├── requirements.txt ├── requirements-dev.txt ├── .pre-commit-config.yaml ├── .gitignore ├── index.qmd ├── pyproject.toml ├── setup.py ├── setup.cfg ├── _quarto.yml ├── .github └── workflows │ ├── quarto_publish.yml │ ├── auto_publish_pypi.yml │ └── tests.yml ├── usage3.py ├── LICENSE ├── usage.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/gp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/idw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/nsgp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/kriging/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/random/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/spatial/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/spline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/trend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/natural_neighbors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /polire/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Base 2 | -------------------------------------------------------------------------------- /polire/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .gridding import * 2 | -------------------------------------------------------------------------------- /polire/custom/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomInterpolator 2 | -------------------------------------------------------------------------------- /polire/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | from .sptial_features import SpatialFeatures 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | pandas 4 | pykrige 5 | scipy 6 | seaborn 7 | Shapely 8 | xgboost 9 | # GPy -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | coveralls 4 | scikit-learn 5 | pre-commit 6 | xarray 7 | pooch 8 | jinja2 -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 23.7.0 4 | hooks: 5 | - id: black 6 | args: [--line-length=79] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.ipynb_checkpoints 2 | **/*.pyc 3 | .env/ 4 | *__pycache__/ 5 | .pytest_cache 6 | .vscode 7 | .Rhistory 8 | build 9 | dist 10 | polire.egg-info 11 | global_test.py 12 | /.quarto/ 13 | 14 | # ignore docs 15 | docs/ -------------------------------------------------------------------------------- /polire/constants.py: -------------------------------------------------------------------------------- 1 | """This python script contains all the constants that 2 | might be needed in the various interpolation pacakages. 3 | """ 4 | 5 | low_res = 10 6 | med_res = 100 7 | high_res = 1000 8 | 9 | RESOLUTION = {"low": low_res, "standard": med_res, "high": high_res} 10 | -------------------------------------------------------------------------------- /index.qmd: -------------------------------------------------------------------------------- 1 | ## Polire 2 | 3 | ```python 4 | pip install polire 5 | ``` 6 | 7 | 8 | The word "interpolation" has Latin origin and is composed of two words - Inter meaning between and Polire meaning to polish. 9 | 10 | 11 | Polire is a collection of several spatial interpolation algorithms. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=50.0", 4 | "setuptools_scm[toml]>=6.0", 5 | "setuptools_scm_git_archive", 6 | "wheel>=0.33", 7 | "numpy>=1.16", 8 | "cython>=0.29", 9 | ] 10 | 11 | [tool.setuptools_scm] 12 | write_to = "polire/_version.py" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("requirements.txt") as f: 4 | requirements = f.read().splitlines() 5 | 6 | setup( 7 | packages=find_packages(exclude=["docs"]), 8 | python_requires=">=3.8", 9 | install_requires=requirements, 10 | include_package_data=True, 11 | ) 12 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = polire 3 | author = Zeel B Patel, Deepak Narayanan, Apoorv Agnihotri, Nipun Batra 4 | author_email = patel_zeel@iitgn.ac.in 5 | description = Spatial Interpolation in Python 6 | url = https://github.com/sustainability-lab/polire 7 | license = BSD 3-Clause License 8 | license_file = LICENCE 9 | long_description = file: README.md 10 | long_description_content_type = text/markdown -------------------------------------------------------------------------------- /polire/__init__.py: -------------------------------------------------------------------------------- 1 | from .random.random import Random 2 | from .idw.idw import IDW 3 | from .spline.bspline import Spline 4 | from .trend.trend import Trend 5 | from .spatial.spatial import SpatialAverage 6 | from .natural_neighbors.natural_neighbors import NaturalNeighbor 7 | from .kriging.kriging import Kriging 8 | 9 | # from .gp.gp import GP 10 | from .custom.custom import CustomInterpolator 11 | 12 | # from .nsgp.nsgp import NSGP 13 | -------------------------------------------------------------------------------- /_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | output-dir: docs 4 | 5 | # render only the contents mentioned in the _quarto.yml file 6 | 7 | 8 | website: 9 | title: "Polire" 10 | sidebar: 11 | style: "docked" 12 | search: true 13 | contents: 14 | - section: "Introduction" 15 | path: "index.qmd" 16 | 17 | - section: "Examples" 18 | contents: 19 | - examples/all_in_one.ipynb 20 | 21 | execute: 22 | freeze: auto -------------------------------------------------------------------------------- /.github/workflows/quarto_publish.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | push: 4 | branches: master 5 | 6 | name: Quarto Publish 7 | 8 | jobs: 9 | build-deploy: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | steps: 14 | - name: Check out repository 15 | uses: actions/checkout@v3 16 | 17 | - name: Set up Quarto 18 | uses: quarto-dev/quarto-actions/setup@v2 19 | 20 | - name: Render and Publish 21 | uses: quarto-dev/quarto-actions/publish@v2 22 | with: 23 | target: gh-pages 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /polire/trend/polynomials.py: -------------------------------------------------------------------------------- 1 | """File containing polynomials supported by Trend interpolation 2 | Class. 3 | """ 4 | 5 | 6 | def _create_polynomial(order): 7 | if order is None: # custom function by the user 8 | return None 9 | 10 | elif order == 0: 11 | 12 | def func(X, a): 13 | return a 14 | 15 | elif order == 1: 16 | 17 | def func(X, a, b, c): 18 | x1, x2 = X 19 | return a + b * x1 + c * x2 20 | 21 | elif order == 2: 22 | 23 | def func(X, a, b, c, d, e, f): 24 | x1, x2 = X 25 | return ( 26 | a 27 | + b * x1 28 | + c * x2 29 | + d * (x1**2) 30 | + e * (x1**2) 31 | + f * x1 * x2 32 | ) 33 | 34 | else: 35 | raise NotImplementedError( 36 | f"{order} order polynomial needs to be defined manually" 37 | ) 38 | 39 | return func 40 | -------------------------------------------------------------------------------- /usage3.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from polire import CustomInterpolator 7 | import xgboost 8 | from sklearn.ensemble import RandomForestRegressor 9 | from sklearn.linear_model import LinearRegression 10 | from sklearn.neighbors import KNeighborsRegressor 11 | from sklearn.gaussian_process import GaussianProcessRegressor 12 | from sklearn.gaussian_process.kernels import Matern 13 | 14 | # sample data 15 | X = [[0, 0], [0, 3], [3, 0], [3, 3]] 16 | y = [0, 1.5, 1.5, 3] 17 | X = np.array(X) 18 | y = np.array(y) 19 | 20 | for r in [ 21 | CustomInterpolator(xgboost.XGBRegressor()), 22 | CustomInterpolator(RandomForestRegressor()), 23 | CustomInterpolator(LinearRegression(normalize=True)), 24 | CustomInterpolator(KNeighborsRegressor(n_neighbors=3, weights="distance")), 25 | CustomInterpolator( 26 | GaussianProcessRegressor(normalize_y=True, kernel=Matern()) 27 | ), 28 | ]: 29 | r.fit(X, y) 30 | Z = r.predict_grid((0, 3), (0, 3)).reshape(100, 100) 31 | sns.heatmap(Z) 32 | plt.title(r) 33 | plt.show() 34 | plt.close() 35 | -------------------------------------------------------------------------------- /.github/workflows/auto_publish_pypi.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python package using Twine when a release is 2 | # created. For more information see the following link: 3 | # https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 4 | 5 | name: Publish to PyPI 6 | 7 | on: 8 | release: 9 | types: [published] 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | # Make sure tags are fetched so we can get a version. 19 | - run: | 20 | git fetch --prune --unshallow --tags 21 | - name: Set up Python 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: '3.x' 25 | 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -U setuptools 'setuptools_scm[toml]' setuptools_scm_git_archive wheel twine 30 | - name: Build and publish 31 | env: 32 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 33 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 34 | 35 | run: | 36 | python setup.py sdist 37 | twine upload dist/* -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from time import time 4 | from polire import ( 5 | IDW, 6 | Spline, 7 | Trend, 8 | # GP, 9 | Kriging, 10 | NaturalNeighbor, 11 | SpatialAverage, 12 | CustomInterpolator, 13 | # NSGP, 14 | ) 15 | from sklearn.linear_model import LinearRegression 16 | 17 | X = np.random.rand(20, 2) 18 | y = np.random.rand(20) 19 | 20 | X_new = np.random.rand(40, 2) 21 | 22 | 23 | @pytest.mark.parametrize( 24 | "model", 25 | [ 26 | IDW(), 27 | Spline(), 28 | Trend(), 29 | # GP(), 30 | Kriging(), 31 | NaturalNeighbor(), 32 | SpatialAverage(), 33 | CustomInterpolator(LinearRegression()), 34 | # NSGP(), 35 | ], 36 | ) 37 | def test_fit_predict(model): 38 | init = time() 39 | model.fit(X, y) 40 | y_new = model.predict(X_new) 41 | 42 | assert y_new.shape == (40,) 43 | print("Passed", "Time:", np.round(time() - init, 3), "seconds") 44 | 45 | 46 | @pytest.mark.skip(reason="Temporarily disabled") 47 | def test_nsgp(): 48 | model = NSGP() 49 | init = time() 50 | model.fit(X, y, **{"ECM": X @ X.T}) 51 | y_new = model.predict(X_new) 52 | 53 | assert y_new.shape == (40,) 54 | assert y_new.sum() == y_new.sum() # No NaN 55 | print("Passed", "Time:", np.round(time() - init, 3), "seconds") 56 | -------------------------------------------------------------------------------- /polire/random/random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..base import Base 4 | 5 | 6 | class Random(Base): 7 | """ 8 | Class to randomly interpolate by picking values between maximum and 9 | minimum measurements. 10 | 11 | Note: Even if a point on the requested grid is present in 12 | the training set, we return a random value for it. 13 | """ 14 | 15 | def __init__(self, resolution="standard", coordinate_type="Euclidean"): 16 | super().__init__(resolution, coordinate_type) 17 | 18 | def _fit(self, X, y): 19 | """Function for fitting random interpolation. 20 | This function is not supposed to be called directly. 21 | """ 22 | self.ymax = max(y) 23 | self.ymin = min(y) 24 | return self 25 | 26 | def _predict_grid(self, x1lim, x2lim): 27 | """Function for random grid interpolation. 28 | This function is not supposed to be called directly. 29 | """ 30 | return np.random.uniform( 31 | low=self.ymin, 32 | high=self.ymax, 33 | size=(self.resolution, self.resolution), 34 | ) 35 | 36 | def _predict(self, X): 37 | """Function for random interpolation. 38 | This function is not supposed to be called directly. 39 | """ 40 | return np.random.uniform( 41 | low=self.ymin, high=self.ymax, size=(X.shape[0]) 42 | ) 43 | -------------------------------------------------------------------------------- /polire/utils/distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | A module to have different distance metrics for spatial interpolation 3 | """ 4 | import numpy as np 5 | from scipy.spatial.distance import cdist 6 | 7 | 8 | def haversine(X1, X2): 9 | """ 10 | Arguments 11 | --------- 12 | One test point 13 | Multiple Train Points 14 | 15 | Long Lat Order 16 | """ 17 | 18 | # Non-vectorized version 19 | # X1 = X1.reshape(1, 2) 20 | # difference = (X1 - X2) * np.pi / 180 21 | # test_point_lat = X1[:, 1] * np.pi / 180 22 | # training_locations_lat = X2[:, 1] * np.pi / 180 23 | 24 | # a = np.sin(difference[:, 0] / 2)**2 * np.cos(test_point_lat) * np.cos(training_locations_lat) +\ 25 | # np.sin(difference[:, 1] / 2)**2 26 | # radius = 6371 27 | # c = 2 * np.arcsin(np.sqrt(a)) 28 | # return radius * c 29 | 30 | # Vectorized code 31 | lon1, lat1, lon2, lat2 = map( 32 | np.radians, 33 | [X1[:, 0, None], X1[:, 1, None], X2[:, 0, None], X2[:, 1, None]], 34 | ) 35 | 36 | dlon = lon2.T - lon1 37 | dlat = lat2.T - lat1 38 | 39 | a = ( 40 | np.sin(dlat / 2.0) ** 2 41 | + np.cos(lat1) @ np.cos(lat2.T) * np.sin(dlon / 2.0) ** 2 42 | ) 43 | 44 | c = 2 * np.arcsin(np.sqrt(a)) 45 | km = 6371 * c 46 | return km 47 | 48 | 49 | def euclidean(X1, X2): 50 | # return np.linalg.norm(X1 - X2, 2, axis=1) 51 | return cdist(X1, X2) 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, sustainability-lab 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and produce coveralls report. 2 | # Inspired from https://github.com/wesselb/stheno/blob/master/.github/workflows/ci.yml 3 | 4 | name: Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.9", "3.10", "3.11"] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -r requirements.txt 30 | pip install -r requirements-dev.txt 31 | - name: Pre-commit 32 | continue-on-error: true 33 | run: | 34 | pre-commit run --all-files 35 | - name: Test 36 | run: | 37 | pytest -v --cov=polire --cov-report term-missing 38 | coveralls --service=github 39 | env: 40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 41 | COVERALLS_FLAG_NAME: ${{ matrix.test-name }} 42 | COVERALLS_PARALLEL: true 43 | 44 | finish: 45 | name: Finish Coveralls 46 | needs: test 47 | runs-on: ubuntu-latest 48 | steps: 49 | - name: Finish Coveralls 50 | uses: coverallsapp/github-action@v1 51 | with: 52 | github-token: ${{ secrets.github_token }} 53 | parallel-finished: true -------------------------------------------------------------------------------- /polire/custom/custom.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..base import Base 4 | 5 | 6 | class CustomInterpolator(Base): 7 | """ 8 | Class to interpolate by fitting a sklearn type Regressor to 9 | the given data. 10 | 11 | Parameters 12 | ---------- 13 | regressor: class definition, 14 | This variable is used to pass in the Regressor we would like 15 | to use for interpolation. The regressor sould be sklearn type 16 | regressor. Example from sklearn.ensemble -> RandomForestRegressor 17 | 18 | reg_kwargs: dict, optional 19 | This is a dictionary that is passed into the Regressor initialization. 20 | Use this to change the behaviour of the passed regressor. Default = empty dict 21 | 22 | Attributes 23 | ---------- 24 | reg : object 25 | Object of the `regressor` class passed. 26 | """ 27 | 28 | def __init__( 29 | self, regressor, resolution="standard", coordinate_type="Euclidean" 30 | ): 31 | super().__init__(resolution, coordinate_type) 32 | self.reg = regressor 33 | 34 | def _fit(self, X, y): 35 | """Function for fitting. 36 | This function is not supposed to be called directly. 37 | """ 38 | self.reg.fit(X, y) 39 | return self 40 | 41 | def _predict_grid(self, x1lim, x2lim): 42 | """Function for grid interpolation. 43 | This function is not supposed to be called directly. 44 | """ 45 | # getting the boundaries for interpolation 46 | x1min, x1max = x1lim 47 | x2min, x2max = x2lim 48 | 49 | # building the grid 50 | x1 = np.linspace(x1min, x1max, self.resolution) 51 | x2 = np.linspace(x2min, x2max, self.resolution) 52 | X1, X2 = np.meshgrid(x1, x2) 53 | return self.reg.predict(np.asarray([X1.ravel(), X2.ravel()]).T) 54 | 55 | def _predict(self, X): 56 | """Function for interpolation on specific points. 57 | This function is not supposed to be called directly. 58 | """ 59 | return self.reg.predict(X) 60 | 61 | def __repr__(self): 62 | return self.__class__.__name__ + "." + self.reg.__class__.__name__ 63 | -------------------------------------------------------------------------------- /polire/spatial/spatial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial.distance import cdist 3 | 4 | from ..base import Base 5 | from ..utils.distance import euclidean, haversine 6 | 7 | 8 | class SpatialAverage(Base): 9 | """ 10 | Class to interpolate by fitting a XGBoost Regressor to given 11 | data. 12 | Note that radius you specify must be in kilometres if you are passing latitude and longitude as inputs 13 | """ 14 | 15 | def __init__( 16 | self, 17 | radius=100, 18 | resolution="standard", 19 | coordinate_type="Euclidean", 20 | **kwargs 21 | ): 22 | super().__init__(resolution, coordinate_type) 23 | self.radius = radius 24 | if self.coordinate_type == "Geographic": 25 | self.distance = haversine 26 | elif self.coordinate_type == "Euclidean": 27 | self.distance = euclidean 28 | else: 29 | raise NotImplementedError( 30 | "Only Geographic and Euclidean Coordinates are available" 31 | ) 32 | 33 | def _fit(self, X, y): 34 | """Function for fitting. 35 | This function is not supposed to be called directly. 36 | """ 37 | self.X = X 38 | self.y = y 39 | return self 40 | 41 | def _predict_grid(self, x1lim, x2lim): 42 | """Function for grid interpolation. 43 | This function is not supposed to be called directly. 44 | """ 45 | # getting the boundaries for interpolation 46 | x1min, x1max = x1lim 47 | x2min, x2max = x2lim 48 | 49 | # building the grid 50 | x1 = np.linspace(x1min, x1max, self.resolution) 51 | x2 = np.linspace(x2min, x2max, self.resolution) 52 | X1, X2 = np.meshgrid(x1, x2) 53 | return self._predict(np.asarray([X1.ravel(), X2.ravel()]).T) 54 | 55 | def _predict(self, X): 56 | """Function for interpolation on specific points. 57 | This function is not supposed to be called directly. 58 | """ 59 | return self._average(X) 60 | 61 | def _average(self, X): 62 | dist = self.distance(X, self.X) 63 | mask = self.radius >= dist 64 | return (self.y * mask).sum(axis=1) / mask.sum(axis=1) 65 | -------------------------------------------------------------------------------- /tests/data/30-03-18.csv: -------------------------------------------------------------------------------- 1 | ,location,parameter,value,latitude,longitude 2 | 4087,"Jawaharlal Nehru Stadium, Delhi - DPCC",pm25,194.0,28.581197,77.234291 3 | 3930,"Sonia Vihar, Delhi - DPCC",pm25,267.0,28.739434,77.245721 4 | 4020,"Narela, Delhi - DPCC",pm25,273.0,28.822931,77.101961 5 | 4057,"Najafgarh, Delhi - DPCC",pm25,129.0,28.620806,76.991463 6 | 3998,"NSIT Dwarka, New Delhi - CPCB",pm25,176.0,28.609090000000002,77.03254129999999 7 | 4104,"Dwarka-Sector 8, Delhi - DPCC ",pm25,172.0,28.570859,77.072196 8 | 4009,"R K Puram, New Delhi - DPCC",pm25,168.0,28.5646102,77.1670103 9 | 4099,"IGI Airport Terminal - 3, New Delhi - IMD",pm25,105.0,28.562776300000003,77.1180053 10 | 4074,"Okhla Phase-2, Delhi - DPCC",pm25,203.0,28.530782,77.272404 11 | 4068,"Nehru Nagar, Delhi - DPCC",pm25,192.0,28.563827,77.26075 12 | 4039,"Pusa, New Delhi - IMD",pm25,95.0,28.610304,77.0996943 13 | 3947,"Lodhi Road, New Delhi - IMD",pm25,148.0,28.5918245,77.2273074 14 | 4004,"Rohini, Delhi - DPCC",pm25,203.0,28.732219,77.09211 15 | 4112,"Vivek Vihar, Delhi - DPCC",pm25,221.0,28.668672,77.317084 16 | 4103,"North Campus, DU, New Delhi - IMD",pm25,141.0,28.657381400000002,77.15854470000001 17 | 4078,"IHBAS, Dilshad Garden,New Delhi - CPCB",pm25,192.0,28.6802747,77.20115729999999 18 | 4015,"Major Dhyan Chand National Stadium, Delhi - DPCC",pm25,203.0,28.612561,77.237372 19 | 3959,"Patparganj, Delhi - DPCC",pm25,152.0,28.632707,77.305651 20 | 3960,"Shadipur, New Delhi - CPCB",pm25,185.0,28.651478100000002,77.1473105 21 | 3980,"Wazirpur, Delhi - DPCC",pm25,290.0,28.699254,77.16482 22 | 4041,"Jahangirpuri, Delhi - DPCC",pm25,273.0,28.728722,77.170221 23 | 4005,"Mandir Marg, New Delhi - DPCC",pm25,173.0,28.6372688,77.2005604 24 | 3970,"Burari Crossing, New Delhi - IMD",pm25,269.0,28.725650399999996,77.20115729999999 25 | 4089,"Punjabi Bagh, Delhi - DPCC",pm25,160.0,28.669119,77.136777 26 | 396386,"Dr. Karni Singh Shooting Range, Delhi - DPCC",pm25,78.0,28.49968,77.267246 27 | 4117,"ITO, New Delhi - CPCB",pm25,211.0,28.631694500000002,77.2494387 28 | 4072,"CRRI Mathura Road, New Delhi - IMD",pm25,252.0,28.5512005,77.27357370000001 29 | 4033,"Sirifort, New Delhi - CPCB",pm25,133.0,28.5504249,77.2159377 30 | 396397,"DTU, New Delhi - CPCB",pm25,77.0,28.7500499,77.1112615 31 | 4180,US Diplomatic Post: New Delhi,pm25,96.0,28.635759999999998,77.22445 32 | -------------------------------------------------------------------------------- /polire/gp/gp.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a module for GP Interpolation 3 | """ 4 | import numpy as np 5 | from ..base import Base 6 | from GPy.models import GPRegression 7 | from GPy.kern import RBF 8 | 9 | 10 | class GP(Base): 11 | """A class that is declared for performing GP interpolation. 12 | GP interpolation (usually) works on the principle of finding the 13 | best unbiased predictor. 14 | 15 | Parameters 16 | ---------- 17 | type : str, optional 18 | This parameter defines the type of Kriging under consideration. This 19 | implementation uses PyKrige package (https://github.com/bsmurphy/PyKrige). 20 | The user needs to choose between "Ordinary" and "Universal". 21 | 22 | """ 23 | 24 | def __init__( 25 | self, 26 | kernel=RBF(2, ARD=True), 27 | ): 28 | super().__init__() 29 | self.kernel = kernel 30 | 31 | def _fit(self, X, y, n_restarts=5, verbose=False, random_state=None): 32 | """Fit method for GP Interpolation 33 | This function shouldn't be called directly. 34 | """ 35 | np.random.seed(random_state) 36 | if len(y.shape) == 1: 37 | y = y.reshape(-1, 1) 38 | self.model = GPRegression(X, y, self.kernel) 39 | self.model.optimize_restarts(n_restarts, verbose=verbose) 40 | return self 41 | 42 | def _predict_grid(self, x1lim, x2lim): 43 | """The function that is called to return the interpolated data in Kriging Interpolation 44 | in a grid. This method shouldn't be called directly""" 45 | lims = (*x1lim, *x2lim) 46 | x1min, x1max, x2min, x2max = lims 47 | x1 = np.linspace(x1min, x1max, self.resolution) 48 | x2 = np.linspace(x2min, x2max, self.resolution) 49 | 50 | X1, X2 = np.meshgrid(x1, x2) 51 | X = np.array([(i, j) for i, j in zip(X1.ravel(), X2.ravel())]) 52 | 53 | predictions = self.model.predict(X)[0].reshape(len(x1), len(x2)) 54 | 55 | return predictions.ravel() 56 | 57 | def _predict(self, X, return_variance=False): 58 | """This function should be called to return the interpolated data in kriging 59 | in a pointwise manner. This method shouldn't be called directly.""" 60 | 61 | predictions, variance = self.model.predict(X) 62 | if return_variance: 63 | return predictions.ravel(), variance 64 | else: 65 | return predictions.ravel() 66 | -------------------------------------------------------------------------------- /polire/utils/gridding.py: -------------------------------------------------------------------------------- 1 | """ Standard Utility Script for Gridding Data 2 | 1. Contains all the common functions that 3 | will be employed across various different interpolators 4 | 5 | """ 6 | import numpy as np 7 | from scipy import spatial 8 | 9 | 10 | def make_grid(self, x, y, res, offset=0.2): 11 | """This function returns the grid to perform interpolation on. 12 | This function is used inside the fit() attribute of the idw class. 13 | 14 | Parameters 15 | ---------- 16 | x: array-like, shape(n_samples,) 17 | The first coordinate values of all points where 18 | ground truth is available 19 | y: array-like, shape(n_samples,) 20 | The second coordinate values of all points where 21 | ground truth is available 22 | res: int 23 | The resolution value 24 | offset: float, optional 25 | A value between 0 and 0.5 that specifies the extra interpolation to be done 26 | Default is 0.2 27 | 28 | Returns 29 | ------- 30 | xx : {array-like, 2D}, shape (n_samples, n_samples) 31 | yy : {array-like, 2D}, shape (n_samples, n_samples) 32 | """ 33 | y_min = y.min() - offset 34 | y_max = y.max() + offset 35 | x_min = x.min() - offset 36 | x_max = x.max() + offset 37 | x_arr = np.linspace(x_min, x_max, res) 38 | y_arr = np.linspace(y_min, y_max, res) 39 | xx, yy = np.meshgrid(x_arr, y_arr) 40 | return xx, yy 41 | 42 | 43 | def find_closest(grid, X, l=2): 44 | """Function used to find the indices of the grid points closest 45 | to the passed points in X. 46 | 47 | Parameters 48 | ---------- 49 | grid: {list of 2 arrays}, (shape(res, res), shape(res, res)) 50 | This is generated by meshgrid. 51 | 52 | X: {array-like, 2D matrix}, shape(n_samples, 2) 53 | The set of points to which we need to provide closest points 54 | on the grid. 55 | 56 | l: str, optional 57 | To decide the `l`th norm to use. `Default = 2`. 58 | 59 | Returns 60 | ------- 61 | ix: array, shape(X.shape[0],) 62 | The index of the point closest to points in X. 63 | 64 | ref - https://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates 65 | """ 66 | points = np.asarray( 67 | [grid[0].ravel(), grid[1].ravel()] 68 | ).T # ravel is inplace 69 | kdtree = spatial.KDTree(points) 70 | ixs = [] # for containing the indices of closest points found on grid 71 | 72 | for point_ix in range(X.shape[0]): 73 | point = X[point_ix, :] 74 | _, ix = kdtree.query(point) 75 | ixs.append(ix) 76 | 77 | return ixs 78 | -------------------------------------------------------------------------------- /polire/trend/trend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import curve_fit 3 | 4 | from ..base import Base 5 | from .polynomials import _create_polynomial 6 | 7 | 8 | class Trend(Base): 9 | """Class to interpolate by fitting a curve to the data points 10 | available using `scipy`'s `curve_fit`. 11 | 12 | Parameters 13 | ---------- 14 | order: int, default 1 15 | Selects the order of the polynomial to best fit. 16 | Possible values 0 <= order <= 2. 17 | 18 | custom_poly: functor, default None 19 | If you would like to fit to your custom function, 20 | _set order to None_ and then pass a functor. 21 | See Example functor passing below 22 | .. highlight:: python 23 | .. code-block:: python 24 | def func(X, a, b, c): 25 | x1, x2 = X 26 | return np.log(a) + b*np.log(x1) + c*np.log(x2) 27 | t = Trend(order=None, custom_poly=func) 28 | ... 29 | """ 30 | 31 | def __init__( 32 | self, 33 | order=1, 34 | custom_poly=None, 35 | resolution="standard", 36 | coordinate_type="Euclidean", 37 | ): 38 | super().__init__(resolution, coordinate_type) 39 | self.order = order 40 | # setting the polynomial to fit our data 41 | if _create_polynomial(order) is not None: 42 | self.func = _create_polynomial(order) 43 | else: 44 | if custom_poly is not None: 45 | self.func = custom_poly 46 | else: 47 | raise ValueError("Arguments passed are not valid") 48 | 49 | def _fit(self, X, y): 50 | """Function for fitting trend interpolation. 51 | This function is not supposed to be called directly. 52 | """ 53 | # fitting the curve using scipy 54 | self.popt, self.pcov = curve_fit(self.func, (X[:, 0], X[:, 1]), y) 55 | return self 56 | 57 | def _predict_grid(self, x1lim, x2lim): 58 | """Function for trend interpolation. 59 | This function is not supposed to be called directly. 60 | """ 61 | # getting the boundaries for interpolation 62 | x1min, x1max = x1lim 63 | x2min, x2max = x2lim 64 | 65 | # forming the grid 66 | x1 = np.linspace(x1min, x1max, self.resolution) 67 | x2 = np.linspace(x2min, x2max, self.resolution) 68 | X1, X2 = np.meshgrid(x1, x2) 69 | return self.func((X1, X2), *self.popt) 70 | 71 | def _predict(self, X): 72 | """Function for random interpolation. 73 | This function is not supposed to be called directly. 74 | """ 75 | x1, x2 = X[:, 0], X[:, 1] 76 | return self.func((x1, x2), *self.popt) 77 | -------------------------------------------------------------------------------- /usage.py: -------------------------------------------------------------------------------- 1 | # imports 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | from GPy.kern import Matern32 7 | 8 | from polire import ( 9 | Random, 10 | Trend, 11 | Spline, 12 | IDW, 13 | Kriging, 14 | SpatialAverage, 15 | NaturalNeighbor, 16 | GP, 17 | ) 18 | 19 | # sample data 20 | X = [[0, 0], [0, 3], [3, 0], [3, 3]] 21 | y = [0, 1.5, 1.5, 3] 22 | X = np.array(X) 23 | y = np.array(y) 24 | regressors = [ 25 | Random(), 26 | SpatialAverage(), 27 | Spline(kx=1, ky=1), 28 | Trend(), 29 | IDW(coordinate_type="Geographic"), 30 | Kriging(), 31 | GP(Matern32(input_dim=2)), 32 | ] 33 | 34 | 35 | def test_grid(): 36 | # Gridded interpolation testing 37 | print("\nTesting on small dataset") 38 | for r in regressors: 39 | r.fit(X, y) 40 | y_pred = r.predict_grid() 41 | Z = y_pred 42 | sns.heatmap(Z) 43 | plt.title(r) 44 | plt.show() 45 | plt.close() 46 | print("\nTesting completed on a small dataset\n") 47 | 48 | print("\nTesting on a reasonable dataset") 49 | 50 | df = pd.read_csv("tests/data/30-03-18.csv") 51 | X1 = np.array(df[["longitude", "latitude"]]) 52 | y1 = np.array(df["value"]) 53 | 54 | for r in regressors: 55 | r.fit(X1, y1) 56 | y_pred = r.predict_grid() 57 | Z = y_pred 58 | sns.heatmap(Z) 59 | plt.title(r) 60 | plt.show() 61 | plt.close() 62 | 63 | 64 | def test_point(): 65 | # Pointwise interpolation testing 66 | for r in regressors: 67 | r.fit(X, y) 68 | test_data = [ 69 | [0, 0], 70 | [0, 3], 71 | [3, 0], 72 | [3, 3], 73 | [1, 1], 74 | [1.5, 1.5], 75 | [2, 2], 76 | [2.5, 2.5], 77 | [4, 4], 78 | ] 79 | y_pred = r.predict(np.array(test_data)) 80 | print(r) 81 | print(y_pred) 82 | 83 | 84 | def test_nn(): 85 | print("\nNatural Neighbors - Point Wise") 86 | nn = NaturalNeighbor() 87 | df = pd.read_csv("tests/data/30-03-18.csv") 88 | X = np.array(df[["longitude", "latitude"]]) 89 | y = np.array(df["value"]) 90 | nn.fit(X, y) 91 | test_data = [[77.16, 28.70], X[0]] 92 | y_pred = nn.predict(np.array(test_data)) 93 | print(y_pred) 94 | del nn 95 | print("\nNatural Neighbors - Entire Grid") 96 | # Suggested by Apoorv as a temporary fix 97 | # Patience pays 98 | nn = NaturalNeighbor() 99 | nn.fit(X, y) 100 | y_pred = nn.predict_grid() 101 | print(y_pred) 102 | sns.heatmap(y_pred) 103 | plt.title(nn) 104 | plt.show() 105 | plt.close() 106 | 107 | 108 | if __name__ == "__main__": 109 | print("Testing Gridded Interpolation") 110 | test_grid() 111 | print("\nTesting Pointwise Interpolation") 112 | test_point() 113 | print("\nTesting Natural Neighbors") 114 | test_nn() 115 | -------------------------------------------------------------------------------- /polire/spline/bspline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.interpolate import bisplrep, bisplev 3 | 4 | 5 | from ..base import Base 6 | from ..utils import find_closest 7 | 8 | 9 | class Spline(Base): 10 | """ 11 | Class to use a bivariate B-spline to interpolate values. 12 | https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.interpolate.bisplrep.html#scipy.interpolate.bisplrep 13 | 14 | Parameters 15 | ---------- 16 | kx, ky: int, int, optional 17 | The degrees of the spline (1 <= kx, ky <= 5). 18 | Third order (kx=ky=3) is recommended. 19 | 20 | s : float, optional 21 | A non-negative smoothing factor. If weights correspond 22 | to the inverse of the standard-deviation of the errors 23 | in z, then a good s-value should be found in the 24 | range `(m-sqrt(2*m),m+sqrt(2*m))` where `m=len(x)`. 25 | """ 26 | 27 | def __init__( 28 | self, 29 | kx=3, 30 | ky=3, 31 | s=None, 32 | resolution="standard", 33 | coordinate_type="Euclidean", 34 | ): 35 | super().__init__(resolution, coordinate_type) 36 | self.kx = kx 37 | self.ky = ky 38 | self.s = s 39 | 40 | def _fit(self, X, y): 41 | """The function call to fit the spline model on the given data. 42 | This function is not supposed to be called directly. 43 | """ 44 | # fitting the curve 45 | # bisplrep returns details of the fitted curve 46 | # read bisplrep docs for more info about it's return values. 47 | self.tck = bisplrep( 48 | X[:, 0], X[:, 1], y, kx=self.kx, ky=self.ky, s=self.s 49 | ) 50 | return self 51 | 52 | def _predict_grid(self, x1lim, x2lim): 53 | """The function to predict grid interpolation using the BSpline. 54 | This function is not supposed to be called directly. 55 | """ 56 | # getting the boundaries for interpolation 57 | x1min, x1max = x1lim 58 | x2min, x2max = x2lim 59 | 60 | # interpolating over the grid 61 | # TODO Relook here, we might expect the result to be transpose 62 | return bisplev( 63 | np.linspace(x1min, x1max, self.resolution), 64 | np.linspace(x2min, x2max, self.resolution), 65 | self.tck, 66 | ) 67 | 68 | def _predict(self, X): 69 | """The function to predict using the BSpline interpolation. 70 | This function is not supposed to be called directly. 71 | """ 72 | results = [] 73 | for ix in range(X.shape[0]): 74 | interpolated_y = bisplev( 75 | X[ix, 0], X[ix, 1], self.tck 76 | ).item() # one value returned 77 | results.append(interpolated_y) 78 | 79 | return np.array(results) 80 | 81 | # # form a grid 82 | # x1 = np.linspace(self.x1min_d, self.x1max_d, self.resolution), 83 | # x2 = np.linspace(self.x2min_d, self.x2max_d, self.resolution), 84 | # X1, X2 = np.meshgrid(x1, x2) 85 | 86 | # # be default run grid interpolation on the whole train data 87 | # interpolated_grid = bisplev( 88 | # x1, x2, 89 | # self.tck, 90 | # ) 91 | 92 | # # find the closest points on the interpolated grid 93 | # ix = find_closest(grid=(X1, X2), X) 94 | # return interpolated_grid[ix] # TODO this can be wrong, must depend on 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Tests](https://github.com/sustainability-lab/polire/actions/workflows/tests.yml/badge.svg) 2 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 3 | [![Coverage](https://coveralls.io/repos/github/sustainability-lab/polire/badge.svg?branch=master)](https://coveralls.io/github/sustainability-lab/polire?branch=master) 4 | 5 | ## Polire 6 | 7 | ```python 8 | pip install polire 9 | ``` 10 | 11 | 12 | The word "interpolation" has a Latin origin and is composed of two words - Inter, meaning between, and Polire, meaning to polish. 13 | 14 | 15 | This repository is a collection of several spatial interpolation algorithms. 16 | 17 | 18 | ## Examples 19 | Please refer to [the documentation](https://sustainability-lab.github.io/polire/) to check out practical examples on real datasets. 20 | 21 | ### Minimal example of interpolation 22 | ```python 23 | import numpy as np 24 | from polire import Kriging 25 | 26 | # Data 27 | X = np.random.rand(10, 2) # Spatial 2D points 28 | y = np.random.rand(10) # Observations 29 | X_new = np.random.rand(100, 2) # New spatial points 30 | 31 | # Fit 32 | model = Kriging() 33 | model.fit(X, y) 34 | 35 | # Predict 36 | y_new = model.predict(X_new) 37 | ``` 38 | 39 | ### Supported Interpolation Methods 40 | ```python 41 | from polire import ( 42 | Kriging, # Best spatial unbiased predictor 43 | GP, # Gaussian process interpolator from GPy 44 | IDW, # Inverse distance weighting 45 | SpatialAverage, 46 | Spline, 47 | Trend, 48 | Random, # Predict uniformly within the observation range, a reasonable baseline 49 | NaturalNeighbor, 50 | CustomInterpolator # Supports any regressor from Scikit-learn 51 | ) 52 | ``` 53 | 54 | ### Use GP kernels from GPy (temporarily unavailable) 55 | ```python 56 | from GPy.kern import Matern32 # or any other GPy kernel 57 | 58 | # GP model 59 | model = GP(Matern32(input_dim=2)) 60 | ``` 61 | 62 | ### Regressors from sklearn 63 | ```py 64 | from sklearn.linear_model import LinearRegression # or any Scikit-learn regressor 65 | from polire import GP, CustomInterpolator 66 | 67 | # Sklearn model 68 | model = CustomInterpolator(LinearRegression()) 69 | ``` 70 | 71 | ### Extract spatial features from spatio-temporal dataset 72 | ```python 73 | # X and X_new are datasets as numpy arrays with first three dimensions as longitude, latitute and time. 74 | # y is corresponding observations with X 75 | 76 | from polire.preprocessing import SpatialFeatures 77 | spatial = SpatialFeatures(n_closest=10) 78 | Features = spatial.fit_transform(X, y) 79 | Features_new = spatial.transform(X_new) 80 | ``` 81 | 82 | ## Citation 83 | 84 | If you use this library, please cite the following paper: 85 | 86 | ``` 87 | @inproceedings{10.1145/3384419.3430407, 88 | author = {Narayanan, S Deepak and Patel, Zeel B and Agnihotri, Apoorv and Batra, Nipun}, 89 | title = {A Toolkit for Spatial Interpolation and Sensor Placement}, 90 | year = {2020}, 91 | isbn = {9781450375900}, 92 | publisher = {Association for Computing Machinery}, 93 | address = {New York, NY, USA}, 94 | url = {https://doi.org/10.1145/3384419.3430407}, 95 | doi = {10.1145/3384419.3430407}, 96 | booktitle = {Proceedings of the 18th Conference on Embedded Networked Sensor Systems}, 97 | pages = {653–654}, 98 | numpages = {2}, 99 | location = {Virtual Event, Japan}, 100 | series = {SenSys '20} 101 | } 102 | ``` -------------------------------------------------------------------------------- /polire/idw/idw.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a module for inverse distance weighting (IDW) Spatial Interpolation 3 | """ 4 | import numpy as np 5 | from ..utils.distance import haversine, euclidean 6 | from ..base import Base 7 | 8 | 9 | class IDW(Base): 10 | """A class that is declared for performing IDW Interpolation. 11 | For more information on how this method works, kindly refer to 12 | https://en.wikipedia.org/wiki/Inverse_distance_weighting 13 | 14 | Parameters 15 | ---------- 16 | exponent : positive float, optional 17 | The rate of fall of values from source data points. 18 | Higher the exponent, lower is the value when we move 19 | across space. Default value is 2. 20 | 21 | Attributes 22 | ---------- 23 | Interpolated Values : {array-like, 2D matrix}, shape(resolution, resolution) 24 | This contains all the interpolated values when the interpolation is performed 25 | over a grid, instead of interpolation over a set of points. 26 | 27 | X : {array-like, 2D matrix}, shape(n_samples, 2) 28 | Set of all the coordinates available for interpolation. 29 | 30 | y : array-like, shape(n_samples,) 31 | Set of all the available values at the specified X coordinates. 32 | 33 | result : array_like, shape(n_to_predict, ) 34 | Set of all the interpolated values when interpolating over a given 35 | set of data points. 36 | 37 | """ 38 | 39 | def __init__( 40 | self, exponent=2, resolution="standard", coordinate_type="Euclidean" 41 | ): 42 | super().__init__(resolution, coordinate_type) 43 | self.exponent = exponent 44 | self.interpolated_values = None 45 | self.X = None 46 | self.y = None 47 | self.result = None 48 | if self.coordinate_type == "Geographic": 49 | self.distance = haversine 50 | elif self.coordinate_type == "Euclidean": 51 | self.distance = euclidean 52 | else: 53 | raise NotImplementedError( 54 | "Only Geographic and Euclidean Coordinates are available" 55 | ) 56 | 57 | def _fit(self, X, y): 58 | """This function is for the IDW Class. 59 | This is not expected to be called directly 60 | """ 61 | self.X = X 62 | self.y = y 63 | return self 64 | 65 | def _predict_grid(self, x1lim, x2lim): 66 | """Gridded interpolation for natural neighbors interpolation. This function should not 67 | be called directly. 68 | """ 69 | lims = (*x1lim, *x2lim) 70 | x1min, x1max, x2min, x2max = lims 71 | x1 = np.linspace(x1min, x1max, self.resolution) 72 | x2 = np.linspace(x2min, x2max, self.resolution) 73 | X1, X2 = np.meshgrid(x1, x2) 74 | return self._predict(np.array([X1.ravel(), X2.ravel()]).T) 75 | 76 | def _predict(self, X): 77 | """The function call to predict using the interpolated data 78 | in IDW interpolation. This should not be called directly. 79 | """ 80 | 81 | dist = self.distance(self.X, X) 82 | weights = 1 / np.power(dist, self.exponent) 83 | result = (weights * self.y[:, None]).sum(axis=0) / weights.sum(axis=0) 84 | 85 | # if point is from train data, ground truth must not change 86 | for i in range(X.shape[0]): 87 | mask = np.equal(X[i], self.X).all(axis=1) 88 | if mask.any(): 89 | result[i] = (self.y * mask).sum() 90 | 91 | return result 92 | -------------------------------------------------------------------------------- /polire/preprocessing/sptial_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..idw.idw import IDW 3 | from ..utils.distance import haversine, euclidean 4 | 5 | 6 | class SpatialFeatures: 7 | """Generate spatial features from N-closest locations 8 | 9 | Args: 10 | n_closest : 'N' closest locations 11 | 12 | idw : To use idw output as one of the feature 13 | 14 | idw_exponent : Exponent to be used in idw (if idw is False, ignore) 15 | 16 | coordinate_type : 'Eucleadian' or 'Geographic' (if idw is False, ignore) 17 | 18 | resolution : 'low', 'standard' or 'high' (if idw is False, ignore) 19 | """ 20 | 21 | def __init__( 22 | self, 23 | n_closest: int = 5, 24 | idw: bool = True, 25 | idw_exponent: float = 2, 26 | coordinate_type: str = "Euclidean", 27 | resolution: str = "standard", 28 | ) -> None: 29 | self.n_closest = n_closest 30 | self.idw = idw 31 | self.idw_exponent = idw_exponent 32 | self.coordinate_type = coordinate_type 33 | self.resolution = resolution 34 | if self.coordinate_type == "Eucledian": 35 | self.distance = euclidean 36 | elif self.coordinate_type == "Geographic": 37 | self.distance = haversine 38 | else: 39 | raise NotImplementedError( 40 | '"' 41 | + self.coordinate_type 42 | + '" is not implemented yet or invalid' 43 | ) 44 | 45 | def fit(self, X: np.ndarray, y: np.ndarray) -> object: 46 | """[summary] 47 | 48 | Args: 49 | X : Reference X data (longitude, latitude, time, ...) 50 | y : Reference y data 51 | 52 | Returns: 53 | self 54 | """ 55 | self.X = X 56 | self.y = y 57 | 58 | def transform(self, X: np.ndarray) -> np.ndarray: 59 | """Transform features 60 | 61 | Args: 62 | X (np.ndarray): (longitude, latitude, time, ...) 63 | 64 | Raises: 65 | Exception: If not already fitted 66 | 67 | Returns: 68 | np.ndarray: Transformed features 69 | """ 70 | try: 71 | self.X 72 | except AttributeError: 73 | raise Exception("Not fitted yet. first call the 'fit' method") 74 | 75 | Xflag = False 76 | if np.all(X == self.X): 77 | Xflag = True 78 | 79 | F = ( 80 | np.empty( 81 | (X.shape[0], (X.shape[1] - 3) + self.n_closest * 2 + self.idw) 82 | ) 83 | * np.nan 84 | ) 85 | for t in np.unique(X[:, 2]): # Iterating over time 86 | mask = X[:, 2] == t # rows with time t 87 | trn_mask = self.X[:, 2] == t 88 | X_local = X[mask] 89 | self_X_local = self.X[trn_mask] 90 | 91 | lonlat = X_local[:, :2] # locs 92 | self_lonlat = self_X_local[:, :2] # Reference locs 93 | dst = self.distance(lonlat, self_lonlat) 94 | if Xflag: 95 | idx = dst.argsort()[:, 1 : self.n_closest + 1] 96 | else: 97 | idx = dst.argsort()[:, : self.n_closest] 98 | 99 | # Feature set 1: closest distances 100 | f1 = dst[np.arange(lonlat.shape[0])[:, None], idx] 101 | 102 | self_y_local = self.y[trn_mask] # Train obs 103 | ymat = self_y_local[:, None].repeat(lonlat.shape[0], 1).T 104 | # Feature set 2: closest observations 105 | f2 = ymat[np.arange(lonlat.shape[0])[:, None], idx] 106 | 107 | if self.idw: 108 | 109 | def for_each_row(i): 110 | i = i[0] 111 | model = IDW(exponent=self.idw_exponent) 112 | model.resolution = self.resolution 113 | model.coordinate_type = self.coordinate_type 114 | model.fit(self_lonlat[idx[i]], self_y_local[idx[i]]) 115 | return model.predict(lonlat[i][None, :]) 116 | 117 | # Feature set 3: IDW observation 118 | f3 = np.apply_along_axis( 119 | for_each_row, 120 | axis=1, 121 | arr=np.arange(lonlat.shape[0]).reshape(-1, 1), 122 | ) 123 | F[mask] = np.concatenate([X_local[:, 3:], f1, f2, f3], axis=1) 124 | else: 125 | F[mask] = np.concatenate([X_local[:, 3:], f1, f2], axis=1) 126 | 127 | return F 128 | 129 | def fit_transform(self, X: np.ndarray, y: np.ndarray): 130 | self.fit(X, y) 131 | return self.transform(X) 132 | -------------------------------------------------------------------------------- /polire/base/base.py: -------------------------------------------------------------------------------- 1 | from ..constants import RESOLUTION 2 | 3 | 4 | class Base: 5 | """A class that is declared for performing Interpolation. 6 | This class should not be called directly, use one of it's 7 | children. 8 | """ 9 | 10 | def __init__(self, resolution="standard", coordinate_types="Euclidean"): 11 | self.resolution = RESOLUTION[resolution] 12 | self.coordinate_type = coordinate_types 13 | self._fit_called = False 14 | 15 | def fit(self, X, y, **kwargs): 16 | """The function call to fit the model on the given data. 17 | 18 | Parameters 19 | ---------- 20 | 21 | X: {array-like, 2D matrix}, shape(n_samples, 2) 22 | The set of all coordinates, where we have ground truth 23 | values 24 | y: array-like, shape(n_samples,) 25 | The set of all the ground truth values using which 26 | we perform interpolation 27 | 28 | Returns 29 | ------- 30 | 31 | self : object 32 | Returns self 33 | 34 | """ 35 | assert len(X.shape) == 2, "X must be a 2D array got shape = " + str( 36 | X.shape 37 | ) 38 | # assert X.shape[1] == 2, "X can not have more than 2 dimensions" 39 | assert len(y.shape) == 1, "y should be a 1d array" 40 | assert y.shape[0] == X.shape[0], "X and y must be of the same size" 41 | 42 | # saving that fit was called 43 | self._fit_called = True 44 | 45 | # saving boundaries 46 | self.x1min_d = min(X[:, 0]) 47 | self.x1max_d = max(X[:, 0]) 48 | self.x2min_d = min(X[:, 1]) 49 | self.x2max_d = max(X[:, 1]) 50 | return self._fit(X, y, **kwargs) # calling child specific fit method 51 | 52 | def predict(self, X, **kwargs): 53 | """The function call to return interpolated data on specific 54 | points. 55 | 56 | Parameters 57 | ---------- 58 | 59 | X: {array-like, 2D matrix}, shape(n_samples, 2) 60 | The set of all coordinates, where we have ground truth 61 | values 62 | 63 | Returns 64 | ------- 65 | 66 | y_pred : array-like, shape(n_samples,) 67 | The set of interpolated values for the points used to 68 | call the function. 69 | """ 70 | 71 | assert len(X.shape) == 2, "X must be a 2D array got shape = " + str( 72 | X.shape 73 | ) 74 | # assert X.shape[1] == 2, "X can not have more than 2 dimensions" 75 | 76 | # checking if model is fitted or not 77 | assert self._fit_called, "First call fit method to fit the model" 78 | 79 | # calling child specific _predict method 80 | return self._predict(X, **kwargs) 81 | 82 | def predict_grid(self, x1lim=None, x2lim=None, support_extrapolation=True): 83 | """Function to interpolate data on a grid of given size. 84 | . 85 | Parameters 86 | ---------- 87 | x1lim: tuple(float, float), 88 | Upper and lower bound on 1st dimension for the interpolation. 89 | 90 | x2lim: tuple(float, float), 91 | Upper and lower bound on 2nd dimension for the interpolation. 92 | 93 | Returns 94 | ------- 95 | y: array-like, shape(n_samples,) 96 | Interpolated values on the grid requested. 97 | """ 98 | # checking if model is fitted or not 99 | assert self._fit_called, "First call fit method to fit the model" 100 | 101 | # by default we interpolate over the whole grid 102 | if x1lim is None: 103 | x1lim = (self.x1min_d, self.x1max_d) 104 | if x2lim is None: 105 | x2lim = (self.x2min_d, self.x2max_d) 106 | (x1min, x1max) = x1lim 107 | (x2min, x2max) = x2lim 108 | 109 | # extrapolation isn't supported yet 110 | if not support_extrapolation: 111 | assert self.x1min_d >= x1min, "Extrapolation not supported" 112 | assert self.x1max_d <= x1max, "Extrapolation not supported" 113 | assert self.x2min_d >= x2min, "Extrapolation not supported" 114 | assert self.x2max_d <= x2max, "Extrapolation not supported" 115 | 116 | # calling child specific _predict_grid method 117 | pred_y = self._predict_grid(x1lim, x2lim) 118 | return pred_y.reshape(self.resolution, self.resolution) 119 | 120 | def __repr__(self): 121 | return self.__class__.__name__ 122 | 123 | def _fit(self, X, y): 124 | raise NotImplementedError 125 | 126 | def _predict_grid(self, x1lim, x2lim): 127 | raise NotImplementedError 128 | 129 | def _predict(self, X): 130 | raise NotImplementedError 131 | -------------------------------------------------------------------------------- /polire/kriging/kriging.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a module for Kriging Interpolation 3 | """ 4 | import numpy as np 5 | from ..base import Base 6 | from pykrige.ok import OrdinaryKriging 7 | from pykrige.uk import UniversalKriging 8 | 9 | 10 | class Kriging(Base): 11 | """A class that is declared for performing Kriging interpolation. 12 | Kriging interpolation (usually) works on the principle of finding the 13 | best unbiased predictor. Ordinary Kriging, for an example, involves finding out the 14 | best unbaised linear predictor. 15 | 16 | Parameters 17 | ---------- 18 | type : str, optional 19 | This parameter defines the type of Kriging under consideration. This 20 | implementation uses PyKrige package (https://github.com/bsmurphy/PyKrige). 21 | The user needs to choose between "Ordinary" and "Universal". 22 | 23 | plotting: boolean, optional 24 | This parameter plots the fit semivariogram. We use PyKrige's inbuilt plotter for the same.s 25 | 26 | variogram_model : str, optional 27 | Specifies which variogram model to use; may be one of the following: 28 | linear, power, gaussian, spherical, exponential, hole-effect. 29 | Default is linear variogram model. To utilize a custom variogram model, 30 | specify 'custom'; you must also provide variogram_parameters and 31 | variogram_function. Note that the hole-effect model is only technically 32 | correct for one-dimensional problems. 33 | 34 | require_variance : Boolean, optional 35 | This variable returns the uncertainity in the interpolated values using Kriging 36 | interpolation. If this is True, kindly call the attribute return_variance, of this class 37 | to retreive the computed variances. False is the default value.d 38 | 39 | nlags: int, optional 40 | Number of lags to be considered for semivariogram. As in PyKrige, we set default to be 6. 41 | """ 42 | 43 | def __init__( 44 | self, 45 | type="Ordinary", 46 | plotting=False, 47 | variogram_model="linear", 48 | require_variance=False, 49 | resolution="standard", 50 | coordinate_type="Eucledian", 51 | nlags=6, 52 | ): 53 | super().__init__(resolution, coordinate_type) 54 | self.variogram_model = variogram_model 55 | self.ok = None 56 | self.uk = None 57 | self.type = type 58 | self.plotting = plotting 59 | self.coordinate_type = None 60 | self.require_variance = require_variance 61 | self.variance = None 62 | 63 | if coordinate_type == "Eucledian": 64 | self.coordinate_type = "euclidean" 65 | else: 66 | self.coordinate_type = "geographic" 67 | 68 | self.nlags = nlags 69 | 70 | def _fit(self, X, y): 71 | """This method of the Kriging Class is used to fit Kriging interpolation model to 72 | the train data. This function shouldn't be called directly.""" 73 | if self.type == "Ordinary": 74 | self.ok = OrdinaryKriging( 75 | X[:, 0], 76 | X[:, 1], 77 | y, 78 | variogram_model=self.variogram_model, 79 | enable_plotting=self.plotting, 80 | coordinates_type=self.coordinate_type, 81 | nlags=self.nlags, 82 | ) 83 | 84 | elif self.type == "Universal": 85 | self.uk = UniversalKriging( 86 | X[:, 0], 87 | X[:, 1], 88 | y, 89 | variogram_model=self.variogram_model, 90 | enable_plotting=self.plotting, 91 | ) 92 | 93 | else: 94 | raise ValueError( 95 | "Choose either Universal or Ordinary - Given argument is neither" 96 | ) 97 | 98 | return self 99 | 100 | def _predict_grid(self, x1lim, x2lim): 101 | """The function that is called to return the interpolated data in Kriging Interpolation 102 | in a grid. This method shouldn't be called directly""" 103 | lims = (*x1lim, *x2lim) 104 | x1min, x1max, x2min, x2max = lims 105 | x1 = np.linspace(x1min, x1max, self.resolution) 106 | x2 = np.linspace(x2min, x2max, self.resolution) 107 | 108 | if self.ok is not None: 109 | predictions, self.variance = self.ok.execute( 110 | style="grid", xpoints=x1, ypoints=x2 111 | ) 112 | 113 | else: 114 | predictions, self.variance = self.uk.execute( 115 | style="grid", xpoints=x1, ypoints=x2 116 | ) 117 | 118 | return predictions 119 | 120 | def _predict(self, X): 121 | """This function should be called to return the interpolated data in kriging 122 | in a pointwise manner. This method shouldn't be called directly.""" 123 | if self.ok is not None: 124 | predictions, self.variance = self.ok.execute( 125 | style="points", xpoints=X[:, 0], ypoints=X[:, 1] 126 | ) 127 | 128 | else: 129 | predictions, self.variance = self.uk.execute( 130 | style="points", xpoints=X[:, 0], ypoints=X[:, 1] 131 | ) 132 | 133 | return predictions 134 | 135 | def return_variance(self): 136 | """This method of the Kriging class returns the variance at the interpolated 137 | points if the user chooses to use this option at the beginning of the interpolation 138 | """ 139 | if self.require_variance: 140 | return self.variance 141 | 142 | else: 143 | print( 144 | "Variance not asked for, while instantiating the object. Returning None" 145 | ) 146 | return None 147 | -------------------------------------------------------------------------------- /polire/gp/tests/GP interpolation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pykrige import OrdinaryKriging" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 4, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 38, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 10, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "ok = OrdinaryKriging(data[:,0],data[:,1],data[:,2])\n", 36 | "ok.ex" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 43, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "a,b = ok.execute('grid',x[0],y[:,0])" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 61, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from pykrige import OrdinaryKriging\n", 55 | "import pandas as pd\n", 56 | "import numpy as np\n", 57 | "\n", 58 | "def ordinary_kriging(dataset, resolution='standard', coordinate_type='euclidean',verbose='False',method='grid', isvariance = False):\n", 59 | " if coordinate_type == 'latlong_small':\n", 60 | " \"\"\"\n", 61 | " Assume that the Earth is a Sphere, and use polar coordinates\n", 62 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n", 63 | " \"\"\"\n", 64 | " return \"To be done later\"\n", 65 | " if coordinate_type == 'latlong_large':\n", 66 | " \"\"\"\n", 67 | " Code to be written after understanding all the projections.\n", 68 | " \"\"\"\n", 69 | " return \"To be done later\"\n", 70 | " if coordinate_type==\"euclidean\":\n", 71 | " \n", 72 | " ok = OrdinaryKriging(dataset[:,0],dataset[:,1],dataset[:,2])\n", 73 | " X = dataset[:,0]\n", 74 | " y = dataset[:,1]\n", 75 | " \n", 76 | " if resolution=='high':\n", 77 | " xx,yy = make_grid(X,y,1000)\n", 78 | " \n", 79 | " elif resolution=='low':\n", 80 | " xx,yy = make_grid(X,y,10)\n", 81 | " \n", 82 | " elif resolution=='standard':\n", 83 | " xx,yy = make_grid(X,y,100)\n", 84 | " \n", 85 | " else:\n", 86 | " print('Value Error - Resolution can only be one of \\nhigh, low or standard')\n", 87 | " \n", 88 | " values, variances = ok.execute(method, xx[0], yy[:,0])\n", 89 | " \n", 90 | " if isvariance:\n", 91 | " return values, variances\n", 92 | " else:\n", 93 | " del variances\n", 94 | " return np.array(values)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 62, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([[129.94984945, 129.7682324 , 129.58820662, ..., 159.34079485,\n", 106 | " 159.99175016, 160.63241067],\n", 107 | " [130.22090025, 130.03615966, 129.8529146 , ..., 159.9575165 ,\n", 108 | " 160.61228126, 161.25625641],\n", 109 | " [130.50105231, 130.31324536, 130.12683652, ..., 160.59265384,\n", 110 | " 161.25084023, 161.8977369 ],\n", 111 | " ...,\n", 112 | " [207.22133238, 207.82739139, 208.44615116, ..., 248.64646661,\n", 113 | " 248.3790241 , 248.11033441],\n", 114 | " [207.92838926, 208.53490708, 209.15376273, ..., 248.91678379,\n", 115 | " 248.65601627, 248.39371596],\n", 116 | " [208.61942088, 209.22595474, 209.84445913, ..., 249.17442481,\n", 117 | " 248.9203453 , 248.66446245]])" 118 | ] 119 | }, 120 | "execution_count": 62, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "ordinary_kriging(data)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "* What does ok('points') really do?\n", 134 | "* Specifically test when points aren't really passed - they are let's say the point of an array\n", 135 | "* Returns the diagonal matrix of all these coordinates" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 63, 141 | "metadata": { 142 | "scrolled": true 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "array([129.94984945, 130.03615966, 130.12683652, 130.22219703,\n", 149 | " 130.32258826, 130.42839089, 130.54002324, 130.65794596,\n", 150 | " 130.7826674 , 130.91474976, 131.05481629, 131.20355964,\n", 151 | " 131.36175158, 131.53025441, 131.71003442, 131.90217771,\n", 152 | " 132.107909 , 132.32861401, 132.56586607, 132.82145795,\n", 153 | " 133.0974399 , 133.39616477, 133.72034153, 134.07309736,\n", 154 | " 134.45804822, 134.87937482, 135.34189663, 135.85112772,\n", 155 | " 136.41328222, 137.03517039, 137.72388496, 138.48612122,\n", 156 | " 139.326921 , 140.24763047, 141.24300526, 142.29757046,\n", 157 | " 143.37881815, 144.38425962, 144.49187978, 143.1202101 ,\n", 158 | " 141.66667134, 140.45686022, 139.66795657, 142.48270308,\n", 159 | " 147.03665055, 151.8487008 , 156.90272514, 162.25791164,\n", 160 | " 168.04938768, 173.63870768, 180.93567147, 190.3440156 ,\n", 161 | " 199.86834472, 208.48375248, 215.75635742, 222.1915652 ,\n", 162 | " 228.08641413, 233.15249702, 236.89713686, 239.83524192,\n", 163 | " 242.45744315, 244.57483343, 245.52139699, 245.88236757,\n", 164 | " 246.12295211, 246.3306567 , 246.52369882, 246.70598807,\n", 165 | " 246.87792737, 247.03919426, 247.18952217, 247.3288843 ,\n", 166 | " 247.45749059, 247.57573348, 247.68412862, 247.78326467,\n", 167 | " 247.87376505, 247.95626051, 248.03137024, 248.09968963,\n", 168 | " 248.16178271, 248.21817801, 248.26936683, 248.31580309,\n", 169 | " 248.35790422, 248.39605277, 248.43059841, 248.46186013,\n", 170 | " 248.49012851, 248.51566797, 248.53871897, 248.55950011,\n", 171 | " 248.57821004, 248.59502931, 248.61012204, 248.62363741,\n", 172 | " 248.63571111, 248.64646661, 248.65601627, 248.66446245])" 173 | ] 174 | }, 175 | "execution_count": 63, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "ordinary_kriging(data,method='points')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "def make_grid(X,y,res):\n", 191 | " y_min = y.min()-0.2\n", 192 | " y_max = y.max()+0.2\n", 193 | " x_min = X.min()-0.2\n", 194 | " x_max = X.max()+0.2\n", 195 | " x_arr = np.linspace(x_min,x_max,res)\n", 196 | " y_arr = np.linspace(y_min,y_max,res)\n", 197 | " xx,yy = np.meshgrid(x_arr,y_arr) \n", 198 | " return xx,yy\n", 199 | "x, y = make_grid(data[:,0],data[:,1],100)" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 3", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.6.8" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /polire/kriging/tests/Kriging Interpolation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pykrige import OrdinaryKriging" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 4, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 38, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 10, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "ok = OrdinaryKriging(data[:,0],data[:,1],data[:,2])\n", 36 | "ok.ex" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 43, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "a,b = ok.execute('grid',x[0],y[:,0])" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 61, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "from pykrige import OrdinaryKriging\n", 55 | "import pandas as pd\n", 56 | "import numpy as np\n", 57 | "\n", 58 | "def ordinary_kriging(dataset, resolution='standard', coordinate_type='euclidean',verbose='False',method='grid', isvariance = False):\n", 59 | " if coordinate_type == 'latlong_small':\n", 60 | " \"\"\"\n", 61 | " Assume that the Earth is a Sphere, and use polar coordinates\n", 62 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n", 63 | " \"\"\"\n", 64 | " return \"To be done later\"\n", 65 | " if coordinate_type == 'latlong_large':\n", 66 | " \"\"\"\n", 67 | " Code to be written after understanding all the projections.\n", 68 | " \"\"\"\n", 69 | " return \"To be done later\"\n", 70 | " if coordinate_type==\"euclidean\":\n", 71 | " \n", 72 | " ok = OrdinaryKriging(dataset[:,0],dataset[:,1],dataset[:,2])\n", 73 | " X = dataset[:,0]\n", 74 | " y = dataset[:,1]\n", 75 | " \n", 76 | " if resolution=='high':\n", 77 | " xx,yy = make_grid(X,y,1000)\n", 78 | " \n", 79 | " elif resolution=='low':\n", 80 | " xx,yy = make_grid(X,y,10)\n", 81 | " \n", 82 | " elif resolution=='standard':\n", 83 | " xx,yy = make_grid(X,y,100)\n", 84 | " \n", 85 | " else:\n", 86 | " print('Value Error - Resolution can only be one of \\nhigh, low or standard')\n", 87 | " \n", 88 | " values, variances = ok.execute(method, xx[0], yy[:,0])\n", 89 | " \n", 90 | " if isvariance:\n", 91 | " return values, variances\n", 92 | " else:\n", 93 | " del variances\n", 94 | " return np.array(values)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 62, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([[129.94984945, 129.7682324 , 129.58820662, ..., 159.34079485,\n", 106 | " 159.99175016, 160.63241067],\n", 107 | " [130.22090025, 130.03615966, 129.8529146 , ..., 159.9575165 ,\n", 108 | " 160.61228126, 161.25625641],\n", 109 | " [130.50105231, 130.31324536, 130.12683652, ..., 160.59265384,\n", 110 | " 161.25084023, 161.8977369 ],\n", 111 | " ...,\n", 112 | " [207.22133238, 207.82739139, 208.44615116, ..., 248.64646661,\n", 113 | " 248.3790241 , 248.11033441],\n", 114 | " [207.92838926, 208.53490708, 209.15376273, ..., 248.91678379,\n", 115 | " 248.65601627, 248.39371596],\n", 116 | " [208.61942088, 209.22595474, 209.84445913, ..., 249.17442481,\n", 117 | " 248.9203453 , 248.66446245]])" 118 | ] 119 | }, 120 | "execution_count": 62, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "ordinary_kriging(data)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "* What does ok('points') really do?\n", 134 | "* Specifically test when points aren't really passed - they are let's say the point of an array\n", 135 | "* Returns the diagonal matrix of all these coordinates" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 63, 141 | "metadata": { 142 | "scrolled": true 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "array([129.94984945, 130.03615966, 130.12683652, 130.22219703,\n", 149 | " 130.32258826, 130.42839089, 130.54002324, 130.65794596,\n", 150 | " 130.7826674 , 130.91474976, 131.05481629, 131.20355964,\n", 151 | " 131.36175158, 131.53025441, 131.71003442, 131.90217771,\n", 152 | " 132.107909 , 132.32861401, 132.56586607, 132.82145795,\n", 153 | " 133.0974399 , 133.39616477, 133.72034153, 134.07309736,\n", 154 | " 134.45804822, 134.87937482, 135.34189663, 135.85112772,\n", 155 | " 136.41328222, 137.03517039, 137.72388496, 138.48612122,\n", 156 | " 139.326921 , 140.24763047, 141.24300526, 142.29757046,\n", 157 | " 143.37881815, 144.38425962, 144.49187978, 143.1202101 ,\n", 158 | " 141.66667134, 140.45686022, 139.66795657, 142.48270308,\n", 159 | " 147.03665055, 151.8487008 , 156.90272514, 162.25791164,\n", 160 | " 168.04938768, 173.63870768, 180.93567147, 190.3440156 ,\n", 161 | " 199.86834472, 208.48375248, 215.75635742, 222.1915652 ,\n", 162 | " 228.08641413, 233.15249702, 236.89713686, 239.83524192,\n", 163 | " 242.45744315, 244.57483343, 245.52139699, 245.88236757,\n", 164 | " 246.12295211, 246.3306567 , 246.52369882, 246.70598807,\n", 165 | " 246.87792737, 247.03919426, 247.18952217, 247.3288843 ,\n", 166 | " 247.45749059, 247.57573348, 247.68412862, 247.78326467,\n", 167 | " 247.87376505, 247.95626051, 248.03137024, 248.09968963,\n", 168 | " 248.16178271, 248.21817801, 248.26936683, 248.31580309,\n", 169 | " 248.35790422, 248.39605277, 248.43059841, 248.46186013,\n", 170 | " 248.49012851, 248.51566797, 248.53871897, 248.55950011,\n", 171 | " 248.57821004, 248.59502931, 248.61012204, 248.62363741,\n", 172 | " 248.63571111, 248.64646661, 248.65601627, 248.66446245])" 173 | ] 174 | }, 175 | "execution_count": 63, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "ordinary_kriging(data,method='points')" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "def make_grid(X,y,res):\n", 191 | " y_min = y.min()-0.2\n", 192 | " y_max = y.max()+0.2\n", 193 | " x_min = X.min()-0.2\n", 194 | " x_max = X.max()+0.2\n", 195 | " x_arr = np.linspace(x_min,x_max,res)\n", 196 | " y_arr = np.linspace(y_min,y_max,res)\n", 197 | " xx,yy = np.meshgrid(x_arr,y_arr) \n", 198 | " return xx,yy\n", 199 | "x, y = make_grid(data[:,0],data[:,1],100)" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 3", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.6.8" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /polire/natural_neighbors/natural_neighbors.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a module for Natural Neighbors Interpolation 3 | """ 4 | 5 | import numpy as np 6 | from scipy.spatial import Voronoi, voronoi_plot_2d 7 | import matplotlib.pyplot as plt 8 | from ..base import Base 9 | from shapely.geometry import Point 10 | from shapely.geometry.polygon import Polygon 11 | from math import atan2 12 | from copy import deepcopy 13 | 14 | 15 | def is_row_in_array(row, arr): 16 | return list(row) in arr.tolist() 17 | 18 | 19 | def get_index(row, arr): 20 | t1 = np.where(arr[:, 0] == row[0]) 21 | t2 = np.where(arr[:, 1] == row[1]) 22 | index = np.intersect1d(t1, t2)[0] 23 | # If length of index exceeds one!! - Uniqueness Error 24 | return index 25 | 26 | 27 | def order_poly(vertices): 28 | """This function essentially is used to order the vertices 29 | of the Voronoi polygon in a clockwise manner. This ensures 30 | that Shapely doesn't produce Polygon objects that are potentially 31 | non-convex and non-zero area. 32 | 33 | Arguments 34 | --------- 35 | vertices : {array-like, 2D matrix} 36 | This contains the list of vertices of the Polygon to be sorted 37 | 38 | Returns 39 | ------- 40 | new_vertices : {array-like, 2D matrix} 41 | All the vertices reordered in a clockwise manner 42 | """ 43 | mean_x = np.mean(vertices[:, 0]) 44 | mean_y = np.mean(vertices[:, 1]) 45 | 46 | def condition(x): 47 | """This is the condition to be used while sorting. We convert the coordinates 48 | to Polar and sort the points 49 | """ 50 | return atan2(x[0] - mean_x, x[1] - mean_y) * 180 / np.pi 51 | 52 | return sorted(vertices, key=condition) 53 | 54 | 55 | class NaturalNeighbor(Base): 56 | """Class used for natural neighbors interpolation. This method is an implementation first 57 | proposed by Sibson et al. [1] in 1981. We use the weights derived using the work in [1] 58 | and leave it for future addition, the use of Laplace Weights [2]. 59 | 60 | Parameters 61 | ---------- 62 | weights: str, optional 63 | This defines the type of weights to be used for natural neighbor interpolation. 64 | We use Sibson Weights, and plan to add Laplace weights in the future 65 | Default value is "sibson" 66 | 67 | display: Boolean, optional 68 | True value displays the voronoi tesselation to the user after fitting the model. 69 | Default value is False. 70 | 71 | Notes 72 | ----- 73 | This is for contributors: 74 | The way in which part of the code is used is in the assumption that 75 | we use the data's ordering to find its voronoi partitions. 76 | 77 | References 78 | ---------- 79 | [1] Sibson, R. (1981). "A brief description of natural neighbor interpolation (Chapter 2)". In V. Barnett (ed.). Interpolating Multivariate Data. Chichester: John Wiley. pp. 21–36. 80 | [2] V.V. Belikov; V.D. Ivanov; V.K. Kontorovich; S.A. Korytnik; A.Y. Semenov (1997). "The non-Sibsonian interpolation: A new method of interpolation of the values of a function on an arbitrary set of points". Computational mathematics and mathematical physics. 37 (1): 9–15. 81 | [3] N.H. Christ; R. Friedberg, R.; T.D. Lee (1982). "Weights of links and plaquettes in a random lattice". Nuclear Physics B. 210 (3): 337–346. 82 | """ 83 | 84 | def __init__( 85 | self, 86 | weights="sibson", 87 | display=False, 88 | resolution="standard", 89 | coordinate_type="Eucledian", 90 | ): 91 | super().__init__(resolution, coordinate_type) 92 | self.weights = weights 93 | self.X = None 94 | self.y = None 95 | self.result = None 96 | self.voronoi = None 97 | self.vertices = ( 98 | None # This variable stored the voronoi partition's vertices 99 | ) 100 | self.vertex_poly_map = ( 101 | dict() 102 | ) # This variable stores the polygon to data point map 103 | self.display = display 104 | 105 | def _fit(self, X, y): 106 | """This function is for the natural neighbors interpolation method. 107 | This is not expected to be called directly. 108 | """ 109 | self.X = X 110 | self.y = y 111 | self.voronoi = Voronoi(X, incremental=True) 112 | self.vertices = self.voronoi.vertices 113 | 114 | self.vertex_poly_map = {i: 0 for i in range(len(X))} 115 | 116 | for i in range(len(self.X)): 117 | index = np.where(self.voronoi.point_region == i)[0][0] 118 | point = Point(self.X[index]) 119 | region = self.voronoi.regions[i] 120 | if -1 not in region and region != []: 121 | # -1 corresponds to unbounded region - we can't have this in interpolation 122 | # and the function returns an empty list anyways 123 | # at least in the case of non-incremental NN 124 | p = Polygon(order_poly(self.vertices[region])) 125 | self.vertex_poly_map[index] = p 126 | # Remove all the data points that do not contribute to Nearest Neighhbor interpolation 127 | for i in range(len(self.vertex_poly_map)): 128 | if self.vertex_poly_map[i] == 0: 129 | self.vertex_poly_map.pop(i, None) 130 | 131 | if self.display: 132 | voronoi_plot_2d(self.voronoi) 133 | plt.show() 134 | self.display = False 135 | 136 | return self 137 | 138 | def _predict_grid(self, x1lim, x2lim): 139 | """Gridded interpolation for natural neighbors interpolation. This function should not 140 | be called directly. 141 | """ 142 | lims = (*x1lim, *x2lim) 143 | x1min, x1max, x2min, x2max = lims 144 | x1 = np.linspace(x1min, x1max, self.resolution) 145 | x2 = np.linspace(x2min, x2max, self.resolution) 146 | X1, X2 = np.meshgrid(x1, x2) 147 | return self._predict(np.array([X1.ravel(), X2.ravel()]).T) 148 | 149 | def _predict(self, X): 150 | """The function taht is called to predict the interpolated data in Natural Neighbors 151 | interpolation. This should not be called directly. 152 | If this method returns None, then we cannot interpolate because of the formed Voronoi 153 | Tesselation 154 | """ 155 | result = np.zeros(len(X)) 156 | # Potentially create so many class objects as the 157 | # length of the to be predicted array 158 | # not a bad idea if memory is not a constraints 159 | for index in range(len(X)): 160 | if is_row_in_array(X[index], self.X): 161 | idx = get_index(X[index], self.X) 162 | # Check if query data point already exists 163 | result[index] = self.y[idx] 164 | 165 | else: 166 | # QHull object can't bgit ae pickled. Deepcopy doesn't work. 167 | # So we need to fit the model for each and every query data point. 168 | self._fit(self.X, self.y) 169 | 170 | vor = self.voronoi 171 | vor.add_points(np.array([X[index]])) 172 | vor.close() 173 | # We exploit the incremental processing of Scipy's Voronoi. 174 | # We create a copy to ensure that the original copy is preserved. 175 | new_regions = vor.regions 176 | new_vertices = vor.vertices 177 | final_regions = [] 178 | 179 | for i in new_regions: 180 | if i != [] and -1 not in i: 181 | final_regions.append(i) 182 | 183 | new = [] # this stores the newly created voronoi partitions 184 | for i in range(len(new_vertices)): 185 | if new_vertices[i] not in self.vertices: 186 | new.append(new_vertices[i]) 187 | new = np.array(new) 188 | if len(new) < 3: 189 | # We need atleast a traingle to interpolate 190 | # Three new voronoi vertices form a triangle 191 | result[index] = np.nan 192 | continue 193 | 194 | weights = {} # Weights that we use for interpolation 195 | new_polygon = Polygon(order_poly(new)) 196 | new_polygon_area = new_polygon.area 197 | 198 | for i in self.vertex_poly_map: 199 | if new_polygon.intersects(self.vertex_poly_map[i]): 200 | weights[i] = ( 201 | new_polygon.intersection(self.vertex_poly_map[i]) 202 | ).area / new_polygon_area 203 | 204 | prediction = np.array( 205 | [self.y[i] * weights[i] for i in weights] 206 | ).sum() 207 | result[index] = prediction 208 | del vor, weights, new_polygon, new_polygon_area 209 | 210 | return result 211 | -------------------------------------------------------------------------------- /polire/nsgp/nsgp.py: -------------------------------------------------------------------------------- 1 | from ..base import Base 2 | import numpy as np 3 | import multiprocessing as mp 4 | from GPy.kern import Matern32, Matern52, RBF, ExpQuad 5 | from scipy.optimize import least_squares 6 | 7 | 8 | class NSGP(Base): 9 | """ 10 | A class to learn Nott and Dunsmuir's non-stationary kernel. For more information, refer to 11 | https://academic.oup.com/biomet/article-abstract/89/4/819/242307 12 | 13 | Parameters 14 | ------------ 15 | 16 | N : int, default=10 17 | Number of nearby points to learn each kernel locally 18 | 19 | eta : int, default=1 20 | A hyperparameter used in weight function 21 | 22 | loc_kernel : str, default='m32', ('m32', 'm52' or 'rbf') 23 | type of kernel to be used 24 | """ 25 | 26 | def __init__(self, N=10, eta=1, kernel_name="m32", verbose=True): 27 | super().__init__() 28 | self.__N = N + 1 # Number of datapoints for local kernel learning 29 | self.__eta = eta # Eta hyperparameter for weighting function 30 | self.__kernel_name = kernel_name 31 | self.__param_dict = { 32 | "N": self.__N, 33 | "eta": self.__eta, 34 | "kernel_name": self.__kernel_name, 35 | } 36 | self._KX_inv = None 37 | 38 | def get_all_params(self): 39 | """ 40 | Returns class parameters 41 | """ 42 | return self.__param_dict 43 | 44 | def get_param(self, param): 45 | """ 46 | Returns the value of a parameter 47 | """ 48 | return self.__param_dict[param] 49 | 50 | def __calculate_dmat(self): 51 | self.__dmat = np.zeros((self._X.shape[0], self._X.shape[0])) 52 | for i in range(self._X.shape[0]): 53 | for j in range(i, self._X.shape[0]): 54 | self.__dmat[i, j] = np.linalg.norm(self._X[i] - self._X[j]) 55 | self.__dmat[j, i] = self.__dmat[i, j] 56 | 57 | def __get_close_locs(self): 58 | self.__calculate_dmat() # Distance matrix 59 | return [ 60 | self.__dmat[i].argsort()[: self.__N] 61 | for i in range(self._X.shape[0]) 62 | ] 63 | 64 | def __weight_func(self, S): 65 | return np.exp(-(1 / self.__eta) * ((S - self._X) ** 2).sum(axis=1)) 66 | 67 | def _model(self, loc): 68 | def __D_z(sj): 69 | return self._Gamma[np.ix_(sj, sj)] 70 | 71 | def __obfunc(x): 72 | kernel = kern_dict[self.__kernel_name] 73 | kernel.variance = x[0] 74 | kernel.lengthscale = x[1:] 75 | kern_vals = kernel.K(self._X[self.__close_locs[loc]]) 76 | term = (__D_z(self.__close_locs[loc]) - kern_vals) / kern_vals 77 | return np.sum(term**2) 78 | 79 | # ARD can be added 80 | kern_dict = { 81 | "m32": Matern32( 82 | input_dim=self._X.shape[1], 83 | active_dims=list(range(self._X.shape[1])), 84 | ARD=True, 85 | ), 86 | "m52": Matern52( 87 | input_dim=self._X.shape[1], 88 | active_dims=list(range(self._X.shape[1])), 89 | ARD=True, 90 | ), 91 | "rbf": RBF( 92 | input_dim=self._X.shape[1], 93 | active_dims=list(range(self._X.shape[1])), 94 | ARD=True, 95 | ), 96 | "expqd": ExpQuad( 97 | input_dim=self._X.shape[1], 98 | active_dims=list(range(self._X.shape[1])), 99 | ARD=True, 100 | ), 101 | } 102 | 103 | kernel = kern_dict[self.__kernel_name] 104 | params = least_squares(__obfunc, np.ones((self._X.shape[1] + 1))).x 105 | kernel.variance = params[0] 106 | kernel.lengthscale = params[1:] 107 | return kernel.K 108 | 109 | def _c_inv(self, kern_func): 110 | return np.linalg.pinv(kern_func(self._X)) 111 | 112 | def __learnLocal(self): 113 | # self._verbose_print('Training local kernels. This may take a few moments') 114 | 115 | job = mp.Pool() 116 | self.__kernels = job.map(self._model, list(range(self._X.shape[0]))) 117 | self.__C_inv = job.map(self._c_inv, self.__kernels) 118 | job.close() 119 | 120 | # self._verbose_print('Training complete') 121 | 122 | def _Kernel(self, S1, S2=None): 123 | """ 124 | This function is for the NSGP Class. 125 | This is not expected to be called directly. 126 | """ 127 | S2exists = True 128 | if np.all(S1 == S2) or S2 is None: 129 | S2exists = False 130 | S2 = S1 131 | 132 | assert S1.shape[1] == self._X.shape[1] 133 | assert S2.shape[1] == self._X.shape[1] 134 | 135 | # Calculating Weights & c_mats 136 | self.__v_s1 = np.zeros((S1.shape[0], self._X.shape[0])) 137 | self.__v_s2 = np.zeros((S2.shape[0], self._X.shape[0])) 138 | self.__c_mat_s1 = np.zeros( 139 | (self._X.shape[0], S1.shape[0], self._X.shape[0]) 140 | ) 141 | self.__c_mat_s2 = np.zeros( 142 | (self._X.shape[0], self._X.shape[0], S2.shape[0]) 143 | ) 144 | self.__c_mat_s1s2 = np.zeros( 145 | (self._X.shape[0], S1.shape[0], S2.shape[0]) 146 | ) 147 | 148 | for s1i, s1 in enumerate(S1): 149 | s_vec = self.__weight_func(s1) 150 | self.__v_s1[s1i, :] = s_vec / s_vec.sum() 151 | if S2exists: 152 | for s2i, s2 in enumerate(S2): 153 | s_vec = self.__weight_func(s2) 154 | self.__v_s2[s2i, :] = s_vec / s_vec.sum() 155 | for i in range(self._X.shape[0]): 156 | self.__c_mat_s1[i, :, :] = self.__kernels[i](S1, self._X) 157 | self.__c_mat_s2[i, :, :] = self.__kernels[i](self._X, S2) 158 | self.__c_mat_s1s2[i, :, :] = self.__kernels[i](S1, S2) 159 | else: 160 | self.__v_s2 = self.__v_s1 161 | for i in range(self._X.shape[0]): 162 | self.__c_mat_s1[i, :, :] = self.__kernels[i](S1, self._X) 163 | self.__c_mat_s2[i, :, :] = self.__c_mat_s1[i, :, :].T 164 | self.__c_mat_s1s2[i, :, :] = self.__kernels[i](S1) 165 | 166 | # Calculating main covariance function 167 | first_term = np.zeros((S1.shape[0], S2.shape[0]), dtype="float64") 168 | for i in range(self._X.shape[0]): 169 | for j in range(self._X.shape[0]): 170 | first_term += ( 171 | self.__c_mat_s1[i, :, :] 172 | .dot(self.__C_inv[i]) 173 | .dot(self._Gamma) 174 | .dot(self.__C_inv[j]) 175 | .dot(self.__c_mat_s2[j, :, :]) 176 | ) * ( 177 | self.__v_s1[:, i] 178 | .reshape(-1, 1) 179 | .dot(self.__v_s2[:, j].reshape(1, -1)) 180 | ) 181 | 182 | second_term = np.zeros((S1.shape[0], S2.shape[0])) 183 | for i in range(self._X.shape[0]): 184 | second_term += np.sqrt( 185 | self.__v_s1[:, i] 186 | .reshape(-1, 1) 187 | .dot(self.__v_s2[:, i].reshape(1, -1)) 188 | ) * ( 189 | self.__c_mat_s1s2[i, :, :] 190 | - self.__c_mat_s1[i, :, :] 191 | .dot(self.__C_inv[i]) 192 | .dot(self.__c_mat_s2[i, :, :]) 193 | ) 194 | 195 | return first_term + second_term 196 | 197 | def _fit(self, X, y, ECM): 198 | """ 199 | This function is for the NSGP Class. 200 | This is not expected to be called directly. 201 | """ 202 | 203 | self._Gamma = ECM # Empirical Covariance Matrix 204 | assert type(self._Gamma) == type( 205 | np.zeros((1, 1)) 206 | ), "ECM must be a numpy array" 207 | assert self._Gamma.shape[0] == self._Gamma.shape[1] == X.shape[0], ( 208 | "ECM must have (" 209 | + str(X.shape[0]) 210 | + ", " 211 | + str(X.shape[0]) 212 | + ") shape" 213 | ) 214 | 215 | self._X = X # training fetures 216 | self._y = y # Training values 217 | self.__param_dict["X"] = X 218 | self.__param_dict["y"] = y 219 | self.__param_dict["ECM"] = self._Gamma 220 | 221 | # Get closest N locations for each train location 222 | self.__close_locs = self.__get_close_locs() 223 | self.__learnLocal() # Learning local kernels 224 | return self 225 | 226 | def _predict(self, X, return_cov=False): 227 | """ 228 | This function is for the NSGP Class. 229 | This is not expected to be called directly. 230 | """ 231 | if self._KX_inv is None: 232 | self._KX_inv = np.linalg.pinv(self._Kernel(self._X, self._X)) 233 | KX_test = self._Kernel(X, self._X) 234 | pred_mean = ( 235 | KX_test.dot(self._KX_inv).dot(self._y - self._y.mean()) 236 | + self._y.mean() 237 | ) 238 | if return_cov: 239 | pred_var = self._Kernel(X, X) - KX_test.dot(self._KX_inv).dot( 240 | KX_test.T 241 | ) 242 | return (pred_mean, pred_var) 243 | return pred_mean 244 | -------------------------------------------------------------------------------- /polire/idw/tests/IDW Initial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Inverse Distance Weighting (IDW) Interpolation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Let us suppose we have a data that shows the variation of one quantity of interest across space.\n", 15 | "This could be equivalently viewed as { ($\\vec{x_1}, y_1)$,$(\\vec{x_2}, y_2)$,$(\\vec{x_3}, y_3)$, ...}, where the $\\vec{x_i}$'s represent the coordinates of the points where we have data and the $y_i$'s are the actual data at those points.

\n", 16 | "We would like to perform an interpolation using these data points such that a few things are satisifed.\n", 17 | "1. The interpolation is exact - the value at the known data points is the same as the estimated value, and \n", 18 | "2. We would want far away points from a given source data point to receive less importance than nearby points.\n", 19 | "3. Wikipedia has an excellent article on IDW. I am linking it [here](https://en.wikipedia.org/wiki/Inverse_distance_weighting)." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "We are using the following approximation for coordinate_type being latlong_small
\n", 27 | "$| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pandas as pd\n", 38 | "df = pd.read_csv('../../testdata/30-03-18.csv')\n", 39 | "data = np.array(df[['longitude','latitude','value']])" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def make_grid(X,y,res):\n", 49 | " y_min = y.min()-0.2\n", 50 | " y_max = y.max()+0.2\n", 51 | " x_min = X.min()-0.2\n", 52 | " x_max = X.max()+0.2\n", 53 | " x_arr = np.linspace(x_min,x_max,res)\n", 54 | " y_arr = np.linspace(y_min,y_max,res)\n", 55 | " xx,yy = np.meshgrid(x_arr,y_arr) \n", 56 | " return xx,yy\n", 57 | "\n", 58 | "def idw(dataset, exponent = 2, resolution='standard', coordinate_type='euclidean',verbose='False'):\n", 59 | " \"\"\"\n", 60 | " Here X is the set of spatial locations - Usually assumed to be Lat-Long\n", 61 | " To be extended to higher dimenstions y - estimated value , exponenet - how\n", 62 | " much weight to assign to far off locations to be estimated for each data point, \n", 63 | " extent - interpolate over a grid - what is xmax xmin ymax ymin\n", 64 | " \"\"\"\n", 65 | " if coordinate_type == 'latlong_small':\n", 66 | " \"\"\"\n", 67 | " Assume that the Earth is a Sphere, and use polar coordinates\n", 68 | " $| \\vec{r_2}− \\vec{r_1}| ≈ \\text{R }\\times \\sqrt[]{(Lat_2 - Lat_1)^{2} + (Long_2 - Long_1)^{2}}$\n", 69 | " \"\"\"\n", 70 | " return \"To be done later\"\n", 71 | " if coordinate_type == 'latlong_large':\n", 72 | " \"\"\"\n", 73 | " Code to be written after understanding all the projections.\n", 74 | " \"\"\"\n", 75 | " return \"To be done later\"\n", 76 | " if coordinate_type==\"euclidean\":\n", 77 | " \n", 78 | "# print(dataset)\n", 79 | " X = dataset[:,0]\n", 80 | " y = dataset[:,1]\n", 81 | " if resolution=='high':\n", 82 | " xx,yy = make_grid(X,y,1000)\n", 83 | " \n", 84 | " if resolution=='low':\n", 85 | " xx,yy = make_grid(X,y,10)\n", 86 | " \n", 87 | " if resolution=='standard':\n", 88 | " xx,yy = make_grid(X,y,100)\n", 89 | " \n", 90 | " new = []\n", 91 | " new_arr = dataset\n", 92 | " for points in new_arr:\n", 93 | " mindist = np.inf\n", 94 | " val = 0\n", 95 | " for j in range(len(yy)):\n", 96 | " temp = yy[j][0]\n", 97 | " for i in range(len(xx[0])):\n", 98 | " dist = np.linalg.norm(np.array([xx[0][i],temp]) - points[:2])\n", 99 | " if dist" 342 | ] 343 | }, 344 | "execution_count": 6, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": [ 350 | "a = idw()\n", 351 | "import pandas as pd\n", 352 | "df = pd.read_csv('../../testdata/30-03-18.csv')\n", 353 | "data = np.array(df[['longitude','latitude','value']])\n", 354 | "a.fit(data[:,:2],data[:,2])" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 5, 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "data": { 364 | "text/plain": [ 365 | "array([[171.89189189, 171.89597641, 171.90813547, ..., 173.89050472,\n", 366 | " 173.89261459, 173.89466512],\n", 367 | " [171.77142857, 171.77625338, 171.79060316, ..., 173.89585441,\n", 368 | " 173.89787202, 173.89983245],\n", 369 | " [171.63636364, 171.64211895, 171.65921778, ..., 173.9012935 ,\n", 370 | " 173.90321551, 173.90508269],\n", 371 | " ...,\n", 372 | " [174.49681529, 174.49676176, 174.49660126, ..., 174.24671184,\n", 373 | " 174.24416446, 174.24164382],\n", 374 | " [174.49056604, 174.49051451, 174.49035999, ..., 174.24671343,\n", 375 | " 174.24419773, 174.2417078 ],\n", 376 | " [174.48447205, 174.48442242, 174.48427358, ..., 174.2466762 ,\n", 377 | " 174.24419219, 174.24173298]])" 378 | ] 379 | }, 380 | "execution_count": 5, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "a.interpolated_values" 387 | ] 388 | } 389 | ], 390 | "metadata": { 391 | "kernelspec": { 392 | "display_name": "Python 3", 393 | "language": "python", 394 | "name": "python3" 395 | }, 396 | "language_info": { 397 | "codemirror_mode": { 398 | "name": "ipython", 399 | "version": 3 400 | }, 401 | "file_extension": ".py", 402 | "mimetype": "text/x-python", 403 | "name": "python", 404 | "nbconvert_exporter": "python", 405 | "pygments_lexer": "ipython3", 406 | "version": "3.6.8" 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 2 411 | } 412 | --------------------------------------------------------------------------------