├── setup.cfg ├── takensembed ├── __init__.py └── takens_embed.py ├── README.md ├── LICENSE ├── setup.py ├── .gitignore └── examples └── CrossEmbedding.ipynb /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /takensembed/__init__.py: -------------------------------------------------------------------------------- 1 | from takens_embed import * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | takens-embed 2 | ============ 3 | 4 | Using the Taken's embedding theorem to estimate the causality between variables. Based on Sugihara et al (2012) - Detecting Causality in Complex Ecosystems. 5 | 6 | Usage 7 | ----- 8 | General usage examples can be found in the examples folder. 9 | 10 | Installation with pip 11 | --------------------- 12 | Clone this repository, and in the terminal navigate to the folder above the repository. Then simply type: 13 | 14 | ``` 15 | pip install -e takens-embed 16 | ``` 17 | 18 | License 19 | ------- 20 | 21 | All code is Copyright (c) 2018, Sander Keemink. 22 | All rights reserved. 23 | 24 | This program is free software; you can redistribute it and/or 25 | modify it under the terms of the MIT License. 26 | 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Sander Keemink 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup file.""" 2 | 3 | import os 4 | 5 | from distutils.core import setup 6 | 7 | NAME = 'takensembed' 8 | 9 | 10 | install_requires = [ 'numpy>=1.13.1', 11 | 'scipy>=0.19.1', 12 | 'future>=0.16.0', 13 | 'scikit-learn>=0.18.2'] 14 | 15 | 16 | def read(fname): 17 | """Read the readme file.""" 18 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 19 | 20 | 21 | setup( 22 | name=NAME, 23 | install_requires=install_requires, 24 | version="0.1", 25 | author="Sander Keemink", 26 | author_email="swkeemink@scimail.eu", 27 | description="Uses Taken's embedding theorem to estimate causality" + 28 | " between variables. ", 29 | url="https://github.com/swkeemink/takens-embed", 30 | download_url="", 31 | package_dir={NAME: "./takensembed"}, 32 | packages=[NAME], 33 | license="MIT", 34 | long_description=read('README.md'), 35 | classifiers=[ 36 | "Natural Language :: English", 37 | "Programming Language :: Python", 38 | "Topic :: Scientific/Engineering" 39 | ] 40 | ) 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /takensembed/takens_embed.py: -------------------------------------------------------------------------------- 1 | """An implementation of the Taken's embedding theorem. 2 | 3 | Based on Sugihara et al (2012) - Detecting Causality in Complex Ecosystems. 4 | 5 | Author: Sander Keemink. 6 | """ 7 | import random 8 | import numpy as np 9 | from sklearn.neighbors import NearestNeighbors 10 | 11 | 12 | def get_delayed_manifold(data, tau=10, ndelay=3): 13 | """Get the delayed manifolds of the variables in data. 14 | 15 | Parameters 16 | ---------- 17 | data : array 18 | ntimepoints*nvariable data array 19 | tau : int, optional 20 | how many timepoints per delay step 21 | ndelay : int 22 | how many delay steps, optional 23 | 24 | Returns 25 | ------- 26 | array 27 | ndim*ndata*ndelay array 28 | 29 | """ 30 | N = data.shape[0] # number of data points 31 | ndim = data.shape[1] # number of input dimensions 32 | delayed_manifolds = np.zeros((ndim, N-ndelay*tau, ndelay)) 33 | for dim in range(ndim): 34 | for n in range(ndelay): 35 | delayed_manifolds[dim, :, n] = np.roll(data[:, dim], 36 | -n*tau)[:-ndelay*tau] 37 | return delayed_manifolds 38 | 39 | 40 | def findknearest(data, k): 41 | """Find k nearest neighbours. 42 | 43 | Parameters 44 | ---------- 45 | data : array 46 | Data to apply NearestNeighbors to 47 | (see sklearn.neighbors.NearestNeighbors) 48 | k : int 49 | How many neighbours to find. 50 | 51 | Returns 52 | ------- 53 | array 54 | distances to k nearest points 55 | array 56 | ids of k nearest points 57 | """ 58 | neigh = NearestNeighbors(n_neighbors=k+1) 59 | neigh.fit(data) 60 | dists, ids = neigh.kneighbors(data, n_neighbors=k+1) 61 | 62 | return dists[:, 1:], ids[:, 1:] 63 | 64 | 65 | def do_embedding(delayed_manifolds, rnge=None, 66 | randomize_coordinates=False): 67 | """Do embedding at different time-point-lengths. 68 | 69 | Parameters 70 | ---------- 71 | delayed_manifolds : array 72 | ndim*ndata*ndelay array with delayed single time courses 73 | rnge : array, optional 74 | At which time points to calculate the predictability of the variables 75 | randomize_coordinates : bool, optional 76 | If true, will randomize the delay coordinates as in Tajima et al (2015) 77 | 78 | Returns 79 | ------- 80 | array 81 | The correlations 82 | 83 | """ 84 | if rnge is None: 85 | rnge = range(20, 5000, 20) 86 | 87 | # randomize delay coordinates if requested 88 | if randomize_coordinates: 89 | delayed_randomized = np.copy(delayed_manifolds) 90 | for i in range(delayed_manifolds.shape[0]): 91 | R = np.random.normal(0, 1, (delayed_manifolds.shape[2], 92 | delayed_manifolds.shape[2])) 93 | for j in range(delayed_manifolds.shape[1]): 94 | delayed_randomized[i, j, :] = np.dot( 95 | R, delayed_manifolds[i, j, :]) 96 | delayed_manifolds = delayed_randomized 97 | 98 | # get some information about data size 99 | ndelay = delayed_manifolds.shape[2] 100 | ndims = delayed_manifolds.shape[0] 101 | N = delayed_manifolds.shape[1] 102 | 103 | # start analysis 104 | data = delayed_manifolds 105 | k = ndelay+3 # how many neighbours to find 106 | cors = np.zeros((ndims, ndims, len(rnge))) 107 | # loop over time lengths 108 | for i, l in enumerate(rnge): 109 | indices = random.sample(range(N), l) 110 | data_cut = data[:, indices, :] 111 | dists, ids, weights, preds = {}, {}, {}, {} 112 | # loop over actual dimensions 113 | for dim in range(ndims): 114 | # get nearest neighbours 115 | dists[dim], ids[dim] = findknearest(data_cut[dim, :, :], k) 116 | 117 | # get weights as per pop paper 118 | minim = dists[dim].min(axis=1) 119 | weights[dim] = np.exp(-dists[dim]/minim[:, None]) 120 | weights[dim] /= weights[dim].sum(axis=1)[:, None] 121 | # get predictions from cross embeddings for all dimension combinations 122 | for dim1 in range(ndims): # dimension to use to predict 123 | for dim2 in range(ndims): # dimension to predict 124 | points_to_use = data_cut[dim2, ids[dim1], 0] 125 | preds[dim1, dim2] = np.sum(weights[dim1][:, :]*points_to_use, 126 | axis=1) 127 | cors[dim1, dim2, i] = np.corrcoef(preds[dim1, dim2], 128 | data_cut[dim2, :, 0])[0, 1] 129 | 130 | return cors 131 | -------------------------------------------------------------------------------- /examples/CrossEmbedding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Cross Embedding\n", 8 | "This notebook gives a few example applications of the takensembed package.\n", 9 | "\n", 10 | "Imports first:" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "from swktools.plotting import plot3d\n", 21 | "import takensembed as te\n", 22 | "import holoviews as hv\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "from scipy.integrate import odeint\n", 25 | "from sklearn.neighbors import NearestNeighbors\n", 26 | "hv.extension('matplotlib','bokeh')\n", 27 | "colors= hv.core.options.Cycle.default_cycles['default_colors']\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Basis chaotic system from Sugihara et al (2012) - Detecting Causality in Complex Ecosystems" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "%%opts Curve {+axiswise}\n", 44 | "%%output backend='bokeh'\n", 45 | "# parameters\n", 46 | "rx = 3.8\n", 47 | "ry = 3.5\n", 48 | "bxy = .02\n", 49 | "byx = .1\n", 50 | "x1 = .4\n", 51 | "y1 = .2\n", 52 | "n = 3000\n", 53 | "sol = np.zeros((n,2))\n", 54 | "\n", 55 | "sol[0,0] = x1\n", 56 | "sol[0,1] = y1\n", 57 | "\n", 58 | "for i in range(1,n):\n", 59 | " sol[i,0] = sol[i-1,0]*(rx-rx*sol[i-1,0]-bxy*sol[i-1,1])\n", 60 | " sol[i,1] = sol[i-1,1]*(ry-ry*sol[i-1,1]-byx*sol[i-1,0])\n", 61 | "\n", 62 | "\n", 63 | "x = sol[:, 0]\n", 64 | "y = sol[:, 1]\n", 65 | "\n", 66 | "lorenzian = hv.Overlay([hv.Path(d) for d in zip(np.array_split(x, 1), np.array_split(y, 1))])\n", 67 | "lorenzian(style={'Path': dict(color=hv.Palette('Blues'), linewidth=1)})+hv.Curve(x)*hv.Curve(y)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Show delay manifolds" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "%matplotlib nbagg\n", 84 | "tau = 1 # how many time steps to go back\n", 85 | "ndelay = 3 # how many dimensions to do for the delays\n", 86 | "delayed = te.get_delayed_manifold(sol, tau, ndelay)\n", 87 | "\n", 88 | "if ndelay == 3:\n", 89 | " fig = plt.figure()\n", 90 | " ntraj=20\n", 91 | " ax = fig.add_subplot(131, projection='3d')\n", 92 | " plot3d(delayed[0,::1,:], ntraj=ntraj, labels=['x(t)','x(t-tau)','x(t-2tau)'], ax=ax)\n", 93 | " ax = fig.add_subplot(132, projection='3d')\n", 94 | " plot3d(delayed[1,::1,:], ntraj=ntraj, labels=['y(t)','y(t-tau)','y(t-2tau)'], ax=ax)\n" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Do cross-embedding" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "%%output backend='bokeh'\n", 111 | "reload(te)\n", 112 | "cors = te.do_embedding(delayed, range(7, 2500, 40))\n", 113 | "\n", 114 | "hv.Curve(cors[0,1,:], label='y|Mx')*hv.Curve(cors[1,0,:],label='x|My')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "With randomized delay coordinates as in Tajima et al (2015)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "%%output backend='bokeh'\n", 131 | "\n", 132 | "cors = te.do_embedding(delayed, range(7, 2500, 40), True)\n", 133 | "\n", 134 | "hv.Curve(cors[0,1,:], label='y|Mx')*hv.Curve(cors[1,0,:],label='x|My')" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### Do above for Lorentz attractor" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "%%output backend='bokeh' \n", 151 | "%%opts Curve {+axiswise}\n", 152 | "sigma = 10\n", 153 | "rho = 50\n", 154 | "beta = 8.0/3\n", 155 | "theta = 3 * np.pi / 4\n", 156 | "\n", 157 | "def lorenz(xyz, t):\n", 158 | " x, y, z = xyz\n", 159 | " x_dot = sigma * (y - x)\n", 160 | " y_dot = x * rho - x * z - y\n", 161 | " z_dot = x * y - beta* z\n", 162 | " return [x_dot, y_dot, z_dot]\n", 163 | "\n", 164 | "initial = (-10, -7, 35)\n", 165 | "t = np.arange(0, 10, 0.006)\n", 166 | "N = len(t)\n", 167 | "solution = odeint(lorenz, initial, t)\n", 168 | "# solution = np.random.poisson((solution+40)*10)\n", 169 | "x = solution[:, 0]\n", 170 | "y = solution[:, 1]\n", 171 | "z = solution[:, 2]\n", 172 | "xprime = np.cos(theta) * x - np.sin(theta) * y\n", 173 | "\n", 174 | "lorenzian = hv.Overlay([hv.Path(d) for d in zip(np.array_split(xprime, 1), np.array_split(z, 1))])\n", 175 | "fig = lorenzian(style={'Path': dict(color=hv.Palette('Blues'), linewidth=1)})\n", 176 | "fig+= hv.Curve(x, label='x')*hv.Curve(y, label='y')*hv.Curve(z, label='z')\n", 177 | "fig" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "fig = plt.figure()\n", 187 | "ax = fig.add_subplot(111, projection='3d')\n", 188 | "plot3d(solution[::1,:], ntraj=1, ax=ax)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "tau = 10 # how many time steps to go back\n", 198 | "ndelay = 3 # how many dimensions to do for the delays\n", 199 | "delayed = te.get_delayed_manifold(solution, tau, ndelay)\n", 200 | "\n", 201 | "if ndelay == 3:\n", 202 | " fig = plt.figure()\n", 203 | " ntraj=20\n", 204 | " ax = fig.add_subplot(131, projection='3d')\n", 205 | " plot3d(delayed[0,::1,:], ntraj=ntraj, labels=['x(t)','x(t-tau)','x(t-2tau)'], ax=ax)\n", 206 | " ax = fig.add_subplot(132, projection='3d')\n", 207 | " plot3d(delayed[1,::1,:], ntraj=ntraj, labels=['y(t)','y(t-tau)','y(t-2tau)'], ax=ax)\n", 208 | " ax = fig.add_subplot(133, projection='3d')\n", 209 | " plot3d(delayed[2,::1,:], ntraj=ntraj, labels=['z(t)','z(t-tau)','z(t-2tau)'], ax=ax)\n" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "Normal cross-embedding" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "%%output backend='bokeh'\n", 226 | "cors = te.do_embedding(delayed, range(20, 1000, 20))\n", 227 | "fig = hv.Curve(cors[0,1,:], label='y|Mx')*hv.Curve(cors[1,0,:], label='x|My')\n", 228 | "fig+= hv.Curve(cors[0,2,:], label='z|Mx')*hv.Curve(cors[2,0,:], label='x|Mz')\n", 229 | "fig+= hv.Curve(cors[1,2,:], label='z|My')*hv.Curve(cors[2,1,:], label='y|Mz')\n", 230 | "fig" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "With randomized coordinates" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "%%output backend='bokeh'\n", 247 | "cors = te.do_embedding(delayed, range(20, 1000, 20),True)\n", 248 | "fig = hv.Curve(cors[0,1,:], label='y|Mx')*hv.Curve(cors[1,0,:], label='x|My')\n", 249 | "fig+= hv.Curve(cors[0,2,:], label='z|Mx')*hv.Curve(cors[2,0,:], label='x|Mz')\n", 250 | "fig+= hv.Curve(cors[1,2,:], label='z|My')*hv.Curve(cors[2,1,:], label='y|Mz')\n", 251 | "fig" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Python 2", 265 | "language": "python", 266 | "name": "python2" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 2 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython2", 278 | "version": "2.7.14" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } 284 | --------------------------------------------------------------------------------