├── GP_algorithm.py ├── README.md ├── README_files ├── README_11_0.png ├── README_4_0.png ├── README_6_0.png └── README_8_0.png └── Test_Example_notebook.ipynb /GP_algorithm.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | COPYRIGHT sebastiano.bontorin@unitn.it 4 | ''' 5 | 6 | 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from scipy import stats 11 | 12 | def td_embedding(data,emb,tau): 13 | ''' 14 | Time delay embedding of timeseries of scalars 15 | Args: 16 | timeseries: array of scalars 17 | emb: (int) embedding dimension 18 | tau = (int) time delay between values in phase space reconstruction 19 | Returns: 20 | array of embedded vectors: 21 | [x[i],x[i+tau],x[i+2*tau],...,x[i + (m-1)*tau]] 22 | ''' 23 | indexes = np.arange(0,emb,1)*tau 24 | return np.array([data[indexes +i] for i in range(len(data)-(emb-1)*tau)]) 25 | 26 | 27 | def logarithmic_r(min_n, max_n, factor): 28 | ''' 29 | Creates array of values distributed such as log(values) is an array of 30 | evenly spaced (space between values = log(factor)) values between log(min_n) and log(max_n) 31 | Args: 32 | arg1: min_n: minimum value 33 | arg2: max_n: maximum value ( > arg1 ) 34 | factor: log(factor) is the space between values 35 | Returns: 36 | min_n, min_n * factor, min_n * factor^2, ... min_n * factor^i < max_n 37 | ''' 38 | 39 | if max_n <= min_n: 40 | raise ValueError("arg1 has to be < arg2") 41 | if factor <= 1: 42 | raise ValueError("factor(arg3) has to be > 1") 43 | max_i = int(np.floor(np.log(1.0 * max_n / min_n) / np.log(factor))) 44 | return np.array([min_n * (factor ** i) for i in range(max_i + 1)]) 45 | 46 | 47 | def grassberg_procaccia(data,emb_dim,time_delay,plot = None): 48 | ''' 49 | Implementation of the Gassberger-Procaccia algorithm to estimate the 50 | correlation dimension of a set of points in an m-dimensional space. 51 | 52 | This code takes in input a timeseries of scalar values and the embedding dimension + time delay 53 | necessary to perform a time-delay embedding in phase space to reconstruct the attractor 54 | 55 | Args: 56 | data: array of scalars - a timeseries 57 | emb_dim: (int) embedding dimension 58 | time_delay = (int) time delay between values in phase space reconstruction 59 | Kwargs: 60 | plot: if set to True: plots the logarithm of the correlation 61 | sums against the logarithm of the set of values of r considered in the algorithm 62 | 63 | r is the scaling factor, it tells the threshold distance between points. if we have a plateau 64 | of local slopes means that we are in a scaling range. 65 | 66 | Returns: 67 | Correlation dimension (scalar) 68 | 69 | ''' 70 | 71 | # Phase space points reconstructed via time-delay embedding 72 | orbit = td_embedding(data, emb_dim, time_delay) 73 | n_points = len(orbit) 74 | 75 | # Timeseries standard deviation 76 | data_std = np.std(data) 77 | 78 | # Generate a series of r distances evenly spaced in log scale, these are 79 | # generated starting from the timeseries of scalars standard deviation. 80 | # The r distance is a scalar used to find the fraction of points in phase space for which 81 | # the euclidean distance between them is smaller than r 82 | r_vals = logarithmic_r(0.1 * data_std, 0.7 * data_std, 1.03) 83 | 84 | distances = np.zeros(shape=(n_points,n_points)) 85 | r_matrix_base = np.zeros(shape=(n_points,n_points)) 86 | 87 | # Euclidean distance of points in phase space 88 | for i in range(n_points): 89 | for j in range(i,n_points): 90 | distances[i][j] = np.linalg.norm(orbit[i]-orbit[j]) 91 | r_matrix_base[i][j] = 1 92 | 93 | # Correlation sum 94 | C_r = [] 95 | for r in r_vals: 96 | r_matrix = r_matrix_base*r 97 | heavi_matrix = np.heaviside( r_matrix - distances, 0) 98 | corr_sum = (2/float(n_points*(n_points-1)))*np.sum(heavi_matrix) 99 | C_r.append(corr_sum) 100 | 101 | #strong assumption: the log-log plot is assumed to be a smooth, monotonic function, 102 | #hence the slope in the scaling region should be the maximum gradient ( in this case 103 | #is taken as the mean of the last five maximum gradients as they are calculated for every point ) 104 | gradients = np.gradient(np.log2(C_r),np.log2(r_vals)) 105 | gradients.sort() 106 | D = np.mean(gradients[-5:]) 107 | 108 | if plot: 109 | # plot the trend of Cr) 110 | plt.plot(np.log2(r_vals),np.log2(C_r)) 111 | plt.xlabel("Distance r") 112 | plt.ylabel("C(r)") 113 | plt.title("Correlation sum in log2-log2 plot. Dimension D is "+str(round(D,2))) 114 | plt.show() 115 | 116 | return D 117 | 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | (Open the notebook for a better visualization of plots axes) 4 | 5 | Implementation of the Grassberger-Procaccia algorithm to estimate the correlation dimension of a set. Please refer to: http://www.scholarpedia.org/article/Grassberger-Procaccia_algorithm for more information on the Grassberger-Procaccia algorithm and the correlation dmension 6 | 7 | Correlation dimension is a fractal dimension (such as Box-counting dimension or Hausdorff dimension) and it is characteristic of the set of points. If an attractor in phase space is being studied, once the attractor is completely unfolded in m dimensions, correlation dimension becomes an invariant and further embeddings in more dimensions do not influence its value, thus remaining constant. 8 | 9 | # Test Example 10 | 11 | Test of the `grassberg_procaccia` function (found in `GP_algorithm.py`) that implements the G-P algorithm, to estimate the correlation dimension of a set of points in a m dimensional space. This code takes in input a timeseries of scalar values and the embedding dimension (m) + time delay (tau) necessary to perform a time-delay embedding in phase space to reconstruct the attractor. See: https://en.wikipedia.org/wiki/Takens%27s_theorem for more information regarding the phase space reconstruction. 12 | 13 | Function `grassberg_procaccia` is the main reference for the implementation. Some notes: 14 | 15 | - it assumes the user has only a timeseries of scalar values coming from a dynamical system 16 | - It assumes the time delay is known 17 | 18 | Additonaly, this function can be used: 19 | 20 | - It will be used to find the ideal embedding dimension m in which to unfold the attractor 21 | - While this implementation was successfully tested with Lorentz and Henon attractors, with High dimensional attractors coming from a stochastic-deterministic hybrid dynamics (with additive or even multiplicative noise) the Correlation sum in log-log shows two different scaling regions. Thus results can not be trusted. 22 | 23 | 24 | # Test example of the implementation of Grassberger-Procaccia algorithm 25 | 26 | 27 | ```python 28 | import matplotlib.pyplot as plt 29 | import numpy as np 30 | 31 | import GP_algorithm as gp 32 | 33 | plt.rcParams['figure.figsize'] = [5, 5] 34 | ``` 35 | 36 | ## Generation of a timeseries from Lorenz attractor 37 | 38 | The following code for the timeseries generation from the Lorenz dynamical system was directly copied from https://matplotlib.org/stable/gallery/mplot3d/lorenz_attractor.html. © Copyright 2002 - 2012 John Hunter, Darren Dale, Eric Firing, Michael Droettboom and the Matplotlib development team; 2012 - 2021 The Matplotlib development team. 39 | 40 | 41 | ```python 42 | def lorenz(x, y, z, s=10, r=28, b=2.667): 43 | '''Given: 44 | x, y, z: a point of interest in three dimensional space 45 | s, r, b: parameters defining the lorenz attractor 46 | Returns: 47 | x_dot, y_dot, z_dot: values of the lorenz attractor's partial derivatives at the point x, y, z''' 48 | x_dot = s*(y - x) 49 | y_dot = r*x - y - x*z 50 | z_dot = x*y - b*z 51 | return x_dot, y_dot, z_dot 52 | 53 | dt = 0.01 54 | num_steps = 10000 55 | 56 | # Need one more for the initial values 57 | xs = np.empty(num_steps + 1) 58 | ys = np.empty(num_steps + 1) 59 | zs = np.empty(num_steps + 1) 60 | 61 | # Set initial values 62 | xs[0], ys[0], zs[0] = (0., 1., 1.05) 63 | 64 | # Step through "time", calculating the partial derivatives at the current point and using them to estimate the next point 65 | for i in range(num_steps): 66 | x_dot, y_dot, z_dot = lorenz(xs[i], ys[i], zs[i]) 67 | xs[i + 1] = xs[i] + (x_dot * dt) 68 | ys[i + 1] = ys[i] + (y_dot * dt) 69 | zs[i + 1] = zs[i] + (z_dot * dt) 70 | 71 | # Plot the attractor 72 | ax = plt.figure().add_subplot(projection='3d') 73 | ax.plot(xs, ys, zs, lw=0.5) 74 | ax.set_title("Lorenz Attractor") 75 | plt.show() 76 | ``` 77 | 78 | 79 | 80 | ![png](README_files/README_4_0.png) 81 | 82 | 83 | 84 | # X dimension timeseries 85 | 86 | To test the GP algorithm we will use the previously generated timeseries coming from the X dimension to reconstruct back the phase space (in the assumption we have data of which we don't know the original deterministic model, and we have only a timeseries of scalars) and then estimate the Correlation dimension 87 | 88 | 89 | ```python 90 | # plot only X dimension, the first 1500 steps 91 | 92 | plt.plot(range(num_steps)[:1500], xs[:1500]) 93 | plt.title("Lorenz Attractor - X dimension timeseries") 94 | plt.xlabel("Step in time") 95 | plt.ylabel("X") 96 | plt.show() 97 | ``` 98 | 99 | 100 | 101 | ![png](README_files/README_6_0.png) 102 | 103 | 104 | 105 | # Correlation Dimension of X timeseries 106 | 107 | Following Takens theorem, the necessary embedding dimension for the reconstruction is 2*dim (2 times the original dimension from which ). 108 | Correlation sum C(r) scales as r^{D}. Thus the correlation dimension can be reconstructed in a log log plot 109 | 110 | 111 | ```python 112 | # Test the function for a single value 113 | emb_dim = 3 114 | # Arbitrary time delay 115 | time_delay = 20 116 | timeseries = xs[:1500] 117 | 118 | # Algortuhm execution to get the dimension 119 | D = gp.grassberg_procaccia(timeseries,emb_dim,time_delay,plot = True) 120 | ``` 121 | 122 | 123 | 124 | ![png](README_files/README_8_0.png) 125 | 126 | 127 | 128 | ## Finding the best embedding dimension for the Lorenz attractor using the correlation dimension 129 | 130 | We compute the correlation dimension for different candidate embedding dimensions for the timeseries X of scalar values coming from the original lorentz system. We find that D reaches a plateau at embedding_dim equal to 3, as the original one (Also note that the dimension given by Takens’s theorem is only an upper limit. A lower embedding dimension may suffice) 131 | 132 | 133 | ```python 134 | # Compute the correlation dimension for a set of different embedding dimensions of the timeseries 135 | Ds = [] 136 | 137 | for emb_dim in range(1,8): 138 | 139 | time_delay = 20 140 | timeseries = xs[:1500] 141 | 142 | D = gp.grassberg_procaccia(timeseries,emb_dim,time_delay,plot = False) 143 | Ds.append(D) 144 | 145 | ``` 146 | 147 | 148 | ```python 149 | # Plot 150 | 151 | plt.plot(range(1,8),Ds,'o-') 152 | plt.xlabel("Embedding dimension") 153 | plt.ylabel("D") 154 | plt.title("Correlation dimension D versus embedding dimension") 155 | plt.show() 156 | ``` 157 | 158 | 159 | 160 | ![png](README_files/README_11_0.png) 161 | 162 | 163 | -------------------------------------------------------------------------------- /README_files/README_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/notsebastiano/GP_algorithm/4950e284b611a74db2d2482acaa52ed1422b2b94/README_files/README_11_0.png -------------------------------------------------------------------------------- /README_files/README_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/notsebastiano/GP_algorithm/4950e284b611a74db2d2482acaa52ed1422b2b94/README_files/README_4_0.png -------------------------------------------------------------------------------- /README_files/README_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/notsebastiano/GP_algorithm/4950e284b611a74db2d2482acaa52ed1422b2b94/README_files/README_6_0.png -------------------------------------------------------------------------------- /README_files/README_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/notsebastiano/GP_algorithm/4950e284b611a74db2d2482acaa52ed1422b2b94/README_files/README_8_0.png -------------------------------------------------------------------------------- /Test_Example_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# README\n", 8 | "\n", 9 | "Implementation of the Grassberger-Procaccia algorithm to estimate the correlation dimension of a set. Please refer to: http://www.scholarpedia.org/article/Grassberger-Procaccia_algorithm for more information on the Grassberger-Procaccia algorithm and the correlation dmension\n", 10 | "\n", 11 | "Correlation dimension is a fractal dimension (such as Box-counting dimension or Hausdorff dimension) and it is characteristic of the set of points. If an attractor in phase space is being studied, once the attractor is completely unfolded in m dimensions, correlation dimension becomes an invariant and further embeddings in more dimensions do not influence its value, thus remaining constant.\n", 12 | "\n", 13 | "# Notebook implementation\n", 14 | "\n", 15 | "Test of the `grassberg_procaccia` function that implements the G-P algorithm, to estimate the correlation dimension of a set of points in a m dimensional space. This code takes in input a timeseries of scalar values and the embedding dimension (m) + time delay (tau) necessary to perform a time-delay embedding in phase space to reconstruct the attractor. See: https://en.wikipedia.org/wiki/Takens%27s_theorem for more information regarding the phase space reconstruction.\n", 16 | "\n", 17 | "Function `grassberg_procaccia` is the main reference for the implementation. Some notes:\n", 18 | "\n", 19 | "- it assumes the user has only a timeseries of scalar values coming from a dynamical system\n", 20 | "- It assumes the time delay is known\n", 21 | "\n", 22 | "Additonaly, this function can be used:\n", 23 | "\n", 24 | "- It will be used to find the ideal embedding dimension m in which to unfold the attractor \n", 25 | "- While this implementation was successfully tested with Lorentz and Henon attractors, with High dimensional attractors coming from a stochastic-deterministic hybrid dynamics (with additive or even multiplicative noise) the Correlation sum in log-log shows two different scaling regions. Thus results can not be trusted.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Test example of the implementation of Grassberger-Procaccia algorithm" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import matplotlib.pyplot as plt\n", 42 | "import numpy as np\n", 43 | "\n", 44 | "import GP_algorithm as gp\n", 45 | "\n", 46 | "plt.rcParams['figure.figsize'] = [5, 5]" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Generation of a timeseries from Lorenz attractor\n", 54 | "\n", 55 | "The following code for the timeseries generation from the Lorenz dynamical system was directly copied from https://matplotlib.org/stable/gallery/mplot3d/lorenz_attractor.html. © Copyright 2002 - 2012 John Hunter, Darren Dale, Eric Firing, Michael Droettboom and the Matplotlib development team; 2012 - 2021 The Matplotlib development team." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "image/png": "\n", 66 | "text/plain": [ 67 | "
" 68 | ] 69 | }, 70 | "metadata": { 71 | "needs_background": "light" 72 | }, 73 | "output_type": "display_data" 74 | } 75 | ], 76 | "source": [ 77 | "def lorenz(x, y, z, s=10, r=28, b=2.667):\n", 78 | " '''Given:\n", 79 | " x, y, z: a point of interest in three dimensional space\n", 80 | " s, r, b: parameters defining the lorenz attractor\n", 81 | " Returns:\n", 82 | " x_dot, y_dot, z_dot: values of the lorenz attractor's partial derivatives at the point x, y, z'''\n", 83 | " x_dot = s*(y - x)\n", 84 | " y_dot = r*x - y - x*z\n", 85 | " z_dot = x*y - b*z\n", 86 | " return x_dot, y_dot, z_dot\n", 87 | "\n", 88 | "dt = 0.01\n", 89 | "num_steps = 10000\n", 90 | "\n", 91 | "# Need one more for the initial values\n", 92 | "xs = np.empty(num_steps + 1)\n", 93 | "ys = np.empty(num_steps + 1)\n", 94 | "zs = np.empty(num_steps + 1)\n", 95 | "\n", 96 | "# Set initial values\n", 97 | "xs[0], ys[0], zs[0] = (0., 1., 1.05)\n", 98 | "\n", 99 | "# Step through \"time\", calculating the partial derivatives at the current point and using them to estimate the next point\n", 100 | "for i in range(num_steps):\n", 101 | " x_dot, y_dot, z_dot = lorenz(xs[i], ys[i], zs[i])\n", 102 | " xs[i + 1] = xs[i] + (x_dot * dt)\n", 103 | " ys[i + 1] = ys[i] + (y_dot * dt)\n", 104 | " zs[i + 1] = zs[i] + (z_dot * dt)\n", 105 | "\n", 106 | "# Plot the attractor\n", 107 | "ax = plt.figure().add_subplot(projection='3d')\n", 108 | "ax.plot(xs, ys, zs, lw=0.5)\n", 109 | "ax.set_title(\"Lorenz Attractor\")\n", 110 | "plt.show()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "# X dimension timeseries\n", 118 | "\n", 119 | "To test the GP algorithm we will use the previously generated timeseries coming from the X dimension to reconstruct back the phase space (in the assumption we have data of which we don't know the original deterministic model, and we have only a timeseries of scalars) and then estimate the Correlation dimension" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 3, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "image/png": "\n", 130 | "text/plain": [ 131 | "
" 132 | ] 133 | }, 134 | "metadata": { 135 | "needs_background": "light" 136 | }, 137 | "output_type": "display_data" 138 | } 139 | ], 140 | "source": [ 141 | "# plot only X dimension, the first 1500 steps\n", 142 | "\n", 143 | "plt.plot(range(num_steps)[:1500], xs[:1500])\n", 144 | "plt.title(\"Lorenz Attractor - X dimension timeseries\")\n", 145 | "plt.xlabel(\"Step in time\")\n", 146 | "plt.ylabel(\"X\")\n", 147 | "plt.show()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "# Correlation Dimension of X timeseries\n", 155 | "\n", 156 | "Following Takens theorem, the necessary embedding dimension for the reconstruction is 2*dim (2 times the original dimension from which ).\n", 157 | "Correlation sum C(r) scales as r^{D}. Thus the correlation dimension can be reconstructed in a log log plot" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 4, 163 | "metadata": { 164 | "scrolled": true 165 | }, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "image/png": "\n", 170 | "text/plain": [ 171 | "
" 172 | ] 173 | }, 174 | "metadata": { 175 | "needs_background": "light" 176 | }, 177 | "output_type": "display_data" 178 | } 179 | ], 180 | "source": [ 181 | "# Test the function for a single value\n", 182 | "emb_dim = 3\n", 183 | "# Arbitrary time delay\n", 184 | "time_delay = 20\n", 185 | "timeseries = xs[:1500]\n", 186 | "\n", 187 | "# Algortuhm execution to get the dimension\n", 188 | "D = gp.grassberg_procaccia(timeseries,emb_dim,time_delay,plot = True)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## Finding the best embedding dimension for the Lorenz attractor using the correlation dimension\n", 196 | "\n", 197 | "We compute the correlation dimension for different candidate embedding dimensions for the timeseries X of scalar values coming from the original lorentz system. We find that D reaches a plateau at embedding_dim equal to 3, as the original one (Also note that the dimension given by Takens’s theorem is only an upper limit. A lower embedding dimension may suffice)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 5, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# Compute the correlation dimension for a set of different embedding dimensions of the timeseries\n", 207 | "Ds = []\n", 208 | "\n", 209 | "for emb_dim in range(1,8):\n", 210 | " \n", 211 | " time_delay = 20\n", 212 | " timeseries = xs[:1500]\n", 213 | " \n", 214 | " D = gp.grassberg_procaccia(timeseries,emb_dim,time_delay,plot = False)\n", 215 | " Ds.append(D)\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "image/png": "\n", 226 | "text/plain": [ 227 | "
" 228 | ] 229 | }, 230 | "metadata": { 231 | "needs_background": "light" 232 | }, 233 | "output_type": "display_data" 234 | } 235 | ], 236 | "source": [ 237 | "# Plot\n", 238 | "\n", 239 | "plt.plot(range(1,8),Ds,'o-')\n", 240 | "plt.xlabel(\"Embedding dimension\")\n", 241 | "plt.ylabel(\"D\")\n", 242 | "plt.title(\"Correlation dimension D versus embedding dimension\")\n", 243 | "plt.show()" 244 | ] 245 | } 246 | ], 247 | "metadata": { 248 | "kernelspec": { 249 | "display_name": "Python 3 (ipykernel)", 250 | "language": "python", 251 | "name": "python3" 252 | }, 253 | "language_info": { 254 | "codemirror_mode": { 255 | "name": "ipython", 256 | "version": 3 257 | }, 258 | "file_extension": ".py", 259 | "mimetype": "text/x-python", 260 | "name": "python", 261 | "nbconvert_exporter": "python", 262 | "pygments_lexer": "ipython3", 263 | "version": "3.9.9" 264 | } 265 | }, 266 | "nbformat": 4, 267 | "nbformat_minor": 2 268 | } 269 | --------------------------------------------------------------------------------