├── License.md ├── PyIF ├── CPU_TE.py ├── GPU_TE.py ├── __init__.py ├── helper.py └── te_compute.py ├── README.md └── setup.py /License.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018-2019 University of Illinois 2 | All rights reserved. 3 | 4 | Developed by: Robert Brunner, Kelechi Ikegwu, Jacob Trauger, Tyson Trauger 5 | University of Illinois 6 | http://lcdm.illinois.edu 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. 11 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. 12 | Neither the names of Robert Brunner or the University of Illinois, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 14 | -------------------------------------------------------------------------------- /PyIF/CPU_TE.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from numba import njit,prange 4 | from scipy.special import digamma # to compute log derivative of gamma function 5 | 6 | 7 | @njit(nopython=True, parallel=True) 8 | def countsInKD(fn_cntX_XKY_arr, fn_cntX_XK_arr, fn_xdistXKY, fn_xdistXK, xkyPts, embedding, X): 9 | """ 10 | Computes counts for distances less than xdistXKY & xdistXK and stores them in cntX_XKY_arr and cntX_XK_arr respectively. 11 | 12 | Parameters 13 | ---------- 14 | fn_cntX_XKY_arr: array that holds how many points are within its respective XKY dist 15 | fn_cntX_XK_arr: array that holds how many points are within its respective XK dist 16 | fn_xdistXKY: array that holds the X distance in the xky space 17 | fn_xdistXK: array that holds the K distance 18 | embedding: integer containg number of lag periods to consider 19 | xkyPts: 2D array that holds points in the XKY subspace 20 | X: array that holds all X points 21 | 22 | 23 | Returns 24 | ------- 25 | No return values 26 | """ 27 | 28 | #Get distances 29 | for i in prange(len(xkyPts) - embedding): 30 | fn_cntX_XKY_arr[i] = 0 31 | fn_cntX_XK_arr[i] = 0 32 | point = xkyPts[i][0] 33 | for j in range(embedding, len(X)): 34 | difference = abs(point - X[j]) 35 | if difference <= fn_xdistXKY[i] and difference != 0: 36 | fn_cntX_XKY_arr[i] += 1 37 | if difference <= fn_xdistXK[i] and difference != 0: 38 | fn_cntX_XK_arr[i] += 1 39 | 40 | if fn_cntX_XKY_arr[i] == 0: 41 | fn_cntX_XKY_arr[i] = 1 42 | if fn_cntX_XK_arr[i] == 0: 43 | fn_cntX_XK_arr[i] = 1 44 | 45 | def compute(xkykdTree, kykdTree, xkkdTree, kkdTree, 46 | xkyPts, kyPts, xkPts, kPts, nPts, X, embedding=1, k=1): 47 | ''' 48 | Computes the TE 49 | 50 | Parameters 51 | ---------- 52 | xkykdTree: KD tree of the xky subspace 53 | kykdTree: KD tree of the ky subspace 54 | xkkdTree: KD tree of the xk subspace 55 | kkdTree: KD tree of the k subspace 56 | xkyPts: array of the points in the xky subspace 57 | kyPts: array of the points in the ky subspace 58 | xkPts: array of the xk subspace 59 | kPts: array of the k subspace 60 | nPts: total number of points 61 | X: array of the X points 62 | embedding: number of the wanted embedding value 63 | k: number of the nearest neighbors 64 | 65 | Returns 66 | ------- 67 | TE: The transfer entropy estimate 68 | ''' 69 | 70 | #variables to store the distance to the kth neighbor in different spaces 71 | tmpdist,xdistXKY,xdistXK,kydist,kdist = 0,0,0,0,0 72 | 73 | #counters for summing the digammas of the point counts. 74 | cntX_XKY, cntX_XK, cntKY_XKY, cntK_XK = 0,0,0,0 75 | 76 | # for each point in the XKY space, 77 | # Return the distance and indicies of k nearest neighbors 78 | dists, idxs = xkykdTree.query(xkyPts, k=k+1) 79 | idx = idxs[:, 1:] # Drop first index since it is a duplicate 80 | 81 | # Grab kth neighbor 82 | idx = idx[:, k-1] 83 | # Calculate the X distance and KY distance in xky space 84 | 85 | xdistXKY= np.absolute(np.subtract(xkyPts[:, 0], xkyPts[idx][:, 0])) 86 | kydist= np.absolute(np.subtract(xkyPts[:, 1:], xkyPts[idx][:, 1:])) 87 | 88 | # Take column with maximum distance 89 | kydist = np.amax(kydist, axis=1) 90 | 91 | # perform the same operations in the xk space 92 | 93 | # Returns distance and indicies of k nearest neighbors 94 | dists, idxs = xkkdTree.query(xkPts, k=k+1) 95 | idx = idxs[:, 1:] # Drop first index since it is a duplicate 96 | # Grab closest neighbors 97 | idx = idx[:, k-1] 98 | # Calculate the K distance and the XK distance. 99 | xdistXK= np.absolute(np.subtract(xkPts[:, 0], xkPts[idx][:, 0])) 100 | kdist = np.absolute(np.subtract(xkPts[:, 1:], xkPts[idx][:, 1:])) 101 | 102 | # Take column with maximum distance 103 | kdist = np.amax(kdist, axis=1) 104 | # temp counters 105 | Cnt1, Cnt2 = 0,0 106 | 107 | 108 | cntX_XKY_arr = np.zeros(len(xkyPts) - embedding, dtype="float") 109 | cntX_XK_arr = np.zeros(len(xkyPts) - embedding, dtype="float") 110 | 111 | countsInKD(cntX_XKY_arr, cntX_XK_arr, xdistXKY, xdistXK, xkyPts, embedding, X) 112 | vfunc = np.vectorize(digamma) 113 | cntX_XKY_arr = vfunc(cntX_XKY_arr) 114 | cntX_XK_arr = vfunc(cntX_XK_arr) 115 | 116 | 117 | cntX_XKY = np.sum(cntX_XKY_arr) 118 | cntX_XK = np.sum(cntX_XK_arr) 119 | 120 | #Count the number of points in the KY subspace, within the XKY distance: 121 | # Comparable to computeDistance[View] in compute_TE.cpp 122 | 123 | Cnt1 = kykdTree.query_radius(kyPts, kydist, count_only=True) - 1 124 | Cnt2 = kkdTree.query_radius(kPts, kdist, count_only=True) - 1 125 | # 126 | def digammaAtLeastOne(x): 127 | if x != 0: 128 | return digamma(x) 129 | else: 130 | return digamma(1) 131 | dvfunc = np.vectorize(digammaAtLeastOne) 132 | cntKY_XKY = np.sum(dvfunc(Cnt1)) 133 | cntK_XK = np.sum(dvfunc(Cnt2)) 134 | 135 | 136 | # The transfer entropy is the difference of the two mutual informations 137 | # If we define digK = digamma(k), digN = digamma(nPts); then the 138 | # Kraskov (2004) estimator for MI gives 139 | # TE = (digK - 1/k - (cntX_XKY + cntKY_XKY)/nPts + digN) - (digK - 1/k - (cntX_XK + cntK_XK)/nPts + digN) 140 | # which simplifies to: 141 | # TE = (cntX_XK + cntK_XK)/nPts - (cntX_XKY + cntKY_XKY)/nPts; 142 | # 143 | TE = (cntX_XK + cntK_XK)/nPts - (cntX_XKY + cntKY_XKY)/nPts 144 | return TE 145 | -------------------------------------------------------------------------------- /PyIF/GPU_TE.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import cuda 3 | from scipy.special import digamma # to compute log derivative of gamma function 4 | 5 | @cuda.jit 6 | def countsInKD(fn_cntX_XKY_arr, fn_cntX_XK_arr, fn_X, fn_xkyPts, fn_xdistXKY, fn_xdistXK, embedding): 7 | """ 8 | Computes counts for distances less than xdistXKY & xdistXK and stores them in cntX_XKY_arr and cntX_XK_arr respectively. 9 | 10 | Parameters 11 | ---------- 12 | fn_cntX_XKY_arr: array that holds how many points are within its respective XKY dist 13 | fn_cntX_XK_arr: array that holds how many points are within its respective XK dist 14 | fn_X: array that holds all X points 15 | fn_xkyPts: 2D array that holds points in the XKY subspace 16 | fn_xdistXKY: array that holds the X distance in the xky space 17 | fn_xdistXK: array that holds the K distance 18 | embedding: integer containg number of lag periods to consider 19 | Returns 20 | ------- 21 | No return values 22 | """ 23 | #Get distances 24 | i = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x 25 | ArCnt1 = 0 26 | ArCnt2 = 0 27 | point = fn_xkyPts[i][0] 28 | for j in range(embedding, len(fn_X)): 29 | difference = abs(point - fn_X[j]) 30 | if difference <= fn_xdistXKY[i] and difference != 0: 31 | ArCnt1 += 1 32 | if difference <= fn_xdistXK[i] and difference != 0: 33 | ArCnt2 += 1 34 | 35 | if ArCnt1 == 0: 36 | ArCnt1 = 1 37 | if ArCnt2 == 0: 38 | ArCnt2 = 1 39 | fn_cntX_XKY_arr[i] = ArCnt1 40 | fn_cntX_XK_arr[i] = ArCnt2 41 | 42 | 43 | def compute(xkykdTree, kykdTree, xkkdTree, kkdTree, 44 | xkyPts, kyPts, xkPts, kPts, nPts, X, embedding=1, k=1): 45 | ''' 46 | Computes the TE 47 | 48 | Parameters 49 | ---------- 50 | xkykdTree: KD tree of the xky subspace 51 | kykdTree: KD tree of the ky subspace 52 | xkkdTree: KD tree of the xk subspace 53 | kkdTree: KD tree of the k subspace 54 | xkyPts: array of the points in the xky subspace 55 | kyPts: array of the points in the ky subspace 56 | xkPts: array of the xk subspace 57 | kPts: array of the k subspace 58 | nPts: total number of points 59 | X: array of the X points 60 | embedding: number of the wanted embedding value 61 | k: number of the nearest neighbors 62 | 63 | Returns 64 | ------- 65 | TE: The transfer entropy estimate 66 | ''' 67 | 68 | #variables to store the distance to the kth neighbor in different spaces 69 | tmpdist,xdistXKY,xdistXK,kydist,kdist = 0,0,0,0,0 70 | 71 | #counters for summing the digammas of the point counts. 72 | cntX_XKY, cntX_XK, cntKY_XKY, cntK_XK = 0,0,0,0 73 | 74 | # for each point in the XKY space, 75 | # Return the distance and indicies of k nearest neighbors 76 | dists, idxs = xkykdTree.query(xkyPts, k=k+1) 77 | idx = idxs[:, 1:] # Drop first index since it is a duplicate 78 | 79 | # Grab kth neighbor 80 | idx = idx[:, k-1] 81 | # Calculate the X distance and KY distance in xky space 82 | 83 | xdistXKY= np.absolute(np.subtract(xkyPts[:, 0], xkyPts[idx][:, 0])) 84 | kydist= np.absolute(np.subtract(xkyPts[:, 1:], xkyPts[idx][:, 1:])) 85 | 86 | # Take column with maximum distance 87 | kydist = np.amax(kydist, axis=1) 88 | 89 | # perform the same operations in the xk space 90 | 91 | # Returns distance and indicies of k nearest neighbors 92 | dists, idxs = xkkdTree.query(xkPts, k=k+1) 93 | idx = idxs[:, 1:] # Drop first index since it is a duplicate 94 | # Grab closest neighbors 95 | idx = idx[:, k-1] 96 | # Calculate the K distance and the XK distance. 97 | xdistXK= np.absolute(np.subtract(xkPts[:, 0], xkPts[idx][:, 0])) 98 | kdist = np.absolute(np.subtract(xkPts[:, 1:], xkPts[idx][:, 1:])) 99 | 100 | # Take column with maximum distance 101 | kdist = np.amax(kdist, axis=1) 102 | # temp counters 103 | Cnt1, Cnt2 = 0,0 104 | 105 | 106 | cntX_XKY_arr = np.zeros(len(xkyPts) - embedding, dtype="float") 107 | cntX_XK_arr = np.zeros(len(xkyPts) - embedding, dtype="float") 108 | 109 | threadsPerBlock = 32 110 | blocksPerGrid = (len(xkyPts) - embedding + threadsPerBlock - 1) // threadsPerBlock 111 | 112 | countsInKD[blocksPerGrid, threadsPerBlock](cntX_XKY_arr, cntX_XK_arr, X, xkyPts, xdistXKY, xdistXK, embedding) 113 | 114 | vfunc = np.vectorize(digamma) 115 | cntX_XKY_arr = vfunc(cntX_XKY_arr) 116 | cntX_XK_arr = vfunc(cntX_XK_arr) 117 | 118 | 119 | cntX_XKY = np.sum(cntX_XKY_arr) 120 | cntX_XK = np.sum(cntX_XK_arr) 121 | 122 | #Count the number of points in the KY subspace, within the XKY distance: 123 | # 124 | # Comparable to computeDistance[View] in compute_TE.cpp 125 | 126 | Cnt1 = kykdTree.query_radius(kyPts, kydist, count_only=True) - 1 127 | Cnt2 = kkdTree.query_radius(kPts, kdist, count_only=True) - 1 128 | # 129 | def digammaAtLeastOne(x): 130 | if x != 0: 131 | return digamma(x) 132 | else: 133 | return digamma(1) 134 | dvfunc = np.vectorize(digammaAtLeastOne) 135 | cntKY_XKY = np.sum(dvfunc(Cnt1)) 136 | cntK_XK = np.sum(dvfunc(Cnt2)) 137 | 138 | 139 | # The transfer entropy is the difference of the two mutual informations 140 | # If we define digK = digamma(k), digN = digamma(nPts); then the 141 | # Kraskov (2004) estimator for MI gives 142 | # TE = (digK - 1/k - (cntX_XKY + cntKY_XKY)/nPts + digN) - (digK - 1/k - (cntX_XK + cntK_XK)/nPts + digN) 143 | # which simplifies to: 144 | # TE = (cntX_XK + cntK_XK)/nPts - (cntX_XKY + cntKY_XKY)/nPts; 145 | # 146 | TE = (cntX_XK + cntK_XK)/nPts - (cntX_XKY + cntKY_XKY)/nPts 147 | return TE 148 | print(TE) 149 | -------------------------------------------------------------------------------- /PyIF/__init__.py: -------------------------------------------------------------------------------- 1 | name = "PyIF" 2 | from . import te_compute 3 | -------------------------------------------------------------------------------- /PyIF/helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def make_spaces(X, Y, embedding=1): 5 | ''' 6 | function to make 4 subspaces 7 | 8 | Parameters 9 | ---------- 10 | x: numpy array 11 | y: numpy array 12 | embedding: how far to look back when creatining spaces. Must be greater than or equal to 1. 13 | 14 | Returns 15 | ------- 16 | xky subspace: contains the X values, the embedding for X, and the Y Values 17 | ky subspace: contains X values and Y Values 18 | xk subspace: contains X values, the embedding values for X 19 | k subspace: contains only X Values 20 | ''' 21 | 22 | 23 | # A column for X & Y, along with the embedding 24 | dimxky = embedding + 2 25 | 26 | # A column for embedding and Y Value 27 | dimky = embedding + 1 28 | 29 | # A column for X & embedding values 30 | dimxk = embedding + 1 31 | 32 | # columns for embedding values 33 | dimk = embedding 34 | 35 | 36 | 37 | N = len(X) 38 | 39 | # Create numpy array of zeros for subspace 40 | xky_pts = np.zeros((N-embedding, dimxky)) 41 | ky_pts = np.zeros((N-embedding, dimky)) 42 | xk_pts = np.zeros((N-embedding, dimxk)) 43 | k_pts = np.zeros((N-embedding, dimk)) 44 | 45 | 46 | # Set the last column to the Y column. Start at the embedding index and take up to N - embedding values 47 | xky_pts[:, embedding+1] = Y.flatten()[embedding-1:][0:N-embedding] 48 | ky_pts[:, embedding] = Y.flatten()[embedding-1:][0:N-embedding] 49 | 50 | 51 | # start from embedding value and decrease to 0 52 | for i, j in enumerate(range(embedding, -1, -1)): 53 | 54 | # set first column to the X values from embedding to the length of the array 55 | # then take from 0 to N-embedding 56 | xky_pts[:, i] = X.flatten()[j:][0:N-embedding] 57 | xk_pts[:, i] = X.flatten()[j:][0:N-embedding] 58 | 59 | # This is so that there are no values of X in k or the ky subspaces. 60 | if i > 0: 61 | k_pts[:, i-1] = X.flatten()[j:][0:N-embedding] 62 | ky_pts[:, i-1] = X.flatten()[j:][0:N-embedding] 63 | 64 | # Repeat with next column from the embedding -1 to the length of the array 65 | # then take 0 to N-embedding 66 | return xky_pts, ky_pts, xk_pts, k_pts, N-embedding 67 | 68 | 69 | def safetyCheck(X,Y): 70 | ''' 71 | Checks for duplicate data and ends TE estimation if duplicate 72 | data points are found 73 | 74 | Parameters 75 | ---------- 76 | X: Array that holds the X values 77 | Y: Array that holds the Y values 78 | 79 | Returns 80 | ------- 81 | True if the safety check passes and False otherwise 82 | ''' 83 | checkDict = {} 84 | for i in range(len(X)): 85 | if checkDict.get((X[i],Y[i])) == None: 86 | checkDict[(X[i],Y[i])] = 1 87 | else: 88 | return False 89 | return True 90 | 91 | -------------------------------------------------------------------------------- /PyIF/te_compute.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.neighbors import KDTree # to compute distance 3 | 4 | from . import helper 5 | from . import GPU_TE as gte 6 | from . import CPU_TE as cte 7 | 8 | from nose.tools import assert_true 9 | 10 | def te_compute(X, Y, k=1, embedding=1, safetyCheck=False, GPU=False): 11 | ''' 12 | Parameters 13 | ---------- 14 | X: numpy array 15 | Y: numpy array 16 | k: number of nearest neighbors 17 | embedding: integer containing number of lag periods to consider 18 | safetyCheck: Boolean value when True, will check for unique values 19 | and abort estimation if duplicate values are found 20 | GPU: Boolean value that when set to true will use CUDA compatiable GPUs 21 | 22 | Returns 23 | ------- 24 | TE: Floating point value 25 | ''' 26 | 27 | assert_true(k>=1, msg="K should be greater than or equal to 1") 28 | assert_true(embedding >= 1, msg='The embedding must be greater than or equal to 1') 29 | assert_true(type(X) == np.ndarray, msg='X should be a numpy array') 30 | assert_true(type(Y) == np.ndarray, msg='Y should be a numpy array') 31 | assert_true(len(X) == len(Y), msg='The length of X & Y are not equal') 32 | 33 | 34 | 35 | if safetyCheck and (not helper.safetyCheck(X,Y)): 36 | print("Safety check failed. There are duplicates in the data.") 37 | return None 38 | 39 | # Make Spaces 40 | xkyPts, kyPts, xkPts, kPts, nPts = helper.make_spaces(X, Y, 41 | embedding=embedding) 42 | 43 | # Make Trees 44 | xkykdTree = KDTree(xkyPts, metric="chebyshev") 45 | kykdTree = KDTree(kyPts, metric="chebyshev") 46 | xkkdTree = KDTree(xkPts, metric="chebyshev") 47 | kkdTree = KDTree(kPts, metric="chebyshev") 48 | 49 | if GPU: 50 | TE = gte.compute(xkykdTree, kykdTree, xkkdTree, kkdTree, 51 | xkyPts, kyPts, xkPts, kPts, nPts, X, embedding=embedding, k=k) 52 | 53 | else: 54 | TE = cte.compute(xkykdTree, kykdTree, xkkdTree, kkdTree, 55 | xkyPts, kyPts, xkPts, kPts, nPts, X, embedding=embedding, k=k) 56 | 57 | 58 | return TE 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyIF 2 | 3 | An open source implementation to compute bi-variate Transfer Entropy. 4 | 5 | 6 | Then `from PyIF import te_compute` to import the needed functions. Then all that is needed then is to call the `te_compute` function. 7 | 8 | ## Installation 9 | 10 | To install PyIF using pip run the following command: 11 | ```bash 12 | pip install PyIF 13 | ``` 14 | 15 | To install a the development release of PyIF run the following command: 16 | ```bash 17 | pip install -e . 18 | ``` 19 | 20 | ## Example 21 | 22 | ``` python 23 | from PyIF import te_compute as te 24 | import numpy as np 25 | rand = np.random.RandomState(seed=23) 26 | 27 | X_1000 = rand.randn(1000, 1).flatten() 28 | Y_1000 = rand.randn(1000, 1).flatten() 29 | 30 | TE = te.te_compute(X_1000, Y_1000, k=1, embedding=1, safetyCheck=True, GPU=False) 31 | 32 | print(TE) 33 | ``` 34 | 35 | ## Arguments 36 | 37 | PyIF has 2 required arguments `X` and `Y` which should be numpy arrays with dimensions of N x 1. The following arguments are optional: 38 | 39 | - `k`: controls the number of neighbors used in KD-tree queries 40 | - `embedding`: controls how many lagged periods are used to estimate transfer entropy 41 | - `GPU`: a boolean argument that indicates if CUDA compatible GPUs should be used to estimate transfer entropy instead of your computer's CPUs. 42 | - `safetyCheck`: a boolean argument can be used to check for duplicates rows in your dataset. 43 | 44 | ## How to Cite PyIF 45 | 46 | K. M. Ikegwu, J. Trauger, J. McMullin and R. J. Brunner, "PyIF: A Fast and Light Weight Implementation to Estimate Bivariate Transfer Entropy for Big Data," 2020 SoutheastCon, Raleigh, NC, USA, 2020, pp. 1-6, doi: 10.1109/SoutheastCon44009.2020.9249650. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | 7 | setuptools.setup( 8 | name='PyIF', 9 | version='0.1.1', 10 | author="Kelechi Ikegwu, Jacob Trauger, Robert Brunner", 11 | author_email="ikegwu2@illinois.edu, jtt2@illinois.edu, bigdog@illinois.edu", 12 | description="An open source implementation to compute bi-variate Transfer Entropy.", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/lcdm-uiuc/PyTE", 16 | packages=setuptools.find_packages(), 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: University of Illinois/NCSA Open Source License", 20 | "Operating System :: OS Independent", 21 | ], 22 | ) 23 | --------------------------------------------------------------------------------