├── GNN2.py
├── Graph_layers.py
├── README.md
├── VAE2.py
├── brainprint.py
├── clustering.py
├── evaluation.py
├── graphfeatures.py
└── pre_func.py


/GNN2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from graphfeatures import graph_norm
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | from copy import deepcopy
  8 | from Graph_layers import GraphConvolution, Graph_diffpool,Graph_sagepool, Graph_globalpool, Graph_clustpool #Graph layers
  9 | from Graph_layers import Graph_clustpool_2
 10 | from clustering import A_binarize, creating_label
 11 | from evaluation import EER_calculation
 12 | #from keras.utils.vis_utils import plot_model
 13 | 
 14 | import pyedflib #for importing EEG data
 15 | import os
 16 | 
 17 | subject_num = 109
 18 | run_num = 14
 19 | task_num = 6
 20 | n = 64
 21 | data_length = 9600
 22 | def load_dataset(subject=None,task=2):
 23 |     if(subject==None):
 24 |         x = np.zeros((subject_num,run_num,n,data_length))
 25 |         for k in range(subject_num):
 26 |             for i in range(run_num):
 27 |                 if(i==4 and k==105):
 28 |                     file_name = os.path.join('./datasetI/S'+'{0:03}'.format(k+1), 'S' +'{0:03}'.format(k+1)+'R'+'{0:02}'.format(i+1+4)+'.edf')
 29 |                     f = pyedflib.EdfReader(file_name)
 30 |                     for j in range(n):
 31 |                         x[k,i,j, :] = f.readSignal(j)[data_length:data_length*2]
 32 |                 else:
 33 |                     file_name = os.path.join('./datasetI/S'+'{0:03}'.format(k+1), 'S' +'{0:03}'.format(k+1)+'R'+'{0:02}'.format(i+1)+'.edf')
 34 |                     f = pyedflib.EdfReader(file_name)
 35 |                     #n = f.signals_in_file
 36 |                     #time = f.getNSamples()[0]
 37 |                     #96th subject time is 9600 instead of 9760
 38 |                     for j in range(n):
 39 |                         x[k,i,j, :] = f.readSignal(j)[:data_length]
 40 |                     if(i==1):
 41 |                         signal_channel = f.getSignalLabels()
 42 |                     f._close()
 43 |                     del f
 44 |     else:
 45 |         x = np.zeros((run_num,n,data_length))
 46 |         for i in range(run_num):
 47 |             file_name = os.path.join('./datasetI/S'+'{0:03}'.format(subject), 'S' +'{0:03}'.format(subject)+'R'+'{0:02}'.format(i+1)+'.edf')
 48 |             f = pyedflib.EdfReader(file_name)
 49 |             #n = f.signals_in_file
 50 |             #time = f.getNSamples()[0]
 51 |             #96th subject time is 9600 instead of 9760
 52 |             for j in range(n):
 53 |                 x[i,j, :] = f.readSignal(j)[:data_length]
 54 |             if(i==1):
 55 |                 signal_channel = f.getSignalLabels()
 56 |             f._close()
 57 |             del f
 58 |                         
 59 |     return x, signal_channel
 60 | 
 61 | win_size = 160
 62 | step = 160*0+80 #1-window*alpha%
 63 | Fs = 160
 64 | Ts = 1/Fs
 65 | 
 66 | Labels = np.linspace(1,subject_num,subject_num) #data label
 67 | x_original_all, signal_channel = load_dataset() #import data for all subject icluding all tasks
 68 | 
 69 | diffpool = False 
 70 | sagepool = False
 71 | globalpool = False
 72 | mypool = False 
 73 | Task = False; ntask = 6
 74 | 
 75 | # Graph Convolutional Network Model
 76 | class GCNModel(tf.keras.Model):
 77 |     def __init__(self, adj,adj_norm, num_features, num_nodes, features_nonzero, subject_num, **kwargs):
 78 |         super().__init__(**kwargs)
 79 |         self.input_dim = num_features
 80 |         self.features_nonzero = features_nonzero
 81 |         self.n_samples = num_nodes
 82 |         self.hd1 = 
 83 |         self.hd2 = 
 84 |         self.hd3 = 
 85 |         self.subject_num = subject_num if(not(Task)) else ntask
 86 |         self.h1 = GraphConvolution(input_dim = self.input_dim, 
 87 |                                    output_dim = self.hd1, num = 1,
 88 |                                    act = tf.nn.leaky_relu) #Convolutional Graph layer
 89 |         """
 90 |         self.h2 = GraphConvolution(input_dim = self.hd1,
 91 |                                    output_dim = self.hd2, num = 2,
 92 |                                    act = lambda x: x)
 93 |         #"""
 94 |         """
 95 |         self.h5 = GraphConvolution(input_dim = self.hd2,
 96 |                                    output_dim = self.hd2, num = 5,
 97 |                                    act = lambda x: x) 
 98 |         #"""
 99 |         #"""
100 |         self.h3 = GraphConvolution(input_dim = self.hd1,
101 |                                    output_dim = self.hd3, num = 3,
102 |                                    act = tf.nn.tanh)  #leaky_relu
103 |         #"""
104 |         self.h4 = tf.keras.layers.Dense(self.subject_num)
105 |         if(diffpool): #diffpool pooling layer
106 |             self.p1 = Graph_diffpool(input_dim = self.hd1,
107 |                                      output_dim = 48, num = 4,
108 |                                      act = lambda x: x)
109 |         elif(sagepool): #sage pooling layer
110 |             self.p1 = Graph_sagepool(input_dim = self.hd1, num = 4, ratio = .25,
111 |                                      act = lambda x: x)
112 |         if(mypool): #pooling layer defined by myself
113 |             #self.p1 = Graph_clustpool_2(adj,ratio=.25)
114 |             self.p1 = Graph_clustpool(adj,48,cluster_type='sum')
115 |             self.adj_pool = self.p1.adj_masking(adj_norm)
116 |             #self.adj_pool = tf.matmul(adj_pool, adj_pool)
117 |             """
118 |             feature = tf.ones((adj.shape[0],adj.shape[1],1))
119 |             x, adj2 = self.p1(feature,adj)
120 |             self.p2 = Graph_clustpool_2(adj2,ratio=.25)
121 |             #"""
122 |         
123 |     def call(self, inputs, adj, rate, adj_pool):
124 |         adj = tf.matmul(adj, adj)
125 |         x = self.h1(inputs, adj, rate)
126 |         if(mypool):
127 |             x, _ = self.p1(x, adj)
128 |         #x = self.h2(x, adj, rate)
129 |         #x = self.h5(x, adj, rate)
130 |         if(diffpool):
131 |             x, adj = self.p1(x, adj, rate)
132 |         elif(sagepool):
133 |             x, adj = self.p1(x, adj, rate)
134 |         #x = self.h2(x, adj, rate)
135 |         """
136 |         if(sagepool):
137 |             x, adj = self.p2(x, adj, rate, .25)
138 |         """
139 |         """
140 |         if(mypool):
141 |             x, adj = self.p2(x, adj)
142 |         #"""
143 |         if(adj_pool==None):
144 |             adj_pool = adj
145 |         x = self.h3(x, adj_pool, rate)
146 |             
147 |         if(globalpool):  
148 |             x = Graph_globalpool(pool_method='max')(x)
149 |         else:
150 |             x = tf.keras.layers.Flatten()(x)
151 |         x = self.h4(x)
152 |         x = tf.nn.log_softmax(x, axis=1)
153 |         return x
154 | 
155 | lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
156 |                 initial_learning_rate=.4e-2,
157 |                 decay_steps=10000,
158 |                 decay_rate=0.9)
159 | #Model Optimizer
160 | class Optimizer(object):
161 |     def __init__(self, subject_num):
162 |         self.cce = tf.keras.losses.CategoricalCrossentropy()
163 |         self.subject_num = subject_num if(not(Task)) else ntask
164 |         self.optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule) #RMSprop
165 |     def train_step(self,y,x,adj,rate,adj2,model):
166 |         with tf.GradientTape() as tape: #watch_accessed_variables=False
167 |             tape.watch(model.trainable_variables)
168 |             y_pred = model(x,adj,rate,adj2)
169 |             y_true = tf.keras.utils.to_categorical(y-1, num_classes=self.subject_num)
170 |             #loss = self.cce(y_true, y_pred)
171 |             loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred,labels=y_true))
172 |             if(diffpool):
173 |                 loss += sum(model.losses)
174 |         gradients = tape.gradient(loss, model.trainable_variables)
175 |         opt_op = self.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
176 |         return loss
177 |     
178 | dataset7 = True #Two dataset
179 | if(dataset7):
180 |     Binary=False
181 | else:
182 |     Binary=True
183 | Part_channel = False #Consider part of the channels
184 | 
185 | def Adj_matrix(train_x, test_x):   
186 |     if(Binary):
187 |         #Convert weighted matrix to binary matrix with threshold
188 |         percentile = 0.9
189 |         adj_train = A_binarize(A_matrix=train_x,percent=percentile,sparse=True)
190 |         adj_test  = A_binarize(A_matrix=test_x,percent=percentile,sparse=True)
191 |     else:
192 |         adj_train = deepcopy(train_x) 
193 |         adj_test = deepcopy(test_x)
194 |     if(Part_channel):
195 |         index = creating_label(ztr,y_train,subject_num,method='mean_sort') #dataset2_indices(signal_channel)
196 |         adj_train = adj_train[:,:,index]
197 |         adj_train = adj_train[:,index]
198 |         adj_test = adj_test[:,:,index]
199 |         adj_test = adj_test[:,index]
200 |     return adj_train, adj_test
201 | 
202 | FLAGS_features = False
203 | if not FLAGS_features:
204 |     features_init_train = None
205 | else:
206 |     features_init_train = deepcopy(ztr)
207 | if not FLAGS_features:
208 |     features_init_test = None
209 | else:
210 |     features_init_test = deepcopy(zte)
211 | 
212 | verbose = True
213 | nb_run = 5 #5-fold cross validation
214 | accuracy = np.zeros((nb_run,1))
215 | Computational_time = np.zeros((nb_run,1))
216 | roc_auc = np.zeros((nb_run,1))
217 | EER = np.zeros((nb_run,1))
218 | num_epoch = np.zeros((nb_run,1))
219 | full_time = np.zeros((nb_run,1))
220 | 
221 | for i in range(nb_run):
222 |     t_start = time.time()
223 |     subject_num = len(Labels)  
224 |     # Preprocessing EEG data
225 |     if(not(dataset7)):
226 |         train_x, test_x, y_train, y_test = preprocess_data(x_original_all[:,0],Labels,i,Fs,dataset2=False,
227 |                                                            filt=False,ICA=True,A_Matrix='cov')
228 |     else:
229 |         train_x, test_x, y_train, y_test = preprocess_data(x_original[:,:,Fs*9:],Labels,i,Fs,
230 |                                                           dataset2=False,filt=False,ICA=True,A_Matrix='plv',sec=30,sampling=False)
231 |     adj_train, adj_test = Adj_matrix(train_x, test_x)
232 |     
233 |     # Preprocessing and initialization
234 |     if verbose:
235 |         print("Preprocessing and Initializing...")
236 |     # Compute number of nodes
237 |     num_nodes = adj_train.shape[1]
238 |     # If features are not used, replace feature matrix by identity matrix
239 |     I = (np.tile(np.eye(adj_train.shape[1]),adj_train.shape[0]).T).reshape(-1,adj_train.shape[1],adj_train.shape[1])
240 |     I_test = (np.tile(np.eye(adj_test.shape[1]),adj_test.shape[0]).T).reshape(-1,adj_test.shape[1],adj_test.shape[1])
241 |     if not FLAGS_features:
242 |         features = np.ones((adj_train.shape[0],adj_train.shape[1],1))
243 |         #features = deepcopy(I)
244 |     else:
245 |         features = deepcopy(features_init_train)
246 |     # Preprocessing on node features
247 |     num_features = features.shape[2]
248 |     features_nonzero = np.count_nonzero(features)//features.shape[0]
249 |     # Normalization and preprocessing on adjacency matrix
250 |     if(dataset7):
251 |         adj_norm = adj_train
252 |         adj_norm_test = adj_test
253 |     else:
254 |         adj_norm = graph_norm(adj_train)
255 |         adj_norm_test = graph_norm(adj_test)
256 |     #adj_norm = A[:len(adj_train)]
257 |     #adj_norm_test = A[len(adj_train):]
258 |     
259 |     if not FLAGS_features:
260 |         features_test = np.ones((adj_test.shape[0],adj_test.shape[1],1))
261 |         #features_test = deepcopy(I_test)
262 |     else:
263 |         features_test = deepcopy(features_init_test)
264 |         
265 |     rate_test = 0
266 |     #model
267 |     GCmodel = GCNModel(adj_norm,adj_norm,num_features,num_nodes,features_nonzero,subject_num)
268 |     if(mypool):
269 |         print('number of cluster: ',GCmodel.p1.n_cluster)
270 |         adj_pool = GCmodel.adj_pool
271 |         train_dataset = (tf.data.Dataset.from_tensor_slices((adj_norm,y_train,features,adj_pool))
272 |                          .shuffle(len(adj_norm)).batch(64))
273 |     else:
274 |         train_dataset = (tf.data.Dataset.from_tensor_slices((adj_norm,y_train,features))
275 |                          .shuffle(len(adj_norm)).batch(64))
276 |     # Optimizer
277 |     opt = Optimizer(subject_num)
278 |     # Model training
279 |     if verbose:
280 |         print("Training...")
281 |     prev_cost = 100000
282 |     stop_val = 0
283 |     stop_num = 15 #15
284 |     FLAGS_shuffle = False
285 |     nb_epochs = 50
286 |     if(i==0):
287 |         nb_epochs = 40 #80
288 |     for epoch in range(nb_epochs):
289 |         num_epoch[i] +=1
290 |         t = time.time()
291 |         # Compute average loss
292 |         loss = 0
293 |         if(mypool):
294 |             for adj, label, x, adj2 in train_dataset:
295 |                 loss += opt.train_step(tf.cast(label,tf.float32),tf.cast(x,tf.float32),
296 |                                        tf.cast(adj,tf.float32), 0.5, adj2, GCmodel)
297 |         else:
298 |             for adj, label, x in train_dataset:
299 |                 loss += opt.train_step(tf.cast(label,tf.float32),tf.cast(x,tf.float32),
300 |                                        tf.cast(adj,tf.float32), 0.5, None, GCmodel)
301 |         #loss = opt.train_step(adj_label,tf.cast(features,tf.float32),tf.cast(adj_norm,tf.float32), 0.5, model)
302 |         avg_cost = loss.numpy()
303 |         Computational_time[i] += (time.time() - t)
304 |         if verbose:
305 |             # Display epoch information
306 |             print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost),
307 |                   "time=", "{:.5f}".format(time.time() - t))
308 |         nb_epochs += 1
309 |         #Stopping condition
310 |         if(prev_cost <= avg_cost):
311 |             stop_val += 1
312 |             if (stop_val == stop_num):
313 |                 break
314 |         else:
315 |             stop_val = 0
316 |             prev_cost = avg_cost
317 |             
318 |     if(mypool):
319 |         pred = GCmodel(tf.cast(features_test,tf.float32), tf.cast(adj_norm_test,tf.float32), 
320 |                        0.0,GCmodel.p1.adj_masking(adj_norm_test)).numpy()
321 |     else:
322 |         pred = GCmodel(tf.cast(features_test,tf.float32), tf.cast(adj_norm_test,tf.float32), 
323 |                        0.0,None).numpy()
324 |     test_pred = np.argmax(pred,axis=1)
325 |     full_time[i] = time.time()-t_start
326 |     accuracy[i] = 100 * np.sum(test_pred==(y_test-1)) / len(test_pred)
327 |     print("accuracy: ", accuracy[i])
328 |     Computational_time[i] = Computational_time[i]/nb_epochs
329 |     print("computational time for each epoch: ",Computational_time[i])
330 |     eer_num = subject_num if(not(Task)) else ntask
331 |     eer, _, _, roc = EER_calculation(y_test,test_pred+1,eer_num)
332 |     EER[i], roc_auc[i] = np.round(np.mean(eer),4),np.round(np.mean(roc),3)
333 |     print("EER: {} and ROC: {}".format(EER[i],roc_auc[i]))
334 | 
335 | print("final EER: {} and ROC: {}".format(np.round(np.mean(EER),4),np.round(np.mean(roc_auc),3)))
336 | print("final accuracy: ", np.round(np.mean(accuracy),3),np.round(np.var(accuracy),3))
337 | print("final computation time: ",np.round(np.mean(Computational_time),3))
338 | print("final num epochs: ",np.round(np.mean(num_epoch),3))
339 | print("final full time: ",np.round(np.mean(full_time/60),3))
340 | 


--------------------------------------------------------------------------------
/Graph_layers.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.keras.backend as bk
  3 | from clustering import graph_clustering
  4 | import numpy as np
  5 | from copy import deepcopy
  6 |         
  7 | Type = "float32"
  8 | 
  9 | class GraphConvolution(tf.keras.layers.Layer):
 10 |     """ Graph convolution layer """
 11 |     def __init__(self, input_dim, output_dim, num, act = tf.nn.relu, **kwargs):
 12 |         super().__init__(**kwargs)
 13 |         w_init = tf.random_normal_initializer()
 14 |         self.w = tf.Variable( name = 'weight'+str(num),
 15 |             initial_value=w_init(shape=(input_dim, output_dim), dtype=Type),
 16 |             trainable=True)
 17 |         b_init = tf.zeros_initializer()
 18 |         self.b = tf.Variable( name = 'bias'+str(num),
 19 |             initial_value=b_init(shape=(output_dim,), dtype=Type), trainable=True)
 20 |         self.act = act
 21 | 
 22 |     def call(self, inputs, adj, rate =0., normalize=False):
 23 |         x = tf.nn.dropout(inputs, rate = rate)
 24 |         x = tf.matmul(x, self.w)
 25 |         x = tf.matmul(adj, x)
 26 |         outputs = self.act(x + self.b)
 27 |         if normalize:
 28 |             x = tf.keras.utils.normalize(x)
 29 |         return outputs
 30 |     
 31 | class GraphLinear(tf.keras.layers.Layer):
 32 |     """ Graph linear layer """
 33 |     def __init__(self, input_dim, output_dim, num, act = tf.nn.relu, **kwargs):
 34 |         super().__init__(**kwargs)
 35 |         w_init = tf.random_normal_initializer()
 36 |         self.w = tf.Variable( name = 'weight'+str(num),
 37 |             initial_value=w_init(shape=(input_dim, output_dim), dtype=Type),
 38 |             trainable=True)
 39 |         b_init = tf.zeros_initializer()
 40 |         self.b = tf.Variable( name = 'bias'+str(num),
 41 |             initial_value=b_init(shape=(output_dim,), dtype=Type), trainable=True)
 42 |         self.act = act
 43 |         
 44 |     def call(self, inputs, normalize=False):
 45 |         x = tf.matmul(inputs, self.w)
 46 |         outputs = self.act(x + self.b)
 47 |         if normalize:
 48 |             x = tf.keras.utils.normalize(x)
 49 |         return outputs
 50 |     
 51 | class Graph_diffpool(tf.keras.layers.Layer):
 52 |     """ Graph diff pooling layer """
 53 |     def __init__(self, input_dim, output_dim, num, act = tf.nn.relu, **kwargs):
 54 |         super().__init__(**kwargs)
 55 |         self.h = GraphConvolution(input_dim = input_dim, 
 56 |                                   output_dim = output_dim, num = num,
 57 |                                   act = act)
 58 |         
 59 |     def call(self, inputs, adj, rate, normalize=False):
 60 |         S = self.h(inputs, adj, rate, normalize)
 61 |         S = tf.nn.softmax(S,axis=-1)
 62 |         S_T = tf.transpose(S, perm=[0, 2, 1])
 63 |         #loss
 64 |         LP_loss = adj - tf.matmul(S,S_T)
 65 |         LP_loss = tf.reduce_mean(tf.norm(LP_loss, axis=(-1, -2)))
 66 |         self.add_loss(LP_loss)
 67 |         entr = tf.negative(tf.reduce_sum(tf.multiply(S, bk.log(S + bk.epsilon())), axis=-1))
 68 |         entr_loss = tf.reduce_mean(entr)
 69 |         self.add_loss(entr_loss)
 70 |         #new_output
 71 |         x = tf.matmul(S_T,inputs)
 72 |         adj = tf.matmul(adj,S)
 73 |         adj = tf.matmul(S_T,adj)
 74 |         return x, adj
 75 |     
 76 | class Graph_sagepool(tf.keras.layers.Layer):
 77 |     """ Graph sage pooling layer """
 78 |     def __init__(self, input_dim, num, ratio, act = tf.nn.relu, **kwargs):
 79 |         super().__init__(**kwargs)
 80 |         self.ratio = ratio
 81 |         self.h = GraphConvolution(input_dim = input_dim, 
 82 |                                   output_dim = 1, num = num,
 83 |                                   act = act)
 84 |         
 85 |     def call(self, inputs, adj, rate, normalize=False):
 86 |         K = int(self.ratio * adj.shape[1])
 87 |         n = adj.shape[1]
 88 |         num_nodes = n-K
 89 |         y = self.h(inputs, adj, rate, normalize)
 90 |         y = tf.reshape(y,[-1,adj.shape[1]])
 91 |         y = tf.math.tanh(y)
 92 |         indices = tf.argsort(y,axis=-1)
 93 |         indices = indices[:,K:]
 94 |         u = tf.repeat(tf.reshape(tf.range(len(indices)),(-1,1)),indices.shape[1],axis=1)
 95 |         index = tf.concat([tf.reshape(u,[-1,1]),tf.reshape(indices,[-1,1])],1)
 96 |         mask = tf.scatter_nd(index, tf.reshape(tf.ones_like(indices),-1), tf.constant([len(adj),n]))
 97 |         x = tf.boolean_mask(inputs,mask)
 98 |         x = tf.math.multiply(x , tf.boolean_mask(tf.expand_dims(y,axis=2),mask))
 99 |         x = tf.reshape(x,[-1,num_nodes,x.shape[1]])
100 |         adj = tf.boolean_mask(adj,mask,axis=0)
101 |         adj = tf.reshape(adj,[-1,num_nodes,n])
102 |         adj = tf.transpose(adj, perm=[0, 2, 1])
103 |         adj = tf.boolean_mask(adj,mask,axis=0)
104 |         adj = tf.reshape(adj,[-1,num_nodes,num_nodes])
105 |         return x, adj
106 |     
107 | class Graph_globalpool(tf.keras.layers.Layer):
108 |     """ Graph global pooling layer"""
109 |     def __init__(self,pool_method='max',**kwargs):
110 |         super().__init__(**kwargs)
111 |         self.method = pool_method
112 |     def call(self, inputs):
113 |         if(self.method=='max'):
114 |             return tf.reduce_max(inputs,axis=-1)
115 |         elif(self.method=='mean'):
116 |             return tf.reduce_mean(inputs,axis=-1)
117 |         elif(self.method=='sum'):
118 |             return tf.reduce_sum(inputs,axis=-1)
119 |         
120 | class InnerProductDecoder(tf.keras.layers.Layer):
121 |     """Symmetric inner product decoder layer"""
122 |     def __init__(self , act = tf.nn.sigmoid, **kwargs):
123 |         super().__init__(**kwargs)
124 |         self.act = act
125 | 
126 |     def call(self, inputs, rate = 0.):
127 |         inputs = tf.nn.dropout(inputs, rate = rate)
128 |         if (tf.shape(inputs).shape==3):
129 |             x = tf.transpose(inputs, perm=[0, 2, 1])
130 |         else:
131 |             x = tf.transpose(inputs)
132 |         x = tf.matmul(inputs, x)
133 |         """
134 |         if (tf.shape(inputs).shape==3):
135 |             x = tf.reshape(x, [-1,x.shape[1]*x.shape[2]])
136 |         else:
137 |             x = tf.reshape(x, [-1])
138 |         """
139 |         outputs = self.act(x)
140 |         return outputs
141 | 
142 | class Graph_clustpool_2(tf.keras.layers.Layer):
143 |     """ Graph clustering pooling layer """
144 |     def __init__(self, adj, ratio, act = lambda x: x, **kwargs):
145 |         super().__init__(**kwargs)
146 |         self.n = adj.shape[1]
147 |         self.act = act
148 |         self.cluster_labels = graph_clustering(adj,'kmeans',2,ratio=ratio).reshape(1,-1)
149 |         self.n_cluster = np.sum(self.cluster_labels)
150 |     def adj_masking(self,adj):
151 |         adj = tf.cast(adj,Type)
152 |         mask = tf.repeat(self.cluster_labels,len(adj),axis=0)
153 |         adj = tf.boolean_mask(adj,mask,axis=0)
154 |         adj = tf.reshape(adj,[-1,self.n_cluster,self.n])
155 |         adj = tf.transpose(adj, perm=[0, 2, 1])
156 |         adj = tf.boolean_mask(adj,mask,axis=0)
157 |         adj = tf.reshape(adj,[-1,self.n_cluster,self.n_cluster])
158 |         return adj
159 |     def call(self, inputs, adj, normalize=False):
160 |         mask = tf.repeat(self.cluster_labels,len(adj),axis=0)
161 |         x = tf.boolean_mask(inputs,mask)
162 |         x = tf.reshape(x,[-1,self.n_cluster,x.shape[1]])
163 |         return x, adj
164 | 
165 | class Graph_clustpool(tf.keras.layers.Layer):
166 |     """ Graph clustering pooling layer """
167 |     def __init__(self, adj, n_cluster, cluster_type = 'sum', num_sample=1, **kwargs):
168 |         super().__init__(**kwargs)
169 |         self.n_cluster = n_cluster
170 |         self.cluster_type = cluster_type
171 |         self.adj = adj
172 |         self.clustering_method = 'kmeans' #'kmeans' 'Graclus'
173 |         if(num_sample>1):
174 |             Cluster = np.zeros((num_sample,self.adj.shape[1]))
175 |             for i in range(num_sample):
176 |                 Cluster[i] = graph_clustering(self.adj,self.clustering_method,self.n_cluster).astype(int)
177 |             for i in range(self.adj.shape[1]):
178 |                 Cluster[0,i] = np.bincount(Cluster[:,i].astype(int)).argmax()
179 |             self.cluster_labels = Cluster[0]
180 |         else:
181 |             self.cluster_labels = graph_clustering(self.adj,self.clustering_method,self.n_cluster,Mean=False)
182 |         self.n_cluster = len(Counter(self.cluster_labels).keys())
183 |         mask = np.zeros((self.adj.shape[1],self.n_cluster))
184 |         for i in range(self.n_cluster):
185 |             mask[:,i] = np.equal(self.cluster_labels,i)
186 |         self.mask = tf.cast(mask,dtype=Type)
187 |     def adj_masking(self,adj):
188 |         if(self.cluster_type=='sum'):
189 |             adj = tf.einsum('ijk,kn->ijn',adj,self.mask)
190 |             adj = tf.einsum('ijk,jn->ink',adj,self.mask)
191 |         else:
192 |             all_adj = tf.einsum('nij,ik->nikj',adj,self.mask)
193 |             if(self.cluster_type=='max'):
194 |                 all_adj = tf.math.reduce_max(all_adj,axis=1)
195 |             elif(self.cluster_type=='mean'):
196 |                 all_adj = tf.math.reduce_mean(all_adj,axis=1) 
197 |             all_adj = tf.einsum('nij,jk->nijk',all_adj,self.mask)
198 |             if(self.cluster_type=='max'):
199 |                 adj = tf.math.reduce_max(all_adj,axis=2)
200 |             elif(self.cluster_type=='mean'):
201 |                 adj = tf.math.reduce_mean(all_adj,axis=2) 
202 |         return adj
203 |     #pytorch masking
204 |     def masking(self, inputs, adj, labels):
205 |         import torch
206 |         from torch_scatter import scatter
207 |         labels = torch.tensor(labels).type(torch.LongTensor)
208 |         x = scatter(torch.tensor(inputs),labels,dim=1, reduce="mean")
209 |         adj = scatter(torch.tensor(adj),labels,dim=-1, reduce="mean")
210 |         adj = scatter(adj,labels,dim=1, reduce="mean")
211 |         return tf.cast(x.numpy(),tf.float32), tf.cast(adj.numpy(),tf.float32)
212 |     #@tf.function
213 |     def call(self, inputs, adj, normalize=False):
214 |         #x, adj = self.masking(inputs.numpy(),adj.numpy(),self.cluster_labels)
215 |         if(self.cluster_type=='sum'):
216 |             x = tf.einsum('ijk,jn->ink',inputs,self.mask)
217 |         else:
218 |             all_x = tf.einsum('nij,ik->nikj',inputs,self.mask)
219 |             if(self.cluster_type=='max'):
220 |                 x = tf.math.reduce_max(all_x,axis=1)
221 |             elif(self.cluster_type=='mean'):
222 |                 x = tf.math.reduce_mean(all_x,axis=1)
223 |         return x, adj
224 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Graph-based Machine Learning for EEG data
 2 | 
 3 | The models presented in this represetory are mainly applied for biometric application. For more information on this work please read [Graph Variational Auto-Encoder for Deriving EEG-based Graph Embedding](https://www.sciencedirect.com/science/article/pii/S0031320321003848?dgcid=author).
 4 | 
 5 | ## Previous work
 6 | 
 7 | This represetory contains the codes for previous work [BrainPrint: EEG biometric identification based on analyzing brain connectivity graphs](https://www.sciencedirect.com/science/article/abs/pii/S0031320320301849). The code file is 'brainprint.py' and it uses graph features implemented in 'graphfeatures.py'. This model converts EEG signals into graph and manualy derive the features employing graph features such as minimum distance and clustering coefficients.
 8 | 
 9 | ## Current work
10 | 
11 | Two novel machine learning method for automaticily deriving EEG signals' features is presented. [GCNN](GNN2.py) contains code for graph convolutional neural network for deriving brain graph features in supervised setting. [GVAE](VAE2.py) is a corresponding code for a novel graph-based variational auto-encoder. The GVAE can dervie an unsupervised brain graph embedding. 
12 | 
13 | ## Prerequisites
14 | 
15 | All codes are written for Python 3 (https://www.python.org/) on Linux platform. The tensorflow version is 2.3.1.
16 | 
17 | The packages that are needed: tensorflow, os, sklearn, numpy, time, and networkx.
18 | 
19 | ### Clone this repository
20 | 
21 | ```
22 | git clone git@github.com:Tinbeh97/Graph_ML.git
23 | ```
24 | ## Citation
25 | 
26 | If you find this repository useful, please consider citing the following papers:
27 | 
28 | [Tina Behrouzi and Dimitrios Hatzinakos. "Graph Variational Auto-Encoder for Deriving EEG-based Graph Embedding." Pattern Recognition (2021): 108202.](https://www.sciencedirect.com/science/article/pii/S0031320321003848?dgcid=author)
29 | 
30 | [Understanding Power of Graph Convolutional Neural Network on Discriminating Human EEG Signal](https://drive.google.com/file/d/1erFzrMZ_Lrjznx3LkeNea7GiM6XV-lnJ/view)
31 | 


--------------------------------------------------------------------------------
/VAE2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from graphfeatures import graph_norm
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | from copy import deepcopy
  8 | from sklearn import svm
  9 | from Graph_layers import GraphConvolution, GraphLinear, InnerProductDecoder #graph layers
 10 | from clustering import A_binarize, creating_label
 11 | from pre_func import dataset2_indices, preprocess_data
 12 | from evaluation import EER_calculation
 13 | from sklearn.neighbors import KNeighborsClassifier
 14 | from sklearn.naive_bayes import MultinomialNB
 15 | 
 16 | import pyedflib #for importing EEG data
 17 | import os
 18 | 
 19 | subject_num = 109
 20 | run_num = 14
 21 | task_num = 6
 22 | n = 64
 23 | data_length = 9600
 24 | def load_dataset(subject=None,task=2):
 25 |     if(subject==None):
 26 |         x = np.zeros((subject_num,run_num,n,data_length))
 27 |         for k in range(subject_num):
 28 |             for i in range(run_num):
 29 |                 if(i==4 and k==105):
 30 |                     file_name = os.path.join('./datasetI/S'+'{0:03}'.format(k+1), 'S' +'{0:03}'.format(k+1)+'R'+'{0:02}'.format(i+1+4)+'.edf')
 31 |                     f = pyedflib.EdfReader(file_name)
 32 |                     for j in range(n):
 33 |                         x[k,i,j, :] = f.readSignal(j)[data_length:data_length*2]
 34 |                 else:
 35 |                     file_name = os.path.join('./datasetI/S'+'{0:03}'.format(k+1), 'S' +'{0:03}'.format(k+1)+'R'+'{0:02}'.format(i+1)+'.edf')
 36 |                     f = pyedflib.EdfReader(file_name)
 37 |                     #n = f.signals_in_file
 38 |                     #time = f.getNSamples()[0]
 39 |                     #96th subject time is 9600 instead of 9760
 40 |                     for j in range(n):
 41 |                         x[k,i,j, :] = f.readSignal(j)[:data_length]
 42 |                     if(i==1):
 43 |                         signal_channel = f.getSignalLabels()
 44 |                     f._close()
 45 |                     del f
 46 |     else:
 47 |         x = np.zeros((run_num,n,data_length))
 48 |         for i in range(run_num):
 49 |             file_name = os.path.join('./datasetI/S'+'{0:03}'.format(subject), 'S' +'{0:03}'.format(subject)+'R'+'{0:02}'.format(i+1)+'.edf')
 50 |             f = pyedflib.EdfReader(file_name)
 51 |             #n = f.signals_in_file
 52 |             #time = f.getNSamples()[0]
 53 |             #96th subject time is 9600 instead of 9760
 54 |             for j in range(n):
 55 |                 x[i,j, :] = f.readSignal(j)[:data_length]
 56 |             if(i==1):
 57 |                 signal_channel = f.getSignalLabels()
 58 |             f._close()
 59 |             del f
 60 |                         
 61 |     return x, signal_channel
 62 | 
 63 | win_size = 160
 64 | step = 160*0+80 #1-window*alpha%
 65 | Fs = 160
 66 | Ts = 1/Fs
 67 | 
 68 | Labels = np.linspace(1,subject_num,subject_num) #data label
 69 | x_original_all, signal_channel = load_dataset() #import data for all subject icluding all tasks
 70 | 
 71 | loss_function = 3 #3 loss function is defined
 72 | decoder_adj = True #include new decoder model
 73 | 
 74 | def invlogit(z): #convert decoded adjancy matrix to original space
 75 |     return 1 - 1 /(1 + np.exp(z))
 76 |     
 77 | #Graph Variational Auto Encoder
 78 | class GCNModelVAE(tf.keras.Model):
 79 |     def __init__(self, num_features, num_nodes, features_nonzero, **kwargs):
 80 |         super().__init__(**kwargs)
 81 |         self.input_dim = num_features
 82 |         self.features_nonzero = features_nonzero
 83 |         self.n_samples = num_nodes
 84 |         self.hidden_dim = 
 85 |         self.hidden_dim2 = 
 86 |         if(loss_function==1 or loss_function==3):
 87 |             self.dimension = 
 88 |         else:
 89 |             self.dimension = 
 90 |         self.hidden1 = GraphConvolution(input_dim = self.input_dim, 
 91 |                                         output_dim = self.hidden_dim, num = 1,
 92 |                                         act = lambda x: x) #Convolutional layer
 93 |         """
 94 |         self.hidden12 = GraphConvolution(input_dim = self.hidden_dim, 
 95 |                                         output_dim = self.hidden_dim, num = 4,
 96 |                                         act = tf.nn.relu)
 97 |         #"""
 98 |         self.hidden2 = GraphConvolution(input_dim = self.hidden_dim,
 99 |                                         output_dim = self.dimension*2, num = 2,
100 |                                         act = lambda x: x)
101 |         self.d = InnerProductDecoder(act = lambda x: x)
102 |         if(loss_function==1 or loss_function==3):
103 |             if(decoder_adj):
104 |                 self.d1 = GraphConvolution(input_dim = 1,
105 |                                        output_dim = self.n_samples, num = 3,
106 |                                        act = lambda x: x)
107 |             else:
108 |                 self.d1 = GraphConvolution(input_dim = self.n_samples,
109 |                                        output_dim = self.n_samples, num = 3,
110 |                                        act = lambda x: x)
111 |      
112 |     #encoder model
113 |     def encoder(self, inputs, adj, rate):
114 |         x = self.hidden1(inputs, adj, rate)
115 |         #x = tf.keras.layers.BatchNormalization()(x)
116 |         #x = self.hidden12(x, adj, rate)
117 |         x = self.hidden2(x, adj, rate)
118 |         mean, logvar = tf.split(x, num_or_size_splits=2, axis=2)
119 |         return mean, logvar
120 |     
121 |     # reparameterization trick
122 |     def reparameterize(self, mean, logvar):
123 |         eps = tf.random.normal([self.n_samples, self.dimension])
124 |         return eps * (tf.exp(logvar)) + mean
125 |     
126 |     #decoder model
127 |     def decoder(self, z, adj, rate=0., apply_sigmoid=False):
128 |         logits = z
129 |         logits = self.d(logits,0.)
130 |         if(loss_function==1 or loss_function==3):
131 |             if(decoder_adj):
132 |                 feature = tf.ones((logits.shape[0],logits.shape[1],1))
133 |                 logits = self.d1(feature,logits,rate)
134 |             else:
135 |                 logits = self.d1(logits,adj,rate)
136 |         logits = tf.reshape(logits, [-1,self.n_samples*self.n_samples])
137 |         if apply_sigmoid:
138 |           probs = tf.sigmoid(logits)
139 |           return probs
140 |         return logits
141 | 
142 | lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
143 |                 initial_learning_rate=1e-4,
144 |                 decay_steps=10000,
145 |                 decay_rate=0.9) #learning rate
146 | 
147 | #VAE optimizer model
148 | class OptimizerVAE(object):
149 |     def __init__(self, model, num_nodes,num_features,norm):
150 |         self.norm = norm
151 |         self.num_nodes = num_nodes
152 |         self.num_features = num_features
153 |         self.optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
154 |     def log_normal_pdf(self,sample, mean, logsd, raxis=[1,2]):
155 |         logvar = 2 * logsd
156 |         log2pi = tf.math.log(2. * np.pi)
157 |         out = tf.reduce_sum(-.5 * (tf.multiply((sample - mean) ** 2., tf.exp(-logvar)) + logvar + log2pi),axis=raxis)
158 |         return out    
159 |     def bernoulli_log_density(self,logit,x):
160 |         b = (x * 2) - 1
161 |         return - tf.math.log(1 + tf.exp(-tf.multiply(b,logit)))
162 |     def loss(self,y,x,adj,rate, model):
163 |         mean, logvar = model.encoder(x,adj,rate)
164 |         reparam = model.reparameterize(mean,logvar)
165 |         reconstruct = model.decoder(reparam, adj, rate)
166 |         preds_sub = tf.reshape(reconstruct, [-1,self.num_nodes,self.num_nodes])
167 |         logpz = self.log_normal_pdf(reparam, 0., 0.)
168 |         logqz_x = self.log_normal_pdf(reparam, mean, logvar)
169 |         if(loss_function==3):
170 |             logpx_z = tf.reduce_sum(self.bernoulli_log_density(preds_sub,tf.cast(y,tf.float32)),[1,2])
171 |             return -tf.reduce_mean(logpx_z - ((logpz - logqz_x)))
172 |         else:
173 |             cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(y,tf.float32), logits=tf.cast(preds_sub,tf.float32))
174 |             logpx_z = tf.reduce_sum(cross_ent, axis=[1, 2])
175 |             return tf.reduce_mean(logpx_z + ((logpz - logqz_x)))
176 |     def loss2(self,y, x,adj,rate, model):
177 |         mean, logvar = model.encoder(x,adj,rate)
178 |         reparam = model.reparameterize(mean,logvar)
179 |         reconstruct = model.decoder(reparam, adj, rate)
180 |         preds_sub = tf.reshape(reconstruct, [-1,self.num_nodes,self.num_nodes])
181 |         cost = self.norm * tf.reduce_mean(tf.reduce_sum(
182 |                         tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.cast(y,tf.float32), 
183 |                                                     logits = preds_sub),[1,2]))
184 |         kl = (0.5 / num_nodes) * \
185 |                   tf.reduce_mean(tf.reduce_sum(1 \
186 |                                                + 2 * logvar \
187 |                                                - tf.square(mean) \
188 |                                                - tf.square(tf.exp(logvar)), [1,2]))         
189 |         cost -= kl
190 |         return cost
191 |     def train_step(self,y,x,adj,rate,model):
192 |         with tf.GradientTape() as tape:
193 |             if(loss_function== 3 or loss_function==1):
194 |                 cost = self.loss(y,x,adj,rate, model)
195 |             else:
196 |                 cost = self.loss2(y,x,adj,rate, model)
197 |         assert not np.any(np.isnan(cost.numpy()))
198 |         gradients = tape.gradient(cost, model.trainable_variables)
199 |         opt_op = self.optimizer.apply_gradients(zip(gradients, model.trainable_variables))
200 |         return cost
201 | 
202 | channel7 = False # two dataset
203 | if(channel7):
204 |     Binary = False
205 | else:
206 |     Binary = True
207 | Part_channel = False; partial_subject = False; part_channel = False
208 | 
209 | def Adj_matrix(train_x, test_x):   
210 |     if(Binary):
211 |         #Change weighted matrix to binary matrix with threshold
212 |         percentile = 0.75 
213 |         adj_train = A_binarize(A_matrix=train_x,percent=percentile,sparse=False)
214 |         adj_test  = A_binarize(A_matrix=test_x,percent=percentile,sparse=False)
215 |         #sparse matrix
216 |     else:
217 |         adj_train = deepcopy(train_x) 
218 |         adj_test = deepcopy(test_x) 
219 |     #consider part of the graph
220 |     if(Part_channel):
221 |         index = creating_label(ztr,y_train,subject_num,method='mean_sort') 
222 |         adj_train = adj_train[:,:,index]
223 |         adj_train = adj_train[:,index]
224 |         adj_test = adj_test[:,:,index]
225 |         adj_test = adj_test[:,index]
226 |     print("sparsity: ",scipy.sparse.issparse(adj_train[9])) #check sparsity
227 |     print("rank: ",np.linalg.matrix_rank(adj_train[9])) #check matrix rank
228 |     return adj_train, adj_test
229 |     
230 | FLAGS_features = False #include predefined feature or not
231 | if not FLAGS_features:
232 |     features_init_train = None
233 | else:
234 |     features_init_train = deepcopy(train_x)
235 | if not FLAGS_features:
236 |     features_init_test = None
237 | else:
238 |     features_init_test = deepcopy(test_x)
239 | 
240 | verbose = True
241 | nb_run = 5 #5-fold cross validation
242 | accuracy = np.zeros((nb_run,1))
243 | accuracy2 = np.zeros((nb_run,1))
244 | Computational_time = np.zeros((nb_run,1))
245 | num_epoch = np.zeros((nb_run,1))
246 | full_time = np.zeros((nb_run,1))
247 | roc_auc = np.zeros((nb_run,1))
248 | EER = np.zeros((nb_run,1))
249 | 
250 | for i in range(nb_run):
251 |     t_start = time.time()
252 |     if verbose:
253 |         print("Creating Adjacency matrix...")
254 |         
255 |     #EEG Data Preprocessing
256 |     if(channel7):
257 |         train_x, test_x, y_train, y_test = preprocess_data(x_original[:,:,Fs*9:],Labels,i,Fs,
258 |                                                            dataset2=False,filt=True,ICA=True,A_Matrix='cov',sec=1)
259 |     else:
260 |         train_x, test_x, y_train, y_test = preprocess_data(x_original_all[:,0],Labels,i,Fs,dataset2=False,
261 |                                                        filt=False,ICA=True,A_Matrix='cov',sec=12)    
262 |     #A_matrix = 'cov' 'plv' 'iplv' 'pli' 'AEC'
263 |     adj_train, adj_test = Adj_matrix(train_x, test_x) #Creating brain graph
264 |     #Initialization
265 |     if verbose:
266 |         print("Preprocessing and Initializing...")
267 |     # Compute number of nodes
268 |     num_nodes = adj_train.shape[1]
269 |     # If features are not used, replace feature matrix by identity matrix
270 |     I = (np.tile(np.eye(adj_train.shape[1]),adj_train.shape[0]).T).reshape(-1,adj_train.shape[1],adj_train.shape[1])
271 |     I_test = (np.tile(np.eye(adj_test.shape[1]),adj_test.shape[0]).T).reshape(-1,adj_test.shape[1],adj_test.shape[1])
272 |     if not FLAGS_features:
273 |         features = np.ones((adj_train.shape[0],adj_train.shape[1],1))
274 |         #features = deepcopy(I)
275 |     else:
276 |         features = deepcopy(features_init_train)
277 |     # Preprocessing on node features
278 |     num_features = features.shape[2]
279 |     features_nonzero = np.count_nonzero(features)//features.shape[0]
280 |     # Normalization and preprocessing on adjacency matrix
281 |     adj_norm = graph_norm(adj_train)
282 |     adj_label = adj_train + I
283 |     
284 |     adj_norm_test = graph_norm(adj_test)
285 |     adj_label_test = adj_test + I_test
286 |     if not FLAGS_features:
287 |         features_test = np.ones((adj_test.shape[0],adj_test.shape[1],1))
288 |         #features_test = deepcopy(I_test)
289 |     else:
290 |         features_test = deepcopy(features_init_test)
291 |     #"""
292 |     num_part = 19
293 |     if(partial_subject):
294 |         del_index = np.where(y_train>=num_part)[0]
295 |         adj_train_par = np.delete(adj_train,del_index,axis=0)
296 |         adj_n = np.delete(adj_norm,del_index,axis=0)
297 |         adj_l = np.delete(adj_label,del_index,axis=0)
298 |         feat = np.delete(features,del_index,axis=0)
299 |         train_dataset = (tf.data.Dataset.from_tensor_slices((adj_n,adj_l,feat))
300 |                          .shuffle(len(adj_n)).batch(64))
301 |         norm = adj_train_par.shape[1] * adj_train_par.shape[1] / float((adj_train_par.shape[1] * adj_train_par.shape[1]
302 |                                                 - (adj_train_par.sum()/adj_train_par.shape[0])) * 2)
303 |     else:
304 |         train_dataset = (tf.data.Dataset.from_tensor_slices((adj_norm,adj_label,features))
305 |                          .shuffle(len(adj_norm)).batch(64))
306 |         norm = adj_train.shape[1] * adj_train.shape[1] / float((adj_train.shape[1] * adj_train.shape[1]
307 |                                                 - (adj_train.sum()/adj_train.shape[0])) * 2)
308 |     rate_test = 0
309 |     # VAE model
310 |     VAEmodel = GCNModelVAE(num_features, num_nodes,features_nonzero)
311 |     # Optimizer
312 |     opt = OptimizerVAE(model = VAEmodel, num_nodes = num_nodes, 
313 |                        num_features=num_features, norm=norm)
314 |     # Model training
315 |     if verbose:
316 |         print("Training...")
317 |     prev_cost = 100000
318 |     stop_val = 0
319 |     stop_num = 10
320 |     FLAGS_shuffle = False
321 |     for epoch in range(1000): 
322 |         t = time.time()
323 |         # Compute average loss
324 |         loss = 0
325 |         for adj, label, x in train_dataset:
326 |             loss += opt.train_step(label,tf.cast(x,tf.float32),tf.cast(adj,tf.float32), 0.5, VAEmodel)
327 |         avg_cost = loss.numpy() / (len(adj_train))
328 |         if verbose:
329 |             # Display epoch information
330 |             print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(round(avg_cost,3)),
331 |                   "time=", "{:.5f}".format(round(time.time() - t,3)))
332 |         Computational_time[i] += (time.time() - t)
333 |         num_epoch[i] +=1
334 |         #When to stop the iteration
335 |         if(prev_cost < avg_cost):
336 |             stop_val += 1
337 |             if (stop_val == stop_num):
338 |                 break
339 |         else:
340 |             stop_val = 0
341 |             prev_cost = avg_cost
342 |     Computational_time[i] = Computational_time[i]/num_epoch[i]
343 |     print("computational time for each epoch: ",np.round(Computational_time[i],3))
344 |     if(partial_subject and part_channel):
345 |         test_index = np.where(y_test>=5)[0]
346 |         n_partial = 32
347 |         n = adj_train.shape[1]
348 |         prev_norm = tf.cast((np.mean(adj_train,keepdims=True,axis=0)),tf.float32)
349 |         A_test = np.tile(graph_norm(prev_norm),len(test_index)).reshape(-1,n,n)
350 |         A_test[:,:n_partial,:n_partial] = graph_norm(adj_test[test_index,:n_partial,:n_partial])
351 |         adj_norm_test[test_index] = A_test
352 |     meanr,logvarr = VAEmodel.encoder(tf.cast(features,tf.float32),tf.cast(adj_norm,tf.float32), 0.)
353 |     ztr = VAEmodel.reparameterize(meanr,logvarr)
354 |     meane,logvare = VAEmodel.encoder(tf.cast(features_test,tf.float32),tf.cast(adj_norm_test,tf.float32), 0.)
355 |     zte = VAEmodel.reparameterize(meane,logvare)
356 |     train_feature = deepcopy(ztr).numpy().reshape(len(ztr),-1)
357 |     test_feature = deepcopy(zte).numpy().reshape(len(zte),-1)
358 |     
359 |     Class_method = "SVM"
360 |     svm_prob = False 
361 |     if(Class_method == "KNN"):
362 |         classifier = KNeighborsClassifier(n_neighbors=5)
363 |         classifier.fit(train_feature, y_train)
364 |         t = time.time()
365 |         test_pred = classifier.predict(test_feature)
366 |         print("testing time: ", time.time()-t)
367 |         accuracy[i] = 100 * np.sum(test_pred==(y_test)) / len(test_pred)
368 |         print("accuracy: ", np.round(accuracy[i],3))
369 |     elif(Class_method == "bayes"):
370 |         mnb = MultinomialNB()
371 |         test_pred = mnb.fit(train_feature-np.min(train_feature), y_train).predict(test_feature-np.min(test_feature))
372 |         accuracy2[i] = 100 * np.sum(test_pred==(y_test)) / len(test_pred)
373 |         print("accuracy: ", np.round(accuracy2[i],3))
374 |     else:
375 |         #SVM better than naive baise 
376 |         clf = svm.SVC(gamma='scale', probability=svm_prob) 
377 |         clf.fit(train_feature,y_train)
378 |         t = time.time()
379 |         test_pred = clf.predict(test_feature)
380 |         print("testing time: ", time.time()-t)
381 |         accuracy[i] = 100 * np.sum(test_pred==(y_test)) / len(test_pred)
382 |         print("accuracy: ", np.round(accuracy[i],3))
383 |     full_time[i] = time.time()-t_start
384 |     print("full time: ",np.round(full_time[i],3))
385 |     if(svm_prob and Class_method=="SVM"):
386 |         test_pred_proba = clf.predict_proba(test_feature)
387 |         eer, _, _, roc = EER_calculation(y_test,test_pred_proba,subject_num)
388 |         EER[i], roc_auc[i] = np.round(np.mean(eer),4),np.round(np.mean(roc),3)
389 |         print("EER: {} and ROC: {}".format(EER[i],roc_auc[i]))
390 |     else:
391 |         eer, _, _, roc = EER_calculation(y_test,test_pred,subject_num)
392 |         EER[i], roc_auc[i] = np.round(np.mean(eer),4),np.round(np.mean(roc),3)
393 |         print("EER: {} and ROC: {}".format(EER[i],roc_auc[i]))
394 | 
395 | print("final EER: {} and ROC: {}".format(np.round(np.mean(EER),4),np.round(np.mean(roc_auc),3)))
396 | print("final accuracy: ", np.round(np.mean(accuracy),3),np.round(np.var(accuracy),3))
397 | print("final computation time: ",np.round(np.mean(Computational_time),3))
398 | print("final num epochs: ",np.round(np.mean(num_epoch),3))
399 | print("final full time: ",np.round(np.mean(full_time/60),3))
400 | 
401 | """
402 | Atr = VAEmodel.decoder(ztr,tf.cast(adj_norm,tf.float32),0.).numpy().reshape(-1,num_nodes,num_nodes) - I
403 | Atr = invlogit(Atr)
404 | Ate = VAEmodel.decoder(zte,tf.cast(adj_norm_test,tf.float32),0.).numpy().reshape(-1,num_nodes,num_nodes) - I_test
405 | Ate = invlogit(Ate)
406 | """   
407 | """
408 | #saving and loading the model
409 | VAEmodel.save_weights('./VAE_model/bin10')
410 | VAEmodel = GCNModelVAE(num_features, num_nodes,features_nonzero)
411 | VAEmodel.load_weights('./VAE_model/bin10')
412 | 
413 | import pickle
414 | pickle.dump(clf, open('./VAE_model/SVM', 'wb'))
415 | clf = pickle.load(open('./VAE_model/SVM', 'rb'))
416 | clf.support_vectors_.shape #space complexity
417 | """
418 | 


--------------------------------------------------------------------------------
/brainprint.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import networkx.algorithms as nl
 3 | from graphfeatures import matrix_feature
 4 | from sklearn.metrics import confusion_matrix
 5 | from copy import deepcopy
 6 | from sklearn import svm
 7 | import time 
 8 | from pre_func import dataset2_indices, preprocess_data
 9 | from evaluation import EER_calculation
10 | 
11 | def invlogit(z):
12 |     return 1 - 1 /(1 + np.exp(z))
13 | 
14 | def Maha_dist(test_f, train_f):
15 |     #train_f = train_f-np.mean(train_f,axis=1,keepdims=1)
16 |     C = np.cov(train_f.T)
17 |     C_inv = np.linalg.pinv(C)
18 |     u = np.mean(train_f,axis=0,keepdims=1)
19 |     v = test_f - np.mean(test_f,axis=1,keepdims=1)
20 |     D = np.dot(np.dot((v-u) , C_inv) , (v-u).T)
21 |     return D.diagonal()
22 | 
23 | connected = 1
24 | VAE = False
25 | 
26 | nb_run = 5
27 | accuracy = np.zeros((nb_run,1))
28 | Computational_time = np.zeros((nb_run,1))
29 | full_time = np.zeros((nb_run,1))
30 | roc_auc = np.zeros((nb_run,1))
31 | EER = np.zeros((nb_run,1))
32 | 
33 | for i in range(nb_run):
34 |     t_begin = time.time()
35 |     train_x, test_x, y_train, y_test = preprocess_data(physio_data[:,:n,:],Labels,i,
36 |                                                        Fs,dataset2=False,filt=False,
37 |                                                        ICA=True,A_Matrix='plv',sec=2)
38 |     n = train_x.shape[2]
39 |     print('training features')
40 |     t_start = time.time()
41 |     train_feature = matrix_feature(train_x,n,connected)
42 |     Computational_time[i] = time.time()-t_start
43 |     print("feature extraction time: ", Computational_time[i])
44 |     
45 |     print('testing features')
46 |     test_feature = matrix_feature(test_x,n,connected)
47 |     
48 |     discriminator = 'Maha_dist'
49 |     if(discriminator=='Maha_dist'):
50 |         sort_index = np.argsort(y_train)
51 |         train_f_sort = train_feature[sort_index,:]
52 |         num = len(sort_index)//subject_num
53 |         pred = np.zeros((test_feature.shape[0],subject_num))
54 |         
55 |         for j in range(subject_num):
56 |             pred[:,j] = Maha_dist(test_feature,train_f_sort[j*num:(j+1)*num,:])
57 |         
58 |         test_pred = np.argmin(pred,axis=1)
59 |         accuracy[i] = 100 * np.sum(test_pred==(y_test-1)) / len(test_pred)
60 |         c = confusion_matrix(y_test-1, test_pred)
61 |     elif(discriminator=='SVM'):
62 |         #clf = svm.SVC(probability=True,gamma='scale')
63 |         clf = svm.SVC(gamma='scale')
64 |         clf.fit(train_feature,y_train)
65 |         test_pred = clf.predict(test_feature)
66 |         accuracy = 100 * np.sum(test_pred==(y_test)) / len(test_pred)
67 |         c = confusion_matrix(y_test, test_pred)
68 |     else:
69 |         raise Exception("non-existing model")
70 |     
71 |     
72 |     print("accuracy: ", accuracy[i])
73 |     crr = np.sum(c.diagonal())/(np.sum(c))
74 |     print("CRR: ", crr)
75 |     full_time[i] = time.time()-t_begin
76 |     print("whole time: ",full_time[i])
77 |     eer, _, _, roc = EER_calculation((y_test-1),test_pred,subject_num)
78 |     EER[i], roc_auc[i] = np.round(np.mean(eer),4),np.round(np.mean(roc),3)
79 |     print("EER: {} and ROC: {}".format(EER[i],roc_auc[i]))
80 | 
81 | print("final EER: {} and ROC: {}".format(np.round(np.mean(EER),4),np.round(np.mean(roc_auc),3)))
82 | print("final accuracy: ", np.round(np.mean(accuracy),3),np.round(np.var(accuracy),3))
83 | print("final computation time: ",np.round(np.mean(Computational_time),3))
84 | print("final full time: ",np.round(np.mean(full_time/60),3))
85 | 


--------------------------------------------------------------------------------
/clustering.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import networkx as nx
  3 | import numpy as np
  4 | from scipy.sparse.csgraph import minimum_spanning_tree
  5 | from graphfeatures import Degree
  6 | import scipy.linalg as la
  7 | from collections import Counter
  8 | from sklearn.cluster import SpectralClustering, AffinityPropagation, AgglomerativeClustering
  9 | from sklearn.cluster import KMeans
 10 | from copy import deepcopy
 11 | import random
 12 | import os
 13 | from torch_cluster import graclus_cluster
 14 | import torch_geometric.utils as g_utils
 15 | from scipy import sparse
 16 | 
 17 | #loading matrices
 18 | #Dict = '/Users/tina/Documents/EEG_graph_project/simulation/data'
 19 | #train_matrix = np.load(Dict+'/cov.npy')
 20 | 
 21 | def A_binarize(A_matrix,percent=0.25,Model='cov',sparse=True):
 22 |     #threshold
 23 |     if(A_matrix.ndim==3):
 24 |         n_subject = A_matrix.shape[0]
 25 |         n_nodes = A_matrix.shape[1]
 26 |     else:
 27 |         n_subject = 1
 28 |         n_nodes = A_matrix.shape[0]
 29 |     A_matrix = A_matrix.reshape(n_subject,n_nodes*n_nodes)
 30 |     if Model == 'pli':
 31 |         quant = 1-percent
 32 |     else:
 33 |         quant = percent
 34 |     thresh = np.quantile(A_matrix, quant, axis = 1, keepdims=1)
 35 |     #binary matrix
 36 |     if Model == 'pli':
 37 |         bA_matrix = (A_matrix < thresh)
 38 |     else:      
 39 |         bA_matrix = (A_matrix >= thresh)
 40 |     if(not(sparse)):
 41 |         if(n_subject==1):
 42 |             return bA_matrix.reshape(n_nodes,n_nodes)
 43 |         return bA_matrix.reshape(-1,n_nodes,n_nodes)
 44 |     A = deepcopy(A_matrix)
 45 |     A[~bA_matrix] = 0
 46 |     if(n_subject==1):
 47 |         return A.reshape(n_nodes,n_nodes)
 48 |     return A.reshape(n_subject,n_nodes,n_nodes)
 49 | 
 50 | def graph_representation(train_A,graph_num=None,Prop='cluster_C',plotting=True,sort=True,laplacian=False):
 51 |     if(graph_num==None):
 52 |         graph_num = random.randint(1,len(train_A))-1
 53 |     n = train_A.shape[1]
 54 |     if(Prop=='degree_D_first'):
 55 |         #degree distribution
 56 |         train_bA = A_binarize(train_A)
 57 |         if(plotting): plt.figure(); u = plt.hist(np.diag(Degree(train_bA[graph_num])))
 58 |         m = (u[1][1:len(u[1])] - u[1][0:len(u[1])-1])/2 + u[1][0:len(u[1])-1]
 59 |         if(plotting): plt.bar(m,np.divide(u[0],m)) #p(K) = N_K/K
 60 |         return m,u
 61 |     elif(Prop=='degee_D'):
 62 |         m = Counter(np.sort(np.diag(Degree(train_bA[graph_num]))))
 63 |         u = np.divide(np.array(list(m.values())),np.array(list(m.keys())))
 64 |         if(plotting): plt.bar(m.keys(), u); plt.show()
 65 |         return m,u
 66 |     elif(Prop=='cluster_C' or Prop=='cluster_C_avg'):
 67 |         #clustering coefficient
 68 |         A = nx.Graph(train_bA[graph_num])
 69 |         if(Prop=='cluster_C_avg'):
 70 |             return nx.average_clustering(A)
 71 |         c = nx.clustering(A) #np.max(list(c.values()))
 72 |         return c
 73 |     elif(Prop=='Laplacian'):
 74 |         #Laplacian matrix5
 75 |         train_L = Degree(train_A)-train_A
 76 |         D_inv = Degree((np.sum(train_A,axis=2)**(-0.5)).reshape(train_A.shape[0],train_A.shape[1],1))
 77 |         train_Lhat = D_inv * train_L * D_inv
 78 |         return train_Lhat
 79 |     elif(Prop=='Spectral'):
 80 |         #spectral 
 81 |         if(laplacian):
 82 |             A = Degree(train_A)-train_A
 83 |         else:
 84 |             A = train_A
 85 |         eigvals, eigvecs = la.eig(A[graph_num])
 86 |         eigvals = eigvals.real #symmetric
 87 |         if(plotting): plt.plot(np.arange(64), np.sort(eigvals),'bo')#number of clusters
 88 |         #u = eigvecs.T @ np.diag(eigvals) @ eigvecs
 89 |         #np.allclose(A[95],u) #true
 90 |         if(sort):
 91 |             #sort based on the eigenvalues
 92 |             vecs = eigvecs[:,np.argsort(eigvals)]
 93 |             vals = eigvals[np.argsort(eigvals)]
 94 |             return vals, vecs
 95 |         return eigvals, eigvecs
 96 |     elif(Prop=='shortest_path_binary'):
 97 |         #shortest path for binary A
 98 |         G = nx.Graph(train_A[graph_num].reshape(n,n))
 99 |         path = nx.shortest_path(G) #binary #max=4
100 |         return path
101 |     elif(Prop=='shortest_path_weighted'):
102 |         #shortest path
103 |         G = nx.Graph(train_A[graph_num].reshape(n,n))
104 |         path = nx.all_pairs_dijkstra_path(G) #weighted
105 |         return path
106 |     elif(Prop=='diameter'):
107 |         #diameter of binary connected graph A
108 |         G = nx.Graph(train_A[graph_num].reshape(n,n))
109 |         return nx.diameter(G)
110 |     elif(Prop=='B_centrality'):
111 |         G = nx.Graph(train_A[graph_num].reshape(n,n))
112 |         return nx.betweenness_centrality(G)
113 |     elif(Prop=='D_centrality'):
114 |         G = nx.Graph(train_A[graph_num].reshape(n,n))
115 |         return nx.degree_centrality(G)
116 |     else:
117 |         raise Exception("non-existing attribute")
118 | 
119 | def graph_clustering(A_matrix,method,n_clusters,ratio=None,graph_num=None,plotting=True,Mean=False):
120 |     if(graph_num==None):
121 |         graph_num = random.randint(1,len(A_matrix))-1
122 |     if(Mean):
123 |         graph_num = 0; A_matrix = np.mean(A_matrix,axis=0,keepdims=True)
124 |     n = A_matrix.shape[1]
125 |     if(method=='kmeans'):
126 |         #kmeans on first n vectors with nonzero eigenvalues
127 |         _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False)
128 |         kmeans = KMeans(n_clusters=n_clusters)
129 |         kmeans.fit(vecs[:,1:n_clusters].reshape(-1,n_clusters-1))
130 |         if(ratio==None):
131 |             return kmeans.labels_
132 |         num = np.sum(kmeans.labels_)
133 |         ind = 0 if num>(n//2) else 1
134 |         prob = (kmeans.fit_transform(vecs[:,1:n_clusters].reshape(-1,n_clusters-1)))
135 |         thresh = np.quantile(prob[:,ind], ratio)
136 |         return (prob[:,ind] >= thresh)
137 |     elif(method=='Spectral_clustering'):
138 |         adjacency_matrix = A_matrix[graph_num].reshape(n,n)
139 |         sc = SpectralClustering(n_clusters, affinity='precomputed', n_init=100,
140 |                                  assign_labels='discretize')
141 |         Class = sc.fit_predict(adjacency_matrix)
142 |         if(plotting):
143 |             Ab_matrix = A_binarize(A_matrix)
144 |             G = nx.Graph(Ab_matrix[graph_num])
145 |             plt.figure(); nx.draw(G, node_size=200, pos=nx.spring_layout(G)); plt.show()
146 |             plt.figure(); nx.draw(G, node_color=Class, node_size=200, pos=nx.spring_layout(G)); plt.show()
147 |         return Class
148 |     elif(method=='Affinity_propagation'):
149 |         _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False)
150 |         clustering = AffinityPropagation().fit(vecs[:,1:n_clusters])
151 |     elif(method=='Agglomerative_clustering'):
152 |         _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False)
153 |         clustering = AgglomerativeClustering(n_clusters=n_clusters).fit(vecs[:,1:n_clusters].reshape(-1,n_clusters-1))
154 |     elif(method=='Graclus'):
155 |         sA = sparse.csr_matrix(A_matrix[graph_num])
156 |         edge_index, edge_weight = g_utils.from_scipy_sparse_matrix(sA)
157 |         cluster = graclus_cluster(edge_index[0], edge_index[1], edge_weight)
158 |         return cluster.numpy()
159 |     else:
160 |         raise Exception("non-existing clustering method")
161 |     return clustering.labels_
162 | 
163 | def MST(train_A,graph_num=100,printing=False,method=2):
164 |     #MST minimum spanning tree 
165 |     if(graph_num==None):
166 |         graph_num = random.randint(1,len(train_A))-1
167 |     n = train_A.shape[1]
168 |     if(method==1):
169 |         Tcsr = minimum_spanning_tree(train_A[graph_num].reshape(n,n))
170 |         E = Tcsr.toarray().astype(int)
171 |         G3 = nx.Graph(Tcsr)
172 |         return E, G3
173 |     G2 = nx.Graph(train_A[graph_num].reshape(n,n))
174 |     T = nx.minimum_spanning_tree(G2)
175 |     if(printing): print(sorted(T.edges(data=True)))
176 |     return T
177 | 
178 | def creating_label(features,y,subject_num,num_node = 20,method='mean_sort',s_num=None):
179 |     features_cluster = [features[(y==(i+1))].numpy().reshape(features.shape[1],-1) for i in range(subject_num)]
180 |     features_cluster = np.array(features_cluster)
181 |     if(s_num==None):
182 |         s_num = random.randint(1,len(features_cluster))-1
183 |     if(method=='cluster'):
184 |         kmeans = KMeans(n_clusters=num_node, random_state=0).fit(features_cluster[s_num])
185 |         label = np.array(kmeans.labels_)
186 |         index = np.zeros((num_node),dtype=int)
187 |         for i in range(num_node):
188 |             index[i] = np.where(label==i)[0][0]
189 |     elif(method=='mean_sort'):
190 |         index = np.argsort(np.mean(features_cluster,axis=2))[s_num]
191 |         index = np.sort(index[(len(index)-num_node):])
192 |     elif(method=='max_sort'):
193 |         index = np.argsort(np.max(features_cluster,axis=2))[s_num]
194 |         index = np.sort(index[(len(index)-num_node):])
195 |     else:
196 |         index = np.sort(random.sample(range(features.shape[1]),num_node))
197 |     return index
198 |         
199 | 


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import roc_curve, auc
 3 | import tensorflow as tf
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | def EER_calculation(y_test,test_pred,subject_num):
 7 |     y = tf.keras.utils.to_categorical(y_test-1, num_classes=subject_num)
 8 |     if(test_pred.ndim==1):
 9 |         y_pred = tf.keras.utils.to_categorical(test_pred-1, num_classes=subject_num)
10 |     else:
11 |         y_pred = test_pred
12 |     fpr = dict()
13 |     tpr = dict()
14 |     roc_auc = np.zeros(subject_num)
15 |     eer = np.zeros(subject_num)
16 |     for i in range(subject_num):
17 |         fpr[i], tpr[i], _ = roc_curve(y[:, i], y_pred[:, i])
18 |         roc_auc[i] = auc(fpr[i], tpr[i])
19 |         fnr = 1 - tpr[i]
20 |         eer[i] = fnr[np.nanargmin(np.absolute((fnr - fpr[i])))]
21 |     return eer, fpr, tpr, roc_auc


--------------------------------------------------------------------------------
/graphfeatures.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx.algorithms as nl
 3 | import networkx as nx
 4 | 
 5 | def Degree(M):
 6 |     if(M.ndim==3):
 7 |         d = np.sum(M, axis=2)
 8 |         D = np.zeros((d.shape[0], d.shape[1]*d.shape[1]))
 9 |         D[:, ::d.shape[1]+1] = d
10 |         D = D.reshape(d.shape[0],d.shape[1],d.shape[1])
11 |         for i in range(d.shape[1]):
12 |             D[:,i,i] = d[:,i]
13 |     else:
14 |         D = np.diag(np.sum(M,axis=1)) 
15 |     return D
16 | 
17 | def global_eff(G):
18 |     sp = dict(nl.shortest_paths.generic.shortest_path_length(G,weight='weight'))
19 |     n = len(sp)
20 |     n_eff = np.zeros(n)
21 |     for i in range(n):
22 |         sps = sp; sps[i][i] = 1000
23 |         spi = (np.array(list(sps[i].values())))
24 |         n_eff[i] = 1 / np.min(spi)
25 |     g_eff = np.mean(n_eff)/(n-1)
26 | 
27 |     return np.mean(g_eff)
28 | 
29 | def matrix_feature(M,n,connected):
30 |     features = np.zeros((M.shape[0],6+n))
31 |     for i in range(M.shape[0]):
32 |         G = nx.Graph(M[i])
33 |         #degree = np.sum(M[i],axis=1)
34 |         #betw = np.array(list(nx.betweenness_centrality(G,weight='weight').values()))
35 |         pr = np.array(list(nx.pagerank(G,weight='weight').values()))
36 |         #clu = np.array(list(nx.clustering(G,weight='weight').values()))
37 |         #eig = np.array(list(nx.eigenvector_centrality(G,weight='weight').values()))
38 |         #nodal_par = np.array([degree,betw,pr,clu,eig]).reshape(-1)
39 |         
40 |         #transitivity
41 |         tra = nl.cluster.transitivity(G)
42 |         #modularity
43 |         di_M = M[i]>0
44 |         k = np.sum(di_M,axis=1)
45 |         l = np.sum(k)
46 |         mod = np.sum(np.multiply((M[i] - (k.T * k)/l),di_M)) / l 
47 |         if(connected==1):
48 |             #path length
49 |             path = nl.shortest_paths.generic.average_shortest_path_length(G,weight='weight')
50 |             #diameter
51 |             dia = nl.distance_measures.diameter(G)
52 |             #radius
53 |             ra = nl.distance_measures.radius(G)
54 |         else:
55 |             if(nx.is_connected(G)):
56 |                 path = nl.shortest_paths.generic.average_shortest_path_length(G,weight='weight')
57 |                 ra = nl.distance_measures.radius(G)
58 |                 dia = nl.distance_measures.diameter(G)
59 |             else:
60 |                 path = 10000
61 |                 dia = 10000
62 |                 ra = 10000
63 |                 """
64 |                 for g in nx.connected_component_subgraphs(G):
65 |                     path = np.maximum(nl.shortest_paths.generic.average_shortest_path_length(g),path)
66 |                     ra = np.maximum(nl.distance_measures.radius(g),ra)
67 |                     dia = np.maximum(nl.distance_measures.diameter(g),dia)
68 |                 """
69 |         #global efficiency
70 |         gf = global_eff(G)
71 |         global_par = np.array([tra,mod,path,gf,dia,ra]).reshape(-1)
72 |         features[i,:] = np.concatenate((global_par,pr))
73 |     return features
74 | 
75 | def node_features(M,n):
76 |     features = np.zeros((M.shape[0],n,2))
77 |     for i in range(M.shape[0]):
78 |         G = nx.Graph(M[i])
79 |         degree = np.sum(M[i],axis=1)
80 |         clu = np.array(list(nx.clustering(G,weight='weight').values()))
81 |         """
82 |         betw = np.array(list(nx.betweenness_centrality(G,weight='weight').values()))
83 |         pr = np.array(list(nx.pagerank(G,weight='weight').values()))
84 |         eig = np.array(list(nx.eigenvector_centrality(G,weight='weight').values()))
85 |         """
86 |         features[i,:] = np.array([degree,clu]).reshape(n,-1)
87 |     return features
88 | 
89 | def graph_norm(adj):
90 |     I = (np.tile(np.eye(adj.shape[1]),adj.shape[0]).T).reshape(-1,adj.shape[1],adj.shape[1])
91 |     adj_ = adj + I
92 |     d = np.sum(adj_,axis=2)**(-0.5)
93 |     D_inv = np.zeros((d.shape[0], d.shape[1]*d.shape[1]))
94 |     D_inv[:, ::d.shape[1]+1] = d
95 |     D_inv = D_inv.reshape(d.shape[0],d.shape[1],d.shape[1])
96 |     adj_normalized = np.einsum('ijk,ikm->ijm',adj,D_inv)
97 |     adj_normalized = np.einsum('ijk,ikm->ijm',D_inv,adj_normalized)
98 |     return np.round(adj_normalized,8)
99 | 


--------------------------------------------------------------------------------
/pre_func.py:
--------------------------------------------------------------------------------
  1 | from scipy.signal import butter, lfilter, filtfilt
  2 | from scipy import signal
  3 | from sklearn import preprocessing 
  4 | import numpy as np
  5 | from statsmodels.tsa.stattools import adfuller #for stationary check 
  6 | from sklearn.decomposition import FastICA
  7 | from skimage import util
  8 | from sklearn.utils import shuffle
  9 | from sklearn.utils.testing import ignore_warnings
 10 | from sklearn.exceptions import ConvergenceWarning
 11 | 
 12 | def butter_bandpass(lowcut, highcut, fs, order=5):
 13 |     nyq = 0.5 * fs
 14 |     low = lowcut / nyq
 15 |     high = highcut / nyq
 16 |     b, a = butter(order, [low, high], btype='bandpass')
 17 |     return b, a
 18 | 
 19 | 
 20 | def bandpass_filter(data, freqband, filtertype, fs, order=5):
 21 |     if (freqband == 'delta'):
 22 |         lowcut = 0.5
 23 |         highcut = 4
 24 |     elif (freqband == 'theta'):
 25 |         lowcut = 4
 26 |         highcut = 8
 27 |     elif (freqband == 'alpha'):
 28 |         lowcut = 8
 29 |         highcut = 14
 30 |     elif (freqband == 'beta'):
 31 |         lowcut = 14
 32 |         highcut = 30
 33 |     elif (freqband == 'gamma'):
 34 |         lowcut = 30
 35 |         highcut = 45
 36 |     elif (freqband == 'all'):
 37 |         lowcut = .5
 38 |         highcut = 45
 39 |     if (filtertype == 'butter'):
 40 |         b, a = butter_bandpass(lowcut, highcut, fs, order=order)
 41 |         y = filtfilt(b, a, data)
 42 |         #y = lfilter(b, a, data)
 43 |     elif (filtertype == 'fir'):
 44 |         b = signal.firwin(order,[lowcut, highcut], pass_zero=False, nyq = 0.5*fs)
 45 |         y = lfilter(b, [1.0], data)
 46 |     return y
 47 | 
 48 | def notch_filter(data, fs):
 49 |     f0 = 50.0  # Frequency to be removed from signal (Hz)
 50 |     Q = 30.0  # Quality factor
 51 |     b, a = signal.iirnotch(f0, Q, fs)
 52 |     y = lfilter(b, a, data)
 53 |     f0 = 60.0  # Frequency to be removed from signal (Hz)
 54 |     b, a = signal.iirnotch(f0, Q, fs)
 55 |     #y = lfilter(b, a, data)
 56 |     y = filtfilt(b, a, y)
 57 |     """
 58 |     f0 = 120.0  # Frequency to be removed from signal (Hz)
 59 |     b, a = signal.iirnotch(f0, Q, fs)
 60 |     y = filtfilt(b, a, y)
 61 |     #"""
 62 |     return y
 63 | 
 64 | def adj_matrix(train_features_n, test_features_n, win_size,n_sample_train,n_sample_test, n, A_Matrix='cov'):
 65 |     if (A_Matrix=='cov'):
 66 |     #covariance matrix 
 67 |         x_train_cov = np.einsum('ijk,ilk->ijl',train_features_n,train_features_n)  
 68 |         x_train = np.abs(x_train_cov)
 69 |         x_test_cov = np.einsum('ijk,ilk->ijl',test_features_n,test_features_n)  
 70 |         x_test = np.abs(x_test_cov)
 71 |     elif(A_Matrix=='ICA'):
 72 |         """
 73 |         x_train = []
 74 |         for i in range(len(train_features_n)):
 75 |             transformer = FastICA(n_components=n,random_state=0, tol=0.0001)
 76 |             transformer.fit_transform(train_features_n[i].T)
 77 |             x_train.append(transformer.components_)
 78 |         """ 
 79 |         @ignore_warnings(category=ConvergenceWarning)
 80 |         def func(x):
 81 |             transformer = FastICA(n_components=n,random_state=0, tol=0.0001)
 82 |             transformer.fit_transform(x.T)
 83 |             return transformer.components_
 84 |         x_train = list(map(func, train_features_n))
 85 |         #x_train = [transformer.components_ for i in range(len(train_features_n))]
 86 |         x_test = list(map(func, test_features_n))
 87 |         x_train, x_test = np.array(x_train), np.array(x_test)
 88 |     else:
 89 |         #phase matrix
 90 |         H_train = signal.hilbert(train_features_n)
 91 |         phase_train = (np.angle(H_train))
 92 |         H_test = signal.hilbert(test_features_n)
 93 |         phase_test = (np.angle(H_test)) #np.unwrap
 94 |         if(A_Matrix=='plv' or A_Matrix=='iplv'):
 95 |             #PLV_Sample
 96 |             x_train_plv = np.einsum('ijk,ilk->ijl',np.exp(phase_train*1j),np.exp(phase_train*-1j))  / (win_size - 1)
 97 |             if(A_Matrix=='iplv'):
 98 |                 x_train = np.abs(x_train_plv.imag)
 99 |             else:
100 |                 x_train = np.abs(x_train_plv)
101 |             x_test_plv = np.einsum('ijk,ilk->ijl',np.exp(phase_test*1j),np.exp(phase_test*-1j))  / (win_size - 1)
102 |             if(A_Matrix=='iplv'):
103 |                 x_test = np.abs(x_test_plv.imag)
104 |             else:
105 |                 x_test = np.abs(x_test_plv)
106 |         elif(A_Matrix=='pli'):
107 |             #PLI
108 |             x_train_pli = np.zeros((n_sample_train,n,n))
109 |             for i in range(n):
110 |                 x_train_pli[:,i,:] = np.abs(np.mean(np.sign(phase_train[:,i,:].reshape(n_sample_train,1,win_size)-phase_train),axis=2))
111 |             x_train = x_train_pli
112 |             x_test_pli = np.zeros((n_sample_test,n,n))
113 |             for i in range(n):
114 |                 x_test_pli[:,i,:] = np.abs(np.mean(np.sign(phase_test[:,i,:].reshape(n_sample_test,1,win_size)-phase_test),axis=2))
115 |             x_test = x_test_pli
116 |         elif(A_Matrix=='AEC'):  
117 |             #AEC
118 |             x_train_aec = np.abs(H_train) - np.mean(np.abs(H_train), axis=2, keepdims=1)
119 |             x_train_aec = x_train_aec / np.sqrt(np.sum(x_train_aec**2, axis=2, keepdims=1)) #normalizing in time
120 |             x_train = np.einsum('ijk,ilk->ijl',x_train_aec,x_train_aec)  / (win_size - 1)
121 |             x_test_aec = np.abs(H_test) - np.mean(np.abs(H_test), axis=2, keepdims=1)
122 |             x_test_aec = x_test_aec / np.sqrt(np.sum(x_test_aec**2, axis=2, keepdims=1)) #normalizing in time
123 |             x_test = np.einsum('ijk,ilk->ijl',x_test_aec,x_test_aec)  / (win_size - 1)    
124 |         else:
125 |             raise Exception("non-existing model")
126 |     return x_train, x_test
127 | 
128 | def normalizition(train_features,test_features,normalize,n,win_size):
129 |     if(normalize=='maxmin'):
130 |         train_features_n = (train_features - np.min(train_features,axis=2,keepdims=1))/(np.max(train_features,axis=2,keepdims=1)-np.min(train_features,axis=2,keepdims=1))
131 |         train_features_n = 2*train_features_n - 1
132 |         test_features_n = (test_features - np.min(test_features,axis=2,keepdims=1))/(np.max(test_features,axis=2,keepdims=1)-np.min(test_features,axis=2,keepdims=1))
133 |         test_features_n = 2*test_features_n - 1
134 |     elif(normalize=='l1' or normalize=='l2'):
135 |         train_features_n = [preprocessing.normalize(train_features[:,:,i], norm = normalize) for i in range(win_size)]
136 |         train_features_n = np.array(train_features_n).reshape(-1,n,win_size)
137 |         test_features_n = [preprocessing.normalize(test_features[:,:,i], norm = normalize) for i in range(win_size)]
138 |         test_features_n = np.array(test_features_n).reshape(-1,n,win_size)
139 |     elif(normalize=='meanstd'):
140 |         #(x-mean(x))/std(x)
141 |         train_features_n = train_features - np.mean(train_features, axis=2, keepdims=1)
142 |         train_features_n = train_features_n / np.sqrt(np.sum(train_features_n**2,axis=2,keepdims=1))
143 |         test_features_n = test_features - np.mean(test_features, axis = 2, keepdims=1) 
144 |         test_features_n = test_features_n / np.sqrt(np.sum(test_features_n**2,axis=2,keepdims=1))
145 |     else:
146 |         train_features_n = train_features
147 |         test_features_n = test_features
148 |     return train_features_n, test_features_n
149 |     
150 | def preprocess_data(x, Labels, K, Fs, dataset2=False, filt = False, ICA = True, 
151 |                     sh = False, A_Matrix = 'cov', normalize='meanstd',sec=1,
152 |                     percent=.2,sampling=False):
153 |     data_length = x.shape[2]
154 |     n = x.shape[1]
155 |     if(sampling):
156 |         win_size = Fs
157 |         step = Fs//2
158 |     else:
159 |         win_size = Fs*sec
160 |         if(sec>1):
161 |             step = Fs*(sec-1)
162 |         else:
163 |             step = sec*(Fs*0+Fs//2) #1-window*alpha%
164 |     #ratio of number of train test #K-fold validation
165 |     #(int((time/Fs)*0.8))*Fs
166 |     if(dataset2):
167 |         test_index = np.arange(int(.25*K*data_length),int(.25*(K+1)*data_length))
168 |     else:
169 |         test_index = np.arange(int(percent*K*data_length),int(percent*(K+1)*data_length))
170 |     train_index = np.delete(np.arange(data_length),test_index)
171 |     x_train = x[:,:,train_index]
172 |     x_test = x[:,:,test_index]
173 |     if(False): # adding noise
174 |         noise = np.random.normal(0, 1, x_test.shape)
175 |         x_test = x_test+noise
176 |         
177 |     subject_num = x.shape[0]
178 |     #ICA
179 |     if(ICA):
180 |         #if(train_filtered.shape[0]>109):
181 |         if(False):
182 |             x_train = x_train.reshape(109,-1,n,x_train.shape[2])
183 |             x_test = x_test.reshape(109,-1,n,x_test.shape[2])
184 |             X_ICA_train = []
185 |             X_ICA_test = []
186 |             for i in range(109):
187 |                 transformer = FastICA(n_components=n,random_state=0, max_iter=200, tol=0.0001) #1000
188 |                 X_ICA_train.append(transformer.fit_transform(x_train[i].reshape(-1,n)))
189 |                 X_ICA_test.append(transformer.transform(x_test[i].reshape(-1,n)))
190 |             X_ICA_train = np.array(X_ICA_train).reshape(subject_num,n,-1)
191 |             X_ICA_test = np.array(X_ICA_test).reshape(subject_num,n,-1)
192 |         else:
193 |             transformer = FastICA(n_components=n,random_state=0, max_iter=1000, tol=0.0001) #1000
194 |             X_ICA_train = transformer.fit_transform(x_train.reshape(-1,n))
195 |             #transformer.components_
196 |             X_ICA_test = transformer.transform(x_test.reshape(-1,n))                
197 |             X_ICA_train = X_ICA_train.reshape(subject_num,n,-1)
198 |             X_ICA_test = X_ICA_test.reshape(subject_num,n,-1)
199 |     else:
200 |         X_ICA_train = x_train
201 |         X_ICA_test = x_test
202 |         
203 |     if(filt):
204 |         #60Hz filter
205 |         train_filtered = notch_filter(X_ICA_train, Fs)
206 |         test_filtered = notch_filter(X_ICA_test, Fs)
207 |         #band pass filter #gamma, beta, alpha
208 |         #train_filtered = bandpass_filter(train_filtered, 'alpha', 'fir', Fs, 100)
209 |         train_filtered = bandpass_filter(train_filtered, 'beta', 'butter', Fs, 5)
210 |         test_filtered = bandpass_filter(test_filtered, 'beta', 'butter', Fs, 5)
211 |     else:
212 |         #60Hz filter
213 |         """
214 |         train_filtered = notch_filter(X_ICA_train, Fs)
215 |         test_filtered = notch_filter(X_ICA_test, Fs)
216 |         #"""
217 |         """
218 |         train_filtered = bandpass_filter(train_filtered, 'all', 'butter', Fs, 3)
219 |         test_filtered = bandpass_filter(test_filtered, 'all', 'butter', Fs, 3)
220 |         #"""
221 |         #"""
222 |         train_filtered = X_ICA_train
223 |         test_filtered = X_ICA_test
224 |         #"""
225 |     if(dataset2):
226 |         signal.savgol_filter(x, Fs//2, 3)
227 |         
228 |     #windowing data using hamming window
229 |     n_sample_train, _ = util.view_as_windows(x_train[0,0,:], window_shape=(win_size,), step=step).shape
230 |     n_sample_test, _ = util.view_as_windows(x_test[0,0,:], window_shape=(win_size,), step=step).shape
231 |     #fit size of data
232 |     X_ICA_train = X_ICA_train[:,:,:((n_sample_train)*step+win_size-step)]
233 |     X_ICA_test = X_ICA_test[:,:,:((n_sample_test)*step+win_size-step)]
234 |     #win = signal.hamming(win_size)
235 |     win = 1
236 |     
237 |     if(not(dataset2)):
238 |         if(sampling):
239 |             train_features = np.zeros((subject_num,n,win_size,n_sample_train))
240 |             test_features = np.zeros((subject_num,n,win_size,n_sample_test))
241 |             for i in range(0, X_ICA_train.shape[2]-step, step):
242 |                 train_features[:,:,:,i//step] = X_ICA_train[:,:,i : i + win_size]
243 |             for i in range(0, X_ICA_test.shape[2]-step, step):
244 |                 test_features[:,:,:,i//step] = X_ICA_test[:,:,i : i + win_size]
245 |             len_tr = 200
246 |             len_te = 50
247 |             index_train = np.random.randint(1, high=n_sample_train, size=(len_tr,sec), dtype='l')
248 |             index_test = np.random.randint(1, high=n_sample_test, size=(len_te,sec), dtype='l')
249 |             r_train_features = np.zeros((len_tr,subject_num,n,win_size))
250 |             r_test_features = np.zeros((len_te,subject_num,n,win_size))
251 |             for j in range(len_tr):
252 |                 r_train_features[j] = np.mean(train_features[:,:,:,index_train[j]],axis=3)
253 |                 if(j<len_te):
254 |                     r_test_features[j] = np.mean(test_features[:,:,:,index_test[j]],axis=3)            
255 |             train_features = r_train_features.reshape(-1,n,win_size)
256 |             test_features = r_test_features.reshape(-1,n,win_size)
257 |             n_sample_train = len_tr
258 |             n_sample_test = len_te
259 |         else:
260 |             train_features = [X_ICA_train[:,:,i : i + win_size]*win for i in range(0, X_ICA_train.shape[2]-step, step)]
261 |             test_features = [X_ICA_test[:,:,i : i + win_size]*win for i in range(0, X_ICA_test.shape[2]-step, step)]
262 |             train_features = np.asarray(train_features).reshape(n_sample_train*subject_num,n,win_size)
263 |             test_features = np.asarray(test_features).reshape(n_sample_test*subject_num,n,win_size)
264 |         
265 |         y_train = np.tile(Labels,n_sample_train)
266 |         y_test = np.tile(Labels,n_sample_test)
267 |         
268 |         n_sample_train = n_sample_train*subject_num
269 |         n_sample_test = n_sample_test*subject_num
270 |         
271 |         #shuffle data 
272 |         if(sh):
273 |             train_features, y_train = shuffle(train_features, y_train)
274 |             test_features, y_test = shuffle(test_features, y_test)
275 |         
276 |         #check whether stationary (p<0.05)
277 |         result = adfuller(train_features[1,1,:])
278 |         print('ADF Statistic: %f' % result[0])
279 |         print('p-value: %f' % result[1])
280 |         print('Critical Values:')
281 |         for key, value in result[4].items():
282 |             	print('\t%s: %.3f' % (key, value))
283 |                 
284 |         #normalize data 
285 |         #normalize = 'meanstd' 'maxmin' 'l1' 'l2'
286 |         train_features_n, test_features_n = normalizition(train_features,test_features,normalize,n,win_size)
287 |         # create adjency matrix 
288 |         #A_Matrix = 'cov' 'plv' 'iplv' 'pli' 'AEC'
289 |         train_x, test_x = adj_matrix(train_features_n, test_features_n, win_size,n_sample_train,n_sample_test, n, A_Matrix)
290 |     else:
291 |         tr = np.asarray(train_features)
292 |         n_sample_train = n_sample_train//2
293 |         tr1 = tr[:len(tr)//2].reshape(n_sample_train*subject_num,n,win_size)
294 |         tr2 = tr[len(tr)//2:((len(tr)//2)*2)].reshape(n_sample_train*subject_num,n,win_size)
295 |         te = np.asarray(test_features)
296 |         n_sample_test = n_sample_test//2
297 |         te1 = te[:len(te)//2].reshape(n_sample_test*subject_num,n,win_size)
298 |         te2 = te[len(te)//2:((len(te)//2)*2)].reshape(n_sample_test*subject_num,n,win_size)
299 |         
300 |         y_train = np.tile(Labels,n_sample_train)
301 |         y_test = np.tile(Labels,n_sample_test)
302 |         n_sample_train = n_sample_train*subject_num
303 |         n_sample_test = n_sample_test*subject_num
304 |         
305 |         tr1, te1 = normalizition(tr1,te1,normalize,n,win_size)
306 |         tr2, te2 = normalizition(tr2,te2,normalize,n,win_size)
307 |         tr11, te11 = adj_matrix(tr1, te1, win_size,n_sample_train,n_sample_test, n, A_Matrix)
308 |         tr22, te22 = adj_matrix(tr2, te2, win_size,n_sample_train,n_sample_test, n, A_Matrix)
309 |         tr12 = np.abs(np.einsum('ijk,ilk->ijl',tr1,tr2)) 
310 |         tr21 = np.abs(np.einsum('ijk,ilk->ijl',tr2,tr1)) 
311 |         train_x = np.concatenate((np.concatenate((tr11,tr12),axis=2),np.concatenate((tr21,tr22),axis=2)),axis=1)
312 |         te12 = np.abs(np.einsum('ijk,ilk->ijl',te1,te2)) 
313 |         te21 = np.abs(np.einsum('ijk,ilk->ijl',te2,te1)) 
314 |         test_x = np.concatenate((np.concatenate((te11,te12),axis=2),np.concatenate((te21,te22),axis=2)),axis=1)
315 |         
316 |     return train_x, test_x, y_train, y_test
317 | 
318 | def preprocess_data_task(x, Fs, ratio, filt = False, ICA = True, 
319 |                     sh = False, A_Matrix = 'cov', normalize='meanstd'):
320 |     data_length = x.shape[3]
321 |     n = x.shape[2]
322 |     win_size = Fs
323 |     step = Fs*0+Fs//2 #1-window*alpha%
324 |     num_train = int(np.ceil(len(x)*ratio))
325 |     x_train = x[:num_train,:]
326 |     x_test = x[num_train:,:]
327 |     if(x.shape[1]==14):
328 |         Labels = np.concatenate((np.concatenate((np.arange(6),np.arange(2,6))),np.arange(2,6))) + 1
329 |     else:
330 |         Labels = np.arange(x.shape[1]) + 1
331 |     Lables_train = np.tile(Labels,x_train.shape[0])
332 |     Lables_test = np.tile(Labels,x_test.shape[0])
333 |     x_train = x_train.reshape(-1,n,data_length)
334 |     x_test = x_test.reshape(-1,n,data_length)
335 |     
336 |     if(filt):
337 |         #60Hz filter
338 |         train_filtered = notch_filter(x_train, Fs)
339 |         test_filtered = notch_filter(x_test, Fs)
340 |         #band pass filter
341 |         #train_filtered = bandpass_filter(train_filtered, 'alpha', 'fir', Fs, 100)
342 |         train_filtered = bandpass_filter(train_filtered, 'beta', 'butter', Fs, 5)
343 |         test_filtered = bandpass_filter(test_filtered, 'beta', 'butter', Fs, 5)
344 |     else:
345 |         #60Hz filter
346 |         """
347 |         train_filtered = notch_filter(x_train, Fs)
348 |         test_filtered = notch_filter(x_test, Fs)
349 |         #"""
350 |         train_filtered = x_train
351 |         test_filtered = x_test
352 |     
353 |     #ICA
354 |     if(ICA):
355 |         transformer = FastICA(n_components=n,random_state=0, max_iter=200, tol=0.0001) #1000
356 |         X_ICA_train = transformer.fit_transform(train_filtered.reshape(-1,n))
357 |         X_ICA_train = X_ICA_train.reshape(-1,n,data_length)
358 |         X_ICA_test = transformer.transform(test_filtered.reshape(-1,n))
359 |         X_ICA_test = X_ICA_test.reshape(-1,n,data_length)
360 |     else:
361 |         X_ICA_train = train_filtered
362 |         X_ICA_test = test_filtered
363 |         
364 |     #windowing data using hamming window
365 |     n_sample_train, _ = util.view_as_windows(x_train[0,0,:], window_shape=(win_size,), step=step).shape
366 |     n_sample_test, _ = util.view_as_windows(x_test[0,0,:], window_shape=(win_size,), step=step).shape
367 |     
368 |     #win = signal.hamming(win_size)
369 |     win = 1
370 |         
371 |     train_features = [X_ICA_train[:,:,i : i + win_size]*win for i in range(0, x_train.shape[2]-step, step)]
372 |     train_features = np.asarray(train_features).reshape(-1,n,win_size)
373 |     
374 |     test_features = [X_ICA_test[:,:,i : i + win_size]*win for i in range(0, x_test.shape[2]-step, step)]
375 |     test_features = np.asarray(test_features).reshape(-1,n,win_size)
376 |     
377 |     y_train = np.tile(Lables_train,n_sample_train)
378 |     y_test = np.tile(Lables_test,n_sample_test)
379 |     
380 |     n_sample_train = train_features.shape[0]
381 |     n_sample_test = test_features.shape[0]
382 |     
383 |     #shuffle data 
384 |     if(sh):
385 |         train_features, y_train = shuffle(train_features, y_train)
386 |         test_features, y_test = shuffle(test_features, y_test)
387 |     
388 |     #check whether stationary (p<0.05)
389 |     result = adfuller(train_features[1,1,:])
390 |     print('ADF Statistic: %f' % result[0])
391 |     print('p-value: %f' % result[1])
392 |     print('Critical Values:')
393 |     for key, value in result[4].items():
394 |         	print('\t%s: %.3f' % (key, value))
395 |             
396 |     #normalize data 
397 |     #normalize = 'meanstd' 'maxmin' 'l1' 'l2'
398 |     train_features_n, test_features_n = normalizition(train_features,test_features,normalize,n,win_size)
399 |     # create adjency matrix 
400 |     #A_Matrix = 'cov' 'plv' 'iplv' 'pli' 'AEC'
401 |     train_x, test_x = adj_matrix(train_features_n, test_features_n, win_size,n_sample_train,n_sample_test, n, A_Matrix)
402 |     
403 |     return train_x, test_x, y_train, y_test
404 | 
405 | def preprocess_data_BCI(x_train,x_test, Labels, Fs, filt = False, ICA = True, 
406 |                      A_Matrix = 'cov', normalize='meanstd',sec=1,sampling=False):
407 |     n = x_train.shape[1]
408 |     if(sampling):
409 |         win_size = Fs
410 |         step = Fs//2
411 |     else:
412 |         win_size = Fs*sec
413 |         step = sec*(Fs*0+Fs//2) 
414 |     subject_num = x_train.shape[0]
415 | 
416 |     if(filt):
417 |         #60Hz filter
418 |         train_filtered = notch_filter(x_train, Fs)
419 |         test_filtered = notch_filter(x_test, Fs)
420 |         #band pass filter
421 |         #train_filtered = bandpass_filter(train_filtered, 'alpha', 'fir', Fs, 100)
422 |         train_filtered = bandpass_filter(train_filtered, 'gamma', 'butter', Fs, 5)
423 |         test_filtered = bandpass_filter(test_filtered, 'gamma', 'butter', Fs, 5)
424 |     else:
425 |         #60Hz filter
426 |         train_filtered = notch_filter(x_train, Fs)
427 |         test_filtered = notch_filter(x_test, Fs)
428 |         #train_filtered = x_train
429 |         #test_filtered = x_test
430 |     #ICA
431 |     if(ICA):
432 |         transformer = FastICA(n_components=n,random_state=0, max_iter=1000, tol=0.0001) #1000
433 |         X_ICA_train = transformer.fit_transform(train_filtered.reshape(-1,n))
434 |         X_ICA_train = X_ICA_train.reshape(subject_num,n,-1)
435 |         X_ICA_test = transformer.transform(test_filtered.reshape(-1,n))
436 |         X_ICA_test = X_ICA_test.reshape(subject_num,n,-1)
437 |     else:
438 |         X_ICA_train = train_filtered
439 |         X_ICA_test = test_filtered
440 |         
441 |     #windowing data using hamming window
442 |     n_sample_train, _ = util.view_as_windows(x_train[0,0,:], window_shape=(win_size,), step=step).shape
443 |     n_sample_test, _ = util.view_as_windows(x_test[0,0,:], window_shape=(win_size,), step=step).shape
444 |     
445 |     X_ICA_train = X_ICA_train[:,:,:((n_sample_train)*step+win_size-step)]
446 |     X_ICA_test = X_ICA_test[:,:,:((n_sample_test)*step+win_size-step)]
447 |     #win = signal.hamming(win_size)
448 |     win = 1
449 |     if(sampling):
450 |         train_features = np.zeros((subject_num,n,win_size,n_sample_train))
451 |         test_features = np.zeros((subject_num,n,win_size,n_sample_test))
452 |         for i in range(0, X_ICA_train.shape[2]-step, step):
453 |             train_features[:,:,:,i//step] = X_ICA_train[:,:,i : i + win_size]
454 |         for i in range(0, X_ICA_test.shape[2]-step, step):
455 |             test_features[:,:,:,i//step] = X_ICA_test[:,:,i : i + win_size]
456 |         len_tr = 200
457 |         len_te = 50
458 |         index_train = np.random.randint(1, high=n_sample_train, size=(len_tr,sec), dtype='l')
459 |         index_test = np.random.randint(1, high=n_sample_test, size=(len_te,sec), dtype='l')
460 |         r_train_features = np.zeros((len_tr,subject_num,n,win_size))
461 |         r_test_features = np.zeros((len_te,subject_num,n,win_size))
462 |         for j in range(len_tr):
463 |             r_train_features[j] = np.mean(train_features[:,:,:,index_train[j]],axis=3)
464 |             if(j<len_te):
465 |                 r_test_features[j] = np.mean(test_features[:,:,:,index_test[j]],axis=3)            
466 |         train_features = r_train_features.reshape(-1,n,win_size)
467 |         test_features = r_test_features.reshape(-1,n,win_size)
468 |         n_sample_train = len_tr
469 |         n_sample_test = len_te
470 |     else:
471 |         train_features = [X_ICA_train[:,:,i : i + win_size]*win for i in range(0, X_ICA_train.shape[2]-step, step)]
472 |         test_features = [X_ICA_test[:,:,i : i + win_size]*win for i in range(0, X_ICA_test.shape[2]-step, step)]
473 |         
474 |         train_features = np.asarray(train_features).reshape(n_sample_train*subject_num,n,win_size)
475 |         test_features = np.asarray(test_features).reshape(n_sample_test*subject_num,n,win_size)
476 |     
477 |     y_train = np.tile(Labels,n_sample_train)
478 |     y_test = np.tile(Labels,n_sample_test)
479 |     
480 |     n_sample_train = n_sample_train*subject_num
481 |     n_sample_test = n_sample_test*subject_num
482 |     
483 |     #check whether stationary (p<0.05)
484 |     result = adfuller(train_features[1,1,:])
485 |     print('ADF Statistic: %f' % result[0])
486 |     print('p-value: %f' % result[1])
487 |     print('Critical Values:')
488 |     for key, value in result[4].items():
489 |         	print('\t%s: %.3f' % (key, value))
490 | 
491 |     train_features_n, test_features_n = normalizition(train_features,test_features,normalize,n,win_size)
492 |     train_x, test_x = adj_matrix(train_features_n, test_features_n, win_size,n_sample_train,n_sample_test, n, A_Matrix)
493 |     return train_x, test_x, y_train, y_test
494 | 
495 | def dataset2_indices(signal_channel):
496 |     channel7_index = np.zeros((9),dtype=int)
497 |     channel7_index[0] = signal_channel.index('Fz..')
498 |     channel7_index[1] = signal_channel.index('Cz..')
499 |     channel7_index[2] = signal_channel.index('T7..') #T3
500 |     channel7_index[3] = signal_channel.index('T8..') #T4
501 |     channel7_index[4] = signal_channel.index('C3..')
502 |     channel7_index[5] = signal_channel.index('C4..')
503 |     channel7_index[6] = signal_channel.index('Oz..')
504 |     channel7_index[7] = signal_channel.index('Fp1.')
505 |     channel7_index[8] = signal_channel.index('Fp2.')
506 |     return np.sort(channel7_index)
507 | 
508 |     


--------------------------------------------------------------------------------