├── loss.py ├── README.md ├── X_dawn.py ├── benchmark.py ├── baseline_18.py ├── model.py ├── EEGnet_train.py ├── train.py └── EEGModels.py /loss.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.losses import binary_crossentropy 3 | def mean_squared_error_ignore_0(y_true, y_pred): 4 | """ loss function computing MSE of non-blank(!=0) in y_true 5 | Args: 6 | y_true(tftensor): true label 7 | y_pred(tftensor): predicted label 8 | return: 9 | MSE reconstruction error for loss computing 10 | """ 11 | loss = K.switch(K.equal(y_true, K.constant(0)),K.zeros(K.shape(y_true)),K.square(y_pred - y_true)) 12 | return K.mean(loss, axis=-1) 13 | 14 | def dummy(y_true, y_pred): 15 | """ 16 | return a tensor of zero 17 | """ 18 | return K.mean(K.zeros(K.shape(y_true))) 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ERPENet (Multi-task Autoencoder) for P300 EEG-Based BCI 2 | The event-related potential encoder network (ERPENet) is a multi-task autoencoder-based model, that can be applied to any ERP-related tasks. 3 | 4 | For more details, please refer to: https://ieeexplore.ieee.org/abstract/document/8723080 5 | 6 | 7 | ## Code Description (To be updated) ## 8 | model.py -- contains all model builders in Keras. 9 | train.py -- used to train the models. log file, tensorboard file, and best weights are kept. 10 | benchmark.py -- used to evaluate the trained model; need .hdf5(weight) from the train.py file as one of the input. 11 | X_dawn -- xDawn algorithm as one of the baseline. 12 | 13 | 14 | ## Citation ## 15 | Following citation format can be used for BibTex: 16 | 17 | @ARTICLE{8723080, 18 | author={A. {Ditthapron} and N. {Banluesombatkul} and S. {Ketrat} and E. {Chuangsuwanich} and T. {Wilaiprasitporn}}, 19 | journal={IEEE Access}, 20 | title={Universal Joint Feature Extraction for P300 EEG Classification Using Multi-Task Autoencoder}, 21 | year={2019}, 22 | volume={7}, 23 | pages={68415-68428}, 24 | doi={10.1109/ACCESS.2019.2919143}, 25 | } 26 | -------------------------------------------------------------------------------- /X_dawn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | xDawn algorithm add name in file variable below 3 | ''' 4 | file = ['00','15','18','17','bci_comp_zeropad','erpbci_zeropad','300'] 5 | from sklearn.pipeline import make_pipeline 6 | from mne.decoding import Vectorizer 7 | from scipy import stats 8 | from sklearn.linear_model import LogisticRegression 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit,StratifiedKFold 13 | from pyriemann.estimation import ERPCovariances 14 | from pyriemann.tangentspace import TangentSpace 15 | from pyriemann.classification import MDM 16 | from pyriemann.spatialfilters import Xdawn 17 | import pandas as pd 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import itertools 22 | 23 | from numpy import unique 24 | from numpy import random 25 | 26 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)] 27 | 28 | def normalize(X_train,X_val): 29 | mu=[stats.tmean(X_train[:,d[0],d[1]]) for d in channel_2d] 30 | std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d] 31 | for i in range(len(channel_2d)): 32 | if(std[i]!=0): 33 | X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 34 | X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 35 | return X_train,X_val 36 | 37 | results=0 38 | if __name__ == "__main__": 39 | f = [] 40 | auc = [] 41 | acc = [] 42 | methods = [] 43 | clfs = OrderedDict() 44 | clfs['Xdawn + RegLDA'] = make_pipeline(Xdawn(2), Vectorizer(), LDA(shrinkage='auto', solver='eigen')) 45 | 46 | for name in file: 47 | X1 = np.load('data/X_' + name + '.npy') 48 | y1 = np.load('data/Y_' + name + '.npy') 49 | X = X1[:,4:9,:,50:150] 50 | X = np.reshape(X,(-1,9*5,100)) 51 | y = y1.flatten() 52 | zero = np.sum(X,axis=-1)[0]!=0 53 | X = X[:,zero,:] 54 | cv = StratifiedKFold(n_splits=10, random_state=0) 55 | for m in clfs: 56 | print name,m 57 | res1 = cross_val_score(clfs[m], X, y, scoring='accuracy', cv=cv, n_jobs=-1) 58 | print name,m,res1[0] 59 | res2 = cross_val_score(clfs[m], X, y, scoring='roc_auc', cv=cv, n_jobs=-1) 60 | print name,m,res2[0] 61 | acc.extend(res1) 62 | auc.extend(res2) 63 | methods.extend([m]*len(res1)) 64 | f.extend([name]*len(res1)) 65 | 66 | results = pd.DataFrame(data=auc, columns=['AUC']) 67 | results['ACC'] = acc 68 | results['Method'] = methods 69 | results['file'] = f 70 | np.save('out',results) 71 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is used for benchmark the model 3 | execution format 4 | python baseline.py $X$ $Y$ $model$ $n_freeze$ $out$ 5 | ''' 6 | import numpy as np 7 | from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau 8 | from sklearn.model_selection import train_test_split 9 | import itertools 10 | from scipy import stats 11 | from tensorflow.keras.optimizers import RMSprop,Adam,Adadelta,SGD 12 | from tensorflow.keras import models 13 | from sklearn.utils import class_weight 14 | from tensorflow.keras.callbacks import TensorBoard 15 | from time import time 16 | 17 | from tensorflow.keras.models import load_model 18 | import tensorflow.keras.losses 19 | import sklearn.preprocessing as pre 20 | from sklearn.metrics import roc_auc_score 21 | from sklearn.metrics import roc_auc_score 22 | from numpy import unique 23 | from numpy import random 24 | import tensorflow as tf 25 | from tensorflow.keras.utils import multi_gpu_model 26 | import argparse 27 | 28 | epochs=800 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument("X", help="path to X.npy", 32 | type=str) 33 | parser.add_argument("Y", help="path to Y.npy", 34 | type=str) 35 | parser.add_argument("model", help="path to __.hdf5", 36 | type=str) 37 | parser.add_argument("out", help="output text path", 38 | type=str) 39 | args = parser.parse_args() 40 | out = args.out 41 | #normalization 42 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)] 43 | def normalize(X_train,X_val): 44 | mu=[stats.tmean(X_train[:,d[0],d[1]]) for d in channel_2d] 45 | std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d] 46 | for i in range(len(channel_2d)): 47 | if(std[i]!=0): 48 | X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 49 | X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 50 | return X_train,X_val 51 | 52 | def EEGnetFormat(X): 53 | X = np.reshape(X,(X.shape[0],1,X.shape[1] * X.shape[2],X.shape[3])) 54 | notZero = [i for i in range(45) if i not in [44,43,42,38,37,36,35,33,29,27]] 55 | return X[:,:,notZero,:] 56 | #load data 57 | X1 = np.load(args.X) 58 | y1 = np.load(args.Y) 59 | X_train, X_test, y_train, y_test = train_test_split(X1,y1,test_size=0.2,random_state=0) 60 | X_train,X_test = normalize(X_train,X_test) 61 | del X1,y1 62 | X_train = EEGnetFormat(X_train[:,4:9,:,50:150]) 63 | X_test = EEGnetFormat(X_test[:,4:9,:,50:150]) 64 | 65 | model = load_model(args.model) 66 | 67 | csv_logger = CSVLogger(out+'.log') 68 | tensorboard = TensorBoard(log_dir="../logs/{}_{}".format(out,time())) 69 | 70 | model.compile(optimizer=Adadelta(),loss=['binary_crossentropy'],metrics=['accuracy']) 71 | model.fit(x=X_train,y=y_train,batch_size=512,epochs=300,validation_data=(X_test,y_test)) 72 | 73 | pre = model.predict(X_test) 74 | print(roc_auc_score(y_test,pre)) 75 | 76 | -------------------------------------------------------------------------------- /baseline_18.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is used for benchmark the model 3 | execution format 4 | python baseline.py $X$ $Y$ $model$ $n_freeze$ $out$ 5 | ''' 6 | import numpy as np 7 | from model import direct_8,direct_8_dist,LSTM_2D,distributed_time,autoencoder_time,autoencoder_CNN,dense_1,hybrid_LSTM 8 | from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau 9 | from sklearn.model_selection import train_test_split 10 | import itertools 11 | from scipy import stats 12 | from keras.optimizers import RMSprop,Adam,Adadelta,SGD 13 | from keras import models 14 | from sklearn.utils import class_weight 15 | from keras.callbacks import TensorBoard 16 | from time import time 17 | from loss import mean_squared_error_ignore_0,dummy 18 | from keras.models import load_model 19 | import keras.losses 20 | import sklearn.preprocessing as pre 21 | from sklearn.metrics import roc_auc_score 22 | from sklearn.metrics import roc_auc_score 23 | from numpy import unique 24 | from numpy import random 25 | import tensorflow as tf 26 | from keras.utils import multi_gpu_model 27 | import argparse 28 | keras.losses.mean_squared_error_ignore_0 = mean_squared_error_ignore_0 29 | epochs=1000 30 | 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument("X", help="path to X.npy", 33 | type=str) 34 | parser.add_argument("Y", help="path to Y.npy", 35 | type=str) 36 | parser.add_argument("model", help="path to __.hdf5", 37 | type=str) 38 | parser.add_argument("n_freeze", help="# of freeze layers(6 for baseline, 63 for the proposed model)", 39 | type=int) 40 | parser.add_argument("out", help="output text path", 41 | type=str) 42 | args = parser.parse_args() 43 | 44 | #normalization 45 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)] 46 | def normalize(X_train,X_val): 47 | mu=[stats.tmean(X_train[:,d[0],d[1]]) for d in channel_2d] 48 | std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d] 49 | for i in range(len(channel_2d)): 50 | if(std[i]!=0): 51 | X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 52 | X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 53 | return X_train,X_val 54 | 55 | #load data 56 | X1 = np.load(args.X) 57 | y1 = np.load(args.Y) 58 | X_train, X_test, y_train, y_test = train_test_split(X1,y1,test_size=0.2,random_state=0) 59 | X_train,X_test = normalize(X_train,X_test) 60 | del X1,y1 61 | with tf.device('/cpu:0'): 62 | model = load_model(args.model,custom_objects={'mean_squared_error_ignore_0':mean_squared_error_ignore_0}) 63 | 64 | #chop out unnecessary data 65 | X_train = X_train[:,4:9,:,50:150] 66 | X_test = X_test[:,4:9,:,50:150] 67 | 68 | #freeze layer 69 | for i in range(args.n_freeze): 70 | model.layers[i].trainable = False 71 | 72 | 73 | csv_logger = CSVLogger(out+'.log') 74 | tensorboard = TensorBoard(log_dir="../logs/{}".format(time())) 75 | 76 | model.compile(optimizer=Adadelta(),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0,1.0]) 77 | model.fit(x=X_train,y=[X_train,y_train],batch_size=1024,epochs=300,validation_data=(X_test,[X_test,y_test])) 78 | 79 | re,pre = model.predict(X_test) 80 | print(roc_auc_score(y_test,pre)) 81 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Input, TimeDistributed, Conv2D, Flatten, Dense, Dropout, BatchNormalization, Activation, Reshape,GRU, LeakyReLU, Reshape,Permute,ConvLSTM2D,Conv1D ,UpSampling2D,Conv2DTranspose,ZeroPadding2D,UpSampling1D,Cropping1D,Lambda,LSTM,RepeatVector 2 | from keras.models import Model 3 | from keras import initializers 4 | import keras.backend as K 5 | import tensorflow as tf 6 | 7 | def hybrid_LSTM(depth=2,conv_size=16,dense_size=512,input_dim=(5,9,100,),dropoutRate=0.2): 8 | """ 9 | Autoencoder model builder composes of CNNs and a LSTM 10 | Args: 11 | depth (int): number of CNN blocks, each has 3 CNN layers with BN and a dropout 12 | conv_size (int): initial CNN filter size, doubled in each depth level 13 | dense_size (int): size of latent vector and a number of filters of ConvLSTM2D 14 | input_dim (tuple): input dimention, should be in (y_spatial,x_spatial,temporal) 15 | dropoutRate (float): dropout rate used in all nodes 16 | Return: 17 | keras model 18 | """ 19 | """Setup""" 20 | temp_filter = conv_size 21 | X = Input(shape=input_dim, name = 'input') 22 | X = Permute((3,1,2))(X) #move temporal axes to be first dim 23 | X = Reshape((100,5,9,1))(X) #reshape (,1) to be feature of each spatial 24 | 25 | """Encoder""" 26 | for i in range(depth): 27 | for j in range(3): 28 | if j == 0: #j==0 is first layer(j) of the CNN block(i); apply stride with double filter size 29 | X = TimeDistributed(Conv2D(2*temp_filter,(3,3),padding='same' ,strides=(2,2),data_format="channels_last"),name = 'encoder_'+str(i)+str(j)+'_timeConv2D')(X) 30 | else: 31 | X = TimeDistributed(Conv2D(temp_filter,(3,3), padding='same', data_format="channels_last"),name = 'encoder_'+str(i)+str(j)+'_timeConv2D')(X) 32 | X = BatchNormalization(name = 'encoder_'+str(i)+str(j)+'_BN')(X) 33 | X = LeakyReLU(alpha=0.1,name = 'encoder_'+str(i)+str(j)+'_relu')(X) 34 | X = Dropout(dropoutRate,name = 'encoder_'+str(i)+str(j)+'_drop')(X) 35 | temp_filter = int(temp_filter * 2) 36 | X = TimeDistributed(Flatten())(X) 37 | X = LSTM(dense_size, recurrent_dropout=dropoutRate ,return_sequences=False, implementation=2)(X) 38 | 39 | """Latent""" 40 | latent = X 41 | 42 | """Setup for decoder"""" 43 | X = RepeatVector(100)(X) 44 | temp_filter = temp_filter/2 45 | 46 | """Decoder""" 47 | X = LSTM(temp_filter*2*3, recurrent_dropout=dropoutRate ,return_sequences=True, implementation=2)(X) 48 | X = Reshape((100,2,3,temp_filter))(X) 49 | for i in range(depth): 50 | for j in range(3): 51 | if j == 0: 52 | X = TimeDistributed(UpSampling2D((2,2)),name = 'decoder_'+str(i)+str(j)+'_upsampling')(X) 53 | X = TimeDistributed(ZeroPadding2D(((1,0),(1,0))),name = 'decoder_'+str(i)+str(j)+'_padding')(X) 54 | X = TimeDistributed(Conv2D(temp_filter,(3,3),data_format="channels_last"),name = 'decoder_'+str(i)+str(j)+'_timeConv2D')(X) 55 | else: 56 | X = TimeDistributed(Conv2D(temp_filter,(3,3), padding='same', data_format="channels_last"),name = 'decoder_'+str(i)+str(j)+'_timeConv2D')(X) 57 | X = BatchNormalization(name = 'decoder_'+str(i)+str(j)+'_BN')(X) 58 | X = LeakyReLU(alpha=0.1,name = 'decoder_'+str(i)+str(j)+'_relu')(X) 59 | X = Dropout(dropoutRate,name = 'decoder_'+str(i)+str(j)+'_drop')(X) 60 | temp_filter = int(temp_filter / 2) 61 | X = TimeDistributed(Conv2D(1,(1,1), padding='same', data_format="channels_last"),name = 'decoder__timeConv2D')(X) 62 | X = Reshape((100,5,9))(X) 63 | X = Permute((2,3,1))(X) 64 | decoded = X 65 | X = latent 66 | X = Dense(1,name = 'Dense10',activation='sigmoid')(X) 67 | return Model(inputs = model_input, outputs = [decoded,X]) 68 | 69 | 70 | 71 | 72 | def baseline(input_dim): 73 | """ 74 | Baseline mode(AE-SLIC) model 75 | Args: 76 | input_dim(tuple): input dimention; need to be in flatten format (samples,feature) 77 | Return: 78 | keras model 79 | """ 80 | 81 | model_input = Input(shape=input_dim, name = 'input') 82 | X=model_input 83 | X = Dense(500,name = 'Encoder1')(X) 84 | X = Dense(250,name = 'Encoder2')(X) 85 | latent = X 86 | X = Dense(500,name = 'Decoder1')(X) 87 | X = Dense(250,name = 'Decoder2')(X) 88 | X = Dense(4500,name = 'Decoder3')(X) 89 | Output = X 90 | X = Dense(1,activation = 'sigmoid')(latent) 91 | return Model(inputs = model_input, outputs = [Output,X]) 92 | -------------------------------------------------------------------------------- /EEGnet_train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is used to train the model with four different datasets 3 | execution format 4 | python train.py $X1$ $Y1$ ... $X4$ $Y4$ $model$ $out$ 5 | ''' 6 | 7 | from EEGModels import EEGNet 8 | import numpy as np 9 | 10 | 11 | from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau 12 | from sklearn.model_selection import train_test_split 13 | import itertools 14 | from scipy import stats 15 | from tensorflow.keras.optimizers import RMSprop,Adam,Adadelta,SGD 16 | from tensorflow.keras import models 17 | from sklearn.utils import class_weight 18 | from tensorflow.keras.callbacks import TensorBoard,LearningRateScheduler,EarlyStopping 19 | from time import time 20 | 21 | import tensorflow as tf 22 | from tensorflow.keras.utils import multi_gpu_model 23 | import tensorflow.keras.backend as K 24 | from tensorflow.keras.losses import binary_crossentropy 25 | from numpy import unique 26 | from numpy import random 27 | 28 | 29 | import argparse 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument("X1", help="path to X.npy", 32 | type=str) 33 | parser.add_argument("Y1", help="path to Y.npy", 34 | type=str) 35 | parser.add_argument("X2", help="path to X.npy", 36 | type=str) 37 | parser.add_argument("Y2", help="path to Y.npy", 38 | type=str) 39 | parser.add_argument("X3", help="path to X.npy", 40 | type=str) 41 | parser.add_argument("Y3", help="path to Y.npy", 42 | type=str) 43 | parser.add_argument("X4", help="path to X.npy", 44 | type=str) 45 | parser.add_argument("Y4", help="path to Y.npy", 46 | type=str) 47 | parser.add_argument("X5", help="path to X.npy", 48 | type=str) 49 | parser.add_argument("Y5", help="path to Y.npy", 50 | type=str) 51 | parser.add_argument("out", help="output text path", 52 | type=str) 53 | args = parser.parse_args() 54 | out = args.out 55 | 56 | epochs=500 57 | 58 | #normalization 59 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)] 60 | def normalize(X_train,X_val): 61 | mu=[stats.tmean(X_train[:,d[0],d[1]]) for d in channel_2d] 62 | std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d] 63 | for i in range(len(channel_2d)): 64 | if(std[i]!=0): 65 | X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 66 | X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 67 | # minn = [np.min(X_train[:,d[0],d[1]]) for d in channel_2d] 68 | # maxx = [np.max(X_train[:,d[0],d[1]]) for d in channel_2d] 69 | # for i in range(len(channel_2d)): 70 | # if(maxx[i]-minn[i]!=0): 71 | # X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i]) 72 | # X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i]) 73 | return X_train,X_val 74 | 75 | #train weight 76 | def weightFunction(y_train): 77 | weight = np.zeros(2) # class 78 | for i in y_train: 79 | weight[int(i)] +=1 80 | d = np.min(weight) 81 | weight = [temp/d for temp in weight] 82 | temp = np.empty_like(y_train) 83 | temp[y_train==0] = weight[0] 84 | temp[y_train==1] = weight[1] 85 | return temp 86 | #chop out unnecessary data 87 | def EEGnetFormat(X): 88 | X = np.reshape(X,(X.shape[0],1,X.shape[1] * X.shape[2],X.shape[3])) 89 | notZero = [i for i in range(45) if i not in [44,43,42,38,37,36,35,33,29,27]] 90 | return X[:,:,notZero,:] 91 | 92 | #load data 93 | X1 = np.load(args.X1) 94 | y1 = np.load(args.Y1) 95 | X1_train, X1_test, y1_train, y1_test = train_test_split(X1,y1,test_size=0.2,random_state=0) 96 | X1_train,X1_test = normalize(X1_train,X1_test) 97 | y1_train = np.reshape(y1_train,(-1,1)) 98 | y1_test = np.reshape(y1_test,(-1,1)) 99 | 100 | X1_train = EEGnetFormat(X1_train[:,4:9,:,50:150]) 101 | X1_test = EEGnetFormat(X1_test[:,4:9,:,50:150]) 102 | 103 | 104 | del X1,y1 105 | 106 | X2 = np.load(args.X2) 107 | y2 = np.load(args.Y2) 108 | X2_train, X2_test, y2_train, y2_test = train_test_split(X2,y2,test_size=0.2,random_state=0) 109 | X2_train, X2_test = normalize(X2_train,X2_test) 110 | y2_train = np.reshape(y2_train,(-1,1)) 111 | y2_test = np.reshape(y2_test,(-1,1)) 112 | 113 | X2_train = EEGnetFormat(X2_train[:,4:9,:,50:150]) 114 | X2_test = EEGnetFormat(X2_test[:,4:9,:,50:150]) 115 | 116 | 117 | del X2,y2 118 | 119 | 120 | X3 = np.load(args.X3) 121 | y3 = np.load(args.Y3) 122 | X3_train, X3_test, y3_train, y3_test = train_test_split(X3,y3,test_size=0.2,random_state=0) 123 | X3_train,X3_test = normalize(X3_train,X3_test) 124 | y3_train = np.reshape(y3_train,(-1,1)) 125 | y3_test = np.reshape(y3_test,(-1,1)) 126 | 127 | X3_train = EEGnetFormat(X3_train[:,4:9,:,50:150]) 128 | X3_test = EEGnetFormat(X3_test[:,4:9,:,50:150]) 129 | 130 | del X3,y3 131 | 132 | 133 | X4 = np.load(args.X4) 134 | y4 = np.load(args.Y4) 135 | X4_train, X4_test, y4_train, y4_test = train_test_split(X4,y4,test_size=0.2,random_state=0) 136 | X4_train,X4_test = normalize(X4_train,X4_test) 137 | y4_train = np.reshape(y4_train,(-1,1)) 138 | y4_test = np.reshape(y4_test,(-1,1)) 139 | 140 | X4_train = EEGnetFormat(X4_train[:,4:9,:,50:150]) 141 | X4_test = EEGnetFormat(X4_test[:,4:9,:,50:150]) 142 | 143 | del X4,y4 144 | 145 | X5 = np.load(args.X5) 146 | y5 = np.load(args.Y5) 147 | X5_train, X5_test, y5_train, y5_test = train_test_split(X5,y5,test_size=0.2,random_state=0) 148 | X5_train,X5_test = normalize(X5_train,X5_test) 149 | y5_train = np.reshape(y5_train,(-1,1)) 150 | y5_test = np.reshape(y5_test,(-1,1)) 151 | 152 | X5_train = EEGnetFormat(X5_train[:,4:9,:,50:150]) 153 | X5_test = EEGnetFormat(X5_test[:,4:9,:,50:150]) 154 | 155 | del X5,y5 156 | 157 | 158 | 159 | X_train = np.concatenate((X2_train,X1_train,X3_train,X4_train,X5_train)) 160 | del X1_train,X3_train,X4_train,X2_train,X5_train 161 | X_test = np.concatenate((X1_test,X2_test,X3_test,X4_test,X5_test)) 162 | del X1_test,X2_test,X3_test,X4_test,X5_test 163 | 164 | y_train = np.concatenate((y1_train,y2_train,y3_train,y4_train,y5_train)) 165 | del y1_train,y2_train,y3_train,y4_train,y5_train 166 | y_test = np.concatenate((y1_test,y2_test,y3_test,y4_test,y5_test)) 167 | del y1_test,y2_test,y3_test,y4_test,y5_test 168 | 169 | 170 | # X_train = X_train[:,4:9,:,50:150] 171 | # X_test = X_test[:,4:9,:,50:150] 172 | 173 | #format to match EEGnet 174 | 175 | 176 | # X_train = EEGnetFormat(X_train) 177 | # X_test = EEGnetFormat(X_test) 178 | 179 | 180 | model = EEGNet(nb_classes = 1, Chans = 35, Samples = 100) 181 | model.compile(optimizer='adam',loss=['binary_crossentropy'],metrics=['accuracy']) 182 | 183 | 184 | print(model.summary()) 185 | 186 | #train the model 187 | csv_logger = CSVLogger(out+'.log') 188 | filepath=out+".hdf5" 189 | tensorboard = TensorBoard(log_dir="../logs/{}_{}".format(out,time())) 190 | checkpointer = ModelCheckpoint(monitor='val_loss', filepath=filepath, verbose=1, save_best_only=True) 191 | early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10) 192 | model.fit(x=X_train,y=y_train,batch_size=128,epochs=epochs,validation_data=(X_test,y_test),callbacks=[checkpointer,csv_logger,tensorboard,early_stop]) 193 | 194 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is used to train the model with four different datasets 3 | execution format 4 | python train.py $X1$ $Y1$ ... $X4$ $Y4$ $model$ $out$ 5 | ''' 6 | 7 | import numpy as np 8 | from model import hybrid_LSTM, baseline 9 | from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau 10 | from sklearn.model_selection import train_test_split 11 | import itertools 12 | from scipy import stats 13 | from keras.optimizers import RMSprop,Adam,Adadelta,SGD 14 | from keras import models 15 | from sklearn.utils import class_weight 16 | from keras.callbacks import TensorBoard,LearningRateScheduler 17 | from time import time 18 | from loss import mean_squared_error_ignore_0,dummy 19 | import tensorflow as tf 20 | from keras.utils import multi_gpu_model 21 | import keras.backend as K 22 | from keras.losses import binary_crossentropy 23 | from numpy import unique 24 | from numpy import random 25 | 26 | import argparse 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("X1", help="path to X.npy", 29 | type=str) 30 | parser.add_argument("Y1", help="path to Y.npy", 31 | type=str) 32 | parser.add_argument("X2", help="path to X.npy", 33 | type=str) 34 | parser.add_argument("Y2", help="path to Y.npy", 35 | type=str) 36 | parser.add_argument("X3", help="path to X.npy", 37 | type=str) 38 | parser.add_argument("Y3", help="path to Y.npy", 39 | type=str) 40 | parser.add_argument("X4", help="path to X.npy", 41 | type=str) 42 | parser.add_argument("Y4", help="path to Y.npy", 43 | type=str) 44 | parser.add_argument("X5", help="path to X.npy", 45 | type=str) 46 | parser.add_argument("Y5", help="path to Y.npy", 47 | type=str) 48 | parser.add_argument("model", help="model name", 49 | type=str) 50 | parser.add_argument("out", help="output text path", 51 | type=str) 52 | args = parser.parse_args() 53 | 54 | 55 | epochs=800 56 | 57 | #normalization 58 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)] 59 | def normalize(X_train,X_val): 60 | mu=[stats.tmean(X_train[:,d[0],d[1]]) for d in channel_2d] 61 | std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d] 62 | for i in range(len(channel_2d)): 63 | if(std[i]!=0): 64 | X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 65 | X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i] 66 | # minn = [np.min(X_train[:,d[0],d[1]]) for d in channel_2d] 67 | # maxx = [np.max(X_train[:,d[0],d[1]]) for d in channel_2d] 68 | # for i in range(len(channel_2d)): 69 | # if(maxx[i]-minn[i]!=0): 70 | # X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i]) 71 | # X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i]) 72 | return X_train,X_val 73 | 74 | #train weight 75 | def weightFunction(y_train): 76 | weight = np.zeros(2) # class 77 | for i in y_train: 78 | weight[int(i)] +=1 79 | d = np.min(weight) 80 | weight = [temp/d for temp in weight] 81 | temp = np.empty_like(y_train) 82 | temp[y_train==0] = weight[0] 83 | temp[y_train==1] = weight[1] 84 | return temp 85 | #load data 86 | X1 = np.load(args.X1) 87 | y1 = np.load(args.Y1) 88 | X1_train, X1_test, y1_train, y1_test = train_test_split(X1,y1,test_size=0.2,random_state=0) 89 | X1_train,X1_test = normalize(X1_train,X1_test) 90 | y1_train = np.reshape(y1_train,(-1,1)) 91 | y1_test = np.reshape(y1_test,(-1,1)) 92 | w1 = weightFunction(y1_train) 93 | X1_w = np.multiply(np.reshape(np.repeat(len(X1_train),len(X1_train)),(-1,1)),w1) 94 | del X1,y1 95 | 96 | X2 = np.load(args.X2) 97 | y2 = np.load(args.Y2) 98 | X2_train, X2_test, y2_train, y2_test = train_test_split(X2,y2,test_size=0.2,random_state=0) 99 | X2_train, X2_test = normalize(X2_train,X2_test) 100 | y2_train = np.reshape(y2_train,(-1,1)) 101 | y2_test = np.reshape(y2_test,(-1,1)) 102 | w2 = weightFunction(y2_train) 103 | X2_w = np.multiply(np.reshape(np.repeat(len(X2_train),len(X2_train)),(-1,1)),w2) 104 | del X2,y2 105 | 106 | 107 | X3 = np.load(args.X3) 108 | y3 = np.load(args.Y3) 109 | X3_train, X3_test, y3_train, y3_test = train_test_split(X3,y3,test_size=0.2,random_state=0) 110 | X3_train,X3_test = normalize(X3_train,X3_test) 111 | y3_train = np.reshape(y3_train,(-1,1)) 112 | y3_test = np.reshape(y3_test,(-1,1)) 113 | w3 = weightFunction(y3_train) 114 | X3_w = np.multiply(np.reshape(np.repeat(len(X3_train),len(X3_train)),(-1,1)),w3) 115 | del X3,y3 116 | 117 | 118 | X4 = np.load(args.X4) 119 | y4 = np.load(args.Y4) 120 | X4_train, X4_test, y4_train, y4_test = train_test_split(X4,y4,test_size=0.2,random_state=0) 121 | X4_train,X4_test = normalize(X4_train,X4_test) 122 | y4_train = np.reshape(y4_train,(-1,1)) 123 | y4_test = np.reshape(y4_test,(-1,1)) 124 | w4 = weightFunction(y4_train) 125 | X4_w = np.multiply(np.reshape(np.repeat(len(X4_train),len(X4_train)),(-1,1)),w4) 126 | del X4,y4 127 | 128 | X5 = np.load(args.X5) 129 | y5 = np.load(args.Y5) 130 | X5_train, X5_test, y5_train, y5_test = train_test_split(X5,y5,test_size=0.2,random_state=0) 131 | X5_train,X5_test = normalize(X5_train,X5_test) 132 | y5_train = np.reshape(y5_train,(-1,1)) 133 | y5_test = np.reshape(y5_test,(-1,1)) 134 | w5 = weightFunction(y5_train) 135 | X5_w = np.multiply(np.reshape(np.repeat(len(X5_train),len(X5_train)),(-1,1)),w5) 136 | del X5,y5 137 | 138 | 139 | 140 | X_train = np.concatenate((X2_train,X1_train,X3_train,X4_train,X5_train)) 141 | X_weight = np.concatenate((X1_w,X2_w,X3_w,X4_w,X5_w)).flatten() 142 | X_weight = 1.0/X_weight*np.max(X_weight) 143 | del X1_train,X3_train,X4_train,X2_train,X5_train 144 | X_test = np.concatenate((X1_test,X2_test,X3_test,X4_test,X5_test)) 145 | del X1_test,X2_test,X3_test,X4_test,X5_test 146 | 147 | y_train = np.concatenate((y1_train,y2_train,y3_train,y4_train,y5_train)) 148 | del y1_train,y2_train,y3_train,y4_train,y5_train 149 | y_test = np.concatenate((y1_test,y2_test,y3_test,y4_test,y5_test)) 150 | del y1_test,y2_test,y3_test,y4_test,y5_test 151 | 152 | #chop out unnecessary data 153 | X_train = X_train[:,4:9,:,50:150] 154 | X_test = X_test[:,4:9,:,50:150] 155 | with tf.device('/cpu:0'): 156 | if args.model == 'hybrid_LSTM': 157 | model = hybrid_LSTM(depth=2,conv_size=8,dense_size=512,input_dim=(5,9,100,),dropoutRate=0.2) 158 | if args.model == 'baseline': 159 | model = baseline(input_dim=(4500,)) 160 | #baseline is FC layers not CNN, need to flatten 161 | X_train = np.reshape(X_train,(X_train.shape[0],-1)) 162 | X_test = np.reshape(X_test,(X_test.shape[0],-1)) 163 | 164 | model.compile(optimizer=SGD(lr=0.002,decay=1E-5),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0.4,0.6]) 165 | parallel_model = multi_gpu_model(model, gpus=2) 166 | parallel_model.__setattr__('callback_model',model) 167 | parallel_model.compile(optimizer=SGD(lr=0.002,decay=1E-5),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0.4,0.6]) 168 | 169 | 170 | print(model.summary()) 171 | 172 | #train the model 173 | csv_logger = CSVLogger(out+'.log') 174 | filepath="out"+{epoch:02d}+".hdf5" 175 | tensorboard = TensorBoard(log_dir="../logs/{}".format(time())) 176 | checkpointer = ModelCheckpoint(monitor='val_loss', filepath=filepath, verbose=1, save_best_only=True) 177 | parallel_model.fit(x=X_train,y=[X_train,y_train],batch_size=512,epochs=epochs,validation_data=(X_test,[X_test,y_test]),callbacks=[checkpointer,csv_logger,tensorboard],sample_weight=[X_weight,X_weight]) 178 | -------------------------------------------------------------------------------- /EEGModels.py: -------------------------------------------------------------------------------- 1 | import keras 2 | keras.backend.set_image_data_format('channels_first') 3 | """ 4 | ARL_EEGModels - A collection of Convolutional Neural Network models for EEG 5 | Signal Processing and Classification, using Keras and Tensorflow 6 | 7 | Requirements: 8 | (1) Tensorflow == 1.9.0 9 | (2) 'image_data_format' = 'channels_first' in keras.json config 10 | (3) Data shape = (trials, kernels, channels, samples), which for the 11 | input layer, will be (trials, 1, channels, samples). 12 | 13 | To use: 14 | 15 | (1) Place this file in the PYTHONPATH variable in your IDE (i.e.: Spyder) 16 | (2) Import the model as 17 | 18 | from EEGModels import EEGNet 19 | 20 | model = EEGNet(nb_classes = ..., Chans = ..., Samples = ...) 21 | 22 | (3) Then compile and fit the model 23 | 24 | model.compile(loss = ..., optimizer = ..., metrics = ...) 25 | fitted = model.fit(...) 26 | predicted = model.predict(...) 27 | 28 | 29 | Portions of this project are works of the United States Government and are not 30 | subject to domestic copyright protection under 17 USC Sec. 105. Those 31 | portions are released world-wide under the terms of the Creative Commons Zero 32 | 1.0 (CC0) license. 33 | 34 | Other portions of this project are subject to domestic copyright protection 35 | under 17 USC Sec. 105. Those portions are licensed under the Apache 2.0 36 | license. The complete text of the license governing this material is in 37 | the file labeled LICENSE.TXT that is a part of this project's official 38 | distribution. 39 | """ 40 | 41 | from tensorflow.keras.models import Model 42 | from tensorflow.keras.layers import Dense, Activation, Permute, Dropout 43 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D 44 | from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D 45 | from tensorflow.keras.layers import BatchNormalization 46 | from tensorflow.keras.layers import SpatialDropout2D 47 | from tensorflow.keras.regularizers import l1_l2 48 | from tensorflow.keras.layers import Input, Flatten 49 | from tensorflow.keras.constraints import max_norm 50 | from tensorflow.keras import backend as K 51 | 52 | 53 | def EEGNet(nb_classes, Chans = 64, Samples = 128, 54 | dropoutRate = 0.25, kernLength = 64, F1 = 4, 55 | D = 2, F2 = 8, norm_rate = 0.25, dropoutType = 'Dropout'): 56 | """ Keras Implementation of EEGNet 57 | http://iopscience.iop.org/article/10.1088/1741-2552/aace8c/meta 58 | 59 | Note that this implements the newest version of EEGNet and NOT the earlier 60 | version (version v1 and v2 on arxiv). We strongly recommend using this 61 | architecture as it performs much better and has nicer properties than 62 | our earlier version. For example: 63 | 64 | 1. Depthwise Convolutions to learn spatial filters within a 65 | temporal convolution. The use of the depth_multiplier option maps 66 | exactly to the number of spatial filters learned within a temporal 67 | filter. This matches the setup of algorithms like FBCSP which learn 68 | spatial filters within each filter in a filter-bank. This also limits 69 | the number of free parameters to fit when compared to a fully-connected 70 | convolution. 71 | 72 | 2. Separable Convolutions to learn how to optimally combine spatial 73 | filters across temporal bands. Separable Convolutions are Depthwise 74 | Convolutions followed by (1x1) Pointwise Convolutions. 75 | 76 | 77 | While the original paper used Dropout, we found that SpatialDropout2D 78 | sometimes produced slightly better results for classification of ERP 79 | signals. However, SpatialDropout2D significantly reduced performance 80 | on the Oscillatory dataset (SMR, BCI-IV Dataset 2A). We recommend using 81 | the default Dropout in most cases. 82 | 83 | Assumes the input signal is sampled at 128Hz. If you want to use this model 84 | for any other sampling rate you will need to modify the lengths of temporal 85 | kernels and average pooling size in blocks 1 and 2 as needed (double the 86 | kernel lengths for double the sampling rate, etc). Note that we haven't 87 | tested the model performance with this rule so this may not work well. 88 | 89 | The model with default parameters gives the EEGNet-4,2 model as discussed 90 | in the paper. This model should do pretty well in general, although as the 91 | paper discussed the EEGNet-8,2 (with 8 temporal kernels and 2 spatial 92 | filters per temporal kernel) can do slightly better on the SMR dataset. 93 | Other variations that we found to work well are EEGNet-4,1 and EEGNet-8,1. 94 | 95 | We set F2 = F1 * D (number of input filters = number of output filters) for 96 | the SeparableConv2D layer. We haven't extensively tested other values of this 97 | parameter (say, F2 < F1 * D for compressed learning, and F2 > F1 * D for 98 | overcomplete). We believe the main parameters to focus on are F1 and D. 99 | 100 | Inputs: 101 | 102 | nb_classes : int, number of classes to classify 103 | Chans, Samples : number of channels and time points in the EEG data 104 | dropoutRate : dropout fraction 105 | kernLength : length of temporal convolution in first layer. We found 106 | that setting this to be half the sampling rate worked 107 | well in practice. For the SMR dataset in particular 108 | since the data was high-passed at 4Hz we used a kernel 109 | length of 32. 110 | F1, F2 : number of temporal filters (F1) and number of pointwise 111 | filters (F2) to learn. Default: F1 = 4, F2 = F1 * D. 112 | D : number of spatial filters to learn within each temporal 113 | convolution. Default: D = 2 114 | dropoutType : Either SpatialDropout2D or Dropout, passed as a string. 115 | 116 | """ 117 | 118 | if dropoutType == 'SpatialDropout2D': 119 | dropoutType = SpatialDropout2D 120 | elif dropoutType == 'Dropout': 121 | dropoutType = Dropout 122 | else: 123 | raise ValueError('dropoutType must be one of SpatialDropout2D ' 124 | 'or Dropout, passed as a string.') 125 | 126 | input1 = Input(shape = (1, Chans, Samples)) 127 | 128 | ################################################################## 129 | block1 = Conv2D(F1, (1, kernLength), padding = 'same', 130 | input_shape = (1, Chans, Samples), 131 | use_bias = False)(input1) 132 | block1 = BatchNormalization(axis = 1)(block1) 133 | block1 = DepthwiseConv2D((Chans, 1), use_bias = False, 134 | depth_multiplier = D, 135 | depthwise_constraint = max_norm(1.))(block1) 136 | block1 = BatchNormalization(axis = 1)(block1) 137 | block1 = Activation('elu')(block1) 138 | block1 = AveragePooling2D((1, 4))(block1) 139 | block1 = dropoutType(dropoutRate)(block1) 140 | 141 | block2 = SeparableConv2D(F2, (1, 16), 142 | use_bias = False, padding = 'same')(block1) 143 | block2 = BatchNormalization(axis = 1)(block2) 144 | block2 = Activation('elu')(block2) 145 | block2 = AveragePooling2D((1, 8))(block2) 146 | block2 = dropoutType(dropoutRate)(block2) 147 | 148 | flatten = Flatten(name = 'flatten')(block2) 149 | 150 | dense = Dense(nb_classes, name = 'dense', 151 | kernel_constraint = max_norm(norm_rate))(flatten) 152 | softmax = Activation('sigmoid', name = 'softmax')(dense) 153 | 154 | return Model(inputs=input1, outputs=softmax) 155 | 156 | 157 | 158 | 159 | def EEGNet_SSVEP(nb_classes = 12, Chans = 8, Samples = 256, 160 | dropoutRate = 0.5, kernLength = 256, F1 = 96, 161 | D = 1, F2 = 96, dropoutType = 'Dropout'): 162 | """ SSVEP Variant of EEGNet, as used in [1]. 163 | 164 | Inputs: 165 | 166 | nb_classes : int, number of classes to classify 167 | Chans, Samples : number of channels and time points in the EEG data 168 | dropoutRate : dropout fraction 169 | kernLength : length of temporal convolution in first layer 170 | F1, F2 : number of temporal filters (F1) and number of pointwise 171 | filters (F2) to learn. 172 | D : number of spatial filters to learn within each temporal 173 | convolution. 174 | dropoutType : Either SpatialDropout2D or Dropout, passed as a string. 175 | 176 | 177 | [1]. Waytowich, N. et. al. (2018). Compact Convolutional Neural Networks 178 | for Classification of Asynchronous Steady-State Visual Evoked Potentials. 179 | Journal of Neural Engineering vol. 15(6). 180 | http://iopscience.iop.org/article/10.1088/1741-2552/aae5d8 181 | 182 | """ 183 | 184 | if dropoutType == 'SpatialDropout2D': 185 | dropoutType = SpatialDropout2D 186 | elif dropoutType == 'Dropout': 187 | dropoutType = Dropout 188 | else: 189 | raise ValueError('dropoutType must be one of SpatialDropout2D ' 190 | 'or Dropout, passed as a string.') 191 | 192 | input1 = Input(shape = (1, Chans, Samples)) 193 | 194 | ################################################################## 195 | block1 = Conv2D(F1, (1, kernLength), padding = 'same', 196 | input_shape = (1, Chans, Samples), 197 | use_bias = False)(input1) 198 | block1 = BatchNormalization(axis = 1)(block1) 199 | block1 = DepthwiseConv2D((Chans, 1), use_bias = False, 200 | depth_multiplier = D, 201 | depthwise_constraint = max_norm(1.))(block1) 202 | block1 = BatchNormalization(axis = 1)(block1) 203 | block1 = Activation('elu')(block1) 204 | block1 = AveragePooling2D((1, 4))(block1) 205 | block1 = dropoutType(dropoutRate)(block1) 206 | 207 | block2 = SeparableConv2D(F2, (1, 16), 208 | use_bias = False, padding = 'same')(block1) 209 | block2 = BatchNormalization(axis = 1)(block2) 210 | block2 = Activation('elu')(block2) 211 | block2 = AveragePooling2D((1, 8))(block2) 212 | block2 = dropoutType(dropoutRate)(block2) 213 | 214 | flatten = Flatten(name = 'flatten')(block2) 215 | 216 | dense = Dense(nb_classes, name = 'dense')(flatten) 217 | softmax = Activation('softmax', name = 'softmax')(dense) 218 | 219 | return Model(inputs=input1, outputs=softmax) 220 | 221 | 222 | 223 | def EEGNet_old(nb_classes, Chans = 64, Samples = 128, regRate = 0.0001, 224 | dropoutRate = 0.25, kernels = [(2, 32), (8, 4)], strides = (2, 4)): 225 | """ Keras Implementation of EEGNet_v1 (https://arxiv.org/abs/1611.08024v2) 226 | 227 | This model is the original EEGNet model proposed on arxiv 228 | https://arxiv.org/abs/1611.08024v2 229 | 230 | with a few modifications: we use striding instead of max-pooling as this 231 | helped slightly in classification performance while also providing a 232 | computational speed-up. 233 | 234 | Note that we no longer recommend the use of this architecture, as the new 235 | version of EEGNet performs much better overall and has nicer properties. 236 | 237 | Inputs: 238 | 239 | nb_classes : total number of final categories 240 | Chans, Samples : number of EEG channels and samples, respectively 241 | regRate : regularization rate for L1 and L2 regularizations 242 | dropoutRate : dropout fraction 243 | kernels : the 2nd and 3rd layer kernel dimensions (default is 244 | the [2, 32] x [8, 4] configuration) 245 | strides : the stride size (note that this replaces the max-pool 246 | used in the original paper) 247 | 248 | """ 249 | 250 | # start the model 251 | input_main = Input((1, Chans, Samples)) 252 | layer1 = Conv2D(16, (Chans, 1), input_shape=(1, Chans, Samples), 253 | kernel_regularizer = l1_l2(l1=regRate, l2=regRate))(input_main) 254 | layer1 = BatchNormalization(axis=1)(layer1) 255 | layer1 = Activation('elu')(layer1) 256 | layer1 = Dropout(dropoutRate)(layer1) 257 | 258 | permute_dims = 2, 1, 3 259 | permute1 = Permute(permute_dims)(layer1) 260 | 261 | layer2 = Conv2D(4, kernels[0], padding = 'same', 262 | kernel_regularizer=l1_l2(l1=0.0, l2=regRate), 263 | strides = strides)(permute1) 264 | layer2 = BatchNormalization(axis=1)(layer2) 265 | layer2 = Activation('elu')(layer2) 266 | layer2 = Dropout(dropoutRate)(layer2) 267 | 268 | layer3 = Conv2D(4, kernels[1], padding = 'same', 269 | kernel_regularizer=l1_l2(l1=0.0, l2=regRate), 270 | strides = strides)(layer2) 271 | layer3 = BatchNormalization(axis=1)(layer3) 272 | layer3 = Activation('elu')(layer3) 273 | layer3 = Dropout(dropoutRate)(layer3) 274 | 275 | flatten = Flatten(name = 'flatten')(layer3) 276 | 277 | dense = Dense(nb_classes, name = 'dense')(flatten) 278 | softmax = Activation('softmax', name = 'softmax')(dense) 279 | 280 | return Model(inputs=input_main, outputs=softmax) 281 | 282 | 283 | 284 | def DeepConvNet(nb_classes, Chans = 64, Samples = 256, 285 | dropoutRate = 0.5): 286 | """ Keras implementation of the Deep Convolutional Network as described in 287 | Schirrmeister et. al. (2017), Human Brain Mapping. 288 | 289 | This implementation assumes the input is a 2-second EEG signal sampled at 290 | 128Hz, as opposed to signals sampled at 250Hz as described in the original 291 | paper. We also perform temporal convolutions of length (1, 5) as opposed 292 | to (1, 10) due to this sampling rate difference. 293 | 294 | Note that we use the max_norm constraint on all convolutional layers, as 295 | well as the classification layer. We also change the defaults for the 296 | BatchNormalization layer. We used this based on a personal communication 297 | with the original authors. 298 | 299 | ours original paper 300 | pool_size 1, 2 1, 3 301 | strides 1, 2 1, 3 302 | conv filters 1, 5 1, 10 303 | 304 | Note that this implementation has not been verified by the original 305 | authors. 306 | 307 | """ 308 | 309 | # start the model 310 | input_main = Input((1, Chans, Samples)) 311 | block1 = Conv2D(25, (1, 5), 312 | input_shape=(1, Chans, Samples), 313 | kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main) 314 | block1 = Conv2D(25, (Chans, 1), 315 | kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) 316 | block1 = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block1) 317 | block1 = Activation('elu')(block1) 318 | block1 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block1) 319 | block1 = Dropout(dropoutRate)(block1) 320 | 321 | block2 = Conv2D(50, (1, 5), 322 | kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) 323 | block2 = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block2) 324 | block2 = Activation('elu')(block2) 325 | block2 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block2) 326 | block2 = Dropout(dropoutRate)(block2) 327 | 328 | block3 = Conv2D(100, (1, 5), 329 | kernel_constraint = max_norm(2., axis=(0,1,2)))(block2) 330 | block3 = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block3) 331 | block3 = Activation('elu')(block3) 332 | block3 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block3) 333 | block3 = Dropout(dropoutRate)(block3) 334 | 335 | block4 = Conv2D(200, (1, 5), 336 | kernel_constraint = max_norm(2., axis=(0,1,2)))(block3) 337 | block4 = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block4) 338 | block4 = Activation('elu')(block4) 339 | block4 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block4) 340 | block4 = Dropout(dropoutRate)(block4) 341 | 342 | flatten = Flatten()(block4) 343 | 344 | dense = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten) 345 | softmax = Activation('softmax')(dense) 346 | 347 | return Model(inputs=input_main, outputs=softmax) 348 | 349 | 350 | # need these for ShallowConvNet 351 | def square(x): 352 | return K.square(x) 353 | 354 | def log(x): 355 | return K.log(K.clip(x, min_value = 1e-7, max_value = 10000)) 356 | 357 | 358 | def ShallowConvNet(nb_classes, Chans = 64, Samples = 128, dropoutRate = 0.5): 359 | """ Keras implementation of the Shallow Convolutional Network as described 360 | in Schirrmeister et. al. (2017), Human Brain Mapping. 361 | 362 | Assumes the input is a 2-second EEG signal sampled at 128Hz. Note that in 363 | the original paper, they do temporal convolutions of length 25 for EEG 364 | data sampled at 250Hz. We instead use length 13 since the sampling rate is 365 | roughly half of the 250Hz which the paper used. The pool_size and stride 366 | in later layers is also approximately half of what is used in the paper. 367 | 368 | Note that we use the max_norm constraint on all convolutional layers, as 369 | well as the classification layer. We also change the defaults for the 370 | BatchNormalization layer. We used this based on a personal communication 371 | with the original authors. 372 | 373 | ours original paper 374 | pool_size 1, 35 1, 75 375 | strides 1, 7 1, 15 376 | conv filters 1, 13 1, 25 377 | 378 | Note that this implementation has not been verified by the original 379 | authors. We do note that this implementation reproduces the results in the 380 | original paper with minor deviations. 381 | """ 382 | 383 | # start the model 384 | input_main = Input((1, Chans, Samples)) 385 | block1 = Conv2D(40, (1, 13), 386 | input_shape=(1, Chans, Samples), 387 | kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main) 388 | block1 = Conv2D(40, (Chans, 1), use_bias=False, 389 | kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) 390 | block1 = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block1) 391 | block1 = Activation(square)(block1) 392 | block1 = AveragePooling2D(pool_size=(1, 35), strides=(1, 7))(block1) 393 | block1 = Activation(log)(block1) 394 | block1 = Dropout(dropoutRate)(block1) 395 | flatten = Flatten()(block1) 396 | dense = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten) 397 | softmax = Activation('softmax')(dense) 398 | 399 | return Model(inputs=input_main, outputs=softmax) 400 | 401 | 402 | --------------------------------------------------------------------------------