├── loss.py
├── README.md
├── X_dawn.py
├── benchmark.py
├── baseline_18.py
├── model.py
├── EEGnet_train.py
├── train.py
└── EEGModels.py


/loss.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | from keras.losses import binary_crossentropy
 3 | def mean_squared_error_ignore_0(y_true, y_pred):
 4 | 	""" loss function computing MSE of non-blank(!=0) in y_true
 5 | 		Args:
 6 | 			y_true(tftensor): true label
 7 | 			y_pred(tftensor): predicted label
 8 | 		return:
 9 | 			MSE reconstruction error for loss computing
10 | 	"""
11 | 	loss = K.switch(K.equal(y_true, K.constant(0)),K.zeros(K.shape(y_true)),K.square(y_pred - y_true))
12 | 	return K.mean(loss, axis=-1)
13 | 
14 | def dummy(y_true, y_pred):
15 | 	"""
16 | 	return a tensor of zero
17 | 	"""
18 | 	return K.mean(K.zeros(K.shape(y_true)))
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ERPENet (Multi-task Autoencoder) for P300 EEG-Based BCI
 2 | The event-related potential encoder network (ERPENet) is a multi-task autoencoder-based model, that can be applied to any ERP-related tasks. 
 3 | 
 4 | For more details, please refer to: https://ieeexplore.ieee.org/abstract/document/8723080
 5 | 
 6 | 
 7 | ## Code Description (To be updated) ##
 8 | model.py -- contains all model builders in Keras.  
 9 | train.py -- used to train the models. log file, tensorboard file, and best weights are kept.  
10 | benchmark.py -- used to evaluate the trained model; need .hdf5(weight) from the train.py file as one of the input.  
11 | X_dawn -- xDawn algorithm as one of the baseline.  
12 | 
13 | 
14 | ## Citation ##
15 | Following citation format can be used for BibTex:
16 | 
17 |     @ARTICLE{8723080,
18 |     author={A. {Ditthapron} and N. {Banluesombatkul} and S. {Ketrat} and E. {Chuangsuwanich} and T. {Wilaiprasitporn}},
19 |     journal={IEEE Access},
20 |     title={Universal Joint Feature Extraction for P300 EEG Classification Using Multi-Task Autoencoder},
21 |     year={2019},
22 |     volume={7},
23 |     pages={68415-68428},
24 |     doi={10.1109/ACCESS.2019.2919143},
25 |     }
26 | 


--------------------------------------------------------------------------------
/X_dawn.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | xDawn algorithm add name in file variable below
 3 | '''
 4 | file = ['00','15','18','17','bci_comp_zeropad','erpbci_zeropad','300']
 5 | from sklearn.pipeline import make_pipeline
 6 | from mne.decoding import Vectorizer
 7 | from scipy import stats
 8 | from sklearn.linear_model import LogisticRegression
 9 | from sklearn.preprocessing import StandardScaler
10 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
11 | from sklearn.model_selection import train_test_split
12 | from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit,StratifiedKFold
13 | from pyriemann.estimation import ERPCovariances
14 | from pyriemann.tangentspace import TangentSpace
15 | from pyriemann.classification import MDM
16 | from pyriemann.spatialfilters import Xdawn
17 | import pandas as pd
18 | from collections import OrderedDict
19 | 
20 | import numpy as np
21 | import itertools
22 | 
23 | from numpy import unique
24 | from numpy import random
25 | 
26 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)]
27 | 
28 | def normalize(X_train,X_val):
29 | 	mu=[stats.tmean(X_train[:,d[0],d[1]])  for d in channel_2d]
30 | 	std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d]
31 | 	for i in range(len(channel_2d)):
32 | 		if(std[i]!=0):
33 | 			X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
34 | 			X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
35 | 	return X_train,X_val
36 | 
37 | results=0
38 | if __name__ == "__main__":
39 |     f = []
40 |     auc = []
41 |     acc = []
42 |     methods = []
43 |     clfs = OrderedDict()
44 |     clfs['Xdawn + RegLDA'] = make_pipeline(Xdawn(2), Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
45 | 
46 |     for name in file:
47 |     	X1 = np.load('data/X_' + name + '.npy')
48 |     	y1 = np.load('data/Y_' + name + '.npy')
49 |     	X = X1[:,4:9,:,50:150]
50 |     	X = np.reshape(X,(-1,9*5,100))
51 |     	y = y1.flatten()
52 |     	zero = np.sum(X,axis=-1)[0]!=0
53 |     	X = X[:,zero,:]
54 |     	cv = StratifiedKFold(n_splits=10, random_state=0)
55 |     	for m in clfs:
56 |     	    print name,m
57 |     	    res1 = cross_val_score(clfs[m], X, y, scoring='accuracy', cv=cv, n_jobs=-1)
58 |     	    print name,m,res1[0]
59 |     	    res2 = cross_val_score(clfs[m], X, y, scoring='roc_auc', cv=cv, n_jobs=-1)
60 |     	    print name,m,res2[0]
61 |     	    acc.extend(res1)
62 |     	    auc.extend(res2)
63 |     	    methods.extend([m]*len(res1))
64 |     	    f.extend([name]*len(res1))
65 | 
66 |     results = pd.DataFrame(data=auc, columns=['AUC'])
67 |     results['ACC'] = acc
68 |     results['Method'] = methods
69 |     results['file'] = f
70 | np.save('out',results)
71 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This file is used for benchmark the model
 3 | execution format
 4 | python baseline.py $X$ $Y$ $model$ $n_freeze$ $out$
 5 | '''
 6 | import numpy as np
 7 | from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
 8 | from sklearn.model_selection import train_test_split
 9 | import itertools
10 | from scipy import stats
11 | from tensorflow.keras.optimizers import RMSprop,Adam,Adadelta,SGD
12 | from tensorflow.keras import models
13 | from sklearn.utils import class_weight
14 | from tensorflow.keras.callbacks import TensorBoard
15 | from time import time
16 | 
17 | from tensorflow.keras.models import load_model
18 | import tensorflow.keras.losses
19 | import sklearn.preprocessing as pre
20 | from sklearn.metrics import roc_auc_score
21 | from sklearn.metrics import roc_auc_score
22 | from numpy import unique
23 | from numpy import random
24 | import tensorflow as tf 
25 | from tensorflow.keras.utils import multi_gpu_model
26 | import argparse
27 | 
28 | epochs=800
29 | 
30 | parser = argparse.ArgumentParser()
31 | parser.add_argument("X", help="path to X.npy",
32 |                     type=str)
33 | parser.add_argument("Y", help="path to Y.npy",
34 |                     type=str)
35 | parser.add_argument("model", help="path to __.hdf5",
36 |                     type=str)
37 | parser.add_argument("out", help="output text path",
38 |                     type=str)
39 | args = parser.parse_args()
40 | out = args.out
41 | #normalization
42 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)]
43 | def normalize(X_train,X_val):
44 | 	mu=[stats.tmean(X_train[:,d[0],d[1]])  for d in channel_2d]
45 | 	std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d]
46 | 	for i in range(len(channel_2d)):
47 | 		if(std[i]!=0):
48 | 			X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
49 | 			X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
50 | 	return X_train,X_val
51 | 
52 | def EEGnetFormat(X):
53 |     X = np.reshape(X,(X.shape[0],1,X.shape[1] * X.shape[2],X.shape[3]))
54 |     notZero = [i for i in range(45) if i not in [44,43,42,38,37,36,35,33,29,27]]
55 |     return X[:,:,notZero,:]
56 | #load data
57 | X1 = np.load(args.X)
58 | y1 = np.load(args.Y)
59 | X_train, X_test, y_train, y_test = train_test_split(X1,y1,test_size=0.2,random_state=0)
60 | X_train,X_test = normalize(X_train,X_test)
61 | del X1,y1
62 | X_train = EEGnetFormat(X_train[:,4:9,:,50:150])
63 | X_test = EEGnetFormat(X_test[:,4:9,:,50:150])
64 | 
65 | model = load_model(args.model)
66 | 
67 | csv_logger = CSVLogger(out+'.log')
68 | tensorboard = TensorBoard(log_dir="../logs/{}_{}".format(out,time()))
69 | 
70 | model.compile(optimizer=Adadelta(),loss=['binary_crossentropy'],metrics=['accuracy'])
71 | model.fit(x=X_train,y=y_train,batch_size=512,epochs=300,validation_data=(X_test,y_test))
72 | 
73 | pre  = model.predict(X_test)
74 | print(roc_auc_score(y_test,pre))
75 | 
76 | 


--------------------------------------------------------------------------------
/baseline_18.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This file is used for benchmark the model
 3 | execution format
 4 | python baseline.py $X$ $Y$ $model$ $n_freeze$ $out$
 5 | '''
 6 | import numpy as np
 7 | from model import direct_8,direct_8_dist,LSTM_2D,distributed_time,autoencoder_time,autoencoder_CNN,dense_1,hybrid_LSTM
 8 | from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
 9 | from sklearn.model_selection import train_test_split
10 | import itertools
11 | from scipy import stats
12 | from keras.optimizers import RMSprop,Adam,Adadelta,SGD
13 | from keras import models
14 | from sklearn.utils import class_weight
15 | from keras.callbacks import TensorBoard
16 | from time import time
17 | from loss import mean_squared_error_ignore_0,dummy
18 | from keras.models import load_model
19 | import keras.losses
20 | import sklearn.preprocessing as pre
21 | from sklearn.metrics import roc_auc_score
22 | from sklearn.metrics import roc_auc_score
23 | from numpy import unique
24 | from numpy import random
25 | import tensorflow as tf 
26 | from keras.utils import multi_gpu_model
27 | import argparse
28 | keras.losses.mean_squared_error_ignore_0 = mean_squared_error_ignore_0
29 | epochs=1000
30 | 
31 | parser = argparse.ArgumentParser()
32 | parser.add_argument("X", help="path to X.npy",
33 |                     type=str)
34 | parser.add_argument("Y", help="path to Y.npy",
35 |                     type=str)
36 | parser.add_argument("model", help="path to __.hdf5",
37 |                     type=str)
38 | parser.add_argument("n_freeze", help="# of freeze layers(6 for baseline, 63 for the proposed model)",
39 |                     type=int)
40 | parser.add_argument("out", help="output text path",
41 |                     type=str)
42 | args = parser.parse_args()
43 | 
44 | #normalization
45 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)]
46 | def normalize(X_train,X_val):
47 | 	mu=[stats.tmean(X_train[:,d[0],d[1]])  for d in channel_2d]
48 | 	std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d]
49 | 	for i in range(len(channel_2d)):
50 | 		if(std[i]!=0):
51 | 			X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
52 | 			X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
53 | 	return X_train,X_val
54 | 
55 | #load data
56 | X1 = np.load(args.X)
57 | y1 = np.load(args.Y)
58 | X_train, X_test, y_train, y_test = train_test_split(X1,y1,test_size=0.2,random_state=0)
59 | X_train,X_test = normalize(X_train,X_test)
60 | del X1,y1
61 | with tf.device('/cpu:0'):
62 |     model = load_model(args.model,custom_objects={'mean_squared_error_ignore_0':mean_squared_error_ignore_0})
63 | 
64 | #chop out unnecessary data
65 | X_train = X_train[:,4:9,:,50:150]
66 | X_test = X_test[:,4:9,:,50:150]
67 | 
68 | #freeze layer
69 | for i in range(args.n_freeze):
70 |     model.layers[i].trainable = False
71 | 
72 | 
73 | csv_logger = CSVLogger(out+'.log')
74 | tensorboard = TensorBoard(log_dir="../logs/{}".format(time()))
75 | 
76 | model.compile(optimizer=Adadelta(),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0,1.0])
77 | model.fit(x=X_train,y=[X_train,y_train],batch_size=1024,epochs=300,validation_data=(X_test,[X_test,y_test]))
78 | 
79 | re,pre  = model.predict(X_test)
80 | print(roc_auc_score(y_test,pre))
81 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | from keras.layers import Input, TimeDistributed, Conv2D, Flatten, Dense, Dropout, BatchNormalization, Activation, Reshape,GRU, LeakyReLU, Reshape,Permute,ConvLSTM2D,Conv1D	,UpSampling2D,Conv2DTranspose,ZeroPadding2D,UpSampling1D,Cropping1D,Lambda,LSTM,RepeatVector
 2 | from keras.models import Model
 3 | from keras import initializers
 4 | import keras.backend as K
 5 | import tensorflow as tf
 6 | 
 7 | def hybrid_LSTM(depth=2,conv_size=16,dense_size=512,input_dim=(5,9,100,),dropoutRate=0.2):
 8 | 	"""
 9 | 	Autoencoder model builder composes of CNNs and a LSTM
10 | 	Args:
11 | 		depth (int): number of CNN blocks, each has 3 CNN layers with BN and a dropout
12 | 		conv_size (int): initial CNN filter size, doubled in each depth level
13 | 		dense_size (int): size of latent vector and a number of filters of ConvLSTM2D
14 | 		input_dim (tuple): input dimention, should be in (y_spatial,x_spatial,temporal)
15 | 		dropoutRate (float): dropout rate used in all nodes
16 | 	Return:
17 | 		keras model
18 | 	"""
19 | 	"""Setup"""
20 | 	temp_filter = conv_size
21 | 	X = Input(shape=input_dim, name = 'input')
22 | 	X = Permute((3,1,2))(X)  #move temporal axes to be first dim
23 | 	X = Reshape((100,5,9,1))(X) #reshape (,1) to be feature of each spatial
24 | 
25 | 	"""Encoder"""
26 | 	for i in range(depth):
27 | 		for j in range(3):
28 | 			if j == 0: #j==0 is first layer(j) of the CNN block(i); apply stride with double filter size
29 | 				X = TimeDistributed(Conv2D(2*temp_filter,(3,3),padding='same' ,strides=(2,2),data_format="channels_last"),name = 'encoder_'+str(i)+str(j)+'_timeConv2D')(X)
30 | 			else:
31 | 				X = TimeDistributed(Conv2D(temp_filter,(3,3), padding='same', data_format="channels_last"),name = 'encoder_'+str(i)+str(j)+'_timeConv2D')(X)
32 | 			X = BatchNormalization(name = 'encoder_'+str(i)+str(j)+'_BN')(X)
33 | 			X = LeakyReLU(alpha=0.1,name = 'encoder_'+str(i)+str(j)+'_relu')(X)
34 | 			X = Dropout(dropoutRate,name = 'encoder_'+str(i)+str(j)+'_drop')(X)
35 | 		temp_filter = int(temp_filter * 2)
36 | 	X = TimeDistributed(Flatten())(X)
37 | 	X = LSTM(dense_size, recurrent_dropout=dropoutRate ,return_sequences=False, implementation=2)(X)
38 | 
39 | 	"""Latent"""
40 | 	latent = X
41 | 
42 | 	"""Setup for decoder""""
43 | 	X = RepeatVector(100)(X)
44 | 	temp_filter = temp_filter/2
45 | 
46 | 	"""Decoder"""
47 | 	X = LSTM(temp_filter*2*3, recurrent_dropout=dropoutRate ,return_sequences=True, implementation=2)(X)
48 | 	X = Reshape((100,2,3,temp_filter))(X)
49 | 	for i in range(depth):
50 | 		for j in range(3):
51 | 			if j == 0:
52 | 				X = TimeDistributed(UpSampling2D((2,2)),name = 'decoder_'+str(i)+str(j)+'_upsampling')(X)
53 | 				X = TimeDistributed(ZeroPadding2D(((1,0),(1,0))),name = 'decoder_'+str(i)+str(j)+'_padding')(X)
54 | 				X = TimeDistributed(Conv2D(temp_filter,(3,3),data_format="channels_last"),name = 'decoder_'+str(i)+str(j)+'_timeConv2D')(X)
55 | 			else:
56 | 				X = TimeDistributed(Conv2D(temp_filter,(3,3), padding='same', data_format="channels_last"),name = 'decoder_'+str(i)+str(j)+'_timeConv2D')(X)
57 | 			X = BatchNormalization(name = 'decoder_'+str(i)+str(j)+'_BN')(X)
58 | 			X = LeakyReLU(alpha=0.1,name = 'decoder_'+str(i)+str(j)+'_relu')(X)
59 | 			X = Dropout(dropoutRate,name = 'decoder_'+str(i)+str(j)+'_drop')(X)
60 | 		temp_filter = int(temp_filter / 2)
61 | 	X = TimeDistributed(Conv2D(1,(1,1), padding='same', data_format="channels_last"),name = 'decoder__timeConv2D')(X)
62 | 	X = Reshape((100,5,9))(X)
63 | 	X = Permute((2,3,1))(X)
64 | 	decoded = X
65 | 	X = latent
66 | 	X = Dense(1,name = 'Dense10',activation='sigmoid')(X)
67 | 	return Model(inputs = model_input, outputs = [decoded,X])
68 | 
69 | 
70 | 
71 | 
72 | def baseline(input_dim):
73 | 	"""
74 | 	Baseline mode(AE-SLIC) model
75 | 	Args:
76 | 		input_dim(tuple): input dimention; need to be in flatten format (samples,feature)
77 | 	Return:
78 | 		keras model
79 | 	"""
80 | 
81 | 	model_input = Input(shape=input_dim, name = 'input')
82 | 	X=model_input
83 | 	X = Dense(500,name = 'Encoder1')(X)
84 | 	X = Dense(250,name = 'Encoder2')(X)
85 | 	latent = X
86 | 	X = Dense(500,name = 'Decoder1')(X)
87 | 	X = Dense(250,name = 'Decoder2')(X)
88 | 	X = Dense(4500,name = 'Decoder3')(X)
89 | 	Output = X
90 | 	X = Dense(1,activation = 'sigmoid')(latent)
91 | 	return Model(inputs = model_input, outputs = [Output,X])
92 | 


--------------------------------------------------------------------------------
/EEGnet_train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This file is used to train the model with four different datasets
  3 | execution format
  4 | python train.py $X1$ $Y1$ ... $X4$ $Y4$ $model$ $out$
  5 | '''
  6 | 
  7 | from EEGModels import EEGNet
  8 | import numpy as np
  9 | 
 10 | 
 11 | from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
 12 | from sklearn.model_selection import train_test_split
 13 | import itertools
 14 | from scipy import stats
 15 | from tensorflow.keras.optimizers import RMSprop,Adam,Adadelta,SGD
 16 | from tensorflow.keras import models
 17 | from sklearn.utils import class_weight
 18 | from tensorflow.keras.callbacks import TensorBoard,LearningRateScheduler,EarlyStopping
 19 | from time import time
 20 | 
 21 | import tensorflow as tf
 22 | from tensorflow.keras.utils import multi_gpu_model
 23 | import tensorflow.keras.backend as K
 24 | from tensorflow.keras.losses import binary_crossentropy
 25 | from numpy import unique
 26 | from numpy import random 
 27 | 
 28 | 
 29 | import argparse
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument("X1", help="path to X.npy",
 32 |                     type=str)
 33 | parser.add_argument("Y1", help="path to Y.npy",
 34 |                     type=str)
 35 | parser.add_argument("X2", help="path to X.npy",
 36 |                     type=str)
 37 | parser.add_argument("Y2", help="path to Y.npy",
 38 |                     type=str)
 39 | parser.add_argument("X3", help="path to X.npy",
 40 |                     type=str)
 41 | parser.add_argument("Y3", help="path to Y.npy",
 42 |                     type=str)
 43 | parser.add_argument("X4", help="path to X.npy",
 44 |                     type=str)
 45 | parser.add_argument("Y4", help="path to Y.npy",
 46 |                     type=str)
 47 | parser.add_argument("X5", help="path to X.npy",
 48 |                     type=str)
 49 | parser.add_argument("Y5", help="path to Y.npy",
 50 |                     type=str)
 51 | parser.add_argument("out", help="output text path",
 52 |                     type=str)
 53 | args = parser.parse_args()
 54 | out = args.out
 55 | 
 56 | epochs=500
 57 | 
 58 | #normalization
 59 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)]
 60 | def normalize(X_train,X_val):
 61 |     mu=[stats.tmean(X_train[:,d[0],d[1]])  for d in channel_2d]
 62 |     std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d]
 63 |     for i in range(len(channel_2d)):
 64 |         if(std[i]!=0):
 65 |             X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
 66 |             X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
 67 |     # minn = [np.min(X_train[:,d[0],d[1]]) for d in channel_2d]
 68 |     # maxx = [np.max(X_train[:,d[0],d[1]]) for d in channel_2d]
 69 |     # for i in range(len(channel_2d)):
 70 |     #   if(maxx[i]-minn[i]!=0):
 71 |     #       X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i])
 72 |     #       X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i])
 73 |     return X_train,X_val
 74 | 
 75 | #train weight
 76 | def weightFunction(y_train):
 77 |     weight = np.zeros(2) # class
 78 |     for i in y_train:
 79 |       weight[int(i)] +=1 
 80 |     d = np.min(weight)
 81 |     weight = [temp/d for temp in weight]
 82 |     temp = np.empty_like(y_train)
 83 |     temp[y_train==0] = weight[0]
 84 |     temp[y_train==1] = weight[1]
 85 |     return temp
 86 | #chop out unnecessary data
 87 | def EEGnetFormat(X):
 88 |     X = np.reshape(X,(X.shape[0],1,X.shape[1] * X.shape[2],X.shape[3]))
 89 |     notZero = [i for i in range(45) if i not in [44,43,42,38,37,36,35,33,29,27]]
 90 |     return X[:,:,notZero,:]
 91 | 
 92 | #load data
 93 | X1 = np.load(args.X1)
 94 | y1 = np.load(args.Y1)
 95 | X1_train, X1_test, y1_train, y1_test = train_test_split(X1,y1,test_size=0.2,random_state=0)
 96 | X1_train,X1_test = normalize(X1_train,X1_test)
 97 | y1_train = np.reshape(y1_train,(-1,1))
 98 | y1_test = np.reshape(y1_test,(-1,1))
 99 | 
100 | X1_train = EEGnetFormat(X1_train[:,4:9,:,50:150])
101 | X1_test = EEGnetFormat(X1_test[:,4:9,:,50:150])
102 | 
103 | 
104 | del X1,y1
105 | 
106 | X2 = np.load(args.X2)
107 | y2 = np.load(args.Y2)
108 | X2_train, X2_test, y2_train, y2_test = train_test_split(X2,y2,test_size=0.2,random_state=0)
109 | X2_train, X2_test = normalize(X2_train,X2_test)
110 | y2_train = np.reshape(y2_train,(-1,1))
111 | y2_test = np.reshape(y2_test,(-1,1))
112 | 
113 | X2_train = EEGnetFormat(X2_train[:,4:9,:,50:150])
114 | X2_test = EEGnetFormat(X2_test[:,4:9,:,50:150])
115 | 
116 | 
117 | del X2,y2
118 | 
119 | 
120 | X3 = np.load(args.X3)
121 | y3 = np.load(args.Y3)
122 | X3_train, X3_test, y3_train, y3_test = train_test_split(X3,y3,test_size=0.2,random_state=0)
123 | X3_train,X3_test = normalize(X3_train,X3_test)
124 | y3_train = np.reshape(y3_train,(-1,1))
125 | y3_test = np.reshape(y3_test,(-1,1))
126 | 
127 | X3_train = EEGnetFormat(X3_train[:,4:9,:,50:150])
128 | X3_test = EEGnetFormat(X3_test[:,4:9,:,50:150])
129 | 
130 | del X3,y3
131 | 
132 | 
133 | X4 = np.load(args.X4)
134 | y4 = np.load(args.Y4)
135 | X4_train, X4_test, y4_train, y4_test = train_test_split(X4,y4,test_size=0.2,random_state=0)
136 | X4_train,X4_test = normalize(X4_train,X4_test)
137 | y4_train = np.reshape(y4_train,(-1,1))
138 | y4_test = np.reshape(y4_test,(-1,1))
139 | 
140 | X4_train = EEGnetFormat(X4_train[:,4:9,:,50:150])
141 | X4_test = EEGnetFormat(X4_test[:,4:9,:,50:150])
142 | 
143 | del X4,y4
144 | 
145 | X5 = np.load(args.X5)
146 | y5 = np.load(args.Y5)
147 | X5_train, X5_test, y5_train, y5_test = train_test_split(X5,y5,test_size=0.2,random_state=0)
148 | X5_train,X5_test = normalize(X5_train,X5_test)
149 | y5_train = np.reshape(y5_train,(-1,1))
150 | y5_test = np.reshape(y5_test,(-1,1))
151 | 
152 | X5_train = EEGnetFormat(X5_train[:,4:9,:,50:150])
153 | X5_test = EEGnetFormat(X5_test[:,4:9,:,50:150])
154 | 
155 | del X5,y5
156 | 
157 | 
158 | 
159 | X_train = np.concatenate((X2_train,X1_train,X3_train,X4_train,X5_train))
160 | del X1_train,X3_train,X4_train,X2_train,X5_train
161 | X_test = np.concatenate((X1_test,X2_test,X3_test,X4_test,X5_test))
162 | del X1_test,X2_test,X3_test,X4_test,X5_test
163 | 
164 | y_train = np.concatenate((y1_train,y2_train,y3_train,y4_train,y5_train))
165 | del y1_train,y2_train,y3_train,y4_train,y5_train
166 | y_test = np.concatenate((y1_test,y2_test,y3_test,y4_test,y5_test))
167 | del y1_test,y2_test,y3_test,y4_test,y5_test
168 | 
169 | 
170 | # X_train = X_train[:,4:9,:,50:150]
171 | # X_test = X_test[:,4:9,:,50:150]
172 | 
173 | #format to match EEGnet
174 | 
175 | 
176 | # X_train = EEGnetFormat(X_train)
177 | # X_test = EEGnetFormat(X_test)
178 | 
179 | 
180 | model  = EEGNet(nb_classes = 1, Chans = 35, Samples = 100)
181 | model.compile(optimizer='adam',loss=['binary_crossentropy'],metrics=['accuracy'])
182 | 
183 | 
184 | print(model.summary())
185 | 
186 | #train the model
187 | csv_logger = CSVLogger(out+'.log')
188 | filepath=out+".hdf5"
189 | tensorboard = TensorBoard(log_dir="../logs/{}_{}".format(out,time()))
190 | checkpointer = ModelCheckpoint(monitor='val_loss', filepath=filepath, verbose=1, save_best_only=True)
191 | early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=10)
192 | model.fit(x=X_train,y=y_train,batch_size=128,epochs=epochs,validation_data=(X_test,y_test),callbacks=[checkpointer,csv_logger,tensorboard,early_stop]) 
193 | 
194 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This file is used to train the model with four different datasets
  3 | execution format
  4 | python train.py $X1$ $Y1$ ... $X4$ $Y4$ $model$ $out$
  5 | '''
  6 | 
  7 | import numpy as np
  8 | from model import hybrid_LSTM, baseline
  9 | from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
 10 | from sklearn.model_selection import train_test_split
 11 | import itertools
 12 | from scipy import stats
 13 | from keras.optimizers import RMSprop,Adam,Adadelta,SGD
 14 | from keras import models
 15 | from sklearn.utils import class_weight
 16 | from keras.callbacks import TensorBoard,LearningRateScheduler
 17 | from time import time
 18 | from loss import mean_squared_error_ignore_0,dummy
 19 | import tensorflow as tf
 20 | from keras.utils import multi_gpu_model
 21 | import keras.backend as K
 22 | from keras.losses import binary_crossentropy
 23 | from numpy import unique
 24 | from numpy import random 
 25 | 
 26 | import argparse
 27 | parser = argparse.ArgumentParser()
 28 | parser.add_argument("X1", help="path to X.npy",
 29 |                     type=str)
 30 | parser.add_argument("Y1", help="path to Y.npy",
 31 |                     type=str)
 32 | parser.add_argument("X2", help="path to X.npy",
 33 |                     type=str)
 34 | parser.add_argument("Y2", help="path to Y.npy",
 35 |                     type=str)
 36 | parser.add_argument("X3", help="path to X.npy",
 37 |                     type=str)
 38 | parser.add_argument("Y3", help="path to Y.npy",
 39 |                     type=str)
 40 | parser.add_argument("X4", help="path to X.npy",
 41 |                     type=str)
 42 | parser.add_argument("Y4", help="path to Y.npy",
 43 |                     type=str)
 44 | parser.add_argument("X5", help="path to X.npy",
 45 |                     type=str)
 46 | parser.add_argument("Y5", help="path to Y.npy",
 47 |                     type=str)
 48 | parser.add_argument("model", help="model name",
 49 |                     type=str)
 50 | parser.add_argument("out", help="output text path",
 51 |                     type=str)
 52 | args = parser.parse_args()
 53 | 
 54 | 
 55 | epochs=800
 56 | 
 57 | #normalization
 58 | channel_2d = [grid for grid in itertools.product(range(9), repeat=2)]
 59 | def normalize(X_train,X_val):
 60 |     mu=[stats.tmean(X_train[:,d[0],d[1]])  for d in channel_2d]
 61 |     std=[stats.tstd(X_train[:,d[0],d[1]]) for d in channel_2d]
 62 |     for i in range(len(channel_2d)):
 63 |         if(std[i]!=0):
 64 |             X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
 65 |             X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-mu[i]) / std[i]
 66 |     # minn = [np.min(X_train[:,d[0],d[1]]) for d in channel_2d]
 67 |     # maxx = [np.max(X_train[:,d[0],d[1]]) for d in channel_2d]
 68 |     # for i in range(len(channel_2d)):
 69 |     #   if(maxx[i]-minn[i]!=0):
 70 |     #       X_train[:,channel_2d[i][0],channel_2d[i][1]]= (X_train[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i])
 71 |     #       X_val[:,channel_2d[i][0],channel_2d[i][1]]= (X_val[:,channel_2d[i][0],channel_2d[i][1]]-minn[i]) / (maxx[i]-minn[i])
 72 |     return X_train,X_val
 73 | 
 74 | #train weight
 75 | def weightFunction(y_train):
 76 |     weight = np.zeros(2) # class
 77 |     for i in y_train:
 78 |       weight[int(i)] +=1 
 79 |     d = np.min(weight)
 80 |     weight = [temp/d for temp in weight]
 81 |     temp = np.empty_like(y_train)
 82 |     temp[y_train==0] = weight[0]
 83 |     temp[y_train==1] = weight[1]
 84 |     return temp
 85 | #load data
 86 | X1 = np.load(args.X1)
 87 | y1 = np.load(args.Y1)
 88 | X1_train, X1_test, y1_train, y1_test = train_test_split(X1,y1,test_size=0.2,random_state=0)
 89 | X1_train,X1_test = normalize(X1_train,X1_test)
 90 | y1_train = np.reshape(y1_train,(-1,1))
 91 | y1_test = np.reshape(y1_test,(-1,1))
 92 | w1 = weightFunction(y1_train)
 93 | X1_w = np.multiply(np.reshape(np.repeat(len(X1_train),len(X1_train)),(-1,1)),w1)
 94 | del X1,y1
 95 | 
 96 | X2 = np.load(args.X2)
 97 | y2 = np.load(args.Y2)
 98 | X2_train, X2_test, y2_train, y2_test = train_test_split(X2,y2,test_size=0.2,random_state=0)
 99 | X2_train, X2_test = normalize(X2_train,X2_test)
100 | y2_train = np.reshape(y2_train,(-1,1))
101 | y2_test = np.reshape(y2_test,(-1,1))
102 | w2 = weightFunction(y2_train)
103 | X2_w = np.multiply(np.reshape(np.repeat(len(X2_train),len(X2_train)),(-1,1)),w2)
104 | del X2,y2
105 | 
106 | 
107 | X3 = np.load(args.X3)
108 | y3 = np.load(args.Y3)
109 | X3_train, X3_test, y3_train, y3_test = train_test_split(X3,y3,test_size=0.2,random_state=0)
110 | X3_train,X3_test = normalize(X3_train,X3_test)
111 | y3_train = np.reshape(y3_train,(-1,1))
112 | y3_test = np.reshape(y3_test,(-1,1))
113 | w3 = weightFunction(y3_train)
114 | X3_w = np.multiply(np.reshape(np.repeat(len(X3_train),len(X3_train)),(-1,1)),w3)
115 | del X3,y3
116 | 
117 | 
118 | X4 = np.load(args.X4)
119 | y4 = np.load(args.Y4)
120 | X4_train, X4_test, y4_train, y4_test = train_test_split(X4,y4,test_size=0.2,random_state=0)
121 | X4_train,X4_test = normalize(X4_train,X4_test)
122 | y4_train = np.reshape(y4_train,(-1,1))
123 | y4_test = np.reshape(y4_test,(-1,1))
124 | w4 = weightFunction(y4_train)
125 | X4_w = np.multiply(np.reshape(np.repeat(len(X4_train),len(X4_train)),(-1,1)),w4)
126 | del X4,y4
127 | 
128 | X5 = np.load(args.X5)
129 | y5 = np.load(args.Y5)
130 | X5_train, X5_test, y5_train, y5_test = train_test_split(X5,y5,test_size=0.2,random_state=0)
131 | X5_train,X5_test = normalize(X5_train,X5_test)
132 | y5_train = np.reshape(y5_train,(-1,1))
133 | y5_test = np.reshape(y5_test,(-1,1))
134 | w5 = weightFunction(y5_train)
135 | X5_w = np.multiply(np.reshape(np.repeat(len(X5_train),len(X5_train)),(-1,1)),w5)
136 | del X5,y5
137 | 
138 | 
139 | 
140 | X_train = np.concatenate((X2_train,X1_train,X3_train,X4_train,X5_train))
141 | X_weight = np.concatenate((X1_w,X2_w,X3_w,X4_w,X5_w)).flatten()
142 | X_weight = 1.0/X_weight*np.max(X_weight)
143 | del X1_train,X3_train,X4_train,X2_train,X5_train
144 | X_test = np.concatenate((X1_test,X2_test,X3_test,X4_test,X5_test))
145 | del X1_test,X2_test,X3_test,X4_test,X5_test
146 | 
147 | y_train = np.concatenate((y1_train,y2_train,y3_train,y4_train,y5_train))
148 | del y1_train,y2_train,y3_train,y4_train,y5_train
149 | y_test = np.concatenate((y1_test,y2_test,y3_test,y4_test,y5_test))
150 | del y1_test,y2_test,y3_test,y4_test,y5_test
151 | 
152 | #chop out unnecessary data
153 | X_train = X_train[:,4:9,:,50:150]
154 | X_test = X_test[:,4:9,:,50:150]
155 | with tf.device('/cpu:0'):
156 |     if args.model == 'hybrid_LSTM':
157 |         model = hybrid_LSTM(depth=2,conv_size=8,dense_size=512,input_dim=(5,9,100,),dropoutRate=0.2)
158 |     if args.model == 'baseline':
159 |         model = baseline(input_dim=(4500,))
160 |         #baseline is FC layers not CNN, need to flatten
161 |         X_train = np.reshape(X_train,(X_train.shape[0],-1))
162 |         X_test = np.reshape(X_test,(X_test.shape[0],-1))
163 | 
164 | model.compile(optimizer=SGD(lr=0.002,decay=1E-5),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0.4,0.6])
165 | parallel_model = multi_gpu_model(model, gpus=2)
166 | parallel_model.__setattr__('callback_model',model)
167 | parallel_model.compile(optimizer=SGD(lr=0.002,decay=1E-5),loss=[mean_squared_error_ignore_0,'binary_crossentropy'],metrics=['accuracy'],loss_weights=[0.4,0.6])
168 | 
169 | 
170 | print(model.summary())
171 | 
172 | #train the model
173 | csv_logger = CSVLogger(out+'.log')
174 | filepath="out"+{epoch:02d}+".hdf5"
175 | tensorboard = TensorBoard(log_dir="../logs/{}".format(time()))
176 | checkpointer = ModelCheckpoint(monitor='val_loss', filepath=filepath, verbose=1, save_best_only=True)
177 | parallel_model.fit(x=X_train,y=[X_train,y_train],batch_size=512,epochs=epochs,validation_data=(X_test,[X_test,y_test]),callbacks=[checkpointer,csv_logger,tensorboard],sample_weight=[X_weight,X_weight]) 
178 | 


--------------------------------------------------------------------------------
/EEGModels.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | keras.backend.set_image_data_format('channels_first')
  3 | """
  4 |  ARL_EEGModels - A collection of Convolutional Neural Network models for EEG
  5 |  Signal Processing and Classification, using Keras and Tensorflow
  6 | 
  7 |  Requirements:
  8 |     (1) Tensorflow == 1.9.0
  9 |     (2) 'image_data_format' = 'channels_first' in keras.json config
 10 |     (3) Data shape = (trials, kernels, channels, samples), which for the 
 11 |         input layer, will be (trials, 1, channels, samples). 
 12 |     
 13 |  To use:
 14 |     
 15 |     (1) Place this file in the PYTHONPATH variable in your IDE (i.e.: Spyder)
 16 |     (2) Import the model as
 17 |         
 18 |         from EEGModels import EEGNet    
 19 |         
 20 |         model = EEGNet(nb_classes = ..., Chans = ..., Samples = ...)
 21 |         
 22 |     (3) Then compile and fit the model
 23 |     
 24 |         model.compile(loss = ..., optimizer = ..., metrics = ...)
 25 |         fitted    = model.fit(...)
 26 |         predicted = model.predict(...)
 27 | 
 28 | 
 29 |  Portions of this project are works of the United States Government and are not
 30 |  subject to domestic copyright protection under 17 USC Sec. 105.  Those 
 31 |  portions are released world-wide under the terms of the Creative Commons Zero 
 32 |  1.0 (CC0) license.  
 33 |  
 34 |  Other portions of this project are subject to domestic copyright protection 
 35 |  under 17 USC Sec. 105.  Those portions are licensed under the Apache 2.0 
 36 |  license.  The complete text of the license governing this material is in 
 37 |  the file labeled LICENSE.TXT that is a part of this project's official 
 38 |  distribution. 
 39 | """
 40 | 
 41 | from tensorflow.keras.models import Model
 42 | from tensorflow.keras.layers import Dense, Activation, Permute, Dropout
 43 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
 44 | from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D
 45 | from tensorflow.keras.layers import BatchNormalization
 46 | from tensorflow.keras.layers import SpatialDropout2D
 47 | from tensorflow.keras.regularizers import l1_l2
 48 | from tensorflow.keras.layers import Input, Flatten
 49 | from tensorflow.keras.constraints import max_norm
 50 | from tensorflow.keras import backend as K
 51 | 
 52 | 
 53 | def EEGNet(nb_classes, Chans = 64, Samples = 128, 
 54 |              dropoutRate = 0.25, kernLength = 64, F1 = 4, 
 55 |              D = 2, F2 = 8, norm_rate = 0.25, dropoutType = 'Dropout'):
 56 |     """ Keras Implementation of EEGNet
 57 |     http://iopscience.iop.org/article/10.1088/1741-2552/aace8c/meta
 58 | 
 59 |     Note that this implements the newest version of EEGNet and NOT the earlier
 60 |     version (version v1 and v2 on arxiv). We strongly recommend using this
 61 |     architecture as it performs much better and has nicer properties than
 62 |     our earlier version. For example:
 63 |         
 64 |         1. Depthwise Convolutions to learn spatial filters within a 
 65 |         temporal convolution. The use of the depth_multiplier option maps 
 66 |         exactly to the number of spatial filters learned within a temporal
 67 |         filter. This matches the setup of algorithms like FBCSP which learn 
 68 |         spatial filters within each filter in a filter-bank. This also limits 
 69 |         the number of free parameters to fit when compared to a fully-connected
 70 |         convolution. 
 71 |         
 72 |         2. Separable Convolutions to learn how to optimally combine spatial
 73 |         filters across temporal bands. Separable Convolutions are Depthwise
 74 |         Convolutions followed by (1x1) Pointwise Convolutions. 
 75 |         
 76 |     
 77 |     While the original paper used Dropout, we found that SpatialDropout2D 
 78 |     sometimes produced slightly better results for classification of ERP 
 79 |     signals. However, SpatialDropout2D significantly reduced performance 
 80 |     on the Oscillatory dataset (SMR, BCI-IV Dataset 2A). We recommend using
 81 |     the default Dropout in most cases.
 82 |         
 83 |     Assumes the input signal is sampled at 128Hz. If you want to use this model
 84 |     for any other sampling rate you will need to modify the lengths of temporal
 85 |     kernels and average pooling size in blocks 1 and 2 as needed (double the 
 86 |     kernel lengths for double the sampling rate, etc). Note that we haven't 
 87 |     tested the model performance with this rule so this may not work well. 
 88 |     
 89 |     The model with default parameters gives the EEGNet-4,2 model as discussed
 90 |     in the paper. This model should do pretty well in general, although as the
 91 |     paper discussed the EEGNet-8,2 (with 8 temporal kernels and 2 spatial
 92 |     filters per temporal kernel) can do slightly better on the SMR dataset.
 93 |     Other variations that we found to work well are EEGNet-4,1 and EEGNet-8,1.
 94 | 
 95 |     We set F2 = F1 * D (number of input filters = number of output filters) for
 96 |     the SeparableConv2D layer. We haven't extensively tested other values of this
 97 |     parameter (say, F2 < F1 * D for compressed learning, and F2 > F1 * D for
 98 |     overcomplete). We believe the main parameters to focus on are F1 and D. 
 99 | 
100 |     Inputs:
101 |         
102 |       nb_classes      : int, number of classes to classify
103 |       Chans, Samples  : number of channels and time points in the EEG data
104 |       dropoutRate     : dropout fraction
105 |       kernLength      : length of temporal convolution in first layer. We found
106 |                         that setting this to be half the sampling rate worked
107 |                         well in practice. For the SMR dataset in particular
108 |                         since the data was high-passed at 4Hz we used a kernel
109 |                         length of 32.     
110 |       F1, F2          : number of temporal filters (F1) and number of pointwise
111 |                         filters (F2) to learn. Default: F1 = 4, F2 = F1 * D. 
112 |       D               : number of spatial filters to learn within each temporal
113 |                         convolution. Default: D = 2
114 |       dropoutType     : Either SpatialDropout2D or Dropout, passed as a string.
115 | 
116 |     """
117 |     
118 |     if dropoutType == 'SpatialDropout2D':
119 |         dropoutType = SpatialDropout2D
120 |     elif dropoutType == 'Dropout':
121 |         dropoutType = Dropout
122 |     else:
123 |         raise ValueError('dropoutType must be one of SpatialDropout2D '
124 |                          'or Dropout, passed as a string.')
125 |     
126 |     input1   = Input(shape = (1, Chans, Samples))
127 | 
128 |     ##################################################################
129 |     block1       = Conv2D(F1, (1, kernLength), padding = 'same',
130 |                                    input_shape = (1, Chans, Samples),
131 |                                    use_bias = False)(input1)
132 |     block1       = BatchNormalization(axis = 1)(block1)
133 |     block1       = DepthwiseConv2D((Chans, 1), use_bias = False, 
134 |                                    depth_multiplier = D,
135 |                                    depthwise_constraint = max_norm(1.))(block1)
136 |     block1       = BatchNormalization(axis = 1)(block1)
137 |     block1       = Activation('elu')(block1)
138 |     block1       = AveragePooling2D((1, 4))(block1)
139 |     block1       = dropoutType(dropoutRate)(block1)
140 |     
141 |     block2       = SeparableConv2D(F2, (1, 16),
142 |                                    use_bias = False, padding = 'same')(block1)
143 |     block2       = BatchNormalization(axis = 1)(block2)
144 |     block2       = Activation('elu')(block2)
145 |     block2       = AveragePooling2D((1, 8))(block2)
146 |     block2       = dropoutType(dropoutRate)(block2)
147 |         
148 |     flatten      = Flatten(name = 'flatten')(block2)
149 |     
150 |     dense        = Dense(nb_classes, name = 'dense', 
151 |                          kernel_constraint = max_norm(norm_rate))(flatten)
152 |     softmax      = Activation('sigmoid', name = 'softmax')(dense)
153 |     
154 |     return Model(inputs=input1, outputs=softmax)
155 | 
156 | 
157 | 
158 | 
159 | def EEGNet_SSVEP(nb_classes = 12, Chans = 8, Samples = 256, 
160 |              dropoutRate = 0.5, kernLength = 256, F1 = 96, 
161 |              D = 1, F2 = 96, dropoutType = 'Dropout'):
162 |     """ SSVEP Variant of EEGNet, as used in [1]. 
163 | 
164 |     Inputs:
165 |         
166 |       nb_classes      : int, number of classes to classify
167 |       Chans, Samples  : number of channels and time points in the EEG data
168 |       dropoutRate     : dropout fraction
169 |       kernLength      : length of temporal convolution in first layer
170 |       F1, F2          : number of temporal filters (F1) and number of pointwise
171 |                         filters (F2) to learn. 
172 |       D               : number of spatial filters to learn within each temporal
173 |                         convolution.
174 |       dropoutType     : Either SpatialDropout2D or Dropout, passed as a string.
175 |       
176 |       
177 |     [1]. Waytowich, N. et. al. (2018). Compact Convolutional Neural Networks
178 |     for Classification of Asynchronous Steady-State Visual Evoked Potentials.
179 |     Journal of Neural Engineering vol. 15(6). 
180 |     http://iopscience.iop.org/article/10.1088/1741-2552/aae5d8
181 | 
182 |     """
183 |     
184 |     if dropoutType == 'SpatialDropout2D':
185 |         dropoutType = SpatialDropout2D
186 |     elif dropoutType == 'Dropout':
187 |         dropoutType = Dropout
188 |     else:
189 |         raise ValueError('dropoutType must be one of SpatialDropout2D '
190 |                          'or Dropout, passed as a string.')
191 |     
192 |     input1   = Input(shape = (1, Chans, Samples))
193 | 
194 |     ##################################################################
195 |     block1       = Conv2D(F1, (1, kernLength), padding = 'same',
196 |                                    input_shape = (1, Chans, Samples),
197 |                                    use_bias = False)(input1)
198 |     block1       = BatchNormalization(axis = 1)(block1)
199 |     block1       = DepthwiseConv2D((Chans, 1), use_bias = False, 
200 |                                    depth_multiplier = D,
201 |                                    depthwise_constraint = max_norm(1.))(block1)
202 |     block1       = BatchNormalization(axis = 1)(block1)
203 |     block1       = Activation('elu')(block1)
204 |     block1       = AveragePooling2D((1, 4))(block1)
205 |     block1       = dropoutType(dropoutRate)(block1)
206 |     
207 |     block2       = SeparableConv2D(F2, (1, 16),
208 |                                    use_bias = False, padding = 'same')(block1)
209 |     block2       = BatchNormalization(axis = 1)(block2)
210 |     block2       = Activation('elu')(block2)
211 |     block2       = AveragePooling2D((1, 8))(block2)
212 |     block2       = dropoutType(dropoutRate)(block2)
213 |         
214 |     flatten      = Flatten(name = 'flatten')(block2)
215 |     
216 |     dense        = Dense(nb_classes, name = 'dense')(flatten)
217 |     softmax      = Activation('softmax', name = 'softmax')(dense)
218 |     
219 |     return Model(inputs=input1, outputs=softmax)
220 | 
221 | 
222 | 
223 | def EEGNet_old(nb_classes, Chans = 64, Samples = 128, regRate = 0.0001,
224 |            dropoutRate = 0.25, kernels = [(2, 32), (8, 4)], strides = (2, 4)):
225 |     """ Keras Implementation of EEGNet_v1 (https://arxiv.org/abs/1611.08024v2)
226 | 
227 |     This model is the original EEGNet model proposed on arxiv
228 |             https://arxiv.org/abs/1611.08024v2
229 |     
230 |     with a few modifications: we use striding instead of max-pooling as this 
231 |     helped slightly in classification performance while also providing a 
232 |     computational speed-up. 
233 |     
234 |     Note that we no longer recommend the use of this architecture, as the new
235 |     version of EEGNet performs much better overall and has nicer properties.
236 |     
237 |     Inputs:
238 |         
239 |         nb_classes     : total number of final categories
240 |         Chans, Samples : number of EEG channels and samples, respectively
241 |         regRate        : regularization rate for L1 and L2 regularizations
242 |         dropoutRate    : dropout fraction
243 |         kernels        : the 2nd and 3rd layer kernel dimensions (default is 
244 |                          the [2, 32] x [8, 4] configuration)
245 |         strides        : the stride size (note that this replaces the max-pool
246 |                          used in the original paper)
247 |     
248 |     """
249 | 
250 |     # start the model
251 |     input_main   = Input((1, Chans, Samples))
252 |     layer1       = Conv2D(16, (Chans, 1), input_shape=(1, Chans, Samples),
253 |                                  kernel_regularizer = l1_l2(l1=regRate, l2=regRate))(input_main)
254 |     layer1       = BatchNormalization(axis=1)(layer1)
255 |     layer1       = Activation('elu')(layer1)
256 |     layer1       = Dropout(dropoutRate)(layer1)
257 |     
258 |     permute_dims = 2, 1, 3
259 |     permute1     = Permute(permute_dims)(layer1)
260 |     
261 |     layer2       = Conv2D(4, kernels[0], padding = 'same', 
262 |                             kernel_regularizer=l1_l2(l1=0.0, l2=regRate),
263 |                             strides = strides)(permute1)
264 |     layer2       = BatchNormalization(axis=1)(layer2)
265 |     layer2       = Activation('elu')(layer2)
266 |     layer2       = Dropout(dropoutRate)(layer2)
267 |     
268 |     layer3       = Conv2D(4, kernels[1], padding = 'same',
269 |                             kernel_regularizer=l1_l2(l1=0.0, l2=regRate),
270 |                             strides = strides)(layer2)
271 |     layer3       = BatchNormalization(axis=1)(layer3)
272 |     layer3       = Activation('elu')(layer3)
273 |     layer3       = Dropout(dropoutRate)(layer3)
274 |     
275 |     flatten      = Flatten(name = 'flatten')(layer3)
276 |     
277 |     dense        = Dense(nb_classes, name = 'dense')(flatten)
278 |     softmax      = Activation('softmax', name = 'softmax')(dense)
279 |     
280 |     return Model(inputs=input_main, outputs=softmax)
281 | 
282 | 
283 | 
284 | def DeepConvNet(nb_classes, Chans = 64, Samples = 256,
285 |                 dropoutRate = 0.5):
286 |     """ Keras implementation of the Deep Convolutional Network as described in
287 |     Schirrmeister et. al. (2017), Human Brain Mapping.
288 |     
289 |     This implementation assumes the input is a 2-second EEG signal sampled at 
290 |     128Hz, as opposed to signals sampled at 250Hz as described in the original
291 |     paper. We also perform temporal convolutions of length (1, 5) as opposed
292 |     to (1, 10) due to this sampling rate difference. 
293 |     
294 |     Note that we use the max_norm constraint on all convolutional layers, as 
295 |     well as the classification layer. We also change the defaults for the
296 |     BatchNormalization layer. We used this based on a personal communication 
297 |     with the original authors.
298 |     
299 |                       ours        original paper
300 |     pool_size        1, 2        1, 3
301 |     strides          1, 2        1, 3
302 |     conv filters     1, 5        1, 10
303 |     
304 |     Note that this implementation has not been verified by the original 
305 |     authors. 
306 |     
307 |     """
308 | 
309 |     # start the model
310 |     input_main   = Input((1, Chans, Samples))
311 |     block1       = Conv2D(25, (1, 5), 
312 |                                  input_shape=(1, Chans, Samples),
313 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main)
314 |     block1       = Conv2D(25, (Chans, 1),
315 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(block1)
316 |     block1       = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block1)
317 |     block1       = Activation('elu')(block1)
318 |     block1       = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block1)
319 |     block1       = Dropout(dropoutRate)(block1)
320 |   
321 |     block2       = Conv2D(50, (1, 5),
322 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(block1)
323 |     block2       = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block2)
324 |     block2       = Activation('elu')(block2)
325 |     block2       = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block2)
326 |     block2       = Dropout(dropoutRate)(block2)
327 |     
328 |     block3       = Conv2D(100, (1, 5),
329 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(block2)
330 |     block3       = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block3)
331 |     block3       = Activation('elu')(block3)
332 |     block3       = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block3)
333 |     block3       = Dropout(dropoutRate)(block3)
334 |     
335 |     block4       = Conv2D(200, (1, 5),
336 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(block3)
337 |     block4       = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block4)
338 |     block4       = Activation('elu')(block4)
339 |     block4       = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block4)
340 |     block4       = Dropout(dropoutRate)(block4)
341 |     
342 |     flatten      = Flatten()(block4)
343 |     
344 |     dense        = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten)
345 |     softmax      = Activation('softmax')(dense)
346 |     
347 |     return Model(inputs=input_main, outputs=softmax)
348 | 
349 | 
350 | # need these for ShallowConvNet
351 | def square(x):
352 |     return K.square(x)
353 | 
354 | def log(x):
355 |     return K.log(K.clip(x, min_value = 1e-7, max_value = 10000))   
356 | 
357 | 
358 | def ShallowConvNet(nb_classes, Chans = 64, Samples = 128, dropoutRate = 0.5):
359 |     """ Keras implementation of the Shallow Convolutional Network as described
360 |     in Schirrmeister et. al. (2017), Human Brain Mapping.
361 |     
362 |     Assumes the input is a 2-second EEG signal sampled at 128Hz. Note that in 
363 |     the original paper, they do temporal convolutions of length 25 for EEG
364 |     data sampled at 250Hz. We instead use length 13 since the sampling rate is 
365 |     roughly half of the 250Hz which the paper used. The pool_size and stride
366 |     in later layers is also approximately half of what is used in the paper.
367 |     
368 |     Note that we use the max_norm constraint on all convolutional layers, as 
369 |     well as the classification layer. We also change the defaults for the
370 |     BatchNormalization layer. We used this based on a personal communication 
371 |     with the original authors.
372 |     
373 |                      ours        original paper
374 |     pool_size        1, 35       1, 75
375 |     strides          1, 7        1, 15
376 |     conv filters     1, 13       1, 25    
377 |     
378 |     Note that this implementation has not been verified by the original 
379 |     authors. We do note that this implementation reproduces the results in the
380 |     original paper with minor deviations. 
381 |     """
382 | 
383 |     # start the model
384 |     input_main   = Input((1, Chans, Samples))
385 |     block1       = Conv2D(40, (1, 13), 
386 |                                  input_shape=(1, Chans, Samples),
387 |                                  kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main)
388 |     block1       = Conv2D(40, (Chans, 1), use_bias=False, 
389 |                           kernel_constraint = max_norm(2., axis=(0,1,2)))(block1)
390 |     block1       = BatchNormalization(axis=1, epsilon=1e-05, momentum=0.1)(block1)
391 |     block1       = Activation(square)(block1)
392 |     block1       = AveragePooling2D(pool_size=(1, 35), strides=(1, 7))(block1)
393 |     block1       = Activation(log)(block1)
394 |     block1       = Dropout(dropoutRate)(block1)
395 |     flatten      = Flatten()(block1)
396 |     dense        = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten)
397 |     softmax      = Activation('softmax')(dense)
398 |     
399 |     return Model(inputs=input_main, outputs=softmax)
400 | 
401 | 
402 | 


--------------------------------------------------------------------------------