├── utils.py ├── requirements.txt ├── imgs └── data.png ├── merge_dataset.py ├── readme.md ├── physionet.py ├── model.py ├── test.py └── train.py /utils.py: -------------------------------------------------------------------------------- 1 | import numpy -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | numpy 3 | scipy 4 | pandas -------------------------------------------------------------------------------- /imgs/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/horseee/PhysioNet/HEAD/imgs/data.png -------------------------------------------------------------------------------- /merge_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.io import savemat 3 | from physionet import load_physionet 4 | 5 | import argparse 6 | 7 | 8 | 9 | def merge_data(dir_path, test=0.2, train_file='train',test_file='test',shuffle=True): 10 | train_X, train_y, test_X, test_y, _, _ = load_physionet(dir_path=dir_path, test=test, vali=0, shuffle=True) 11 | 12 | train_data = {'data': train_X, 'label':train_y} 13 | test_data = {'data': test_X, 'label':test_y} 14 | savemat(train_file,train_data) 15 | savemat(test_file, test_data) 16 | 17 | print("[!] Train set saved as %s.mat"%(train_file)) 18 | print("[!] Test set saved as %s.mat"%(test_file)) 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--dir',type=str,default='training2017',help='the directory of dataset') 24 | parser.add_argument('--test_set',type=float,default=0.2,help='The percentage of test set') 25 | args = parser.parse_args() 26 | 27 | merge_data(args.dir, test=args.test_set) 28 | 29 | if __name__=='__main__': 30 | main() -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## ECG classification 2 | 3 | #### Dataset 4 | [physionet challenge 2017](https://www.physionet.org/challenge/2017/) 5 | ![vis](https://github.com/VainF/PhysioNet/blob/master/imgs/data.png) 6 | 7 | #### Requirements 8 | * tensorflow 9 | * numpy 10 | * scipy 11 | * pandas 12 | Also, you can use the command `pip3 install -r requirements.txt` to install the dependency packages. 13 | In this project, both python2 and python3 are ok(But we strongly suggest that you use python3). 14 | 15 | #### How to Run 16 | 1. Put the data set in folder. 17 | 2. Run `merge_dataset.py` to create **train.mat** and **test.mat**. Use the following command to run the code. 18 | ``` 19 | python3 merge_dataset.py --dir YOUR_TRAINING_SET_FOLDER_NAME 20 | ``` 21 | Use `python3 merge_dataset.py -h` if you need some help. 22 | 3. Run `train.py`. You can choose your parameter for the following parameters in your command. 23 | * learning_rate 24 | * epochs 25 | * batch_size. 26 | * k_folder: True/False. 27 | 28 | If you want to begin the process for k-folder validation, use the following command: `python3 train.py --k_folder True`. If you only want to train the model, use the command: `python3 train.py`. 29 | Use `python3 train.py -h` if you need some help. 30 | 31 | 4. After you train the model, use `test.py` to test the accuracy and F1 rate. The default path for checkpoints is **checkpoints/**. If you use other path, run the test.py use the following command: 32 | ``` 33 | python3 test.py --check_point_folder YOUR_CHECKPOINT_FOLDER_PATH 34 | ``` 35 | 36 | 37 | #### Experiment result 38 | The F1 for our model is **0.82**. But maybe if you run you will get a different number for that the training and testing set is randomly choose. 39 | 40 | -------------------------------------------------------------------------------- /physionet.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import random 3 | import pandas as pd 4 | import numpy as np 5 | from scipy.io import loadmat 6 | 7 | def load_physionet(dir_path, test=0.2,vali=0, shuffle=True): 8 | "return train_X, train_y, test_X, test_y, valid_X, valid_y" 9 | if dir_path[-1]!='/': dir_path = dir_path+'/' 10 | ref = pd.read_csv(dir_path+'REFERENCE.csv',header=None) 11 | label_id = {'N':0, 'A':1, 'O':2, '~':3 }#Normal, AF, Other, Noisy 12 | X = [] 13 | y = [] 14 | test_X = None 15 | test_y = None 16 | valid_X = None 17 | valid_y = None 18 | 19 | for index, row in ref.iterrows(): 20 | file_prefix = row[0] 21 | mat_file = dir_path+file_prefix+'.mat' 22 | hea_file = dir_path+file_prefix+'.hea' 23 | data = loadmat(mat_file)['val'] 24 | 25 | data = data.squeeze() 26 | data = np.nan_to_num(data) 27 | data = data-np.mean(data) 28 | data = data/np.std(data) 29 | 30 | 31 | X.append( data ) 32 | y.append( label_id[row[1]] ) 33 | data_n = len(y) 34 | print(data_n) 35 | 36 | X = np.array(X) 37 | y = np.array(y) 38 | 39 | if shuffle: 40 | shuffle_idx = list(range(data_n)) 41 | random.shuffle(shuffle_idx) 42 | X = X[shuffle_idx] 43 | y = y[shuffle_idx] 44 | 45 | valid_n = int(vali*data_n) 46 | test_n = int(test*data_n) 47 | assert (valid_n+test_n <= data_n) , "Dataset has no enough samples!" 48 | 49 | if vali>0: 50 | valid_X = X[0:valid_n] 51 | valid_y = y[0:valid_n] 52 | 53 | if test>0: 54 | test_X = X[valid_n: valid_n+test_n] 55 | test_y = y[valid_n: valid_n+test_n] 56 | 57 | if vali>0 or test>0: 58 | X = X[valid_n+test_n: ] 59 | y = y[valid_n+test_n: ] 60 | 61 | #print('Train: %d, Test: %d, Validation: %d (%s)'%((data_n-valid_n-test_n), test_n, valid_n, 'shuffled' if shuffle else 'unshuffled')) 62 | return np.squeeze(X), np.squeeze(y), np.squeeze(test_X), np.squeeze(test_y), np.squeeze(valid_X), np.squeeze(valid_y) -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def ResNet(inputs, class_num=4): 4 | conv_ksize = 16 5 | conv_strides = 1 6 | conv_filters = 64 7 | dropout_rate = 0.5 8 | pool_size = 2 9 | pool_strides = 2 10 | 11 | def _residual_block(x, filters, kernel_size, strides, dropout_rate, grow=True, pool=False): 12 | if grow: 13 | short_cut = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=1, padding='VALID', strides=1) 14 | else: 15 | short_cut = tf.identity(x) 16 | 17 | x = tf.layers.batch_normalization(x) 18 | x = tf.nn.relu(x) 19 | x = tf.layers.dropout(x, rate=dropout_rate) 20 | x = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=kernel_size, padding='SAME', strides=strides) 21 | 22 | x = tf.layers.batch_normalization(x) 23 | x = tf.nn.relu(x) 24 | x = tf.layers.dropout(x, rate=dropout_rate) 25 | x = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=kernel_size, padding='SAME', strides=strides) 26 | 27 | if p: 28 | short_cut = tf.layers.max_pooling1d(short_cut, pool_size=pool_size, strides=pool_strides) 29 | x = tf.layers.max_pooling1d(x, pool_size=pool_size, strides=pool_strides) 30 | x = x + short_cut 31 | print(x.shape) 32 | return x 33 | 34 | print(inputs.shape) 35 | x = tf.layers.conv1d(inputs=inputs, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides) 36 | x = tf.layers.batch_normalization(x) 37 | x = tf.nn.relu(x) 38 | 39 | print(x.shape) 40 | 41 | short_cut = tf.identity(x) 42 | x = tf.layers.conv1d(inputs=x, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides) 43 | x = tf.layers.batch_normalization(x) 44 | x = tf.nn.relu(x) 45 | x = tf.layers.conv1d(inputs=x, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides) 46 | 47 | short_cut = tf.layers.max_pooling1d(short_cut, pool_size=pool_size, strides=pool_strides) 48 | x = tf.layers.max_pooling1d(x, pool_size=pool_size, strides=pool_strides) 49 | x = x + short_cut 50 | print(x.shape) 51 | 52 | k = 1 53 | p = False 54 | for i in range(15): 55 | if i%4==0 and i>0: 56 | k+=1 57 | x = _residual_block(x, filters=conv_filters*k, kernel_size=conv_ksize, strides=conv_strides, dropout_rate=dropout_rate, grow=(i%4==0 and i>0),pool=p) 58 | p = not p 59 | x = tf.layers.batch_normalization(x) 60 | x = tf.nn.relu(x) 61 | x = tf.contrib.layers.flatten(x) 62 | x = tf.layers.dense(x,units=class_num) 63 | #x = tf.layers.average_pooling1d(x, pool_size=x.get_shape().as_list()[1],strides=1) 64 | #x = tf.layers.flatten(x) 65 | print(x.shape) 66 | #x = tf.layers.dense(x,units=class_num) 67 | #print(x.shape) 68 | return x 69 | 70 | 71 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import argparse 4 | import sys, os 5 | import time 6 | 7 | from model import ResNet 8 | from scipy.io import loadmat 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--check_point_folder', type=str ,default = 'checkpoints',help='the path to the checkpoint folder') 14 | args = parser.parse_args() 15 | 16 | training_set = loadmat('test.mat') 17 | X = training_set['data'][0] 18 | y = training_set['label'][0].astype('int32') 19 | 20 | cut_size = 300 * 30 21 | n = len(X) 22 | X_cut = np.zeros(shape=(n, cut_size)) 23 | for i in range(n): 24 | data_len = X[i].squeeze().shape[0] 25 | X_cut[i, :min(cut_size, data_len)] = X[i][0, :min(cut_size, data_len)] 26 | X = X_cut 27 | 28 | class_num = 4 29 | 30 | # reconstruct model 31 | test_input = tf.placeholder(dtype='float32',shape=(None,cut_size,1)) 32 | res_net = ResNet(test_input, class_num=class_num) 33 | 34 | tf_config = tf.ConfigProto() 35 | tf_config.gpu_options.allow_growth = True 36 | sess = tf.Session(config=tf_config) 37 | 38 | sess.run(tf.global_variables_initializer()) 39 | saver = tf.train.Saver(tf.global_variables()) 40 | 41 | # restore model 42 | if os.path.exists(args.check_point_folder + '/'): 43 | saver.restore(sess, args.check_point_folder + '/model') 44 | print('Model successfully restore from ' + args.check_point_folder + '/model') 45 | else: print('Restore failed. No model found!') 46 | 47 | test_len = len(X) 48 | label_class = {0:'N', 1:'A', 2:'O', 3:'~'}#Normal, AF, Other, Noisy 49 | PreCount = np.zeros(class_num) 50 | RealCount = np.zeros(class_num) 51 | CorrectCount = np.zeros(class_num) 52 | 53 | start_time = time.time() 54 | for i in range(test_len): 55 | res = sess.run([res_net], {test_input: X[i].reshape(-1, cut_size,1)}) 56 | #print(res) 57 | predicts = np.argmax(res[0],axis=1) 58 | #print('case %d: class = %s, predict = %s, ' % (i, label_class[y[i]], label_class[predicts[0]])) 59 | PreCount[predicts] = PreCount[predicts] + 1 60 | RealCount[y[i]] = RealCount[y[i]] + 1 61 | #print("%d %d"%(predicts[0], y[i])) 62 | if (predicts[0] == y[i]): 63 | CorrectCount[predicts] = CorrectCount[predicts] + 1 64 | end_time = time.time() 65 | # F1 66 | F1_res = CorrectCount * 2/ (PreCount + RealCount) 67 | print('F1n = %f, F1a = %f, F1o = %f, F1n = %f' % (F1_res[0], F1_res[1], F1_res[2], F1_res[3])) 68 | print('F1 = %f'%np.mean(F1_res)) 69 | # Accuracy 70 | print('Accuracy = %f' % (np.sum(CorrectCount) / test_len)) 71 | # Precision 72 | precision_rate = CorrectCount / PreCount 73 | print('Precision: N = %f, A = %f, O = %f, ~ = %f' % (precision_rate[0], precision_rate[1], precision_rate[2], precision_rate[3])) 74 | # Recall 75 | recall_rate = CorrectCount / RealCount 76 | print('Recall: N = %f, A = %f, O = %f, ~ = %f' % (recall_rate[0], recall_rate[1], recall_rate[2], recall_rate[3])) 77 | # Time 78 | print('Time: %fs'%((end_time - start_time) / test_len)) 79 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import argparse 4 | import sys, os 5 | import random 6 | from physionet import load_physionet 7 | from model import ResNet 8 | from scipy.io import loadmat 9 | 10 | 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 12 | 13 | def cut_and_pad(X, cut_size): 14 | n = len(X) 15 | X_cut = np.zeros(shape=(n, cut_size)) 16 | for i in range(n): 17 | data_len = X[i].squeeze().shape[0] 18 | # cut if too long / padd if too short 19 | X_cut[i, :min(cut_size, data_len )] = X[i][0, : min(cut_size, data_len)] 20 | return X_cut 21 | 22 | def to_one_hot(y, class_num=4): 23 | if isinstance(y, int): 24 | y_onehot = np.zeros((1,class_num)) 25 | y_onehot[y] = 1 26 | return y_onehot 27 | elif isinstance(y, np.ndarray): 28 | y_onehot = np.zeros((y.shape[0],class_num)) 29 | for i in range(y.shape[0]): 30 | y_onehot[i, y[i]] = 1 31 | return y_onehot 32 | 33 | def get_sub_set(X, y, k, K_folder_or_not): 34 | if not K_folder_or_not: 35 | k_dataset_len = int(len(X) * 0.9) 36 | train_X = X[ : k_dataset_len ] 37 | train_y = y[ : k_dataset_len ] 38 | valid_X = X[ k_dataset_len :] 39 | valid_y = y[ k_dataset_len :] 40 | else: 41 | k_dataset_len = int(len(X) / 5) 42 | if k == 0: 43 | valid_X = X[ : k_dataset_len ] 44 | valid_y = y[ : k_dataset_len ] 45 | train_X = X[ k_dataset_len :] 46 | train_y = y[ k_dataset_len :] 47 | else: 48 | print(k*k_dataset_len) 49 | valid_X = X[ k*k_dataset_len : (k+1)*k_dataset_len ] 50 | valid_y = y[ k*k_dataset_len : (k+1)*k_dataset_len ] 51 | train_X = np.concatenate((X[ : k*k_dataset_len] , X[(k+1)*k_dataset_len: ]), axis=0) 52 | train_y = np.concatenate((y[ : k*k_dataset_len] , y[(k+1)*k_dataset_len: ]), axis=0) 53 | return train_X, train_y, valid_X, valid_y 54 | 55 | parser = argparse.ArgumentParser() 56 | parser.add_argument('--learning_rate',type=float,default=0.0000002,help='learning rate') 57 | parser.add_argument('--epochs',type=int,default=30000,help='epoch number') 58 | parser.add_argument('--batch_size',type=int,default=16, help='batch size') 59 | parser.add_argument('--k_folder', type=bool, default=False, help='If open kfolder validation') 60 | args = parser.parse_args() 61 | 62 | class_num = 4 63 | 64 | training_set = loadmat('train.mat') 65 | X = training_set['data'][0] 66 | y = training_set['label'][0].astype('int32') 67 | 68 | #cut_size_start = 300 * 3 69 | cut_size = 300 * 30 70 | 71 | X = cut_and_pad(X, cut_size) 72 | 73 | #import matplotlib.pyplot as plt 74 | #plt.plot(range(cut_size),X[0]) 75 | #plt.show() 76 | 77 | 78 | # k-fold / train 79 | if args.k_folder: 80 | low_border = 0 81 | high_border = 5 82 | F1_valid = np.zeros(5) 83 | else: 84 | low_border = 0 85 | high_border = 1 86 | 87 | for k in range(low_border,high_border): 88 | # get validation set 89 | train_X, train_y, valid_X, valid_y = get_sub_set(X, y, k, args.k_folder) 90 | y_onehot = to_one_hot(train_y) 91 | 92 | if args.k_folder: 93 | print("[!] kfolder_iter: %d, train: %d, validation: %d"%(k, len(train_X),len(valid_X))) 94 | else: 95 | print("[!] Training: %d, validation: %d" % (len(train_X),len(valid_X))) 96 | 97 | data_input = tf.placeholder(dtype='float32',shape=(None,cut_size,1)) 98 | label_input = tf.placeholder(dtype='float',shape=(None)) 99 | 100 | # build model 101 | logits = ResNet(data_input, class_num=class_num) 102 | loss = tf.losses.softmax_cross_entropy(label_input, logits) 103 | opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate).minimize(loss) 104 | 105 | tf_config = tf.ConfigProto() 106 | tf_config.gpu_options.allow_growth = True 107 | sess = tf.Session(config=tf_config) 108 | try: os.mkdir('checkpoints') 109 | except: pass 110 | sess.run(tf.global_variables_initializer()) 111 | saver = tf.train.Saver(tf.global_variables()) 112 | 113 | if not args.k_folder: 114 | try: 115 | if os.path.exists('checkpoints'): 116 | saver.restore(sess, 'checkpoints/model') 117 | print('Model restored from checkpoints') 118 | else: print('Restore failed, training new model!') 119 | except: print('Restore failed, training new model!') 120 | 121 | 122 | batch_size = args.batch_size 123 | epochs = args.epochs 124 | train_X = train_X.reshape(-1,cut_size,1) 125 | valid_X = valid_X.reshape(-1,cut_size,1) 126 | ep = 0 127 | while True: 128 | total_loss = [] 129 | ep = ep + 1 130 | for itr in range(0,len(train_X),batch_size): 131 | # prepare data bactch 132 | if itr+batch_size>=len(train_X): 133 | cat_n = itr+batch_size-len(train_X) 134 | cat_idx = random.sample(range(len(train_X)),cat_n) 135 | batch_inputs = np.concatenate((train_X[itr:],train_X[cat_idx]),axis=0) 136 | batch_labels = np.concatenate((y_onehot[itr:],y_onehot[cat_idx]),axis=0) 137 | else: 138 | batch_inputs = train_X[itr:itr+batch_size] 139 | batch_labels = y_onehot[itr:itr+batch_size] 140 | 141 | _, cur_loss = sess.run([opt, loss], {data_input: batch_inputs, label_input: batch_labels}) 142 | total_loss.append(cur_loss) 143 | #if itr % 10==0: 144 | # print(' iter %d, loss = %f'%(itr, cur_loss)) 145 | # saver.save(sess, args.ckpt) 146 | print('[*] epoch %d, average loss = %f'%(ep, np.mean(total_loss))) 147 | if not args.k_folder: 148 | saver.save(sess, 'checkpoints/model') 149 | 150 | # validation 151 | if ep % 5 ==0: #and ep!=0: 152 | err = 0 153 | n = np.zeros(class_num) 154 | N = np.zeros(class_num) 155 | correct = np.zeros(class_num) 156 | valid_n = len(valid_X) 157 | for i in range(valid_n): 158 | res = sess.run([logits], {data_input: valid_X[i].reshape(-1, cut_size,1)}) 159 | # print(valid_y[i]) 160 | # print(res) 161 | predicts = np.argmax(res[0],axis=1) 162 | n[predicts] = n[predicts] + 1 163 | N[valid_y[i]] = N[valid_y[i]] + 1 164 | if predicts[0]!= valid_y[i]: 165 | err+=1 166 | else: 167 | correct[predicts] = correct[predicts] + 1 168 | print("[!] %d validation data, accuracy = %f"%(valid_n, 1.0 * (valid_n - err)/valid_n)) 169 | res = 2.0 * correct / (N + n) 170 | print("[!] Normal = %f, Af = %f, Other = %f, Noisy = %f" % (res[0], res[1], res[2], res[3])) 171 | print("[!] F1 accuracy = %f" % np.mean(2.0 * correct / (N + n))) 172 | if args.k_folder: 173 | F1_valid[k] = np.mean(res) 174 | 175 | if np.mean(total_loss) < 0.2 and ep % 5 == 0: 176 | break 177 | 178 | if args.k_folder: 179 | print("\n\n[!] k-folder finished!! The F1 score for each folder is :") 180 | print("[!] 1: %f, 2: %f, 3: %f, 4: %f, 5: %f" % (F1_valid[0], F1_valid[1], F1_valid[2], F1_valid[3], F1_valid[4])) 181 | print("[!] Average is %f" % (np.mean(F1_valid))) 182 | --------------------------------------------------------------------------------