├── utils.py
├── requirements.txt
├── imgs
    └── data.png
├── merge_dataset.py
├── readme.md
├── physionet.py
├── model.py
├── test.py
└── train.py


/utils.py:
--------------------------------------------------------------------------------
1 | import numpy 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow
2 | numpy
3 | scipy
4 | pandas


--------------------------------------------------------------------------------
/imgs/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/horseee/PhysioNet/HEAD/imgs/data.png


--------------------------------------------------------------------------------
/merge_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.io import savemat
 3 | from physionet import load_physionet
 4 | 
 5 | import argparse
 6 | 
 7 | 
 8 | 
 9 | def merge_data(dir_path, test=0.2, train_file='train',test_file='test',shuffle=True):
10 |     train_X, train_y, test_X, test_y, _, _ = load_physionet(dir_path=dir_path, test=test, vali=0, shuffle=True)
11 | 
12 |     train_data = {'data': train_X, 'label':train_y}
13 |     test_data = {'data': test_X, 'label':test_y}
14 |     savemat(train_file,train_data)
15 |     savemat(test_file, test_data)
16 |     
17 |     print("[!] Train set saved as %s.mat"%(train_file))
18 |     print("[!] Test set saved as %s.mat"%(test_file))
19 | 
20 | 
21 | def main():
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument('--dir',type=str,default='training2017',help='the directory of dataset')
24 |     parser.add_argument('--test_set',type=float,default=0.2,help='The percentage of test set')
25 |     args = parser.parse_args()
26 | 
27 |     merge_data(args.dir, test=args.test_set)
28 | 
29 | if __name__=='__main__':
30 |     main()


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | ## ECG classification
 2 | 
 3 | #### Dataset
 4 | [physionet challenge 2017](https://www.physionet.org/challenge/2017/)  
 5 | ![vis](https://github.com/VainF/PhysioNet/blob/master/imgs/data.png)  
 6 | 
 7 | #### Requirements
 8 | * tensorflow
 9 | * numpy
10 | * scipy
11 | * pandas  
12 | Also, you can use the command `pip3 install -r requirements.txt` to install the dependency packages.  
13 | In this project, both python2 and python3 are ok(But we strongly suggest that you use python3).
14 | 
15 | #### How to Run
16 | 1. Put the data set in folder.
17 | 2. Run `merge_dataset.py` to create **train.mat** and **test.mat**. Use the following command to run the code.    
18 | ```
19 | python3 merge_dataset.py --dir YOUR_TRAINING_SET_FOLDER_NAME
20 | ```  
21 | Use `python3 merge_dataset.py -h` if you need some help.    
22 | 3. Run `train.py`. You can choose your parameter for the following parameters in your command.  
23 |    * learning_rate 
24 |    * epochs
25 |    * batch_size.
26 |    * k_folder: True/False.   
27 | 
28 |    If you want to begin the process for k-folder validation, use the following command: `python3 train.py --k_folder True`. If you only want to train the model, use the command: `python3 train.py`.
29 | Use `python3 train.py -h` if you need some help.  
30 |    
31 | 4. After you train the model, use `test.py` to test the accuracy and F1 rate. The default path for checkpoints is **checkpoints/**. If you use other path, run the test.py use the following command:
32 | ```
33 | python3 test.py --check_point_folder YOUR_CHECKPOINT_FOLDER_PATH
34 | ```
35 | 
36 | 
37 | #### Experiment result
38 | The F1 for our model is **0.82**. But maybe if you run you will get a different number for that the training and testing set is randomly choose.
39 | 
40 | 


--------------------------------------------------------------------------------
/physionet.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import random
 3 | import pandas as pd
 4 | import numpy as np
 5 | from scipy.io import loadmat
 6 | 
 7 | def load_physionet(dir_path, test=0.2,vali=0, shuffle=True):
 8 |     "return train_X, train_y, test_X, test_y, valid_X, valid_y"
 9 |     if dir_path[-1]!='/': dir_path = dir_path+'/'
10 |     ref = pd.read_csv(dir_path+'REFERENCE.csv',header=None)
11 |     label_id = {'N':0, 'A':1, 'O':2, '~':3 }#Normal, AF, Other, Noisy
12 |     X = []
13 |     y = []
14 |     test_X = None
15 |     test_y = None
16 |     valid_X = None
17 |     valid_y = None
18 |     
19 |     for index, row in ref.iterrows():
20 |         file_prefix = row[0]
21 |         mat_file = dir_path+file_prefix+'.mat'
22 |         hea_file = dir_path+file_prefix+'.hea'
23 |         data = loadmat(mat_file)['val']
24 | 
25 |         data = data.squeeze()
26 |         data = np.nan_to_num(data)
27 |         data = data-np.mean(data)
28 |         data = data/np.std(data)
29 | 
30 |         
31 |         X.append( data )
32 |         y.append( label_id[row[1]] )
33 |     data_n = len(y)
34 |     print(data_n)
35 | 
36 |     X = np.array(X)
37 |     y = np.array(y)
38 |         
39 |     if shuffle:
40 |         shuffle_idx = list(range(data_n))
41 |         random.shuffle(shuffle_idx)
42 |         X = X[shuffle_idx]
43 |         y = y[shuffle_idx]
44 |    
45 |     valid_n = int(vali*data_n)  
46 |     test_n = int(test*data_n)
47 |     assert (valid_n+test_n <= data_n) , "Dataset has no enough samples!"
48 | 
49 |     if vali>0:
50 |         valid_X = X[0:valid_n]
51 |         valid_y = y[0:valid_n]
52 |         
53 |     if test>0:
54 |         test_X = X[valid_n: valid_n+test_n]
55 |         test_y = y[valid_n: valid_n+test_n]
56 |     
57 |     if vali>0 or test>0:
58 |         X = X[valid_n+test_n: ]
59 |         y = y[valid_n+test_n: ]
60 |         
61 |     #print('Train: %d, Test: %d, Validation: %d   (%s)'%((data_n-valid_n-test_n), test_n, valid_n, 'shuffled' if shuffle else 'unshuffled'))
62 |     return np.squeeze(X), np.squeeze(y), np.squeeze(test_X), np.squeeze(test_y), np.squeeze(valid_X), np.squeeze(valid_y)


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def ResNet(inputs, class_num=4):
 4 |     conv_ksize = 16
 5 |     conv_strides = 1
 6 |     conv_filters = 64
 7 |     dropout_rate = 0.5
 8 |     pool_size = 2
 9 |     pool_strides = 2
10 | 
11 |     def _residual_block(x, filters, kernel_size, strides, dropout_rate, grow=True, pool=False):
12 |         if grow:
13 |             short_cut = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=1, padding='VALID', strides=1)
14 |         else:
15 |             short_cut = tf.identity(x)
16 | 
17 |         x = tf.layers.batch_normalization(x)
18 |         x = tf.nn.relu(x)
19 |         x = tf.layers.dropout(x, rate=dropout_rate)
20 |         x = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=kernel_size, padding='SAME', strides=strides)
21 | 
22 |         x = tf.layers.batch_normalization(x)
23 |         x = tf.nn.relu(x)
24 |         x = tf.layers.dropout(x, rate=dropout_rate)
25 |         x = tf.layers.conv1d(inputs=x, filters=filters, kernel_size=kernel_size, padding='SAME', strides=strides)
26 | 
27 |         if p:
28 |             short_cut = tf.layers.max_pooling1d(short_cut, pool_size=pool_size, strides=pool_strides)
29 |             x = tf.layers.max_pooling1d(x, pool_size=pool_size, strides=pool_strides)
30 |         x = x + short_cut
31 |         print(x.shape)
32 |         return x
33 | 
34 |     print(inputs.shape)
35 |     x = tf.layers.conv1d(inputs=inputs, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides)
36 |     x = tf.layers.batch_normalization(x)
37 |     x = tf.nn.relu(x)
38 | 
39 |     print(x.shape)
40 | 
41 |     short_cut = tf.identity(x)
42 |     x = tf.layers.conv1d(inputs=x, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides)
43 |     x = tf.layers.batch_normalization(x)
44 |     x = tf.nn.relu(x)
45 |     x = tf.layers.conv1d(inputs=x, filters=conv_filters, kernel_size=conv_ksize, padding='SAME', strides=conv_strides)
46 | 
47 |     short_cut = tf.layers.max_pooling1d(short_cut, pool_size=pool_size, strides=pool_strides)
48 |     x = tf.layers.max_pooling1d(x, pool_size=pool_size, strides=pool_strides)
49 |     x = x + short_cut
50 |     print(x.shape)
51 | 
52 |     k = 1
53 |     p = False
54 |     for i in range(15):
55 |         if i%4==0 and i>0:
56 |             k+=1
57 |         x = _residual_block(x, filters=conv_filters*k, kernel_size=conv_ksize, strides=conv_strides, dropout_rate=dropout_rate, grow=(i%4==0 and i>0),pool=p)
58 |         p = not p
59 |     x = tf.layers.batch_normalization(x)
60 |     x = tf.nn.relu(x)
61 |     x = tf.contrib.layers.flatten(x)
62 |     x = tf.layers.dense(x,units=class_num)
63 |     #x = tf.layers.average_pooling1d(x, pool_size=x.get_shape().as_list()[1],strides=1)
64 |     #x = tf.layers.flatten(x)
65 |     print(x.shape)
66 |     #x = tf.layers.dense(x,units=class_num)
67 |     #print(x.shape)
68 |     return x
69 | 
70 |     
71 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | import numpy as np
 3 | import argparse
 4 | import sys, os
 5 | import time
 6 | 
 7 | from model import ResNet
 8 | from scipy.io import loadmat
 9 | 
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
11 | 
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument('--check_point_folder', type=str ,default = 'checkpoints',help='the path to the checkpoint folder')
14 | args = parser.parse_args()
15 | 
16 | training_set = loadmat('test.mat')
17 | X = training_set['data'][0]
18 | y = training_set['label'][0].astype('int32')
19 | 
20 | cut_size = 300 * 30
21 | n = len(X)
22 | X_cut = np.zeros(shape=(n, cut_size))
23 | for i in range(n):
24 |     data_len = X[i].squeeze().shape[0]
25 |     X_cut[i, :min(cut_size, data_len)] = X[i][0, :min(cut_size, data_len)]
26 | X = X_cut
27 | 
28 | class_num = 4
29 | 
30 | # reconstruct model
31 | test_input = tf.placeholder(dtype='float32',shape=(None,cut_size,1))
32 | res_net = ResNet(test_input, class_num=class_num)
33 | 
34 | tf_config = tf.ConfigProto()
35 | tf_config.gpu_options.allow_growth = True
36 | sess = tf.Session(config=tf_config)
37 | 
38 | sess.run(tf.global_variables_initializer())
39 | saver =  tf.train.Saver(tf.global_variables())
40 | 
41 | # restore model
42 | if os.path.exists(args.check_point_folder + '/'):
43 |     saver.restore(sess, args.check_point_folder + '/model')
44 |     print('Model successfully restore from ' + args.check_point_folder + '/model')
45 | else: print('Restore failed. No model found!')
46 | 
47 | test_len = len(X)
48 | label_class = {0:'N', 1:'A', 2:'O', 3:'~'}#Normal, AF, Other, Noisy
49 | PreCount = np.zeros(class_num)
50 | RealCount = np.zeros(class_num)
51 | CorrectCount = np.zeros(class_num)
52 | 
53 | start_time = time.time()
54 | for i in range(test_len):
55 |     res = sess.run([res_net], {test_input: X[i].reshape(-1, cut_size,1)})
56 |     #print(res)
57 |     predicts  = np.argmax(res[0],axis=1)
58 |     #print('case %d: class = %s, predict = %s, ' % (i, label_class[y[i]], label_class[predicts[0]]))
59 |     PreCount[predicts] = PreCount[predicts] + 1   
60 |     RealCount[y[i]] = RealCount[y[i]] + 1
61 |     #print("%d %d"%(predicts[0], y[i]))
62 |     if (predicts[0] == y[i]):
63 |         CorrectCount[predicts] = CorrectCount[predicts] + 1
64 | end_time = time.time()
65 | # F1
66 | F1_res = CorrectCount * 2/ (PreCount + RealCount)
67 | print('F1n = %f, F1a = %f, F1o = %f, F1n = %f' % (F1_res[0], F1_res[1], F1_res[2], F1_res[3]))
68 | print('F1 = %f'%np.mean(F1_res))
69 | # Accuracy
70 | print('Accuracy = %f' % (np.sum(CorrectCount) / test_len))
71 | # Precision
72 | precision_rate = CorrectCount / PreCount
73 | print('Precision: N = %f, A = %f, O = %f, ~ = %f' % (precision_rate[0], precision_rate[1], precision_rate[2], precision_rate[3]))
74 | # Recall
75 | recall_rate = CorrectCount / RealCount
76 | print('Recall: N = %f, A = %f, O = %f, ~ = %f' % (recall_rate[0], recall_rate[1], recall_rate[2], recall_rate[3]))
77 | # Time
78 | print('Time: %fs'%((end_time - start_time) / test_len))
79 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf 
  2 | import numpy as np
  3 | import argparse
  4 | import sys, os
  5 | import random
  6 | from physionet import load_physionet
  7 | from model import ResNet
  8 | from scipy.io import loadmat
  9 | 
 10 | 
 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 12 | 
 13 | def cut_and_pad(X, cut_size):
 14 |     n = len(X)
 15 |     X_cut = np.zeros(shape=(n, cut_size))
 16 |     for i in range(n):
 17 |         data_len = X[i].squeeze().shape[0]
 18 |         # cut if too long / padd if too short
 19 |         X_cut[i, :min(cut_size, data_len )] = X[i][0,  : min(cut_size, data_len)]
 20 |     return X_cut
 21 | 
 22 | def to_one_hot(y, class_num=4):
 23 |     if isinstance(y, int):
 24 |         y_onehot = np.zeros((1,class_num))
 25 |         y_onehot[y] = 1
 26 |         return y_onehot
 27 |     elif isinstance(y, np.ndarray):
 28 |         y_onehot = np.zeros((y.shape[0],class_num))
 29 |         for i in range(y.shape[0]):
 30 |             y_onehot[i, y[i]] = 1
 31 |         return y_onehot
 32 | 
 33 | def get_sub_set(X, y, k, K_folder_or_not):
 34 |     if not K_folder_or_not:
 35 |         k_dataset_len = int(len(X) * 0.9)
 36 |         train_X = X[ : k_dataset_len ]
 37 |         train_y = y[ : k_dataset_len ]
 38 |         valid_X = X[ k_dataset_len :]
 39 |         valid_y = y[ k_dataset_len :]
 40 |     else:
 41 |         k_dataset_len = int(len(X) / 5)
 42 |         if k == 0:
 43 |             valid_X = X[ : k_dataset_len ]
 44 |             valid_y = y[ : k_dataset_len ]
 45 |             train_X = X[ k_dataset_len :]
 46 |             train_y = y[ k_dataset_len :]
 47 |         else:
 48 |             print(k*k_dataset_len)
 49 |             valid_X = X[ k*k_dataset_len : (k+1)*k_dataset_len ]
 50 |             valid_y = y[ k*k_dataset_len : (k+1)*k_dataset_len ]
 51 |             train_X = np.concatenate((X[ : k*k_dataset_len] , X[(k+1)*k_dataset_len: ]), axis=0)
 52 |             train_y = np.concatenate((y[ : k*k_dataset_len] , y[(k+1)*k_dataset_len: ]), axis=0)
 53 |     return train_X, train_y, valid_X, valid_y
 54 | 
 55 | parser = argparse.ArgumentParser()
 56 | parser.add_argument('--learning_rate',type=float,default=0.0000002,help='learning rate')
 57 | parser.add_argument('--epochs',type=int,default=30000,help='epoch number')
 58 | parser.add_argument('--batch_size',type=int,default=16, help='batch size')
 59 | parser.add_argument('--k_folder', type=bool, default=False, help='If open kfolder validation')
 60 | args = parser.parse_args()
 61 | 
 62 | class_num = 4
 63 | 
 64 | training_set = loadmat('train.mat')
 65 | X = training_set['data'][0]
 66 | y = training_set['label'][0].astype('int32')
 67 | 
 68 | #cut_size_start = 300 * 3
 69 | cut_size = 300 * 30
 70 | 
 71 | X = cut_and_pad(X, cut_size)
 72 | 
 73 | #import matplotlib.pyplot as plt
 74 | #plt.plot(range(cut_size),X[0])
 75 | #plt.show()
 76 | 
 77 | 
 78 | # k-fold / train
 79 | if args.k_folder:
 80 |     low_border = 0
 81 |     high_border = 5
 82 |     F1_valid = np.zeros(5)
 83 | else:
 84 |     low_border = 0
 85 |     high_border = 1
 86 | 
 87 | for k in range(low_border,high_border):
 88 |     # get validation set
 89 |     train_X, train_y, valid_X, valid_y = get_sub_set(X, y, k, args.k_folder)
 90 |     y_onehot = to_one_hot(train_y)
 91 | 
 92 |     if args.k_folder:
 93 |         print("[!] kfolder_iter: %d, train: %d, validation: %d"%(k, len(train_X),len(valid_X)))
 94 |     else:
 95 |         print("[!] Training: %d, validation: %d" % (len(train_X),len(valid_X)))
 96 | 
 97 |     data_input = tf.placeholder(dtype='float32',shape=(None,cut_size,1))
 98 |     label_input = tf.placeholder(dtype='float',shape=(None))
 99 | 
100 |     # build model
101 |     logits = ResNet(data_input, class_num=class_num)
102 |     loss = tf.losses.softmax_cross_entropy(label_input, logits)
103 |     opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate).minimize(loss)
104 | 
105 |     tf_config = tf.ConfigProto()
106 |     tf_config.gpu_options.allow_growth = True
107 |     sess = tf.Session(config=tf_config)
108 |     try: os.mkdir('checkpoints')
109 |     except: pass 
110 |     sess.run(tf.global_variables_initializer())
111 |     saver =  tf.train.Saver(tf.global_variables())
112 | 
113 |     if not args.k_folder:
114 |         try:
115 |             if os.path.exists('checkpoints'):
116 |                 saver.restore(sess, 'checkpoints/model')
117 |                 print('Model restored from checkpoints')
118 |             else: print('Restore failed, training new model!')
119 |         except: print('Restore failed, training new model!')
120 | 
121 | 
122 |     batch_size = args.batch_size
123 |     epochs = args.epochs
124 |     train_X = train_X.reshape(-1,cut_size,1)
125 |     valid_X = valid_X.reshape(-1,cut_size,1)
126 |     ep = 0
127 |     while True:
128 |         total_loss = []
129 |         ep = ep + 1
130 |         for itr in range(0,len(train_X),batch_size):
131 |             # prepare data bactch
132 |             if itr+batch_size>=len(train_X):
133 |                 cat_n = itr+batch_size-len(train_X)
134 |                 cat_idx = random.sample(range(len(train_X)),cat_n)
135 |                 batch_inputs = np.concatenate((train_X[itr:],train_X[cat_idx]),axis=0)
136 |                 batch_labels = np.concatenate((y_onehot[itr:],y_onehot[cat_idx]),axis=0)
137 |             else:
138 |                 batch_inputs = train_X[itr:itr+batch_size]        
139 |                 batch_labels = y_onehot[itr:itr+batch_size]
140 | 
141 |             _, cur_loss = sess.run([opt, loss], {data_input: batch_inputs, label_input: batch_labels})
142 |             total_loss.append(cur_loss)
143 |             #if itr % 10==0:
144 |             #    print('   iter %d, loss = %f'%(itr, cur_loss))
145 |             #    saver.save(sess, args.ckpt)
146 |         print('[*] epoch %d, average loss = %f'%(ep, np.mean(total_loss)))
147 |         if not args.k_folder:
148 |             saver.save(sess, 'checkpoints/model')
149 | 
150 |         # validation
151 |         if ep % 5 ==0: #and ep!=0:
152 |             err = 0
153 |             n = np.zeros(class_num)
154 |             N = np.zeros(class_num)
155 |             correct = np.zeros(class_num)
156 |             valid_n = len(valid_X)
157 |             for i in range(valid_n):
158 |                 res = sess.run([logits], {data_input: valid_X[i].reshape(-1, cut_size,1)})
159 |                 # print(valid_y[i])
160 |                 # print(res)
161 |                 predicts  = np.argmax(res[0],axis=1)
162 |                 n[predicts] = n[predicts] + 1   
163 |                 N[valid_y[i]] = N[valid_y[i]] + 1
164 |                 if predicts[0]!= valid_y[i]:
165 |                     err+=1
166 |                 else:
167 |                     correct[predicts] = correct[predicts] + 1
168 |             print("[!] %d validation data, accuracy = %f"%(valid_n, 1.0 * (valid_n - err)/valid_n))
169 |             res = 2.0 * correct / (N + n)
170 |             print("[!] Normal = %f, Af = %f, Other = %f, Noisy = %f" % (res[0], res[1], res[2], res[3]))
171 |             print("[!] F1 accuracy = %f" % np.mean(2.0 * correct / (N + n)))
172 |             if args.k_folder:
173 |                 F1_valid[k] = np.mean(res)
174 |         
175 |         if np.mean(total_loss) < 0.2 and ep % 5 == 0:
176 |             break
177 | 
178 | if args.k_folder:
179 |     print("\n\n[!] k-folder finished!! The F1 score for each folder is :")
180 |     print("[!] 1: %f, 2: %f, 3: %f, 4: %f, 5: %f" % (F1_valid[0], F1_valid[1], F1_valid[2], F1_valid[3], F1_valid[4]))
181 |     print("[!] Average is %f" % (np.mean(F1_valid)))
182 | 


--------------------------------------------------------------------------------