├── Physiological signal challenge ├── README.md ├── answers.csv ├── codes │ ├── model.py │ ├── pre_processing.py │ ├── segmentation.py │ ├── training.py │ └── utils.py ├── cpsc2018.py ├── model │ ├── checkpoint │ ├── model.ckpt-207875.data-00000-of-00001 │ ├── model.ckpt-207875.index │ ├── model.ckpt-207875.meta │ └── readme.txt ├── readme.txt ├── score.txt ├── score_py3.py ├── training.ipynb ├── training_set │ └── readme.txt ├── validation.ipynb └── validation_set │ └── readme.txt └── README.md /Physiological signal challenge/README.md: -------------------------------------------------------------------------------- 1 | Repository for my China Physiological Signal Challenge 2018 Submission (Submisison ID: CPSC0204) 2 | 3 | Challenge website: http://www.icbeb.org/Challenge.html 4 | 5 | arXiv preprint of my paper: 6 | https://arxiv.org/ftp/arxiv/papers/1811/1811.02090.pdf 7 | -------------------------------------------------------------------------------- /Physiological signal challenge/answers.csv: -------------------------------------------------------------------------------- 1 | Recording,Result 2 | A0001,5 3 | A0002,1 4 | A0003,2 5 | A0004,2 6 | A0005,7 7 | A0006,5 8 | A0007,2 9 | A0008,8 10 | A0009,2 11 | A0010,5 12 | A0011,4 13 | A0012,7 14 | A0013,8 15 | A0014,8 16 | A0015,5 17 | A0016,1 18 | A0017,2 19 | A0018,4 20 | A0019,2 21 | A0020,1 22 | A0021,1 23 | A0022,5 24 | A0023,2 25 | A0024,7 26 | A0025,7 27 | A0026,5 28 | A0027,5 29 | A0028,5 30 | A0029,1 31 | A0030,1 32 | A0031,7 33 | A0032,9 34 | A0033,8 35 | A0034,9 36 | A0035,5 37 | A0036,9 38 | A0037,1 39 | A0038,1 40 | A0039,3 41 | A0040,6 42 | A0041,1 43 | A0042,3 44 | A0043,2 45 | A0044,4 46 | A0045,7 47 | A0046,9 48 | A0047,6 49 | A0048,8 50 | A0049,6 51 | A0050,7 52 | A0051,5 53 | A0052,7 54 | A0053,5 55 | A0054,9 56 | A0055,6 57 | A0056,8 58 | A0057,4 59 | A0058,9 60 | A0059,1 61 | A0060,8 62 | A0061,2 63 | A0062,6 64 | A0063,9 65 | A0064,2 66 | A0065,2 67 | A0066,5 68 | A0067,8 69 | A0068,5 70 | A0069,5 71 | A0070,3 72 | A0071,2 73 | A0072,9 74 | A0073,1 75 | A0074,4 76 | A0075,1 77 | A0076,8 78 | A0077,1 79 | A0078,5 80 | A0079,2 81 | A0080,6 82 | A0081,4 83 | A0082,4 84 | A0083,1 85 | A0084,7 86 | A0085,5 87 | A0086,2 88 | A0087,5 89 | A0088,7 90 | A0089,1 91 | A0090,1 92 | A0091,6 93 | A0092,7 94 | A0093,9 95 | A0094,1 96 | A0095,5 97 | A0096,5 98 | A0097,6 99 | A0098,5 100 | A0099,7 101 | A0100,8 102 | A0101,2 103 | A0102,3 104 | A0103,7 105 | A0104,3 106 | A0105,7 107 | A0106,8 108 | A0107,1 109 | A0108,6 110 | A0109,2 111 | A0110,8 112 | A0111,8 113 | A0112,5 114 | A0113,6 115 | A0114,5 116 | A0115,7 117 | A0116,5 118 | A0117,2 119 | A0118,3 120 | A0119,5 121 | A0120,9 122 | A0121,2 123 | A0122,2 124 | A0123,6 125 | A0124,6 126 | A0125,1 127 | A0126,2 128 | A0127,5 129 | A0128,8 130 | A0129,7 131 | A0130,7 132 | A0131,4 133 | A0132,3 134 | A0133,1 135 | A0134,5 136 | A0135,1 137 | A0136,6 138 | A0137,6 139 | A0138,5 140 | A0139,9 141 | A0140,8 142 | A0141,1 143 | A0142,5 144 | A0143,1 145 | A0144,7 146 | A0145,2 147 | A0146,7 148 | A0147,9 149 | A0148,8 150 | A0149,1 151 | A0150,2 152 | A0151,5 153 | A0152,8 154 | A0153,2 155 | A0154,8 156 | A0155,5 157 | A0156,7 158 | A0157,1 159 | A0158,7 160 | A0159,7 161 | A0160,5 162 | A0161,5 163 | A0162,7 164 | A0163,3 165 | A0164,1 166 | A0165,8 167 | A0166,1 168 | A0167,6 169 | A0168,1 170 | A0169,5 171 | A0170,1 172 | A0171,5 173 | A0172,3 174 | A0173,8 175 | A0174,9 176 | A0175,1 177 | A0176,1 178 | A0177,1 179 | A0178,5 180 | A0179,1 181 | A0180,1 182 | A0181,5 183 | A0182,7 184 | A0183,5 185 | A0184,2 186 | A0185,8 187 | A0186,2 188 | A0187,8 189 | A0188,3 190 | A0189,1 191 | A0190,6 192 | A0191,6 193 | A0192,1 194 | A0193,1 195 | A0194,8 196 | A0195,8 197 | A0196,8 198 | A0197,7 199 | A0198,2 200 | A0199,5 201 | A0200,5 202 | A0201,8 203 | A0202,3 204 | A0203,2 205 | A0204,7 206 | A0205,2 207 | A0206,1 208 | A0207,7 209 | A0208,1 210 | A0209,5 211 | A0210,1 212 | A0211,5 213 | A0212,3 214 | A0213,2 215 | A0214,2 216 | A0215,5 217 | A0216,5 218 | A0217,2 219 | A0218,6 220 | A0219,5 221 | A0220,2 222 | A0221,1 223 | A0222,2 224 | A0223,3 225 | A0224,6 226 | A0225,5 227 | A0226,5 228 | A0227,7 229 | A0228,5 230 | A0229,1 231 | A0230,5 232 | A0231,2 233 | A0232,8 234 | A0233,1 235 | A0234,8 236 | A0235,1 237 | A0236,3 238 | A0237,6 239 | A0238,3 240 | A0239,3 241 | A0240,3 242 | A0241,7 243 | A0242,5 244 | A0243,3 245 | A0244,4 246 | A0245,6 247 | A0246,3 248 | A0247,2 249 | A0248,3 250 | A0249,5 251 | A0250,5 252 | A0251,6 253 | A0252,7 254 | A0253,4 255 | A0254,1 256 | A0255,5 257 | A0256,9 258 | A0257,2 259 | A0258,5 260 | A0259,5 261 | A0260,2 262 | A0261,5 263 | A0262,3 264 | A0263,3 265 | A0264,1 266 | A0265,6 267 | A0266,5 268 | A0267,2 269 | A0268,5 270 | A0269,7 271 | A0270,6 272 | A0271,2 273 | A0272,2 274 | A0273,5 275 | A0274,2 276 | A0275,6 277 | A0276,2 278 | A0277,7 279 | A0278,8 280 | A0279,2 281 | A0280,3 282 | A0281,1 283 | A0282,8 284 | A0283,1 285 | A0284,3 286 | A0285,1 287 | A0286,5 288 | A0287,1 289 | A0288,3 290 | A0289,6 291 | A0290,2 292 | A0291,4 293 | A0292,3 294 | A0293,5 295 | A0294,5 296 | A0295,6 297 | A0296,5 298 | A0297,8 299 | A0298,5 300 | A0299,6 301 | A0300,5 302 | -------------------------------------------------------------------------------- /Physiological signal challenge/codes/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib.layers import fully_connected 3 | import tensorflow.contrib.rnn as recurrent 4 | import numpy as np 5 | # 6 | def build_model_graph(Training_mode = False, input_size = 12, time_steps = 1000, hidden_size = 100, num_hidden = 2, output_size = 9, keep_prob = 0.5): 7 | # 8 | # place-holdes 9 | inputs = tf.placeholder(tf.float32, [None, time_steps, input_size]) 10 | labels = tf.placeholder(tf.int32, [None]) 11 | seq_length = tf.placeholder(tf.int32, [None]) 12 | # 13 | logits, accuracy = RNN_bidirectional(inputs, labels, seq_length, Training_mode = Training_mode, hidden_size = hidden_size, num_hidden = num_hidden, output_size = output_size, keep_prob = keep_prob) 14 | # 15 | return inputs, labels, seq_length, logits, accuracy 16 | # 17 | def load_model(model_dir, session): 18 | # the saver 19 | saver = tf.train.Saver(max_to_keep = 5, keep_checkpoint_every_n_hours =1) 20 | path = tf.train.get_checkpoint_state(model_dir) 21 | saver.restore(session, path.model_checkpoint_path) 22 | # 23 | def RNN_bidirectional(input_tensor, label_tensor, length_tensor, Training_mode, hidden_size, num_hidden, output_size, keep_prob): 24 | # 25 | with tf.variable_scope("recurrent", initializer = tf.contrib.layers.variance_scaling_initializer()): 26 | cell = tf.nn.rnn_cell.BasicLSTMCell 27 | cells_fw = [cell(hidden_size) for _ in range(num_hidden)] 28 | cells_bw = [cell(hidden_size) for _ in range(num_hidden)] 29 | cells_fw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training_mode is True else 1.0) for cell in cells_fw] 30 | cells_bw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training_mode is True else 1.0) for cell in cells_bw] 31 | _, states_fw, states_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( 32 | cells_fw=cells_fw, 33 | cells_bw=cells_bw, 34 | inputs=input_tensor, 35 | sequence_length = length_tensor, 36 | dtype=tf.float32) 37 | outputs_fw = tf.concat(states_fw[-1][-1], axis = 1) 38 | outputs_bw = tf.concat(states_bw[-1][-1], axis = 1) 39 | outputs = tf.concat([outputs_fw, outputs_bw], axis = 1) 40 | logits = tf.squeeze(fully_connected(outputs, output_size, activation_fn = None)) 41 | # 42 | correct = tf.nn.in_top_k(logits, label_tensor, 1) 43 | accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 44 | return logits, accuracy 45 | # -------------------------------------------------------------------------------- /Physiological signal challenge/codes/pre_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.io as sio 4 | import scipy.signal as sig 5 | import pywt 6 | import os 7 | import glob 8 | import itertools 9 | # 10 | def ecg_preprocessing(data, wfun, dcmp_levels, chop_levels): 11 | # 12 | dcmp_levels = min(dcmp_levels, pywt.dwt_max_level(data.shape[1], pywt.Wavelet(wfun))) 13 | coeffs = pywt.wavedec(data, wfun, mode='symmetric', level = dcmp_levels, axis = -1) 14 | # 15 | coeffs_m = [np.zeros_like(coeffs[idx]) if idx >= -chop_levels else coeffs[idx] for idx in range(-dcmp_levels- 1, 0)] 16 | # 17 | data_recon = pywt.waverec(coeffs_m, wfun, mode='symmetric', axis = -1) 18 | # 19 | data_recon = butterworth_high_pass(data_recon, cut_off = 2, order = 1, sampling_freq = 500) 20 | # 21 | data_recon = butterworth_notch(data_recon, cut_off = [49, 51], order = 2, sampling_freq = 500) 22 | # 23 | return data_recon 24 | # 25 | def butterworth_high_pass(x, cut_off, order, sampling_freq): 26 | # 27 | nyq_freq = sampling_freq / 2 28 | digital_cutoff = cut_off / nyq_freq 29 | # 30 | b, a = sig.butter(order, digital_cutoff, btype='highpass') 31 | y = sig.lfilter(b, a, x, axis = -1) 32 | # 33 | return y 34 | # 35 | def butterworth_notch(x, cut_off, order, sampling_freq): 36 | # 37 | cut_off = np.array(cut_off) 38 | nyq_freq = sampling_freq / 2 39 | digital_cutoff = cut_off / nyq_freq 40 | # 41 | b, a = sig.butter(order, digital_cutoff, btype='bandstop') 42 | y = sig.lfilter(b, a, x, axis = -1) 43 | # 44 | return y 45 | # -------------------------------------------------------------------------------- /Physiological signal challenge/codes/segmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.io as sio 4 | import scipy.signal as sig 5 | import pywt 6 | import os 7 | import glob 8 | import itertools 9 | import pandas as pd 10 | import re 11 | # 12 | from codes.pre_processing import ecg_preprocessing 13 | # 14 | EPS = np.finfo(float).eps 15 | # 16 | def wavelet_filtering(data, wfun, max_level = 8): 17 | # 18 | padsize = int((2 ** max_level) * np.ceil(data.shape[0] / (2 ** max_level)) - data.shape[0]) 19 | # 20 | data_padded = np.pad(data, (0, padsize), 'constant', constant_values=(0, 0)) 21 | # 22 | wave = pywt.swt(data_padded, wfun, level = max_level, start_level = 0, axis=0) 23 | # 24 | wave_m = [[np.zeros((data_padded.shape[0],), dtype = float) for j in range(2)] for i in range(max_level)] #list 25 | wave_m[-4][1] = wave[-4][1]; wave_m[-5][1] = wave[-5][1] 26 | wave_m = [tuple(wave_m[i]) for i in range(max_level)] 27 | # 28 | data_const = pywt.iswt(wave_m, wfun) 29 | if padsize != 0: 30 | data_const = data_const[:-padsize] 31 | # 32 | return data_const 33 | # 34 | def local_energy(data, window_size): 35 | # 36 | energy = sig.convolve(data ** 2, np.ones((window_size,), dtype = 'float')/window_size, mode='same') 37 | return energy 38 | # 39 | def find_threshold(feature, window_size_for_threshold): 40 | # 41 | window_size = window_size_for_threshold 42 | loc = np.arange(0, len(feature), window_size) 43 | LM = [np.max(feature[l : min(l + window_size, len(feature))]) for l in loc] 44 | lm = [np.min(feature[l : min(l + window_size, len(feature))]) for l in loc] 45 | # 46 | return np.median([np.median(lm), np.median(LM)]) 47 | 48 | # 49 | def detect_local_maxima(feature, window_size_for_threshold): 50 | # 51 | thresh = find_threshold(feature, window_size_for_threshold) 52 | # 53 | y_ = feature * (feature > thresh) 54 | lobe_detector = y_ != 0 55 | lobe_detector = lobe_detector.astype(float) 56 | lobe_detector = np.diff(lobe_detector) 57 | # 58 | lobe_edges_left = np.where(lobe_detector == 1)[0] 59 | lobe_edges_right = np.where(lobe_detector == -1)[0] 60 | lobe_edges_left = lobe_edges_left 61 | lobe_edges_right = lobe_edges_right 62 | # 63 | if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 1: 64 | lobe_edges_left = np.array([0]) 65 | # 66 | if len(lobe_edges_left) == 1 and len(lobe_edges_right) == 0: 67 | lobe_edges_right = np.array([len(feature) - 1]) 68 | # 69 | if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 0: 70 | peak_loc = [] 71 | return peak_loc 72 | # 73 | if lobe_edges_left[0] > lobe_edges_right[0]: 74 | lobe_edges_left = np.concatenate(([0], lobe_edges_left)) 75 | # 76 | if lobe_edges_left[-1] > lobe_edges_right[-1]: 77 | lobe_edges_right = np.concatenate((lobe_edges_right, [len(feature) - 1])) 78 | # 79 | no_of_lobes = len(lobe_edges_left) 80 | peak_loc = [np.argmax(y_[lobe_edges_left[idx] : lobe_edges_right[idx] + 1]) for idx in range(no_of_lobes)] + lobe_edges_left - 1 81 | # 82 | return peak_loc 83 | # 84 | def detect_local_maxima_v2(feature, threshold): 85 | # 86 | thresh = threshold 87 | # 88 | y_ = feature * (feature > thresh) 89 | lobe_detector = y_ != 0 90 | lobe_detector = lobe_detector.astype(float) 91 | lobe_detector = np.diff(lobe_detector) 92 | # 93 | lobe_edges_left = np.where(lobe_detector == 1)[0] 94 | lobe_edges_right = np.where(lobe_detector == -1)[0] 95 | lobe_edges_left = lobe_edges_left 96 | lobe_edges_right = lobe_edges_right 97 | # 98 | if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 1: 99 | lobe_edges_left = np.array([0]) 100 | # 101 | if len(lobe_edges_left) == 1 and len(lobe_edges_right) == 0: 102 | lobe_edges_right = np.array([len(feature) - 1]) 103 | # 104 | if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 0: 105 | peak_loc = [] 106 | return peak_loc 107 | # 108 | if lobe_edges_left[0] > lobe_edges_right[0]: 109 | lobe_edges_left = np.concatenate(([0], lobe_edges_left)) 110 | # 111 | if lobe_edges_left[-1] > lobe_edges_right[-1]: 112 | lobe_edges_right = np.concatenate((lobe_edges_right, [len(feature) - 1])) 113 | # 114 | no_of_lobes = len(lobe_edges_left) 115 | peak_loc = [np.argmax(y_[lobe_edges_left[idx] : lobe_edges_right[idx] + 1]) for idx in range(no_of_lobes)] 116 | peak_loc = peak_loc + lobe_edges_left - 1 117 | # 118 | return peak_loc 119 | # 120 | def peak_refine(primary_locations, search_radius, count_threshold): 121 | # 122 | primary_locations = np.sort(primary_locations) 123 | N = len(primary_locations) 124 | # 125 | i = 0 126 | peaks = np.array([]) 127 | while i < N - 1: 128 | begin = primary_locations[i] 129 | count = np.sum(np.abs(primary_locations[i:] - begin) < search_radius) 130 | step = count 131 | location = np.expand_dims(np.median(primary_locations[i : i + step]), axis = 0) 132 | if count > count_threshold: 133 | peaks = np.concatenate((peaks, location)) 134 | i = i + step 135 | peaks = np.round(peaks).astype(int) 136 | # 137 | return peaks 138 | # 139 | def peak_detector_basic(data, wfun, max_level, window_size, window_size_for_threshold, search_radius): 140 | # 141 | peak_loc_ensemble = np.array([]) 142 | features = np.zeros_like(data) 143 | for ch in np.arange(data.shape[0]): 144 | data_const = wavelet_filtering(data[ch,:], wfun, max_level) 145 | # 146 | feature = local_energy(data_const, window_size) 147 | # 148 | peak_loc = detect_local_maxima(feature, window_size_for_threshold) 149 | # 150 | peak_loc_ensemble = np.concatenate((peak_loc, peak_loc_ensemble)) 151 | features[ch, :] = feature 152 | # 153 | count_thres = np.floor(np.sum(np.any(data, axis = 1))/2) 154 | # 155 | peaks = peak_refine(peak_loc_ensemble, search_radius, count_thres) 156 | # 157 | return peaks, features 158 | # 159 | def retrieve_missing_peaks(peaks, features, missing_thres): 160 | # 161 | peaks_proxy = list(peaks) 162 | # 163 | if features.shape[1] - peaks[-1] > missing_thres: 164 | peaks_proxy.append(features.shape[1] - 1) 165 | # 166 | if peaks[0] > missing_thres: 167 | peaks_proxy.insert(0, 0) 168 | # 169 | missing_peak_loc = np.where(np.diff(peaks_proxy) > missing_thres)[0] 170 | missing_peak_loc_plus_1 = np.where(np.diff(peaks_proxy) > missing_thres)[0] + 1 171 | # 172 | if len(missing_peak_loc) != 0: 173 | extra_peaks = np.array([]) 174 | # 175 | for j in np.arange(len(missing_peak_loc)): 176 | peak_loc_ens = np.array([]) 177 | for ch in np.arange(features.shape[0]): 178 | s = int(round((peaks_proxy[missing_peak_loc_plus_1[j]] - peaks_proxy[missing_peak_loc[j]])/6)) 179 | seg = features[ch, peaks_proxy[missing_peak_loc[j]] + s : peaks_proxy[missing_peak_loc_plus_1[j]] - s + 1] 180 | threshold = features[ch, peaks_proxy[missing_peak_loc[j]]]/8 + features[ch,peaks_proxy[missing_peak_loc_plus_1[j]]]/8; 181 | np.array(detect_local_maxima_v2(seg, threshold)) + peaks_proxy[missing_peak_loc[j]] + s 182 | peak_loc_ens = np.concatenate((np.array(detect_local_maxima_v2(seg, threshold)) + peaks_proxy[missing_peak_loc[j]] + s, peak_loc_ens)) 183 | # 184 | peak_loc_ens = peak_loc_ens.astype(int) 185 | extra_peaks = np.concatenate((extra_peaks, peak_refine(peak_loc_ens, 100, np.floor(np.sum(np.any(features, axis = 1))/2)))) 186 | # 187 | peaks = np.concatenate((peaks, extra_peaks)) 188 | peaks = np.sort(peaks).astype(int) 189 | # 190 | return peaks 191 | # 192 | def remove_false_peaks(peaks, remove_thres): 193 | peaks_proxy = list(peaks) 194 | false_peak_loc = np.where(np.diff(peaks_proxy) < remove_thres)[0] 195 | to_delete = np.array([]) 196 | # 197 | if len(false_peak_loc) != 0: 198 | # 199 | for j in np.arange(len(false_peak_loc)): 200 | # 201 | if false_peak_loc[j] == len(peaks_proxy) - 2: 202 | d1 = peaks_proxy[false_peak_loc[j] - 1] - peaks_proxy[false_peak_loc[j]] 203 | d2 = peaks_proxy[false_peak_loc[j] - 1] - peaks_proxy[false_peak_loc[j] + 1] 204 | else: 205 | d1 = peaks_proxy[false_peak_loc[j] + 2] - peaks_proxy[false_peak_loc[j]] 206 | d2 = peaks_proxy[false_peak_loc[j] + 2] - peaks_proxy[false_peak_loc[j] + 1] 207 | # 208 | if abs(np.median(np.diff(peaks_proxy)) - d1) > abs(np.median(np.diff(peaks_proxy)) - d2): 209 | to_delete = np.concatenate((to_delete, [false_peak_loc[j]])) 210 | else: 211 | to_delete = np.concatenate((to_delete, [false_peak_loc[j] + 1])) 212 | # 213 | peaks = np.delete(peaks, to_delete.astype(int)) 214 | # 215 | return peaks 216 | # 217 | def peak_detector_with_refinement(file_name, wfun = 'sym8', max_level = 8, window_size = 50, window_size_for_threshold = 1000, search_radius = 50): 218 | # 219 | data = sio.loadmat(file_name)['ECG'][0][0][2] 220 | # 221 | peaks, features = peak_detector_basic(data, wfun, max_level, window_size, window_size_for_threshold, search_radius) 222 | # 223 | missing_peak_thres = 750 224 | peaks = retrieve_missing_peaks(peaks, features, missing_peak_thres) 225 | # 226 | missing_peak_thres = 1.7 * np.median(np.diff(peaks)) 227 | peaks = retrieve_missing_peaks(peaks, features, missing_peak_thres) 228 | # 229 | remove_thres = 0.33 * np.median(np.diff(peaks)) 230 | peaks = remove_false_peaks(peaks, remove_thres) 231 | # 232 | return peaks, features 233 | # 234 | def set_to_desired_length(array, max_len = 1000): 235 | # 236 | if array.shape[1] > max_len: 237 | array = array[:, 0 : max_len] 238 | if array.shape[1] < max_len: 239 | array = np.pad(array, [(0, 0), (0, max_len - array.shape[1])], mode='constant') 240 | return array 241 | # 242 | def extract_features(peaks): 243 | # 244 | return np.diff(peaks) 245 | # 246 | def extract_ecg_segments(peaks, data, data_label, max_length): 247 | # 248 | # read data as float32, labels as int32, sequence length as int32 249 | data = data.astype(np.float32) 250 | # clean the data first 251 | data = ecg_preprocessing(data, 'sym8', 8, 3) 252 | # 253 | num_of_peaks = len(peaks) 254 | no_channels = 12 255 | data_length = np.expand_dims(data.shape[1], axis = 0) 256 | # 257 | if num_of_peaks == 5: 258 | peak_to_peak = np.concatenate([np.diff(peaks), data_length - peaks[-1]]) 259 | else: 260 | peak_to_peak = np.diff(peaks) 261 | # 262 | # 263 | if num_of_peaks <= 4: 264 | ecg_segments = np.expand_dims(set_to_desired_length(data, max_len = max_length), axis = 0) # Rank-3 array 265 | #ecg_segments = ecg_preprocessing(ecg_segments, 'sym8', 8, 3) 266 | ecg_segments = sig.decimate(ecg_segments, n = 60, q = 7, ftype = 'fir', axis = 2, zero_phase = True) 267 | ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array 268 | ecg_seq_length = np.expand_dims(data.shape[1], axis = 0).astype(int) # Rank-1 array 269 | np.place(ecg_seq_length, ecg_seq_length > max_length, max_length) 270 | # 271 | if num_of_peaks == 5 or num_of_peaks == 6: 272 | # 273 | p = 2 274 | start = int(peaks[p] - peak_to_peak[p - 1] - round(0.9 * peak_to_peak[p - 2])) 275 | end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2])) 276 | # 277 | segment = data[:, start : end + 1] 278 | #segment = ecg_preprocessing(segment, 'sym8', 8, 3) 279 | segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True) 280 | ecg_segments = np.expand_dims(set_to_desired_length(segment, max_len = max_length), axis = 0).astype(int) # Rank-3 array 281 | # 282 | ecg_seq_length = np.expand_dims(segment.shape[1], axis = 0) # Rank-1 array 283 | np.place(ecg_seq_length, ecg_seq_length > max_length, max_length) 284 | # 285 | if data_label == 2 or data_label == 6 or data_label == 7: 286 | # 287 | feature_vector = extract_features(peaks[p - 1 : p + 3]) 288 | # 289 | if np.amin(feature_vector) / np.amax(feature_vector) >= .80: 290 | data_label = 1 291 | ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array 292 | # 293 | if num_of_peaks >= 7: 294 | # initialize 295 | #no_of_segments = max(1, num_of_peaks - 3) 296 | no_of_segments = max(1, num_of_peaks - 7) 297 | ecg_segments = np.zeros([no_of_segments, 12, max_length]) 298 | ecg_labels = np.zeros([no_of_segments]) 299 | ecg_seq_length = np.zeros([no_of_segments]) 300 | # 301 | for p in np.arange(3, max(num_of_peaks - 4, 4)): 302 | start = int(peaks[p] - peak_to_peak[p - 1] - round(0.9 * peak_to_peak[p - 2])) 303 | end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2])) 304 | segment = data[:, start : end + 1] 305 | #segment = ecg_preprocessing(segment, 'sym8', 8, 3) 306 | segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True) 307 | seq_length = segment.shape[1] 308 | segment = set_to_desired_length(segment, max_len = max_length) 309 | ecg_segments[p - 3, :,:] = segment 310 | # 311 | ecg_seq_length[p - 3] = seq_length 312 | np.place(ecg_seq_length, ecg_seq_length > max_length, max_length) 313 | # 314 | if data_label == 2 or data_label == 6 or data_label == 7: 315 | # 316 | feature_vector = extract_features(peaks[p - 1 : p + 3]) 317 | # 318 | if np.amin(feature_vector) / np.amax(feature_vector) >= .80: 319 | ecg_labels[p - 3] = 1 320 | else: 321 | ecg_labels[p - 3] = data_label 322 | else: 323 | ecg_labels[p - 3] = data_label 324 | 325 | # 326 | return ecg_segments, ecg_labels.astype(int) - 1, ecg_seq_length.astype(int) 327 | # 328 | def extract_ecg_segments_v2(peaks, file_name, data_label = None, max_length = 1000): 329 | # 330 | # read data as float32, labels as int32, sequence length as int32 331 | data = sio.loadmat(file_name)['ECG'][0][0][2] 332 | data = data.astype(np.float32) 333 | data_org = data 334 | # clean the data first 335 | data = ecg_preprocessing(data, 'sym8', 8, 3) 336 | # 337 | num_of_peaks = len(peaks) 338 | no_channels = 12 339 | data_length = np.expand_dims(data.shape[1], axis = 0) 340 | # 341 | peak_to_peak = np.concatenate([np.expand_dims(peaks[0], axis = 0), np.diff(peaks), data_length - peaks[-1]]) 342 | # 343 | if num_of_peaks <= 4: 344 | data = (data - np.expand_dims(np.mean(data, axis = 1), axis = 1)) / (np.expand_dims(np.std(data, axis = 1), axis = 1) + EPS) 345 | data = sig.decimate(data, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True) 346 | ecg_segments = np.expand_dims(set_to_desired_length(data, max_len = max_length), axis = 0) # Rank-3 array 347 | #ecg_segments = ecg_preprocessing(ecg_segments, 'sym8', 8, 3) 348 | if data_label is not None: 349 | ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array 350 | else: 351 | ecg_labels = None 352 | ecg_seq_length = np.expand_dims(data.shape[1], axis = 0).astype(int) # Rank-1 array 353 | np.place(ecg_seq_length, ecg_seq_length > max_length, max_length) 354 | # 355 | if num_of_peaks > 4: 356 | # initialize 357 | no_of_segments = max(1, num_of_peaks - 3) 358 | #no_of_segments = max(1, num_of_peaks - 7) 359 | ecg_segments = np.zeros([no_of_segments, 12, max_length]) 360 | if data_label is not None: 361 | ecg_labels = np.zeros([no_of_segments]) 362 | else: 363 | ecg_labels = None 364 | ecg_seq_length = np.zeros([no_of_segments]) 365 | # 366 | for p in np.arange(1, num_of_peaks - 2): 367 | start = int(peaks[p] - peak_to_peak[p] - round(0.9 * peak_to_peak[p - 1])) 368 | end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2])) 369 | segment = data[:, start : end + 1] 370 | segment = (segment - np.expand_dims(np.mean(segment, axis = 1), axis = 1)) / (np.expand_dims(np.std(segment, axis = 1), axis = 1) + EPS) 371 | #segment = ecg_preprocessing(segment, 'sym8', 8, 3) 372 | segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True) 373 | seq_length = segment.shape[1] 374 | segment = set_to_desired_length(segment, max_len = max_length) 375 | ecg_segments[p - 1, :,:] = segment 376 | # 377 | ecg_seq_length[p - 1] = seq_length 378 | np.place(ecg_seq_length, ecg_seq_length > max_length, max_length) 379 | # 380 | if ecg_labels is not None: 381 | if data_label == 2 or data_label == 6 or data_label == 7: 382 | # 383 | feature_vector = extract_features(peaks[p - 1 : p + 3]) 384 | # 385 | if np.amin(feature_vector) / np.amax(feature_vector) >= .80: 386 | ecg_labels[p - 1] = 1 387 | else: 388 | ecg_labels[p - 1] = data_label 389 | else: 390 | ecg_labels[p - 1] = data_label 391 | # 392 | if data_label == 7: 393 | #print('here') 394 | _, PVC_segments = special_PVC(peaks, data_org) 395 | #print(PVC_segments) 396 | if len(PVC_segments) < no_of_segments: 397 | #print('here') 398 | ecg_labels[PVC_segments] = data_label 399 | # ecg_labels[PVC_segments] = data_label 400 | # reshape 401 | ecg_segments = np.transpose(ecg_segments, axes = (0, 2, 1)) 402 | if ecg_labels is not None: 403 | ecg_labels = ecg_labels.astype(int) - 1 404 | return ecg_segments, ecg_labels, ecg_seq_length.astype(int) 405 | # 406 | def special_PVC(peaks, data): 407 | # 408 | data_length = np.expand_dims(data.shape[-1], axis = 0) 409 | p2p = np.concatenate([np.expand_dims(peaks[0], axis = 0), np.diff(peaks), data_length - peaks[-1]]) 410 | corr_big = np.zeros([peaks.shape[-1], data.shape[-1]]) 411 | # 412 | for peak_loc, peak in enumerate(peaks, 1) : 413 | start = int(peak - round(0.9 * p2p[peak_loc - 1])) 414 | end = int(peak + round(0.9 * p2p[peak_loc])) 415 | template = data[:, start : end + 1] 416 | # 417 | leads = np.arange(12) 418 | corr = np.zeros_like([data.shape[-1]]) 419 | # 420 | for lead in leads: 421 | corr = corr + sig.correlate(data[lead, :], template[lead,:], mode='same') 422 | # 423 | corr_big[peak_loc - 1, :] = np.abs(corr) 424 | # 425 | corr_big = 1 / np.exp(0.00001 * ((corr_big - np.amax(corr_big)) ** 2)) 426 | # 427 | candidate_peaks = np.unique(np.where(corr_big > 0.95)[0]) 428 | # 429 | P = len(peaks) 430 | K = P - 3 431 | PVC_segments = np.concatenate([np.arange(start = max(0, k - 3), stop = min(k, K - 1) + 1, dtype = np.int32) for k in candidate_peaks]) 432 | PVC_segments = np.unique(PVC_segments) 433 | # 434 | return corr_big, PVC_segments 435 | # 436 | def sample_batch(data_file_name, annotation_file_name, mode = 'evaluation'): 437 | # 438 | df = pd.read_csv(annotation_file_name, delimiter = ',') 439 | # 440 | RECORDS = pd.Series.as_matrix(df.Recording) 441 | LABELS = pd.Series.as_matrix(df.First_label) 442 | # 443 | record = re.search('A[0-9]+', data_file_name).group(0) 444 | if mode == 'training': 445 | label = LABELS[np.squeeze(np.where(RECORDS == record))] 446 | else: 447 | label = None 448 | # 449 | peaks, _ = peak_detector_with_refinement(data_file_name) 450 | segments, labels, lengths = extract_ecg_segments_v2(peaks, data_file_name, label) 451 | # 452 | return segments, labels, lengths 453 | # 454 | def process_data(file_name): 455 | # 456 | peaks, _ = peak_detector_with_refinement(file_name) 457 | segs, _, lens = extract_ecg_segments_v2(peaks, file_name) 458 | # 459 | return segs, lens -------------------------------------------------------------------------------- /Physiological signal challenge/codes/training.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import os 4 | import glob 5 | # 6 | from codes.segmentation import sample_batch 7 | # 8 | def test_val_split_v2(data_path, train_percentage = 90): 9 | file_list = np.array(glob.glob(os.path.join(data_path, '*.mat'))) 10 | no_of_files = len(file_list) 11 | no_of_train = np.ceil(len(file_list) * train_percentage / 100).astype(np.int32) 12 | no_of_val = len(file_list) - no_of_train # not used 13 | np.random.seed(20) 14 | index = np.random.permutation(no_of_files) 15 | train_file_list = list(file_list[index[:no_of_train]]) 16 | val_file_list = list(file_list[index[no_of_train:]]) 17 | # 18 | return (train_file_list, val_file_list) 19 | # 20 | def prediction(labels): 21 | # 22 | labels = np.array(labels) 23 | # 24 | if np.sum(labels == 1) < 2 and np.sum(labels == 5) < 2 and np.sum(labels == 6) < 2: 25 | predict = sp.stats.mode(labels)[0][0] 26 | # 27 | else: 28 | key, val = np.unique(labels, return_counts=True) 29 | # 30 | dctn = {} 31 | dctn[1] = val[key == 1][0] if 1 in key else 0 32 | dctn[5] = val[key == 5][0] if 5 in key else 0 33 | dctn[6] = val[key == 6][0] if 6 in key else 0 34 | # 35 | candidate = max(dctn.keys(), key=(lambda key: dctn[key])) 36 | if np.count_nonzero(labels == candidate) > 2: 37 | predict = int(candidate) 38 | else: 39 | if np.diff(np.where(labels == candidate)[0])[0] == 1: 40 | predict = int(candidate) 41 | else: 42 | predict = sp.stats.mode(labels)[0][0] 43 | # 44 | return predict 45 | # 46 | def prediction_v2(labels): 47 | # 48 | labels = np.array(labels) 49 | # 50 | if np.sum(labels == 1) < 1 and np.sum(labels == 5) < 1 and np.sum(labels == 6) < 1: 51 | predict = sp.stats.mode(labels)[0][0] 52 | # 53 | else: 54 | key, val = np.unique(labels, return_counts=True) 55 | # 56 | dctn = {} 57 | dctn[1] = val[key == 1][0] if 1 in key else 0 58 | dctn[5] = val[key == 5][0] if 5 in key else 0 59 | dctn[6] = val[key == 6][0] if 6 in key else 0 60 | # 61 | predict = int(max(dctn.keys(), key=(lambda key: dctn[key]))) 62 | # 63 | return predict 64 | # 65 | def sample_batch_for_training(file_list, ref_file): 66 | #np.random.shuffle(file_list) 67 | idx = np.random.choice(len(file_list), size = 10, replace = False) 68 | # 69 | data, labels, seq_length = sample_batch(file_list[idx[9]], ref_file, mode = 'training') 70 | for i in np.arange(len(idx) - 1): 71 | segs, labs, lens = sample_batch(file_list[idx[i]], ref_file, mode = 'training') 72 | data = np.concatenate((data, segs)) 73 | labels = np.concatenate((labels, labs)) 74 | seq_length = np.concatenate((seq_length, lens)) 75 | # 76 | index = np.random.permutation(labels.shape[-1]) 77 | return data[index,:,:], labels[index], seq_length[index] 78 | # 79 | -------------------------------------------------------------------------------- /Physiological signal challenge/codes/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import numpy as np 3 | # 4 | def utility_plot(data, peaks): 5 | # 6 | peaks = np.array(peaks, dtype = int) 7 | t = np.arange(data.shape[1]) 8 | channels = data.shape[0] 9 | fig = matplotlib.pyplot.figure(figsize = (10,10)) 10 | # 11 | for i in np.arange(1, channels + 1): 12 | ax = matplotlib.pyplot.subplot(channels, 1, i) 13 | matplotlib.pyplot.plot(t, data[i - 1,:]) 14 | matplotlib.pyplot.plot(peaks, data[i - 1, peaks], 'or') 15 | ax.set_yticklabels([]) 16 | ax.set_xticklabels([]) 17 | # -------------------------------------------------------------------------------- /Physiological signal challenge/cpsc2018.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import argparse 4 | import csv 5 | import glob 6 | from scipy import io 7 | import numpy as np 8 | import tensorflow as tensorflow 9 | from tensorflow.contrib.layers import fully_connected 10 | import tensorflow.contrib.rnn as recurrent 11 | # 12 | from codes.pre_processing import * 13 | from codes.segmentation import * 14 | from codes.utils import * 15 | from codes.training import * 16 | from codes.model import * 17 | 18 | ''' 19 | cspc2018_challenge score 20 | 21 | ''' 22 | 23 | ''' 24 | Save prdiction answers to answers.csv in local path, the first column is recording name and the second 25 | column is prediction label, for example: 26 | Recoding Result 27 | B0001 1 28 | . . 29 | . . 30 | . . 31 | ''' 32 | def cpsc2018(record_base_path): 33 | # ecg = scipy.io.loadmat(record_path) 34 | ###########################INFERENCE PART################################ 35 | 36 | ## Please process the ecg data, and output the classification result. 37 | ## result should be an integer number in [1, 9]. 38 | 39 | inputs, labels, seq_length, logits, acuracy = build_model_graph() 40 | model_dir = './model' 41 | 42 | with open('answers.csv', 'w') as csvfile: 43 | writer = csv.writer(csvfile) 44 | # column name 45 | writer.writerow(['Recording', 'Result']) 46 | with tf.Session() as sess: 47 | load_model(model_dir, sess) 48 | for mat_item in os.listdir(record_base_path): 49 | if mat_item.endswith('.mat') and (not mat_item.startswith('._')): 50 | # 51 | segs, lens = process_data(os.path.join(record_base_path, mat_item)) 52 | logits_val = sess.run(logits, feed_dict = {inputs: segs, seq_length: lens}) 53 | result = prediction_v2(np.argmax(logits_val, axis = 1)) + 1 54 | # 55 | ## If the classification result is an invalid number, the result will be determined as normal(1). 56 | if result > 9 or result < 1 or not(str(result).isdigit()): 57 | result = 1 58 | record_name = mat_item.rstrip('.mat') 59 | answer = [record_name, result] 60 | # write result 61 | writer.writerow(answer) 62 | 63 | csvfile.close() 64 | 65 | ###########################INFERENCE PART################################ 66 | 67 | if __name__ == '__main__': 68 | parser = argparse.ArgumentParser() 69 | parser.add_argument('-p', 70 | '--recording_path', 71 | help='path saving test record file') 72 | 73 | args = parser.parse_args() 74 | 75 | result = cpsc2018(record_base_path=args.recording_path) 76 | -------------------------------------------------------------------------------- /Physiological signal challenge/model/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt-207875" 2 | all_model_checkpoint_paths: "model.ckpt-207871" 3 | all_model_checkpoint_paths: "model.ckpt-207872" 4 | all_model_checkpoint_paths: "model.ckpt-207873" 5 | all_model_checkpoint_paths: "model.ckpt-207874" 6 | all_model_checkpoint_paths: "model.ckpt-207875" 7 | -------------------------------------------------------------------------------- /Physiological signal challenge/model/model.ckpt-207875.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.data-00000-of-00001 -------------------------------------------------------------------------------- /Physiological signal challenge/model/model.ckpt-207875.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.index -------------------------------------------------------------------------------- /Physiological signal challenge/model/model.ckpt-207875.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.meta -------------------------------------------------------------------------------- /Physiological signal challenge/model/readme.txt: -------------------------------------------------------------------------------- 1 | Trained model 2 | -------------------------------------------------------------------------------- /Physiological signal challenge/readme.txt: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%%% The China Physiological Signal Challenge 2018: Automatic identification of the rhythm/morphology abnormalities in 12-lead ECGs. %%%%%% 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Second (final) Open-source Submission %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | %%%%%%%%%%%%%%%%% Multimedia and Augmented Reality Lab, College of Electrical Engineering and Computing Systems %%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% University of Cincinnati, Cincinnati, Ohio, USA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Contributors: Ahmed Mostayed, Junye Luo, and Xingliang Shu %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Advisor: Dr. William G. Wee %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 8 | % DIRECTORY STRUCTURE: 9 | Root 10 | | 11 | notebooks 12 | | 13 | 14 | codes 15 | | 16 | (*.py) 17 | model (tensorflow chckpoint) 18 | | 19 | (*.index) 20 | (*.data) 21 | (*.meta) 22 | training_set 23 | | 24 | (*.mat) 25 | (*.csv) 26 | validation_set 27 | | 28 | (*.mat) 29 | (*.csv) 30 | note.txt 31 | cpsc2018.py 32 | score_py3.py 33 | README_en.txt 34 | % 35 | % DEVELOPMENT ENVIRONMENT: 36 | OS: Windows 37 | Python: Anaconda 4.2.0 64-bit (Python 3.5.2) 38 | Tensorflow GPU version 39 | % REQUIREMENTS: 40 | 1. Python 3.x! 41 | 2. Dependencies: 42 | TensorFlow (use pip to install) 43 | PyWavelets (https://pywavelets.readthedocs.io/en/latest/) (pip install PyWavelets) 44 | Numpy 45 | Scipy 46 | glob 47 | % INSTRUCTIONS: 48 | 1. Please put the evaluation data set (in .mat format) in the /validation_set directory 49 | 2. Name the annotation file for evaluation data "REFERENCE.csv" and put it in the /validation_set directory 50 | 3. Under Windows environment, run command: >python cpsc2018.py -p .\\validation_set\\ 51 | 4. Under Windows environment, run command: >python score_py3.py -r .\\validation_set\\REFERENCE.csv 52 | % 53 | % Additional notes: 54 | 1. Must have TensorFlow and PyWavelets installed to work 55 | 2. Should work fine with the latest release of TensorFlow (tested on CPU version) 56 | -------------------------------------------------------------------------------- /Physiological signal challenge/score.txt: -------------------------------------------------------------------------------- 1 | Total File Number: 300 2 | 3 | F11: 0.905 4 | F12: 0.978 5 | F13: 1.000 6 | F14: 1.000 7 | F15: 0.962 8 | F16: 0.873 9 | F17: 0.903 10 | F18: 0.938 11 | F19: 0.966 12 | 13 | F1: 0.947 14 | 15 | Faf: 0.978 16 | Fblock: 0.976 17 | Fpc: 0.889 18 | Fst: 0.946 19 | -------------------------------------------------------------------------------- /Physiological signal challenge/score_py3.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import cpsc2018 3 | import os 4 | import argparse 5 | import numpy as np 6 | import sys 7 | 8 | ''' 9 | cspc2018_challenge score 10 | Written by: Xingyao Wang, Feifei Liu, Chengyu Liu 11 | School of Instrument Science and Engineering 12 | Southeast University, China 13 | chengyu@seu.edu.cn 14 | ''' 15 | 16 | ''' 17 | Score the prediction answers by comparing answers.csv and REFERENCE.csv in validation_set folder, 18 | The scoring uses a F1 measure, which is an average of the nice F1 values from each classification 19 | type. The specific score rules will be found on http://www.icbeb.org/Challenge.html. 20 | Matrix A follows the format as: 21 | Predicted 22 | Normal AF I-AVB LBBB RBBB PAC PVC STD STE 23 | Normal N11 N12 N13 N14 N15 N16 N17 N18 N19 24 | AF N21 N22 N23 N24 N25 N26 N27 N28 N29 25 | I-AVB N31 N32 N33 N34 N35 N36 N37 N38 N39 26 | LBBB N41 N42 N43 N44 N45 N46 N47 N48 N49 27 | Reference RBBB N51 N52 N53 N54 N55 N56 N57 N58 N59 28 | PAC N61 N62 N63 N64 N65 N66 N67 N68 N69 29 | PVC N71 N72 N73 N74 N75 N76 N77 N78 N79 30 | STD N81 N82 N83 N84 N85 N86 N87 N88 N89 31 | STE N91 N92 N93 N94 N95 N96 N97 N98 N99 32 | 33 | For each of the nine types, F1 is defined as: 34 | Normal: F11=2*N11/(N1x+Nx1) AF: F12=2*N22/(N2x+Nx2) I-AVB: F13=2*N33/(N3x+Nx3) LBBB: F14=2*N44/(N4x+Nx4) RBBB: F15=2*N55/(N5x+Nx5) 35 | PAC: F16=2*N66/(N6x+Nx6) PVC: F17=2*N77/(N7x+Nx7) STD: F18=2*N88/(N8x+Nx8) STE: F19=2*N99/(N9x+Nx9) 36 | 37 | The final challenge score is defined as: 38 | F1 = (F11+F12+F13+F14+F15+F16+F17+F18+F19)/9 39 | 40 | In addition, we alse calculate the F1 measures for each of the four sub-abnormal types: 41 | AF: Faf=2*N22/(N2x+Nx2) Block: Fblock=2*(N33+N44+N55)/(N3x+Nx3+N4x+Nx4+N5x+Nx5) 42 | Premature contraction: Fpc=2*(N66+N77)/(N6x+Nx6+N7x+Nx7) ST-segment change: Fst=2*(N88+N99)/(N8x+Nx8+N9x+Nx9) 43 | 44 | The static of predicted answers and the final score are saved to score.txt in local path. 45 | ''' 46 | 47 | def score(answers_csv_path, reference_csv_path): 48 | answers = dict() 49 | reference = dict() 50 | A = np.zeros((9, 9), dtype=np.float) 51 | with open(answers_csv_path) as f: 52 | reader = csv.DictReader(f) 53 | for row in reader: 54 | answers.setdefault(row['Recording'], []).append(row['Result']) 55 | f.close() 56 | with open(reference_csv_path) as ref: 57 | reader = csv.DictReader(ref) 58 | for row in reader: 59 | reference.setdefault(row['Recording'], []).append([row['First_label'], row['Second_label'], row['Third_label']]) 60 | ref.close() 61 | 62 | for key in answers.keys(): 63 | value = [] 64 | for item in answers[key]: 65 | predict = np.int(item) 66 | for item in reference[key][0]: 67 | if item == '': 68 | item = 0 69 | value.append(np.int(item)) 70 | 71 | if predict in value: 72 | A[predict-1][predict-1] += 1 73 | else: 74 | A[value[0]-1][predict-1] += 1 75 | 76 | F11 = 2 * A[0][0] / (np.sum(A[0, :]) + np.sum(A[:, 0])) 77 | F12 = 2 * A[1][1] / (np.sum(A[1, :]) + np.sum(A[:, 1])) 78 | F13 = 2 * A[2][2] / (np.sum(A[2, :]) + np.sum(A[:, 2])) 79 | F14 = 2 * A[3][3] / (np.sum(A[3, :]) + np.sum(A[:, 3])) 80 | F15 = 2 * A[4][4] / (np.sum(A[4, :]) + np.sum(A[:, 4])) 81 | F16 = 2 * A[5][5] / (np.sum(A[5, :]) + np.sum(A[:, 5])) 82 | F17 = 2 * A[6][6] / (np.sum(A[6, :]) + np.sum(A[:, 6])) 83 | F18 = 2 * A[7][7] / (np.sum(A[7, :]) + np.sum(A[:, 7])) 84 | F19 = 2 * A[8][8] / (np.sum(A[8, :]) + np.sum(A[:, 8])) 85 | 86 | F1 = (F11+F12+F13+F14+F15+F16+F17+F18+F19) / 9 87 | 88 | ## following is calculating scores for 4 types: AF, Block, Premature contraction, ST-segment change. 89 | 90 | Faf = 2 * A[1][1] / (np.sum(A[1, :]) + np.sum(A[:, 1])) 91 | Fblock = 2 * (A[2][2] + A[3][3] + A[4][4]) / (np.sum(A[2:5, :]) + np.sum(A[:, 2:5])) 92 | Fpc = 2 * (A[5][5] + A[6][6]) / (np.sum(A[5:7, :]) + np.sum(A[:, 5:7])) 93 | Fst = 2 * (A[7][7] + A[8][8]) / (np.sum(A[7:9, :]) + np.sum(A[:, 7:9])) 94 | 95 | # print(A) 96 | print('Total File Number: ', np.sum(A)) 97 | 98 | print("F11: ", F11) 99 | print("F12: ", F12) 100 | print("F13: ", F13) 101 | print("F14: ", F14) 102 | print("F15: ", F15) 103 | print("F16: ", F16) 104 | print("F17: ", F17) 105 | print("F18: ", F18) 106 | print("F19: ", F19) 107 | print("F1: ", F1) 108 | 109 | print("Faf: ", Faf) 110 | print("Fblock: ", Fblock) 111 | print("Fpc: ", Fpc) 112 | print("Fst: ", Fst) 113 | 114 | with open('score.txt', 'w') as score_file: 115 | # print (A, file=score_file) 116 | print ('Total File Number: %d\n' %(np.sum(A)), file=score_file) 117 | print ('F11: %0.3f' %F11, file=score_file) 118 | print ('F12: %0.3f' %F12, file=score_file) 119 | print ('F13: %0.3f' %F13, file=score_file) 120 | print ('F14: %0.3f' %F14, file=score_file) 121 | print ('F15: %0.3f' %F15, file=score_file) 122 | print ('F16: %0.3f' %F16, file=score_file) 123 | print ('F17: %0.3f' %F17, file=score_file) 124 | print ('F18: %0.3f' %F18, file=score_file) 125 | print ('F19: %0.3f\n' %F19, file=score_file) 126 | print ('F1: %0.3f\n' %F1, file=score_file) 127 | print ('Faf: %0.3f' %Faf, file=score_file) 128 | print ('Fblock: %0.3f' %Fblock, file=score_file) 129 | print ('Fpc: %0.3f' %Fpc, file=score_file) 130 | print ('Fst: %0.3f' %Fst, file=score_file) 131 | 132 | score_file.close() 133 | 134 | if __name__ == '__main__': 135 | parser = argparse.ArgumentParser() 136 | parser.add_argument('-r', 137 | '--reference_path', 138 | help='path saving reference file') 139 | 140 | args = parser.parse_args() 141 | score('answers.csv', args.reference_path) 142 | -------------------------------------------------------------------------------- /Physiological signal challenge/training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Import necessary modules" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import scipy as sp\n", 20 | "import scipy.io as sio\n", 21 | "import scipy.signal as sig\n", 22 | "import pywt\n", 23 | "import os\n", 24 | "import glob\n", 25 | "import itertools\n", 26 | "import matplotlib\n", 27 | "import pandas as pd\n", 28 | "import re\n", 29 | "import math\n", 30 | "import tensorflow as tf\n", 31 | "from tensorflow.contrib.layers import fully_connected\n", 32 | "import tensorflow.contrib.rnn as recurrent\n", 33 | "import sklearn.preprocessing\n", 34 | "#\n", 35 | "%matplotlib inline" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "source": [ 44 | "Load utility codes" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "from codes.pre_processing import *\n", 56 | "from codes.segmentation import *\n", 57 | "from codes.utils import *\n", 58 | "from codes.training import *\n", 59 | "from codes.model import *" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "Training/Validation split" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "data_root_dir = '../training_set'\n", 78 | "train_file_list, val_file_list = test_val_split_v2(data_root_dir, train_percentage = 90)\n", 79 | "ref_file = os.path.join(data_root_dir, 'REFERENCE.csv')\n", 80 | "#\n", 81 | "list_of_training_files = np.array(train_file_list)\n", 82 | "list_of_validation_files = np.array(val_file_list)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "source": [ 91 | "Define model graph" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# 1000 x 12 batches\n", 103 | "input_size = 12\n", 104 | "time_steps = 1000\n", 105 | "num_classes = 9\n", 106 | "hidden_size = 100\n", 107 | "num_hidden = 2\n", 108 | "output_size = 9\n", 109 | "keep_prob = 0.5 # dropout\n", 110 | "inputs = tf.placeholder(tf.float32, [None, time_steps, input_size])\n", 111 | "labels = tf.placeholder(tf.int32, [None])\n", 112 | "seq_length = tf.placeholder(tf.int32, [None])\n", 113 | "#\n", 114 | "is_training = True # set it to true at first\n", 115 | "#\n", 116 | "def RNN_bidirectional(input_tensor, Training):\n", 117 | " with tf.variable_scope(\"recurrent\", initializer = tf.contrib.layers.variance_scaling_initializer()):\n", 118 | " cell = tf.nn.rnn_cell.BasicLSTMCell\n", 119 | " cells_fw = [cell(hidden_size) for _ in range(num_hidden)]\n", 120 | " cells_bw = [cell(hidden_size) for _ in range(num_hidden)]\n", 121 | " cells_fw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training is True else 1.0) for cell in cells_fw]\n", 122 | " cells_bw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training is True else 1.0) for cell in cells_bw]\n", 123 | " _, states_fw, states_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(\n", 124 | " cells_fw=cells_fw,\n", 125 | " cells_bw=cells_bw,\n", 126 | " inputs=input_tensor,\n", 127 | " sequence_length = seq_length,\n", 128 | " dtype=tf.float32)\n", 129 | " outputs_fw = tf.concat(states_fw[-1][-1], axis = 1)\n", 130 | " outputs_bw = tf.concat(states_bw[-1][-1], axis = 1)\n", 131 | " outputs = tf.concat([outputs_fw, outputs_bw], axis = 1)\n", 132 | " logits = tf.squeeze(fully_connected(outputs, output_size, activation_fn = None))\n", 133 | " #\n", 134 | " return logits" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "logits = RNN_bidirectional(inputs, Training = is_training)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Define the loss and training ops" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = labels, logits = logits)\n", 164 | "loss = tf.reduce_mean(cross_entropy)\n", 165 | "#\n", 166 | "optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)\n", 167 | "gradients, variables = zip(*optimizer.compute_gradients(loss))\n", 168 | "# gradient clipping - makes the training more stable\n", 169 | "gradients = [\n", 170 | " None if gradient is None else tf.clip_by_norm(gradient, 5.0)\n", 171 | " for gradient in gradients]\n", 172 | "training_op = optimizer.apply_gradients(zip(gradients, variables))\n", 173 | "#\n", 174 | "correct = tf.nn.in_top_k(logits, labels, 1)\n", 175 | "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Saver" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": true 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "saver = tf.train.Saver(max_to_keep = 5, keep_checkpoint_every_n_hours = 1)\n", 194 | "save_dir = './model'\n", 195 | "#\n", 196 | "model_name_prefix = 'model.ckpt'\n", 197 | "if not os.path.exists(save_dir):\n", 198 | " os.makedirs(save_dir)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "source": [ 207 | "Initialize variables" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "Begin training" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": { 232 | "collapsed": false 233 | }, 234 | "outputs": [], 235 | "source": [ 236 | "with tf.Session() as sess:\n", 237 | " \n", 238 | " sess.run(init_op)\n", 239 | " path = tf.train.get_checkpoint_state(save_dir)\n", 240 | " if path is None:\n", 241 | " global_step = 0\n", 242 | " else:\n", 243 | " global_step = int(path.model_checkpoint_path.split('-')[-1])\n", 244 | " #\n", 245 | " \n", 246 | " if path is None:\n", 247 | " sess.run(init_op)\n", 248 | " else:\n", 249 | " saver.restore(sess, path.model_checkpoint_path)\n", 250 | " \n", 251 | " for step in range(20000):\n", 252 | " #\n", 253 | " data_numpy, labels_numpy, seq_length_numpy = sample_batch_for_training(list_of_training_files, ref_file)\n", 254 | " _, loss_value, logits_value = sess.run([training_op, loss, logits], feed_dict = {inputs: data_numpy, labels: labels_numpy, seq_length: seq_length_numpy})\n", 255 | " #\n", 256 | " if step % 20 == 0:\n", 257 | " print('current iteration: {}'.format(step + global_step))\n", 258 | " print('loss value: {}'.format(loss_value))\n", 259 | " acc_train = sess.run(accuracy, feed_dict = {inputs: data_numpy, labels: labels_numpy, seq_length: seq_length_numpy})\n", 260 | " is_training = False # set to false before evaluating the test accuracy\n", 261 | " sub = np.random.randint(len(list_of_validation_files))\n", 262 | " data_val, labels_val, seq_length_val = sample_batch(list_of_validation_files[sub], ref_file, mode = 'training')\n", 263 | " acc_val = sess.run(accuracy, feed_dict = {inputs: data_val, labels: labels_val, seq_length: seq_length_val})\n", 264 | " is_training = True # set to true again for the subsequent iterations\n", 265 | " #\n", 266 | " print(step, \"Training accuracy:\", acc_train, \"Test accuracy\", acc_val)\n", 267 | " try:\n", 268 | " loss_list.append(loss_value)\n", 269 | " acc_train_list.append(acc_train)\n", 270 | " acc_test_list.append(acc_val)\n", 271 | " except:\n", 272 | " loss_list = list()\n", 273 | " acc_train_list = list()\n", 274 | " acc_test_list = list()\n", 275 | " loss_list.append(loss_value)\n", 276 | " acc_train_list.append(acc_train)\n", 277 | " acc_test_list.append(acc_val)\n", 278 | " save_path = saver.save(sess, os.path.join(save_dir, model_name_prefix), global_step = step + global_step + 1) # model count begins with 1\n", 279 | " print(\"Model saved in path: %s\" % save_path)\n", 280 | " \n", 281 | " if loss_value < 0.10:\n", 282 | " break" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "Plot training curves" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "import matplotlib.pyplot as plt\n", 301 | "plt.figure(num = 1)\n", 302 | "plt.plot(range(0, step, 20), acc_train_list, 'k', range(0, step, 20), acc_test_list, 'b')\n", 303 | "plt.figure(num = 2)\n", 304 | "plt.plot(range(0, step, 20), loss_list, 'r')" 305 | ] 306 | } 307 | ], 308 | "metadata": { 309 | "anaconda-cloud": {}, 310 | "kernelspec": { 311 | "display_name": "Python [conda root]", 312 | "language": "python", 313 | "name": "conda-root-py" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 3 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython3", 325 | "version": "3.5.2" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 1 330 | } 331 | -------------------------------------------------------------------------------- /Physiological signal challenge/training_set/readme.txt: -------------------------------------------------------------------------------- 1 | Download the training dataset (matlab data format) and annotation file (REFERENCE.CSV) from: 2 | http://www.icbeb.org/Challenge.html 3 | and put them in this directory 4 | -------------------------------------------------------------------------------- /Physiological signal challenge/validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import scipy as sp\n", 13 | "import scipy.io as sio\n", 14 | "import scipy.signal as sig\n", 15 | "import pywt\n", 16 | "import os\n", 17 | "import glob\n", 18 | "import itertools\n", 19 | "import matplotlib\n", 20 | "import pandas as pd\n", 21 | "import re\n", 22 | "import tensorflow as tf\n", 23 | "from tensorflow.contrib.layers import fully_connected\n", 24 | "import tensorflow.contrib.rnn as recurrent\n", 25 | "import sklearn.preprocessing\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "#\n", 28 | "%matplotlib inline" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "from codes.pre_processing import *\n", 40 | "from codes.segmentation import *\n", 41 | "from codes.utils import *\n", 42 | "from codes.training import *\n", 43 | "from codes.model import *" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "A utility fuction to plot confusion matrix: \n", 51 | "http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "def plot_confusion_matrix(cm, classes,\n", 63 | " normalize=False,\n", 64 | " title='Confusion matrix',\n", 65 | " cmap=plt.cm.Blues):\n", 66 | " \"\"\"\n", 67 | " This function prints and plots the confusion matrix.\n", 68 | " Normalization can be applied by setting `normalize=True`.\n", 69 | " \"\"\"\n", 70 | " if normalize:\n", 71 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 72 | " print(\"Normalized confusion matrix\")\n", 73 | " else:\n", 74 | " print('Confusion matrix, without normalization')\n", 75 | "\n", 76 | " print(cm)\n", 77 | "\n", 78 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", 79 | " plt.title(title)\n", 80 | " plt.colorbar()\n", 81 | " tick_marks = np.arange(len(classes))\n", 82 | " plt.xticks(tick_marks, classes, rotation=45)\n", 83 | " plt.yticks(tick_marks, classes)\n", 84 | "\n", 85 | " fmt = '.2f' if normalize else 'd'\n", 86 | " thresh = cm.max() / 2.\n", 87 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", 88 | " plt.text(j, i, format(cm[i, j], fmt),\n", 89 | " horizontalalignment=\"center\",\n", 90 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 91 | "\n", 92 | " plt.tight_layout()\n", 93 | " plt.ylabel('True class')\n", 94 | " plt.xlabel('Predicted class')" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "data_root_dir = '../training_set'\n", 106 | "train_file_list, val_file_list = test_val_split_v2(data_root_dir, train_percentage = 90)\n", 107 | "ref_file = os.path.join(data_root_dir, 'REFERENCE.csv')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "df = pd.read_csv(ref_file, delimiter = ',')\n", 119 | "#\n", 120 | "RECORDS = pd.Series.as_matrix(df.Recording)\n", 121 | "LABEL_1 = pd.Series.as_matrix(df.First_label)\n", 122 | "LABEL_2 = pd.Series.as_matrix(df.Second_label)\n", 123 | "LABEL_3 = pd.Series.as_matrix(df.Third_label)\n", 124 | "#\n", 125 | "N = len(RECORDS)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": false 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "# build the graph\n", 137 | "inputs, labels, seq_length, logits, accuracy = build_model_graph()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "model_dir = './model'" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "# prediction\n", 160 | "# first some placeholders to keep the results\n", 161 | "sub_id = list()\n", 162 | "sub_actual = list()\n", 163 | "sub_predict = list()\n", 164 | "#\n", 165 | "with tf.Session() as sess:\n", 166 | " # load the model\n", 167 | " load_model(model_dir, sess)\n", 168 | " #\n", 169 | " for num in np.arange(len(val_file_list)):\n", 170 | " record = re.search('A[0-9]+', val_file_list[num]).group(0)\n", 171 | " sub_id.append(record)\n", 172 | " parent_label = LABEL_1[np.squeeze(np.where(RECORDS == record))]\n", 173 | " sub_actual.append(parent_label - 1)\n", 174 | " peaks, features = peak_detector_with_refinement(val_file_list[num], 'sym8', max_level, window_size, window_size_for_threshold, search_radius)\n", 175 | " segs, labs, lens = extract_ecg_segments_v2(peaks, val_file_list[num], parent_label, 1000)\n", 176 | " #segs = np.transpose(segs, axes = (0, 2, 1))\n", 177 | " logits_val = sess.run(logits, feed_dict = {inputs: segs, labels: labs, seq_length: lens})\n", 178 | " acc = sess.run(accuracy, feed_dict = {inputs: segs, labels: labs, seq_length: lens})\n", 179 | " sub_predict.append(prediction_v2(np.argmax(logits_val, axis = 1)))\n", 180 | " print('processed: ' + RECORDS[np.squeeze(np.where(RECORDS == record))])\n", 181 | "#\n", 182 | "data_dict = {'id': sub_id, 'actual_class': sub_actual, 'predicted_class': sub_predict}\n", 183 | "df = pd.DataFrame(data = data_dict) " 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "from sklearn.metrics import f1_score, confusion_matrix\n", 195 | "#\n", 196 | "GT_labels = df['actual_class'].tolist()\n", 197 | "predict_labels = df['predicted_class'].tolist()\n", 198 | "#\n", 199 | "con_mtx = confusion_matrix(GT_labels, predict_labels, )" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "print(f1_score(GT_labels, predict_labels, average = 'micro'))\n", 211 | "print(f1_score(GT_labels, predict_labels, average = 'weighted'))\n", 212 | "print(f1_score(GT_labels, predict_labels, average = None))" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": true 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "plt.figure()\n", 224 | "class_names = ['Normal', 'AF', 'I-AVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE']\n", 225 | "plot_confusion_matrix(con_mtx, classes=class_names, normalize=True,\n", 226 | " title='Confusion Matrix')" 227 | ] 228 | } 229 | ], 230 | "metadata": { 231 | "anaconda-cloud": {}, 232 | "kernelspec": { 233 | "display_name": "Python [conda root]", 234 | "language": "python", 235 | "name": "conda-root-py" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.5.2" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 1 252 | } 253 | -------------------------------------------------------------------------------- /Physiological signal challenge/validation_set/readme.txt: -------------------------------------------------------------------------------- 1 | Put the validation data and annotation file (exactly same format as the training_set) in this directory 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Repository of my deep learning projects 2 | --------------------------------------------------------------------------------