├── Physiological signal challenge
    ├── README.md
    ├── answers.csv
    ├── codes
    │   ├── model.py
    │   ├── pre_processing.py
    │   ├── segmentation.py
    │   ├── training.py
    │   └── utils.py
    ├── cpsc2018.py
    ├── model
    │   ├── checkpoint
    │   ├── model.ckpt-207875.data-00000-of-00001
    │   ├── model.ckpt-207875.index
    │   ├── model.ckpt-207875.meta
    │   └── readme.txt
    ├── readme.txt
    ├── score.txt
    ├── score_py3.py
    ├── training.ipynb
    ├── training_set
    │   └── readme.txt
    ├── validation.ipynb
    └── validation_set
    │   └── readme.txt
└── README.md


/Physiological signal challenge/README.md:
--------------------------------------------------------------------------------
1 | Repository for my China Physiological Signal Challenge 2018 Submission (Submisison ID: CPSC0204)
2 | 
3 | Challenge website: http://www.icbeb.org/Challenge.html
4 | 
5 | arXiv preprint of my paper:
6 | https://arxiv.org/ftp/arxiv/papers/1811/1811.02090.pdf
7 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/answers.csv:
--------------------------------------------------------------------------------
  1 | Recording,Result
  2 | A0001,5
  3 | A0002,1
  4 | A0003,2
  5 | A0004,2
  6 | A0005,7
  7 | A0006,5
  8 | A0007,2
  9 | A0008,8
 10 | A0009,2
 11 | A0010,5
 12 | A0011,4
 13 | A0012,7
 14 | A0013,8
 15 | A0014,8
 16 | A0015,5
 17 | A0016,1
 18 | A0017,2
 19 | A0018,4
 20 | A0019,2
 21 | A0020,1
 22 | A0021,1
 23 | A0022,5
 24 | A0023,2
 25 | A0024,7
 26 | A0025,7
 27 | A0026,5
 28 | A0027,5
 29 | A0028,5
 30 | A0029,1
 31 | A0030,1
 32 | A0031,7
 33 | A0032,9
 34 | A0033,8
 35 | A0034,9
 36 | A0035,5
 37 | A0036,9
 38 | A0037,1
 39 | A0038,1
 40 | A0039,3
 41 | A0040,6
 42 | A0041,1
 43 | A0042,3
 44 | A0043,2
 45 | A0044,4
 46 | A0045,7
 47 | A0046,9
 48 | A0047,6
 49 | A0048,8
 50 | A0049,6
 51 | A0050,7
 52 | A0051,5
 53 | A0052,7
 54 | A0053,5
 55 | A0054,9
 56 | A0055,6
 57 | A0056,8
 58 | A0057,4
 59 | A0058,9
 60 | A0059,1
 61 | A0060,8
 62 | A0061,2
 63 | A0062,6
 64 | A0063,9
 65 | A0064,2
 66 | A0065,2
 67 | A0066,5
 68 | A0067,8
 69 | A0068,5
 70 | A0069,5
 71 | A0070,3
 72 | A0071,2
 73 | A0072,9
 74 | A0073,1
 75 | A0074,4
 76 | A0075,1
 77 | A0076,8
 78 | A0077,1
 79 | A0078,5
 80 | A0079,2
 81 | A0080,6
 82 | A0081,4
 83 | A0082,4
 84 | A0083,1
 85 | A0084,7
 86 | A0085,5
 87 | A0086,2
 88 | A0087,5
 89 | A0088,7
 90 | A0089,1
 91 | A0090,1
 92 | A0091,6
 93 | A0092,7
 94 | A0093,9
 95 | A0094,1
 96 | A0095,5
 97 | A0096,5
 98 | A0097,6
 99 | A0098,5
100 | A0099,7
101 | A0100,8
102 | A0101,2
103 | A0102,3
104 | A0103,7
105 | A0104,3
106 | A0105,7
107 | A0106,8
108 | A0107,1
109 | A0108,6
110 | A0109,2
111 | A0110,8
112 | A0111,8
113 | A0112,5
114 | A0113,6
115 | A0114,5
116 | A0115,7
117 | A0116,5
118 | A0117,2
119 | A0118,3
120 | A0119,5
121 | A0120,9
122 | A0121,2
123 | A0122,2
124 | A0123,6
125 | A0124,6
126 | A0125,1
127 | A0126,2
128 | A0127,5
129 | A0128,8
130 | A0129,7
131 | A0130,7
132 | A0131,4
133 | A0132,3
134 | A0133,1
135 | A0134,5
136 | A0135,1
137 | A0136,6
138 | A0137,6
139 | A0138,5
140 | A0139,9
141 | A0140,8
142 | A0141,1
143 | A0142,5
144 | A0143,1
145 | A0144,7
146 | A0145,2
147 | A0146,7
148 | A0147,9
149 | A0148,8
150 | A0149,1
151 | A0150,2
152 | A0151,5
153 | A0152,8
154 | A0153,2
155 | A0154,8
156 | A0155,5
157 | A0156,7
158 | A0157,1
159 | A0158,7
160 | A0159,7
161 | A0160,5
162 | A0161,5
163 | A0162,7
164 | A0163,3
165 | A0164,1
166 | A0165,8
167 | A0166,1
168 | A0167,6
169 | A0168,1
170 | A0169,5
171 | A0170,1
172 | A0171,5
173 | A0172,3
174 | A0173,8
175 | A0174,9
176 | A0175,1
177 | A0176,1
178 | A0177,1
179 | A0178,5
180 | A0179,1
181 | A0180,1
182 | A0181,5
183 | A0182,7
184 | A0183,5
185 | A0184,2
186 | A0185,8
187 | A0186,2
188 | A0187,8
189 | A0188,3
190 | A0189,1
191 | A0190,6
192 | A0191,6
193 | A0192,1
194 | A0193,1
195 | A0194,8
196 | A0195,8
197 | A0196,8
198 | A0197,7
199 | A0198,2
200 | A0199,5
201 | A0200,5
202 | A0201,8
203 | A0202,3
204 | A0203,2
205 | A0204,7
206 | A0205,2
207 | A0206,1
208 | A0207,7
209 | A0208,1
210 | A0209,5
211 | A0210,1
212 | A0211,5
213 | A0212,3
214 | A0213,2
215 | A0214,2
216 | A0215,5
217 | A0216,5
218 | A0217,2
219 | A0218,6
220 | A0219,5
221 | A0220,2
222 | A0221,1
223 | A0222,2
224 | A0223,3
225 | A0224,6
226 | A0225,5
227 | A0226,5
228 | A0227,7
229 | A0228,5
230 | A0229,1
231 | A0230,5
232 | A0231,2
233 | A0232,8
234 | A0233,1
235 | A0234,8
236 | A0235,1
237 | A0236,3
238 | A0237,6
239 | A0238,3
240 | A0239,3
241 | A0240,3
242 | A0241,7
243 | A0242,5
244 | A0243,3
245 | A0244,4
246 | A0245,6
247 | A0246,3
248 | A0247,2
249 | A0248,3
250 | A0249,5
251 | A0250,5
252 | A0251,6
253 | A0252,7
254 | A0253,4
255 | A0254,1
256 | A0255,5
257 | A0256,9
258 | A0257,2
259 | A0258,5
260 | A0259,5
261 | A0260,2
262 | A0261,5
263 | A0262,3
264 | A0263,3
265 | A0264,1
266 | A0265,6
267 | A0266,5
268 | A0267,2
269 | A0268,5
270 | A0269,7
271 | A0270,6
272 | A0271,2
273 | A0272,2
274 | A0273,5
275 | A0274,2
276 | A0275,6
277 | A0276,2
278 | A0277,7
279 | A0278,8
280 | A0279,2
281 | A0280,3
282 | A0281,1
283 | A0282,8
284 | A0283,1
285 | A0284,3
286 | A0285,1
287 | A0286,5
288 | A0287,1
289 | A0288,3
290 | A0289,6
291 | A0290,2
292 | A0291,4
293 | A0292,3
294 | A0293,5
295 | A0294,5
296 | A0295,6
297 | A0296,5
298 | A0297,8
299 | A0298,5
300 | A0299,6
301 | A0300,5
302 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/codes/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib.layers import fully_connected
 3 | import tensorflow.contrib.rnn as recurrent
 4 | import numpy as np
 5 | #
 6 | def build_model_graph(Training_mode = False, input_size = 12, time_steps = 1000, hidden_size = 100, num_hidden = 2, output_size = 9, keep_prob = 0.5):
 7 | 	#
 8 | 	# place-holdes
 9 | 	inputs = tf.placeholder(tf.float32, [None, time_steps, input_size])
10 | 	labels = tf.placeholder(tf.int32, [None])
11 | 	seq_length = tf.placeholder(tf.int32, [None])
12 | 	#
13 | 	logits, accuracy = RNN_bidirectional(inputs, labels, seq_length, Training_mode = Training_mode, hidden_size = hidden_size, num_hidden = num_hidden, output_size = output_size, keep_prob = keep_prob)
14 | 	#
15 | 	return inputs, labels, seq_length, logits, accuracy
16 | #
17 | def load_model(model_dir, session):
18 | 	# the saver
19 | 	saver = tf.train.Saver(max_to_keep = 5, keep_checkpoint_every_n_hours =1)
20 | 	path = tf.train.get_checkpoint_state(model_dir)
21 | 	saver.restore(session, path.model_checkpoint_path)
22 | #
23 | def RNN_bidirectional(input_tensor, label_tensor, length_tensor, Training_mode, hidden_size, num_hidden, output_size, keep_prob):
24 | 	# 
25 |     with tf.variable_scope("recurrent", initializer = tf.contrib.layers.variance_scaling_initializer()):
26 |         cell = tf.nn.rnn_cell.BasicLSTMCell
27 |         cells_fw = [cell(hidden_size) for _ in range(num_hidden)]
28 |         cells_bw = [cell(hidden_size) for _ in range(num_hidden)]
29 |         cells_fw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training_mode is True else 1.0) for cell in cells_fw]
30 |         cells_bw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training_mode is True else 1.0) for cell in cells_bw]
31 |         _, states_fw, states_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
32 |                 cells_fw=cells_fw,
33 |                 cells_bw=cells_bw,
34 |                 inputs=input_tensor,
35 |                 sequence_length = length_tensor,
36 |                 dtype=tf.float32)
37 |         outputs_fw = tf.concat(states_fw[-1][-1], axis = 1)
38 |         outputs_bw = tf.concat(states_bw[-1][-1], axis = 1)
39 |         outputs = tf.concat([outputs_fw, outputs_bw], axis = 1)
40 |         logits = tf.squeeze(fully_connected(outputs, output_size, activation_fn = None))
41 |         #
42 |         correct = tf.nn.in_top_k(logits, label_tensor, 1)
43 |         accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
44 |     return logits, accuracy
45 | #


--------------------------------------------------------------------------------
/Physiological signal challenge/codes/pre_processing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import scipy.io as sio
 4 | import scipy.signal as sig
 5 | import pywt
 6 | import os
 7 | import glob
 8 | import itertools
 9 | #
10 | def ecg_preprocessing(data, wfun, dcmp_levels, chop_levels):
11 |     #
12 |     dcmp_levels = min(dcmp_levels, pywt.dwt_max_level(data.shape[1], pywt.Wavelet(wfun)))
13 |     coeffs = pywt.wavedec(data, wfun, mode='symmetric', level = dcmp_levels, axis = -1)
14 |     #
15 |     coeffs_m = [np.zeros_like(coeffs[idx]) if idx >= -chop_levels  else coeffs[idx] for idx in range(-dcmp_levels- 1, 0)]
16 |     #
17 |     data_recon = pywt.waverec(coeffs_m, wfun, mode='symmetric', axis = -1)
18 |     #
19 |     data_recon = butterworth_high_pass(data_recon, cut_off = 2, order = 1, sampling_freq = 500)
20 |     #
21 |     data_recon = butterworth_notch(data_recon, cut_off = [49, 51], order = 2, sampling_freq = 500)
22 |     #
23 |     return data_recon
24 | #
25 | def butterworth_high_pass(x, cut_off, order, sampling_freq):
26 |     #
27 |     nyq_freq = sampling_freq / 2
28 |     digital_cutoff = cut_off / nyq_freq
29 |     #
30 |     b, a = sig.butter(order, digital_cutoff, btype='highpass')
31 |     y = sig.lfilter(b, a, x, axis = -1)
32 |     #
33 |     return y
34 | #
35 | def butterworth_notch(x, cut_off, order, sampling_freq):
36 |     #
37 |     cut_off = np.array(cut_off)
38 |     nyq_freq = sampling_freq / 2
39 |     digital_cutoff = cut_off / nyq_freq
40 |     #
41 |     b, a = sig.butter(order, digital_cutoff, btype='bandstop')
42 |     y = sig.lfilter(b, a, x, axis = -1)
43 |     #
44 |     return y
45 | #


--------------------------------------------------------------------------------
/Physiological signal challenge/codes/segmentation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy as sp
  3 | import scipy.io as sio
  4 | import scipy.signal as sig
  5 | import pywt
  6 | import os
  7 | import glob
  8 | import itertools
  9 | import pandas as pd
 10 | import re
 11 | #
 12 | from codes.pre_processing import ecg_preprocessing
 13 | #
 14 | EPS = np.finfo(float).eps
 15 | #
 16 | def wavelet_filtering(data, wfun, max_level = 8):
 17 |     #
 18 |     padsize = int((2 ** max_level) * np.ceil(data.shape[0] / (2 ** max_level)) - data.shape[0])
 19 |     #
 20 |     data_padded = np.pad(data, (0, padsize), 'constant', constant_values=(0, 0))
 21 |     #
 22 |     wave = pywt.swt(data_padded, wfun, level = max_level, start_level = 0, axis=0)
 23 |     #
 24 |     wave_m = [[np.zeros((data_padded.shape[0],), dtype = float) for j in range(2)] for i in range(max_level)]  #list
 25 |     wave_m[-4][1] = wave[-4][1]; wave_m[-5][1] = wave[-5][1]
 26 |     wave_m = [tuple(wave_m[i]) for i in range(max_level)]
 27 |     #
 28 |     data_const = pywt.iswt(wave_m, wfun)
 29 |     if padsize != 0:
 30 |         data_const = data_const[:-padsize]
 31 |     #
 32 |     return data_const
 33 |  #
 34 | def local_energy(data, window_size):
 35 |     #
 36 |     energy = sig.convolve(data ** 2, np.ones((window_size,), dtype = 'float')/window_size, mode='same')
 37 |     return energy
 38 | #
 39 | def find_threshold(feature, window_size_for_threshold):
 40 |     #
 41 |     window_size = window_size_for_threshold
 42 |     loc = np.arange(0, len(feature), window_size)
 43 |     LM = [np.max(feature[l : min(l + window_size, len(feature))]) for l in loc]
 44 |     lm = [np.min(feature[l : min(l + window_size, len(feature))]) for l in loc]
 45 |     #
 46 |     return np.median([np.median(lm), np.median(LM)])
 47 | 
 48 | #
 49 | def detect_local_maxima(feature, window_size_for_threshold):
 50 |     #
 51 |     thresh = find_threshold(feature, window_size_for_threshold)
 52 |     #
 53 |     y_ = feature * (feature > thresh)
 54 |     lobe_detector = y_ != 0
 55 |     lobe_detector = lobe_detector.astype(float)
 56 |     lobe_detector = np.diff(lobe_detector)
 57 |     #
 58 |     lobe_edges_left = np.where(lobe_detector == 1)[0]
 59 |     lobe_edges_right = np.where(lobe_detector == -1)[0]
 60 |     lobe_edges_left = lobe_edges_left
 61 |     lobe_edges_right = lobe_edges_right
 62 |     #
 63 |     if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 1:
 64 |         lobe_edges_left = np.array([0])
 65 |     #
 66 |     if len(lobe_edges_left) == 1 and len(lobe_edges_right) == 0:
 67 |         lobe_edges_right = np.array([len(feature) - 1])
 68 |     #
 69 |     if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 0:
 70 |         peak_loc = []
 71 |         return peak_loc
 72 |     #
 73 |     if lobe_edges_left[0] > lobe_edges_right[0]:
 74 |         lobe_edges_left =  np.concatenate(([0], lobe_edges_left))
 75 |     #
 76 |     if lobe_edges_left[-1] > lobe_edges_right[-1]:
 77 |         lobe_edges_right =  np.concatenate((lobe_edges_right, [len(feature) - 1]))
 78 |     #
 79 |     no_of_lobes = len(lobe_edges_left)
 80 |     peak_loc = [np.argmax(y_[lobe_edges_left[idx] : lobe_edges_right[idx] + 1]) for idx in range(no_of_lobes)] + lobe_edges_left - 1
 81 |     #
 82 |     return peak_loc
 83 | #
 84 | def detect_local_maxima_v2(feature, threshold):
 85 |     #
 86 |     thresh = threshold
 87 |     #
 88 |     y_ = feature * (feature > thresh)
 89 |     lobe_detector = y_ != 0
 90 |     lobe_detector = lobe_detector.astype(float)
 91 |     lobe_detector = np.diff(lobe_detector)
 92 |     #
 93 |     lobe_edges_left = np.where(lobe_detector == 1)[0]
 94 |     lobe_edges_right = np.where(lobe_detector == -1)[0]
 95 |     lobe_edges_left = lobe_edges_left
 96 |     lobe_edges_right = lobe_edges_right
 97 |     #
 98 |     if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 1:
 99 |         lobe_edges_left = np.array([0])
100 |     #
101 |     if len(lobe_edges_left) == 1 and len(lobe_edges_right) == 0:
102 |         lobe_edges_right = np.array([len(feature) - 1])
103 |     #
104 |     if len(lobe_edges_left) == 0 and len(lobe_edges_right) == 0:
105 |         peak_loc = []
106 |         return peak_loc
107 |     #
108 |     if lobe_edges_left[0] > lobe_edges_right[0]:
109 |         lobe_edges_left =  np.concatenate(([0], lobe_edges_left))
110 |     #
111 |     if lobe_edges_left[-1] > lobe_edges_right[-1]:
112 |         lobe_edges_right =  np.concatenate((lobe_edges_right, [len(feature) - 1]))
113 |     #
114 |     no_of_lobes = len(lobe_edges_left)
115 |     peak_loc = [np.argmax(y_[lobe_edges_left[idx] : lobe_edges_right[idx] + 1]) for idx in range(no_of_lobes)]
116 |     peak_loc = peak_loc + lobe_edges_left - 1
117 |     #
118 |     return peak_loc
119 | #
120 | def peak_refine(primary_locations, search_radius, count_threshold):
121 |     #
122 |     primary_locations = np.sort(primary_locations)
123 |     N = len(primary_locations)
124 |     #
125 |     i = 0
126 |     peaks = np.array([])
127 |     while i < N - 1:
128 |         begin = primary_locations[i]
129 |         count = np.sum(np.abs(primary_locations[i:] - begin) < search_radius)
130 |         step = count
131 |         location = np.expand_dims(np.median(primary_locations[i : i + step]), axis = 0)
132 |         if count > count_threshold:
133 |             peaks = np.concatenate((peaks, location))
134 |         i = i + step
135 |     peaks = np.round(peaks).astype(int)
136 |     #
137 |     return peaks
138 | #
139 | def peak_detector_basic(data, wfun, max_level, window_size, window_size_for_threshold, search_radius):
140 |     #
141 |     peak_loc_ensemble = np.array([])
142 |     features = np.zeros_like(data)
143 |     for ch in np.arange(data.shape[0]):
144 |         data_const = wavelet_filtering(data[ch,:], wfun, max_level)
145 |         #
146 |         feature = local_energy(data_const, window_size)
147 |         #
148 |         peak_loc = detect_local_maxima(feature, window_size_for_threshold)
149 |         #
150 |         peak_loc_ensemble = np.concatenate((peak_loc, peak_loc_ensemble))
151 |         features[ch, :] = feature
152 |     #
153 |     count_thres = np.floor(np.sum(np.any(data, axis = 1))/2)
154 |     #
155 |     peaks = peak_refine(peak_loc_ensemble, search_radius, count_thres)
156 |     #
157 |     return peaks, features
158 | #
159 | def retrieve_missing_peaks(peaks, features, missing_thres):
160 |     #
161 |     peaks_proxy = list(peaks)
162 |     #
163 |     if features.shape[1] - peaks[-1] > missing_thres:
164 |         peaks_proxy.append(features.shape[1] - 1)
165 |     #
166 |     if peaks[0] > missing_thres:
167 |         peaks_proxy.insert(0, 0)
168 |     #
169 |     missing_peak_loc = np.where(np.diff(peaks_proxy) > missing_thres)[0]
170 |     missing_peak_loc_plus_1 = np.where(np.diff(peaks_proxy) > missing_thres)[0] + 1
171 |     #
172 |     if len(missing_peak_loc) != 0:
173 |         extra_peaks = np.array([])
174 |         #
175 |         for j in np.arange(len(missing_peak_loc)):
176 |             peak_loc_ens = np.array([])
177 |             for ch in np.arange(features.shape[0]):
178 |                 s = int(round((peaks_proxy[missing_peak_loc_plus_1[j]] - peaks_proxy[missing_peak_loc[j]])/6))
179 |                 seg = features[ch, peaks_proxy[missing_peak_loc[j]] + s : peaks_proxy[missing_peak_loc_plus_1[j]] - s + 1]
180 |                 threshold = features[ch, peaks_proxy[missing_peak_loc[j]]]/8 + features[ch,peaks_proxy[missing_peak_loc_plus_1[j]]]/8;
181 |                 np.array(detect_local_maxima_v2(seg, threshold)) + peaks_proxy[missing_peak_loc[j]] + s
182 |                 peak_loc_ens = np.concatenate((np.array(detect_local_maxima_v2(seg, threshold)) + peaks_proxy[missing_peak_loc[j]] + s, peak_loc_ens))
183 |         #
184 |             peak_loc_ens = peak_loc_ens.astype(int)
185 |             extra_peaks = np.concatenate((extra_peaks, peak_refine(peak_loc_ens, 100, np.floor(np.sum(np.any(features, axis = 1))/2))))
186 |     #
187 |         peaks = np.concatenate((peaks, extra_peaks))
188 |         peaks = np.sort(peaks).astype(int)
189 |     #
190 |     return peaks
191 | #
192 | def remove_false_peaks(peaks, remove_thres):
193 |     peaks_proxy = list(peaks)
194 |     false_peak_loc = np.where(np.diff(peaks_proxy) < remove_thres)[0] 
195 |     to_delete = np.array([])
196 |     #
197 |     if len(false_peak_loc) != 0:
198 |         #
199 |         for j in np.arange(len(false_peak_loc)):
200 |             #    
201 |             if false_peak_loc[j] == len(peaks_proxy) - 2:
202 |                 d1 = peaks_proxy[false_peak_loc[j] - 1] - peaks_proxy[false_peak_loc[j]]
203 |                 d2 = peaks_proxy[false_peak_loc[j] - 1] - peaks_proxy[false_peak_loc[j] + 1]  
204 |             else:
205 |                 d1 = peaks_proxy[false_peak_loc[j] + 2] - peaks_proxy[false_peak_loc[j]]
206 |                 d2 = peaks_proxy[false_peak_loc[j] + 2] - peaks_proxy[false_peak_loc[j] + 1]
207 |             #
208 |             if abs(np.median(np.diff(peaks_proxy)) - d1) > abs(np.median(np.diff(peaks_proxy)) - d2):
209 |                 to_delete = np.concatenate((to_delete, [false_peak_loc[j]]))
210 |             else:
211 |                 to_delete = np.concatenate((to_delete, [false_peak_loc[j] + 1]))
212 |     #
213 |     peaks = np.delete(peaks, to_delete.astype(int))
214 |     #
215 |     return peaks
216 | #
217 | def peak_detector_with_refinement(file_name, wfun = 'sym8', max_level = 8, window_size = 50, window_size_for_threshold = 1000, search_radius = 50):
218 |     #
219 |     data = sio.loadmat(file_name)['ECG'][0][0][2]
220 |     #
221 |     peaks, features = peak_detector_basic(data, wfun, max_level, window_size, window_size_for_threshold, search_radius)
222 |     #
223 |     missing_peak_thres = 750
224 |     peaks = retrieve_missing_peaks(peaks, features, missing_peak_thres)
225 |     #
226 |     missing_peak_thres = 1.7 * np.median(np.diff(peaks))
227 |     peaks = retrieve_missing_peaks(peaks, features, missing_peak_thres)
228 |     #
229 |     remove_thres = 0.33 * np.median(np.diff(peaks))
230 |     peaks = remove_false_peaks(peaks, remove_thres)
231 |     #
232 |     return peaks, features
233 | #
234 | def set_to_desired_length(array, max_len = 1000):
235 |     #
236 |     if array.shape[1] > max_len:
237 |         array = array[:, 0 : max_len]
238 |     if array.shape[1] < max_len:
239 |         array = np.pad(array, [(0, 0), (0, max_len - array.shape[1])], mode='constant')
240 |     return array
241 | #
242 | def extract_features(peaks):
243 |     #
244 |     return np.diff(peaks)
245 | #
246 | def extract_ecg_segments(peaks, data, data_label, max_length):
247 |     #
248 |     # read data as float32, labels as int32, sequence length as int32
249 |     data = data.astype(np.float32)
250 |     # clean the data first
251 |     data = ecg_preprocessing(data, 'sym8', 8, 3)
252 |     #
253 |     num_of_peaks = len(peaks)
254 |     no_channels = 12
255 |     data_length = np.expand_dims(data.shape[1], axis = 0)
256 |     #
257 |     if num_of_peaks == 5:
258 |         peak_to_peak = np.concatenate([np.diff(peaks), data_length - peaks[-1]])
259 |     else:
260 |         peak_to_peak = np.diff(peaks)
261 |     #
262 |     #
263 |     if num_of_peaks <= 4:
264 |         ecg_segments = np.expand_dims(set_to_desired_length(data, max_len = max_length), axis = 0) # Rank-3 array
265 |         #ecg_segments = ecg_preprocessing(ecg_segments, 'sym8', 8, 3)
266 |         ecg_segments = sig.decimate(ecg_segments, n = 60, q = 7, ftype = 'fir', axis = 2, zero_phase = True)
267 |         ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array
268 |         ecg_seq_length = np.expand_dims(data.shape[1], axis = 0).astype(int) # Rank-1 array
269 |         np.place(ecg_seq_length, ecg_seq_length > max_length, max_length)
270 |     #
271 |     if num_of_peaks == 5 or num_of_peaks == 6:
272 |         #
273 |         p = 2
274 |         start = int(peaks[p] - peak_to_peak[p - 1] - round(0.9 * peak_to_peak[p - 2]))
275 |         end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2]))
276 |         #
277 |         segment = data[:,  start : end + 1]
278 |         #segment = ecg_preprocessing(segment, 'sym8', 8, 3)
279 |         segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True)
280 |         ecg_segments = np.expand_dims(set_to_desired_length(segment, max_len = max_length), axis = 0).astype(int) # Rank-3 array
281 |         #
282 |         ecg_seq_length = np.expand_dims(segment.shape[1], axis = 0) # Rank-1 array
283 |         np.place(ecg_seq_length, ecg_seq_length > max_length, max_length)
284 |         #
285 |         if data_label == 2 or data_label == 6 or data_label == 7:
286 |             #
287 |             feature_vector = extract_features(peaks[p - 1 : p + 3])
288 |             #
289 |             if np.amin(feature_vector) / np.amax(feature_vector) >= .80:
290 |                 data_label = 1
291 |         ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array 
292 |     #
293 |     if num_of_peaks >= 7:
294 |         # initialize
295 |         #no_of_segments = max(1, num_of_peaks - 3)
296 |         no_of_segments = max(1, num_of_peaks - 7)
297 |         ecg_segments = np.zeros([no_of_segments, 12, max_length])
298 |         ecg_labels = np.zeros([no_of_segments])
299 |         ecg_seq_length = np.zeros([no_of_segments])
300 |         #
301 |         for p in np.arange(3, max(num_of_peaks - 4, 4)):
302 |             start = int(peaks[p] - peak_to_peak[p - 1] - round(0.9 * peak_to_peak[p - 2]))
303 |             end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2]))
304 |             segment = data[:,  start : end + 1]
305 |             #segment = ecg_preprocessing(segment, 'sym8', 8, 3)
306 |             segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True)
307 |             seq_length = segment.shape[1]
308 |             segment = set_to_desired_length(segment, max_len = max_length)
309 |             ecg_segments[p - 3, :,:] = segment
310 |             #
311 |             ecg_seq_length[p - 3] = seq_length
312 |             np.place(ecg_seq_length, ecg_seq_length > max_length, max_length)
313 |             #
314 |             if data_label == 2 or data_label == 6 or data_label == 7:
315 |                 #
316 |                 feature_vector = extract_features(peaks[p - 1 : p + 3])
317 |                 #
318 |                 if np.amin(feature_vector) / np.amax(feature_vector) >= .80:
319 |                     ecg_labels[p - 3] = 1
320 |                 else:
321 |                     ecg_labels[p - 3] = data_label
322 |             else:
323 |                     ecg_labels[p - 3] = data_label
324 |             
325 |     #
326 |     return ecg_segments, ecg_labels.astype(int) - 1, ecg_seq_length.astype(int)
327 | #
328 | def extract_ecg_segments_v2(peaks, file_name, data_label = None, max_length = 1000):
329 |     #
330 |     # read data as float32, labels as int32, sequence length as int32
331 |     data = sio.loadmat(file_name)['ECG'][0][0][2]
332 |     data = data.astype(np.float32)
333 |     data_org = data
334 |     # clean the data first
335 |     data = ecg_preprocessing(data, 'sym8', 8, 3)
336 |     #
337 |     num_of_peaks = len(peaks)
338 |     no_channels = 12
339 |     data_length = np.expand_dims(data.shape[1], axis = 0)
340 |     #
341 |     peak_to_peak = np.concatenate([np.expand_dims(peaks[0], axis = 0), np.diff(peaks), data_length - peaks[-1]])
342 |     #
343 |     if num_of_peaks <= 4:
344 |         data = (data - np.expand_dims(np.mean(data, axis = 1), axis = 1)) / (np.expand_dims(np.std(data, axis = 1), axis = 1) + EPS)
345 |         data = sig.decimate(data, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True)
346 |         ecg_segments = np.expand_dims(set_to_desired_length(data, max_len = max_length), axis = 0) # Rank-3 array
347 |         #ecg_segments = ecg_preprocessing(ecg_segments, 'sym8', 8, 3)
348 |         if data_label is not None:
349 |         	ecg_labels = np.expand_dims(data_label, axis = 0).astype(int) # Rank-1 array
350 |         else:
351 |         	ecg_labels = None
352 |         ecg_seq_length = np.expand_dims(data.shape[1], axis = 0).astype(int) # Rank-1 array
353 |         np.place(ecg_seq_length, ecg_seq_length > max_length, max_length)
354 |     #
355 |     if num_of_peaks > 4:
356 |         # initialize
357 |         no_of_segments = max(1, num_of_peaks - 3)
358 |         #no_of_segments = max(1, num_of_peaks - 7)
359 |         ecg_segments = np.zeros([no_of_segments, 12, max_length])
360 |         if data_label is not None:
361 |         	ecg_labels = np.zeros([no_of_segments])
362 |         else:
363 |         	ecg_labels = None
364 |         ecg_seq_length = np.zeros([no_of_segments])
365 |         #
366 |         for p in np.arange(1, num_of_peaks - 2):
367 |             start = int(peaks[p] - peak_to_peak[p] - round(0.9 * peak_to_peak[p - 1]))
368 |             end = int(peaks[p] + peak_to_peak[p] + peak_to_peak[p + 1] + round(0.9 * peak_to_peak[p + 2]))
369 |             segment = data[:,  start : end + 1]
370 |             segment = (segment - np.expand_dims(np.mean(segment, axis = 1), axis = 1)) / (np.expand_dims(np.std(segment, axis = 1), axis = 1) + EPS)
371 |             #segment = ecg_preprocessing(segment, 'sym8', 8, 3)
372 |             segment = sig.decimate(segment, n = 60, q = 7, ftype = 'fir', axis = 1, zero_phase = True)
373 |             seq_length = segment.shape[1]
374 |             segment = set_to_desired_length(segment, max_len = max_length)
375 |             ecg_segments[p - 1, :,:] = segment
376 |             #
377 |             ecg_seq_length[p - 1] = seq_length
378 |             np.place(ecg_seq_length, ecg_seq_length > max_length, max_length)
379 |             #
380 |             if ecg_labels is not None:
381 |             	if data_label == 2 or data_label == 6 or data_label == 7:
382 |             		#
383 |             		feature_vector = extract_features(peaks[p - 1 : p + 3])
384 |             		#
385 |             		if np.amin(feature_vector) / np.amax(feature_vector) >= .80:
386 |             			ecg_labels[p - 1] = 1
387 |             		else:
388 |             			ecg_labels[p - 1] = data_label
389 |             	else:
390 |                     ecg_labels[p - 1] = data_label
391 |         #
392 |         if data_label == 7:
393 |         	#print('here')
394 |         	_, PVC_segments = special_PVC(peaks, data_org)
395 |         	#print(PVC_segments)
396 |         	if len(PVC_segments) < no_of_segments:
397 |         		#print('here')
398 |         		ecg_labels[PVC_segments] = data_label
399 | 			#	ecg_labels[PVC_segments] = data_label   
400 |     # reshape
401 |     ecg_segments = np.transpose(ecg_segments, axes = (0, 2, 1))
402 |     if ecg_labels is not None:
403 |     	ecg_labels = ecg_labels.astype(int) - 1
404 |     return ecg_segments, ecg_labels, ecg_seq_length.astype(int)
405 | #
406 | def special_PVC(peaks, data):
407 |     #
408 |     data_length = np.expand_dims(data.shape[-1], axis = 0)
409 |     p2p = np.concatenate([np.expand_dims(peaks[0], axis = 0), np.diff(peaks), data_length - peaks[-1]])
410 |     corr_big = np.zeros([peaks.shape[-1], data.shape[-1]])
411 |     #
412 |     for peak_loc, peak in enumerate(peaks, 1) :
413 |         start = int(peak - round(0.9 * p2p[peak_loc - 1]))
414 |         end = int(peak + round(0.9 * p2p[peak_loc]))
415 |         template = data[:,  start : end + 1]
416 |         #
417 |         leads = np.arange(12)
418 |         corr = np.zeros_like([data.shape[-1]])
419 |         #
420 |         for lead in leads:
421 |             corr = corr + sig.correlate(data[lead, :], template[lead,:], mode='same')
422 |         #
423 |         corr_big[peak_loc - 1, :] = np.abs(corr)
424 |     #
425 |     corr_big = 1 / np.exp(0.00001 * ((corr_big - np.amax(corr_big)) ** 2))
426 |     #
427 |     candidate_peaks = np.unique(np.where(corr_big > 0.95)[0])
428 |     #
429 |     P = len(peaks)
430 |     K = P - 3
431 |     PVC_segments = np.concatenate([np.arange(start = max(0, k - 3), stop = min(k, K - 1) + 1, dtype = np.int32) for k in candidate_peaks])
432 |     PVC_segments = np.unique(PVC_segments)
433 |     #
434 |     return corr_big, PVC_segments
435 | #
436 | def sample_batch(data_file_name, annotation_file_name, mode = 'evaluation'):
437 | 	#
438 | 	df = pd.read_csv(annotation_file_name, delimiter = ',')
439 | 	#
440 | 	RECORDS = pd.Series.as_matrix(df.Recording)
441 | 	LABELS = pd.Series.as_matrix(df.First_label)
442 | 	#
443 | 	record = re.search('A[0-9]+', data_file_name).group(0)
444 | 	if mode == 'training':
445 | 		label = LABELS[np.squeeze(np.where(RECORDS == record))]
446 | 	else:
447 | 		label = None
448 | 	#
449 | 	peaks, _ = peak_detector_with_refinement(data_file_name)
450 | 	segments, labels, lengths = extract_ecg_segments_v2(peaks, data_file_name, label)
451 | 	#
452 | 	return segments, labels, lengths
453 | #
454 | def process_data(file_name):
455 |     #
456 |     peaks, _ = peak_detector_with_refinement(file_name)
457 |     segs, _, lens = extract_ecg_segments_v2(peaks, file_name)
458 |     #
459 |     return segs, lens


--------------------------------------------------------------------------------
/Physiological signal challenge/codes/training.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import os
 4 | import glob
 5 | #
 6 | from codes.segmentation import sample_batch
 7 | #
 8 | def test_val_split_v2(data_path, train_percentage = 90):
 9 |     file_list = np.array(glob.glob(os.path.join(data_path, '*.mat')))
10 |     no_of_files = len(file_list)
11 |     no_of_train = np.ceil(len(file_list) * train_percentage / 100).astype(np.int32)
12 |     no_of_val = len(file_list) - no_of_train  # not used
13 |     np.random.seed(20)
14 |     index = np.random.permutation(no_of_files)
15 |     train_file_list = list(file_list[index[:no_of_train]])
16 |     val_file_list = list(file_list[index[no_of_train:]])
17 |     #
18 |     return (train_file_list, val_file_list)
19 | #
20 | def prediction(labels):
21 |     #
22 |     labels = np.array(labels)
23 |     #
24 |     if np.sum(labels == 1) < 2 and np.sum(labels == 5) < 2 and np.sum(labels == 6) < 2:
25 |         predict = sp.stats.mode(labels)[0][0]
26 |     #
27 |     else:
28 |         key, val = np.unique(labels, return_counts=True)
29 |         #
30 |         dctn = {}
31 |         dctn[1] = val[key == 1][0] if 1 in key else 0
32 |         dctn[5] = val[key == 5][0] if 5 in key else 0
33 |         dctn[6] = val[key == 6][0] if 6 in key else 0
34 |         #
35 |         candidate = max(dctn.keys(), key=(lambda key: dctn[key]))
36 |         if np.count_nonzero(labels == candidate) > 2:
37 |             predict = int(candidate)
38 |         else:
39 |             if np.diff(np.where(labels == candidate)[0])[0] == 1:
40 |                 predict = int(candidate)
41 |             else:
42 |                 predict = sp.stats.mode(labels)[0][0]
43 |     #
44 |     return predict
45 | #
46 | def prediction_v2(labels):
47 |     #
48 |     labels = np.array(labels)
49 |     #
50 |     if np.sum(labels == 1) < 1 and np.sum(labels == 5) < 1 and np.sum(labels == 6) < 1:
51 |         predict = sp.stats.mode(labels)[0][0]
52 |     #
53 |     else:
54 |         key, val = np.unique(labels, return_counts=True)
55 |         #
56 |         dctn = {}
57 |         dctn[1] = val[key == 1][0] if 1 in key else 0
58 |         dctn[5] = val[key == 5][0] if 5 in key else 0
59 |         dctn[6] = val[key == 6][0] if 6 in key else 0
60 |         #
61 |         predict = int(max(dctn.keys(), key=(lambda key: dctn[key])))
62 |     #
63 |     return predict
64 | #
65 | def sample_batch_for_training(file_list, ref_file):
66 |     #np.random.shuffle(file_list)
67 |     idx = np.random.choice(len(file_list), size = 10, replace = False)
68 |     #
69 |     data, labels, seq_length = sample_batch(file_list[idx[9]], ref_file, mode = 'training')
70 |     for i in np.arange(len(idx) - 1):
71 |         segs, labs, lens = sample_batch(file_list[idx[i]], ref_file, mode = 'training')
72 |         data = np.concatenate((data, segs))
73 |         labels = np.concatenate((labels, labs))
74 |         seq_length = np.concatenate((seq_length, lens))
75 |     #
76 |     index = np.random.permutation(labels.shape[-1])
77 |     return data[index,:,:], labels[index], seq_length[index]
78 | #
79 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/codes/utils.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import numpy as np
 3 | #
 4 | def utility_plot(data, peaks):
 5 |     #
 6 |     peaks = np.array(peaks, dtype = int)
 7 |     t = np.arange(data.shape[1])
 8 |     channels = data.shape[0]
 9 |     fig = matplotlib.pyplot.figure(figsize = (10,10))
10 |     #
11 |     for i in np.arange(1, channels + 1):
12 |         ax = matplotlib.pyplot.subplot(channels, 1, i)
13 |         matplotlib.pyplot.plot(t, data[i - 1,:])
14 |         matplotlib.pyplot.plot(peaks, data[i - 1, peaks], 'or')
15 |         ax.set_yticklabels([])
16 |         ax.set_xticklabels([])
17 | #


--------------------------------------------------------------------------------
/Physiological signal challenge/cpsc2018.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import os
 3 | import argparse
 4 | import csv
 5 | import glob
 6 | from scipy import io
 7 | import numpy as np
 8 | import tensorflow as tensorflow
 9 | from tensorflow.contrib.layers import fully_connected
10 | import tensorflow.contrib.rnn as recurrent
11 | #
12 | from codes.pre_processing import *
13 | from codes.segmentation import *
14 | from codes.utils import *
15 | from codes.training import *
16 | from codes.model import *
17 | 
18 | '''
19 | cspc2018_challenge score
20 | 
21 | '''
22 | 
23 | '''
24 | Save prdiction answers to answers.csv in local path, the first column is recording name and the second
25 | column is prediction label, for example:
26 | Recoding    Result
27 | B0001       1
28 | .           .
29 | .           .
30 | .           .
31 | '''
32 | def cpsc2018(record_base_path):
33 |     # ecg = scipy.io.loadmat(record_path)
34 |     ###########################INFERENCE PART################################
35 | 
36 |     ## Please process the ecg data, and output the classification result.
37 |     ## result should be an integer number in [1, 9].
38 | 
39 |     inputs, labels, seq_length, logits, acuracy = build_model_graph()
40 |     model_dir = './model'
41 |     
42 |     with open('answers.csv', 'w') as csvfile:
43 |         writer = csv.writer(csvfile)
44 |         # column name
45 |         writer.writerow(['Recording', 'Result'])
46 |         with tf.Session() as sess:
47 |             load_model(model_dir, sess)
48 |             for mat_item in os.listdir(record_base_path):
49 |                 if mat_item.endswith('.mat') and (not mat_item.startswith('._')):
50 |                     #
51 |                     segs, lens = process_data(os.path.join(record_base_path, mat_item))
52 |                     logits_val = sess.run(logits, feed_dict = {inputs: segs, seq_length: lens})
53 |                     result = prediction_v2(np.argmax(logits_val, axis = 1)) + 1
54 |                     #
55 |                     ## If the classification result is an invalid number, the result will be determined as normal(1).
56 |                     if result > 9 or result < 1 or not(str(result).isdigit()):
57 |                         result = 1
58 |                     record_name = mat_item.rstrip('.mat')
59 |                     answer = [record_name, result]
60 |                     # write result
61 |                     writer.writerow(answer)
62 | 
63 |         csvfile.close()
64 | 
65 |     ###########################INFERENCE PART################################
66 | 
67 | if __name__ == '__main__':
68 |     parser = argparse.ArgumentParser()
69 |     parser.add_argument('-p',
70 |                         '--recording_path',
71 |                         help='path saving test record file')
72 | 
73 |     args = parser.parse_args()
74 | 
75 |     result = cpsc2018(record_base_path=args.recording_path)
76 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/model/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "model.ckpt-207875"
2 | all_model_checkpoint_paths: "model.ckpt-207871"
3 | all_model_checkpoint_paths: "model.ckpt-207872"
4 | all_model_checkpoint_paths: "model.ckpt-207873"
5 | all_model_checkpoint_paths: "model.ckpt-207874"
6 | all_model_checkpoint_paths: "model.ckpt-207875"
7 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/model/model.ckpt-207875.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.data-00000-of-00001


--------------------------------------------------------------------------------
/Physiological signal challenge/model/model.ckpt-207875.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.index


--------------------------------------------------------------------------------
/Physiological signal challenge/model/model.ckpt-207875.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amostayed/Deep-learning/5115d16229ba3f0d3a94fb64758149d34ec2f245/Physiological signal challenge/model/model.ckpt-207875.meta


--------------------------------------------------------------------------------
/Physiological signal challenge/model/readme.txt:
--------------------------------------------------------------------------------
1 | Trained model
2 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/readme.txt:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | %%%%  The China Physiological Signal Challenge 2018: Automatic identification of the rhythm/morphology abnormalities in 12-lead ECGs. %%%%%%
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  Second (final) Open-source Submission %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | %%%%%%%%%%%%%%%%%  Multimedia and Augmented Reality Lab, College of Electrical Engineering and Computing Systems %%%%%%%%%%%%%%%%%%%%%%%%%%%
 5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  University of Cincinnati, Cincinnati, Ohio, USA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Contributors: Ahmed Mostayed, Junye Luo, and Xingliang Shu %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Advisor: Dr. William G. Wee %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | % DIRECTORY STRUCTURE:
 9 |                Root
10 |                   |
11 | 		  notebooks
12 | 	          |
13 |                         		  
14 | 		  codes
15 |                         |
16 |                         (*.py)
17 |                   model (tensorflow chckpoint)
18 |                         |
19 |                         (*.index)
20 |                         (*.data)
21 |                         (*.meta)
22 | 		  training_set
23 |                         |
24 | 	                (*.mat)
25 |                         (*.csv)
26 |                   validation_set
27 |                         |
28 | 	                (*.mat)
29 |                         (*.csv)
30 |                   note.txt
31 |                   cpsc2018.py
32 |                   score_py3.py
33 |                   README_en.txt
34 | % 
35 | % DEVELOPMENT ENVIRONMENT:
36 |                  OS: Windows
37 |                  Python: Anaconda 4.2.0 64-bit (Python 3.5.2)
38 |                  Tensorflow GPU version
39 | % REQUIREMENTS:
40 | 1. Python 3.x!
41 | 2. Dependencies: 
42 |                  TensorFlow (use pip to install)
43 |                  PyWavelets (https://pywavelets.readthedocs.io/en/latest/) (pip install PyWavelets)
44 |                  Numpy
45 | 		             Scipy
46 | 		             glob
47 | % INSTRUCTIONS:
48 | 1. Please put the evaluation data set (in .mat format) in the /validation_set directory
49 | 2. Name the annotation file for evaluation data "REFERENCE.csv" and put it in the /validation_set directory
50 | 3. Under Windows environment, run command: >python cpsc2018.py -p .\\validation_set\\
51 | 4. Under Windows environment, run command: >python score_py3.py -r .\\validation_set\\REFERENCE.csv
52 | %
53 | % Additional notes:
54 | 1. Must have TensorFlow and PyWavelets installed to work
55 | 2. Should work fine with the latest release of TensorFlow (tested on CPU version)
56 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/score.txt:
--------------------------------------------------------------------------------
 1 | Total File Number: 300
 2 | 
 3 | F11: 0.905
 4 | F12: 0.978
 5 | F13: 1.000
 6 | F14: 1.000
 7 | F15: 0.962
 8 | F16: 0.873
 9 | F17: 0.903
10 | F18: 0.938
11 | F19: 0.966
12 | 
13 | F1: 0.947
14 | 
15 | Faf: 0.978
16 | Fblock: 0.976
17 | Fpc: 0.889
18 | Fst: 0.946
19 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/score_py3.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import cpsc2018
  3 | import os
  4 | import argparse
  5 | import numpy as np
  6 | import sys
  7 | 
  8 | '''
  9 | cspc2018_challenge score
 10 | Written by:  Xingyao Wang, Feifei Liu, Chengyu Liu
 11 |              School of Instrument Science and Engineering
 12 |              Southeast University, China
 13 |              chengyu@seu.edu.cn
 14 | '''
 15 | 
 16 | '''
 17 | Score the prediction answers by comparing answers.csv and REFERENCE.csv in validation_set folder,
 18 | The scoring uses a F1 measure, which is an average of the nice F1 values from each classification
 19 | type. The specific score rules will be found on http://www.icbeb.org/Challenge.html.
 20 | Matrix A follows the format as:
 21 |                                      Predicted
 22 |                       Normal  AF  I-AVB  LBBB  RBBB  PAC  PVC  STD  STE
 23 |                Normal  N11   N12   N13   N14   N15   N16  N17  N18  N19
 24 |                AF      N21   N22   N23   N24   N25   N26  N27  N28  N29
 25 |                I-AVB   N31   N32   N33   N34   N35   N36  N37  N38  N39
 26 |                LBBB    N41   N42   N43   N44   N45   N46  N47  N48  N49
 27 | Reference      RBBB    N51   N52   N53   N54   N55   N56  N57  N58  N59
 28 |                PAC     N61   N62   N63   N64   N65   N66  N67  N68  N69
 29 |                PVC     N71   N72   N73   N74   N75   N76  N77  N78  N79
 30 |                STD     N81   N82   N83   N84   N85   N86  N87  N88  N89
 31 |                STE     N91   N92   N93   N94   N95   N96  N97  N98  N99
 32 | 
 33 | For each of the nine types, F1 is defined as:
 34 | Normal: F11=2*N11/(N1x+Nx1) AF: F12=2*N22/(N2x+Nx2) I-AVB: F13=2*N33/(N3x+Nx3) LBBB: F14=2*N44/(N4x+Nx4) RBBB: F15=2*N55/(N5x+Nx5)
 35 | PAC: F16=2*N66/(N6x+Nx6)    PVC: F17=2*N77/(N7x+Nx7)    STD: F18=2*N88/(N8x+Nx8)    STE: F19=2*N99/(N9x+Nx9)
 36 | 
 37 | The final challenge score is defined as:
 38 | F1 = (F11+F12+F13+F14+F15+F16+F17+F18+F19)/9
 39 | 
 40 | In addition, we alse calculate the F1 measures for each of the four sub-abnormal types:
 41 |             AF: Faf=2*N22/(N2x+Nx2)                         Block: Fblock=2*(N33+N44+N55)/(N3x+Nx3+N4x+Nx4+N5x+Nx5)
 42 | Premature contraction: Fpc=2*(N66+N77)/(N6x+Nx6+N7x+Nx7)    ST-segment change: Fst=2*(N88+N99)/(N8x+Nx8+N9x+Nx9)
 43 | 
 44 | The static of predicted answers and the final score are saved to score.txt in local path.
 45 | '''
 46 | 
 47 | def score(answers_csv_path, reference_csv_path):
 48 |     answers = dict()
 49 |     reference = dict()
 50 |     A = np.zeros((9, 9), dtype=np.float)
 51 |     with open(answers_csv_path) as f:
 52 |         reader = csv.DictReader(f)
 53 |         for row in reader:
 54 |             answers.setdefault(row['Recording'], []).append(row['Result'])
 55 |         f.close()
 56 |     with open(reference_csv_path) as ref:
 57 |         reader = csv.DictReader(ref)
 58 |         for row in reader:
 59 |             reference.setdefault(row['Recording'], []).append([row['First_label'], row['Second_label'], row['Third_label']])
 60 |         ref.close()
 61 | 
 62 |     for key in answers.keys():
 63 |         value = []
 64 |         for item in answers[key]:
 65 |             predict = np.int(item)
 66 |         for item in reference[key][0]:
 67 |             if item == '':
 68 |                 item = 0
 69 |             value.append(np.int(item))
 70 | 
 71 |         if predict in value:
 72 |             A[predict-1][predict-1] += 1
 73 |         else:
 74 |             A[value[0]-1][predict-1] += 1
 75 | 
 76 |     F11 = 2 * A[0][0] / (np.sum(A[0, :]) + np.sum(A[:, 0]))
 77 |     F12 = 2 * A[1][1] / (np.sum(A[1, :]) + np.sum(A[:, 1]))
 78 |     F13 = 2 * A[2][2] / (np.sum(A[2, :]) + np.sum(A[:, 2]))
 79 |     F14 = 2 * A[3][3] / (np.sum(A[3, :]) + np.sum(A[:, 3]))
 80 |     F15 = 2 * A[4][4] / (np.sum(A[4, :]) + np.sum(A[:, 4]))
 81 |     F16 = 2 * A[5][5] / (np.sum(A[5, :]) + np.sum(A[:, 5]))
 82 |     F17 = 2 * A[6][6] / (np.sum(A[6, :]) + np.sum(A[:, 6]))
 83 |     F18 = 2 * A[7][7] / (np.sum(A[7, :]) + np.sum(A[:, 7]))
 84 |     F19 = 2 * A[8][8] / (np.sum(A[8, :]) + np.sum(A[:, 8]))
 85 | 
 86 |     F1 = (F11+F12+F13+F14+F15+F16+F17+F18+F19) / 9
 87 | 
 88 |     ## following is calculating scores for 4 types: AF, Block, Premature contraction, ST-segment change.
 89 | 
 90 |     Faf = 2 * A[1][1] / (np.sum(A[1, :]) + np.sum(A[:, 1]))
 91 |     Fblock = 2 * (A[2][2] + A[3][3] + A[4][4]) / (np.sum(A[2:5, :]) + np.sum(A[:, 2:5]))
 92 |     Fpc = 2 * (A[5][5] + A[6][6]) / (np.sum(A[5:7, :]) + np.sum(A[:, 5:7]))
 93 |     Fst = 2 * (A[7][7] + A[8][8]) / (np.sum(A[7:9, :]) + np.sum(A[:, 7:9]))
 94 | 
 95 |     # print(A)
 96 |     print('Total File Number: ', np.sum(A))
 97 | 
 98 |     print("F11: ", F11)
 99 |     print("F12: ", F12)
100 |     print("F13: ", F13)
101 |     print("F14: ", F14)
102 |     print("F15: ", F15)
103 |     print("F16: ", F16)
104 |     print("F17: ", F17)
105 |     print("F18: ", F18)
106 |     print("F19: ", F19)
107 |     print("F1: ", F1)
108 | 
109 |     print("Faf: ", Faf)
110 |     print("Fblock: ", Fblock)
111 |     print("Fpc: ", Fpc)
112 |     print("Fst: ", Fst)
113 | 
114 |     with open('score.txt', 'w') as score_file:
115 |         # print (A, file=score_file)
116 |         print ('Total File Number: %d\n' %(np.sum(A)), file=score_file)
117 |         print ('F11: %0.3f' %F11, file=score_file)
118 |         print ('F12: %0.3f' %F12, file=score_file)
119 |         print ('F13: %0.3f' %F13, file=score_file)
120 |         print ('F14: %0.3f' %F14, file=score_file)
121 |         print ('F15: %0.3f' %F15, file=score_file)
122 |         print ('F16: %0.3f' %F16, file=score_file)
123 |         print ('F17: %0.3f' %F17, file=score_file)
124 |         print ('F18: %0.3f' %F18, file=score_file)
125 |         print ('F19: %0.3f\n' %F19, file=score_file)
126 |         print ('F1: %0.3f\n' %F1, file=score_file)
127 |         print ('Faf: %0.3f' %Faf, file=score_file)
128 |         print ('Fblock: %0.3f' %Fblock, file=score_file)
129 |         print ('Fpc: %0.3f' %Fpc, file=score_file)
130 |         print ('Fst: %0.3f' %Fst, file=score_file)
131 | 
132 |         score_file.close()
133 | 
134 | if __name__ == '__main__':
135 |     parser = argparse.ArgumentParser()
136 |     parser.add_argument('-r',
137 |                         '--reference_path',
138 |                         help='path saving reference file')
139 | 
140 |     args = parser.parse_args()
141 |     score('answers.csv', args.reference_path)
142 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Import necessary modules"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import scipy as sp\n",
 20 |     "import scipy.io as sio\n",
 21 |     "import scipy.signal as sig\n",
 22 |     "import pywt\n",
 23 |     "import os\n",
 24 |     "import glob\n",
 25 |     "import itertools\n",
 26 |     "import matplotlib\n",
 27 |     "import pandas as pd\n",
 28 |     "import re\n",
 29 |     "import math\n",
 30 |     "import tensorflow as tf\n",
 31 |     "from tensorflow.contrib.layers import fully_connected\n",
 32 |     "import tensorflow.contrib.rnn as recurrent\n",
 33 |     "import sklearn.preprocessing\n",
 34 |     "#\n",
 35 |     "%matplotlib inline"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "source": [
 44 |     "Load utility codes"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": true
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "from codes.pre_processing import *\n",
 56 |     "from codes.segmentation import *\n",
 57 |     "from codes.utils import *\n",
 58 |     "from codes.training import *\n",
 59 |     "from codes.model import *"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "Training/Validation split"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": true
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "data_root_dir = '../training_set'\n",
 78 |     "train_file_list, val_file_list = test_val_split_v2(data_root_dir, train_percentage = 90)\n",
 79 |     "ref_file = os.path.join(data_root_dir, 'REFERENCE.csv')\n",
 80 |     "#\n",
 81 |     "list_of_training_files = np.array(train_file_list)\n",
 82 |     "list_of_validation_files = np.array(val_file_list)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    },
 90 |    "source": [
 91 |     "Define model graph"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": true
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "# 1000 x 12 batches\n",
103 |     "input_size = 12\n",
104 |     "time_steps = 1000\n",
105 |     "num_classes = 9\n",
106 |     "hidden_size = 100\n",
107 |     "num_hidden = 2\n",
108 |     "output_size = 9\n",
109 |     "keep_prob = 0.5  # dropout\n",
110 |     "inputs = tf.placeholder(tf.float32, [None, time_steps, input_size])\n",
111 |     "labels = tf.placeholder(tf.int32, [None])\n",
112 |     "seq_length = tf.placeholder(tf.int32, [None])\n",
113 |     "#\n",
114 |     "is_training = True # set it to true at first\n",
115 |     "#\n",
116 |     "def RNN_bidirectional(input_tensor, Training):\n",
117 |     "    with tf.variable_scope(\"recurrent\", initializer = tf.contrib.layers.variance_scaling_initializer()):\n",
118 |     "        cell = tf.nn.rnn_cell.BasicLSTMCell\n",
119 |     "        cells_fw = [cell(hidden_size) for _ in range(num_hidden)]\n",
120 |     "        cells_bw = [cell(hidden_size) for _ in range(num_hidden)]\n",
121 |     "        cells_fw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training is True else 1.0) for cell in cells_fw]\n",
122 |     "        cells_bw = [tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob = keep_prob if Training is True else 1.0) for cell in cells_bw]\n",
123 |     "        _, states_fw, states_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(\n",
124 |     "                cells_fw=cells_fw,\n",
125 |     "                cells_bw=cells_bw,\n",
126 |     "                inputs=input_tensor,\n",
127 |     "                sequence_length = seq_length,\n",
128 |     "                dtype=tf.float32)\n",
129 |     "        outputs_fw = tf.concat(states_fw[-1][-1], axis = 1)\n",
130 |     "        outputs_bw = tf.concat(states_bw[-1][-1], axis = 1)\n",
131 |     "        outputs = tf.concat([outputs_fw, outputs_bw], axis = 1)\n",
132 |     "        logits = tf.squeeze(fully_connected(outputs, output_size, activation_fn = None))\n",
133 |     "        #\n",
134 |     "    return logits"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "logits = RNN_bidirectional(inputs, Training = is_training)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "Define the loss and training ops"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": [
163 |     "cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = labels, logits = logits)\n",
164 |     "loss = tf.reduce_mean(cross_entropy)\n",
165 |     "#\n",
166 |     "optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)\n",
167 |     "gradients, variables = zip(*optimizer.compute_gradients(loss))\n",
168 |     "# gradient clipping - makes the training more stable\n",
169 |     "gradients = [\n",
170 |     "    None if gradient is None else tf.clip_by_norm(gradient, 5.0)\n",
171 |     "    for gradient in gradients]\n",
172 |     "training_op = optimizer.apply_gradients(zip(gradients, variables))\n",
173 |     "#\n",
174 |     "correct = tf.nn.in_top_k(logits, labels, 1)\n",
175 |     "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "Saver"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {
189 |     "collapsed": true
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "saver = tf.train.Saver(max_to_keep = 5, keep_checkpoint_every_n_hours = 1)\n",
194 |     "save_dir = './model'\n",
195 |     "#\n",
196 |     "model_name_prefix = 'model.ckpt'\n",
197 |     "if not os.path.exists(save_dir):\n",
198 |     "    os.makedirs(save_dir)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {
204 |     "collapsed": true
205 |    },
206 |    "source": [
207 |     "Initialize variables"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [],
217 |    "source": [
218 |     "init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "Begin training"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": false
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "with tf.Session()  as sess:\n",
237 |     "    \n",
238 |     "    sess.run(init_op)\n",
239 |     "    path = tf.train.get_checkpoint_state(save_dir)\n",
240 |     "    if path is None:\n",
241 |     "        global_step = 0\n",
242 |     "    else:\n",
243 |     "        global_step = int(path.model_checkpoint_path.split('-')[-1])\n",
244 |     "    #\n",
245 |     "    \n",
246 |     "    if path is None:\n",
247 |     "        sess.run(init_op)\n",
248 |     "    else:\n",
249 |     "        saver.restore(sess, path.model_checkpoint_path)\n",
250 |     "    \n",
251 |     "    for step in range(20000):\n",
252 |     "        #\n",
253 |     "        data_numpy, labels_numpy, seq_length_numpy = sample_batch_for_training(list_of_training_files, ref_file)\n",
254 |     "        _, loss_value, logits_value = sess.run([training_op, loss, logits], feed_dict = {inputs: data_numpy, labels: labels_numpy, seq_length: seq_length_numpy})\n",
255 |     "        #\n",
256 |     "        if step % 20 == 0:\n",
257 |     "            print('current iteration: {}'.format(step + global_step))\n",
258 |     "            print('loss value: {}'.format(loss_value))\n",
259 |     "            acc_train = sess.run(accuracy, feed_dict = {inputs: data_numpy, labels: labels_numpy, seq_length: seq_length_numpy})\n",
260 |     "            is_training = False  # set to false before evaluating the test accuracy\n",
261 |     "            sub = np.random.randint(len(list_of_validation_files))\n",
262 |     "            data_val, labels_val, seq_length_val = sample_batch(list_of_validation_files[sub], ref_file, mode = 'training')\n",
263 |     "            acc_val = sess.run(accuracy, feed_dict = {inputs: data_val, labels: labels_val, seq_length: seq_length_val})\n",
264 |     "            is_training = True  # set to true again for the subsequent iterations\n",
265 |     "            #\n",
266 |     "            print(step, \"Training accuracy:\", acc_train, \"Test accuracy\", acc_val)\n",
267 |     "            try:\n",
268 |     "                loss_list.append(loss_value)\n",
269 |     "                acc_train_list.append(acc_train)\n",
270 |     "                acc_test_list.append(acc_val)\n",
271 |     "            except:\n",
272 |     "                loss_list = list()\n",
273 |     "                acc_train_list = list()\n",
274 |     "                acc_test_list = list()\n",
275 |     "                loss_list.append(loss_value)\n",
276 |     "                acc_train_list.append(acc_train)\n",
277 |     "                acc_test_list.append(acc_val)\n",
278 |     "        save_path = saver.save(sess, os.path.join(save_dir, model_name_prefix), global_step = step + global_step + 1) # model count begins with 1\n",
279 |     "        print(\"Model saved in path: %s\" % save_path)\n",
280 |     "        \n",
281 |     "        if loss_value < 0.10:\n",
282 |     "            break"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "Plot training curves"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "import matplotlib.pyplot as plt\n",
301 |     "plt.figure(num = 1)\n",
302 |     "plt.plot(range(0, step, 20), acc_train_list, 'k', range(0, step, 20), acc_test_list, 'b')\n",
303 |     "plt.figure(num = 2)\n",
304 |     "plt.plot(range(0, step, 20), loss_list, 'r')"
305 |    ]
306 |   }
307 |  ],
308 |  "metadata": {
309 |   "anaconda-cloud": {},
310 |   "kernelspec": {
311 |    "display_name": "Python [conda root]",
312 |    "language": "python",
313 |    "name": "conda-root-py"
314 |   },
315 |   "language_info": {
316 |    "codemirror_mode": {
317 |     "name": "ipython",
318 |     "version": 3
319 |    },
320 |    "file_extension": ".py",
321 |    "mimetype": "text/x-python",
322 |    "name": "python",
323 |    "nbconvert_exporter": "python",
324 |    "pygments_lexer": "ipython3",
325 |    "version": "3.5.2"
326 |   }
327 |  },
328 |  "nbformat": 4,
329 |  "nbformat_minor": 1
330 | }
331 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/training_set/readme.txt:
--------------------------------------------------------------------------------
1 | Download the training dataset (matlab data format) and annotation file (REFERENCE.CSV) from:
2 | http://www.icbeb.org/Challenge.html
3 | and put them in this directory
4 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import scipy as sp\n",
 13 |     "import scipy.io as sio\n",
 14 |     "import scipy.signal as sig\n",
 15 |     "import pywt\n",
 16 |     "import os\n",
 17 |     "import glob\n",
 18 |     "import itertools\n",
 19 |     "import matplotlib\n",
 20 |     "import pandas as pd\n",
 21 |     "import re\n",
 22 |     "import tensorflow as tf\n",
 23 |     "from tensorflow.contrib.layers import fully_connected\n",
 24 |     "import tensorflow.contrib.rnn as recurrent\n",
 25 |     "import sklearn.preprocessing\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "#\n",
 28 |     "%matplotlib inline"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from codes.pre_processing import *\n",
 40 |     "from codes.segmentation import *\n",
 41 |     "from codes.utils import *\n",
 42 |     "from codes.training import *\n",
 43 |     "from codes.model import *"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "A utility fuction to plot confusion matrix: \n",
 51 |     "http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": true
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "def plot_confusion_matrix(cm, classes,\n",
 63 |     "                          normalize=False,\n",
 64 |     "                          title='Confusion matrix',\n",
 65 |     "                          cmap=plt.cm.Blues):\n",
 66 |     "    \"\"\"\n",
 67 |     "    This function prints and plots the confusion matrix.\n",
 68 |     "    Normalization can be applied by setting `normalize=True`.\n",
 69 |     "    \"\"\"\n",
 70 |     "    if normalize:\n",
 71 |     "        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
 72 |     "        print(\"Normalized confusion matrix\")\n",
 73 |     "    else:\n",
 74 |     "        print('Confusion matrix, without normalization')\n",
 75 |     "\n",
 76 |     "    print(cm)\n",
 77 |     "\n",
 78 |     "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
 79 |     "    plt.title(title)\n",
 80 |     "    plt.colorbar()\n",
 81 |     "    tick_marks = np.arange(len(classes))\n",
 82 |     "    plt.xticks(tick_marks, classes, rotation=45)\n",
 83 |     "    plt.yticks(tick_marks, classes)\n",
 84 |     "\n",
 85 |     "    fmt = '.2f' if normalize else 'd'\n",
 86 |     "    thresh = cm.max() / 2.\n",
 87 |     "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
 88 |     "        plt.text(j, i, format(cm[i, j], fmt),\n",
 89 |     "                 horizontalalignment=\"center\",\n",
 90 |     "                 color=\"white\" if cm[i, j] > thresh else \"black\")\n",
 91 |     "\n",
 92 |     "    plt.tight_layout()\n",
 93 |     "    plt.ylabel('True class')\n",
 94 |     "    plt.xlabel('Predicted class')"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {
101 |     "collapsed": false
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "data_root_dir = '../training_set'\n",
106 |     "train_file_list, val_file_list = test_val_split_v2(data_root_dir, train_percentage = 90)\n",
107 |     "ref_file = os.path.join(data_root_dir, 'REFERENCE.csv')"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "df = pd.read_csv(ref_file, delimiter = ',')\n",
119 |     "#\n",
120 |     "RECORDS = pd.Series.as_matrix(df.Recording)\n",
121 |     "LABEL_1 = pd.Series.as_matrix(df.First_label)\n",
122 |     "LABEL_2 = pd.Series.as_matrix(df.Second_label)\n",
123 |     "LABEL_3 = pd.Series.as_matrix(df.Third_label)\n",
124 |     "#\n",
125 |     "N = len(RECORDS)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {
132 |     "collapsed": false
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "# build the graph\n",
137 |     "inputs, labels, seq_length, logits, accuracy = build_model_graph()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "model_dir = './model'"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {
155 |     "collapsed": false
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "# prediction\n",
160 |     "# first some placeholders to keep the results\n",
161 |     "sub_id = list()\n",
162 |     "sub_actual = list()\n",
163 |     "sub_predict = list()\n",
164 |     "#\n",
165 |     "with tf.Session() as sess:\n",
166 |     "    # load the model\n",
167 |     "    load_model(model_dir, sess)\n",
168 |     "    #\n",
169 |     "    for num in np.arange(len(val_file_list)):\n",
170 |     "        record = re.search('A[0-9]+', val_file_list[num]).group(0)\n",
171 |     "        sub_id.append(record)\n",
172 |     "        parent_label = LABEL_1[np.squeeze(np.where(RECORDS == record))]\n",
173 |     "        sub_actual.append(parent_label - 1)\n",
174 |     "        peaks, features = peak_detector_with_refinement(val_file_list[num], 'sym8', max_level, window_size, window_size_for_threshold, search_radius)\n",
175 |     "        segs, labs, lens = extract_ecg_segments_v2(peaks, val_file_list[num], parent_label, 1000)\n",
176 |     "        #segs = np.transpose(segs, axes = (0, 2, 1))\n",
177 |     "        logits_val = sess.run(logits, feed_dict = {inputs: segs, labels: labs, seq_length: lens})\n",
178 |     "        acc = sess.run(accuracy, feed_dict = {inputs: segs, labels: labs, seq_length: lens})\n",
179 |     "        sub_predict.append(prediction_v2(np.argmax(logits_val, axis = 1)))\n",
180 |     "        print('processed: ' + RECORDS[np.squeeze(np.where(RECORDS == record))])\n",
181 |     "#\n",
182 |     "data_dict = {'id': sub_id, 'actual_class': sub_actual, 'predicted_class': sub_predict}\n",
183 |     "df = pd.DataFrame(data = data_dict)   "
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [],
193 |    "source": [
194 |     "from sklearn.metrics import f1_score, confusion_matrix\n",
195 |     "#\n",
196 |     "GT_labels = df['actual_class'].tolist()\n",
197 |     "predict_labels = df['predicted_class'].tolist()\n",
198 |     "#\n",
199 |     "con_mtx = confusion_matrix(GT_labels, predict_labels, )"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [],
209 |    "source": [
210 |     "print(f1_score(GT_labels, predict_labels, average = 'micro'))\n",
211 |     "print(f1_score(GT_labels, predict_labels, average = 'weighted'))\n",
212 |     "print(f1_score(GT_labels, predict_labels, average = None))"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {
219 |     "collapsed": true
220 |    },
221 |    "outputs": [],
222 |    "source": [
223 |     "plt.figure()\n",
224 |     "class_names = ['Normal', 'AF', 'I-AVB', 'LBBB', 'RBBB', 'PAC', 'PVC', 'STD', 'STE']\n",
225 |     "plot_confusion_matrix(con_mtx, classes=class_names, normalize=True,\n",
226 |     "                      title='Confusion Matrix')"
227 |    ]
228 |   }
229 |  ],
230 |  "metadata": {
231 |   "anaconda-cloud": {},
232 |   "kernelspec": {
233 |    "display_name": "Python [conda root]",
234 |    "language": "python",
235 |    "name": "conda-root-py"
236 |   },
237 |   "language_info": {
238 |    "codemirror_mode": {
239 |     "name": "ipython",
240 |     "version": 3
241 |    },
242 |    "file_extension": ".py",
243 |    "mimetype": "text/x-python",
244 |    "name": "python",
245 |    "nbconvert_exporter": "python",
246 |    "pygments_lexer": "ipython3",
247 |    "version": "3.5.2"
248 |   }
249 |  },
250 |  "nbformat": 4,
251 |  "nbformat_minor": 1
252 | }
253 | 


--------------------------------------------------------------------------------
/Physiological signal challenge/validation_set/readme.txt:
--------------------------------------------------------------------------------
1 | Put the validation data and annotation file (exactly same format as the training_set) in this directory
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Repository of my deep learning projects
2 | 


--------------------------------------------------------------------------------