├── data_cleaning_complete_set.py ├── data_cleaning_record.py ├── gui.py ├── merge_files.py ├── pacing_coord.py ├── patient.py ├── patientIds.py ├── plotter.py ├── readme.txt └── requirements.txt /data_cleaning_complete_set.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Varun Rajiv Mantri 3 | """ 4 | 5 | from matplotlib import pyplot as plt 6 | 7 | 8 | def file_reader(file_name): 9 | ''' 10 | This method reads in the data from a csv file 11 | :param file_name:name of the input file 12 | :return: 13 | ''' 14 | complete_data = [] 15 | with open(file_name) as file: 16 | for row in file: 17 | row = row.strip() 18 | row = row.split(",") 19 | complete_data.append(row) 20 | return complete_data 21 | 22 | 23 | def peak_finder(row, mean): 24 | ''' 25 | This method finds the peaks 26 | :param row: record under consideration 27 | :param mean: mean value for that record 28 | :return: peaks 29 | ''' 30 | peaks = [] 31 | previous = abs(float(row[0])) 32 | current = abs(float(row[1])) 33 | next_item = abs(float(row[2])) 34 | biggest_peak = 0 35 | for index in range(3, len(row) - 1): 36 | if current > previous and current > next_item and current > (mean + 5): 37 | peaks.append(index) 38 | previous = current 39 | current = abs(float(row[index])) 40 | if current > biggest_peak: 41 | biggest_peak = current 42 | next_item = abs(float(row[index + 1])) 43 | return peaks, biggest_peak 44 | 45 | 46 | def mean_calculator(row): 47 | mean = 0 48 | for value in row: 49 | mean = mean + float(value) 50 | mean = round(mean / len(row), 3) 51 | return mean 52 | 53 | 54 | def rejection_condition_two(complete_data): 55 | ''' 56 | This method looks for peaks and counts only those peaks that have sudden falls at precisely same location 57 | across more than 5 leads 58 | :param complete_data: Complete input data 59 | :return: Bad data list 60 | ''' 61 | number_of_sudden_falls = 0 62 | id = 0 63 | bad_list = [] 64 | for row in complete_data: 65 | lower_limit = 0 66 | upper_limit = 100 67 | leads = 0 68 | location_recorder = [False for _ in range(12)] 69 | index_recorder = [False for _ in range(12)] 70 | while leads < 12: 71 | max_peak = float("-inf") 72 | for index in range(lower_limit, upper_limit): 73 | if float(row[index]) > max_peak: 74 | max_peak = float(row[index]) 75 | location = index 76 | lower_limit = upper_limit 77 | upper_limit = upper_limit + 100 78 | 79 | mean = mean_calculator(row) 80 | # checking if the next value is the biggest 81 | if location + 1 < 1200: 82 | if float(row[location + 1]) < (mean + 5): 83 | location_recorder[leads] = True 84 | index_recorder[leads] = location - lower_limit 85 | if location - 1 >= 0: 86 | if float(row[location - 1]) < (mean + 5): 87 | location_recorder[leads] = True 88 | index_recorder[leads] = location - lower_limit 89 | leads = leads + 1 90 | max_count = 0 91 | for index in range(len(index_recorder) - 1): 92 | current = index_recorder[index] 93 | counter = 0 94 | for index_1 in range(index, len(index_recorder)): 95 | if current == index_recorder[index_1]: 96 | counter = counter + 1 97 | if max_count < counter: 98 | max_count = counter 99 | value = current 100 | counter = 0 101 | for index in range(len(index_recorder)): 102 | if index_recorder[index] == value: 103 | if location_recorder[index] == True: 104 | counter = counter + 1 105 | if counter >= 5: 106 | bad_list.append(id) 107 | id = id + 1 108 | return bad_list 109 | 110 | 111 | def rejection_condition_one(complete_data): 112 | ''' 113 | This method rejects the records 114 | :param complete_data: 115 | :return: correct and incorrect records list 116 | ''' 117 | record_id = 0 118 | incorrect_records_list = [] 119 | correct_records = [] 120 | flag = False 121 | for row in complete_data: 122 | mean = 0 123 | for value in row: 124 | mean = mean + float(value) 125 | mean = round(mean / len(row), 3) 126 | count = 0 127 | upper_limit = 100 128 | lower_limit = 0 129 | # finding peaks 130 | peaks, biggest_peak = peak_finder(row, mean) 131 | mid_value = (mean + biggest_peak) / 2 132 | while (count < 12): 133 | for i in range(lower_limit, lower_limit + 9): 134 | # cheking first five 135 | if abs(float(row[i])) >= mean + mid_value: 136 | flag = True 137 | break 138 | for i in range(upper_limit - 1, upper_limit - 10, -1): 139 | # cheking first five 140 | # print(upper_limit) 141 | if abs(float(row[i])) >= mean + mid_value: 142 | flag = True 143 | break 144 | if flag == True: 145 | break 146 | count = count + 1 147 | lower_limit = upper_limit 148 | upper_limit = upper_limit + 100 149 | for index in peaks: 150 | if round(float(row[index + 1])) == 0: 151 | flag = True 152 | elif round(float(row[index - 1])) == 0: 153 | flag = True 154 | if flag == True: 155 | incorrect_records_list.append(record_id) 156 | flag = False 157 | else: 158 | correct_records.append(record_id) 159 | record_id = record_id + 1 160 | return incorrect_records_list, correct_records 161 | 162 | 163 | def plotter(data, fig): 164 | lower_limit = 0 165 | upper_limit = 100 166 | figure_count = 1 167 | plt.figure(fig) 168 | row = 1 169 | col = 1 170 | for _ in range(12): 171 | temp = [] 172 | for index in range(lower_limit, upper_limit): 173 | temp.append(float(data[index])) 174 | plt.subplot(4, 3, figure_count) 175 | plt.plot(temp) 176 | plt.title("Lead" + str(figure_count)) 177 | lower_limit = upper_limit 178 | upper_limit = upper_limit + 100 179 | figure_count = figure_count + 1 180 | 181 | 182 | def combine(incorrect_record_list, bad_list): 183 | dicto = {} 184 | for item in incorrect_record_list: 185 | dicto[item] = True 186 | for item in bad_list: 187 | if item not in dicto.keys(): 188 | dicto[item] = True 189 | complete_list = [] 190 | for item in dicto.keys(): 191 | complete_list.append(item) 192 | return complete_list 193 | 194 | 195 | def compute(complete_data): 196 | incorrect_record_list, correct_records = rejection_condition_one(complete_data) 197 | bad_list = rejection_condition_two(complete_data) 198 | incorrect_record_list = combine(incorrect_record_list, bad_list) 199 | return incorrect_record_list 200 | 201 | # def main(): 202 | # complete_data=file_reader("train_x.csv") 203 | # 204 | # 205 | # 206 | # print("-----------------------------------") 207 | # print("Incorrect record ID's: ") 208 | # print(incorrect_record_list) 209 | # print("Correct record ID's:") 210 | # print(correct_records) 211 | # print("\n\nPercentage of records that are wrong:"+str(round((len(incorrect_record_list)/len(complete_data))*100,3))+"%") 212 | # print("-----------------------------------") 213 | # plotter(complete_data[correct_records[8]],0) 214 | # plt.title("Good Records") 215 | # plotter(complete_data[incorrect_record_list[8]], 1) 216 | # plt.title("Bad Records") 217 | # plt.show() 218 | # 219 | # 220 | # 221 | # main() 222 | -------------------------------------------------------------------------------- /data_cleaning_record.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: Varun Rajiv Mantri 3 | """ 4 | 5 | from matplotlib import pyplot as plt 6 | 7 | 8 | def file_reader(file_name): 9 | ''' 10 | This method reads in the data from a csv file 11 | :param file_name:name of the input file 12 | :return: 13 | ''' 14 | complete_data = [] 15 | with open(file_name) as file: 16 | for row in file: 17 | row = row.strip() 18 | row = row.split(",") 19 | complete_data.append(row) 20 | return complete_data 21 | 22 | 23 | def peak_finder(row, mean): 24 | ''' 25 | This method finds the peaks 26 | :param row: record under consideration 27 | :param mean: mean value for that record 28 | :return: peaks 29 | ''' 30 | peaks = [] 31 | previous = abs(float(row[0])) 32 | current = abs(float(row[1])) 33 | next_item = abs(float(row[2])) 34 | biggest_peak = 0 35 | for index in range(3, len(row) - 1): 36 | if current > previous and current > next_item and current > (mean + 5): 37 | peaks.append(index) 38 | previous = current 39 | current = abs(float(row[index])) 40 | if current > biggest_peak: 41 | biggest_peak = current 42 | next_item = abs(float(row[index + 1])) 43 | return peaks, biggest_peak 44 | 45 | 46 | def mean_calculator(row): 47 | mean = 0 48 | for value in row: 49 | mean = mean + float(value) 50 | mean = round(mean / len(row), 3) 51 | return mean 52 | 53 | 54 | def rejection_condition_two(row): 55 | ''' 56 | This method looks for peaks and counts only those peaks that have sudden falls at precisely same location 57 | across more than 5 leads 58 | :param complete_data: Complete input data 59 | :return: Bad data list 60 | ''' 61 | lower_limit = 0 62 | upper_limit = 100 63 | leads = 0 64 | location_recorder = [False for _ in range(12)] 65 | index_recorder = [False for _ in range(12)] 66 | while leads < 12: 67 | max_peak = float("-inf") 68 | for index in range(lower_limit, upper_limit): 69 | if float(row[index]) > max_peak: 70 | max_peak = float(row[index]) 71 | location = index 72 | lower_limit = upper_limit 73 | upper_limit = upper_limit + 100 74 | 75 | mean = mean_calculator(row) 76 | # checking if the next value is the biggest 77 | if location + 1 < 1200: 78 | if float(row[location + 1]) < (mean + 5): 79 | location_recorder[leads] = True 80 | index_recorder[leads] = location - lower_limit 81 | if location - 1 >= 0: 82 | if float(row[location - 1]) < (mean + 5): 83 | location_recorder[leads] = True 84 | index_recorder[leads] = location - lower_limit 85 | leads = leads + 1 86 | max_count = 0 87 | for index in range(len(index_recorder) - 1): 88 | current = index_recorder[index] 89 | counter = 0 90 | for index_1 in range(index, len(index_recorder)): 91 | if current == index_recorder[index_1]: 92 | counter = counter + 1 93 | if max_count < counter: 94 | max_count = counter 95 | value = current 96 | counter = 0 97 | for index in range(len(index_recorder)): 98 | if index_recorder[index] == value: 99 | if location_recorder[index] == True: 100 | counter = counter + 1 101 | if counter >= 5: 102 | return True 103 | return False 104 | 105 | 106 | def rejection_condition_one(row): 107 | ''' 108 | This method rejects the records 109 | :param complete_data: 110 | :return: correct and incorrect records list 111 | ''' 112 | flag = False 113 | mean = 0 114 | for value in row: 115 | mean = mean + float(value) 116 | mean = round(mean / len(row), 3) 117 | count = 0 118 | upper_limit = 100 119 | lower_limit = 0 120 | # finding peaks 121 | peaks, biggest_peak = peak_finder(row, mean) 122 | mid_value = (mean + biggest_peak) / 2 123 | while (count < 12): 124 | for i in range(lower_limit, lower_limit + 9): 125 | # cheking first five 126 | if abs(float(row[i])) >= mean + mid_value: 127 | flag = True 128 | break 129 | for i in range(upper_limit - 1, upper_limit - 10, -1): 130 | # cheking first five 131 | # print(upper_limit) 132 | if abs(float(row[i])) >= mean + mid_value: 133 | flag = True 134 | break 135 | if flag == True: 136 | break 137 | count = count + 1 138 | lower_limit = upper_limit 139 | upper_limit = upper_limit + 100 140 | for index in peaks: 141 | if round(float(row[index + 1])) == 0: 142 | flag = True 143 | elif round(float(row[index - 1])) == 0: 144 | flag = True 145 | if flag == True: 146 | return True 147 | return False 148 | 149 | 150 | def plotter(data, fig): 151 | lower_limit = 0 152 | upper_limit = 100 153 | figure_count = 1 154 | plt.figure(fig) 155 | row = 1 156 | col = 1 157 | for _ in range(12): 158 | temp = [] 159 | for index in range(lower_limit, upper_limit): 160 | temp.append(float(data[index])) 161 | plt.subplot(4, 3, figure_count) 162 | plt.plot(temp) 163 | plt.title("Lead" + str(figure_count)) 164 | lower_limit = upper_limit 165 | upper_limit = upper_limit + 100 166 | figure_count = figure_count + 1 167 | 168 | 169 | def combine(incorrect_record_list, bad_list): 170 | dicto = {} 171 | for item in incorrect_record_list: 172 | dicto[item] = True 173 | for item in bad_list: 174 | if item not in dicto.keys(): 175 | dicto[item] = True 176 | complete_list = [] 177 | for item in dicto.keys(): 178 | complete_list.append(item) 179 | return complete_list 180 | 181 | 182 | def check_quality(record): 183 | status = rejection_condition_one(record) 184 | if status == False: 185 | status = rejection_condition_two(record) 186 | if status == False: 187 | return 'good' 188 | else: 189 | return 'bad' 190 | 191 | 192 | ''' 193 | def main(): 194 | complete_data=file_reader("train_x.csv") 195 | check_quality(record) 196 | incorrect_record_list,correct_records=rejection_condition_one(complete_data) 197 | bad_list=rejection_condition_two(complete_data) 198 | incorrect_record_list=combine(incorrect_record_list,bad_list) 199 | print("-----------------------------------") 200 | print("Incorrect record ID's: ") 201 | print(incorrect_record_list) 202 | print("Correct record ID's:") 203 | print(correct_records) 204 | print("\n\nPercentage of records that are wrong:"+str(round((len(incorrect_record_list)/len(complete_data))*100,3))+"%") 205 | print("-----------------------------------") 206 | plotter(complete_data[correct_records[8]],0) 207 | plt.title("Good Records") 208 | plotter(complete_data[incorrect_record_list[8]], 1) 209 | plt.title("Bad Records") 210 | plt.show() 211 | ''' 212 | 213 | 214 | # main() 215 | -------------------------------------------------------------------------------- /gui.py: -------------------------------------------------------------------------------- 1 | from tkinter import * 2 | from tkinter import Tk, Label, Button 3 | from plotter import plotter 4 | from matplotlib.figure import Figure 5 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg 6 | import patientIds 7 | import tkinter as tk 8 | # import yaml 9 | import os.path 10 | import patient 11 | import plotter 12 | import pickle 13 | import csv 14 | 15 | COLOR = 'BLACK' 16 | 17 | 18 | class Application(Frame): 19 | def __init__(self): 20 | super().__init__() 21 | self.initUI() 22 | self.dynamicFrames = [] 23 | self.dynamicPlots = [] 24 | self.initIndexes() 25 | self.coord = None 26 | self.label = None 27 | 28 | def initIndexes(self): 29 | self.coordIdx = 0 30 | self.pId = 0 31 | 32 | def patientObjects(self): 33 | patientObjects = None 34 | with open("data.pickle", 'rb') as f: 35 | unpickler = pickle.Unpickler(f) 36 | patientObjects = unpickler.load() 37 | return patientObjects 38 | 39 | def initializeGrid(self): 40 | self.grid(row=0, column=0, sticky=N + S + E + W) 41 | for row in range(0, 13): 42 | Grid.rowconfigure(self, row, weight=1) 43 | for column in range(0, 27): 44 | Grid.columnconfigure(self, column, weight=1) 45 | 46 | def initUI(self): 47 | self.master.title("ECG Validator") 48 | self.initializeGrid() 49 | patientIdFrame = patientIds.patientButtonFrame(self, 50 | self.patientObjects()) 51 | patientIdFrame.grid(row=0, 52 | column=0, 53 | columnspan=6, 54 | rowspan=7, 55 | sticky=W + E + N + S) 56 | 57 | def onClickPatient(self, patient): 58 | self.initIndexes() 59 | self.clearDynamicFrames(self.dynamicFrames) 60 | 61 | pacingSiteFrame = patientIds.pacingSiteFrame(self, patient) 62 | pacingSiteFrame.grid(row=7, 63 | column=0, 64 | columnspan=6, 65 | rowspan=7, 66 | sticky=W + E + N + S) 67 | 68 | self.dynamicFrames.append(pacingSiteFrame) 69 | 70 | def onClickCoordinate(self, coord): 71 | self.coord = None 72 | self.initIndexes() 73 | self.addButtonsBelowImage() 74 | self.showPlot(coord) 75 | 76 | def clearDynamicFrames(self, dynamicFrames): 77 | for frame in dynamicFrames: 78 | dynamicFrames.remove(frame) 79 | frame.grid_forget() 80 | frame.destroy() 81 | 82 | def showStat(self): 83 | frame = Frame(self) 84 | frame.grid() 85 | stat = Label(frame, text='hhh') 86 | Grid.rowconfigure(frame, 1, weight=1) 87 | Grid.columnconfigure(frame, 1, weight=1) 88 | frame.grid(row=1, column=6, rowspan=1, columnspan=21, sticky=W + E + N + S) 89 | 90 | def showPlot(self, coord): 91 | print('loading frame') 92 | self.showStatus(coord.samples_stat[self.coordIdx]) 93 | 94 | self.coord = coord 95 | subplot, a = plotter.plotter(coord.samples[self.coordIdx]) 96 | canvas = FigureCanvasTkAgg(subplot, master=self) 97 | canvas.show() 98 | self.clearDynamicFrames(self.dynamicPlots) 99 | frame = canvas.get_tk_widget() 100 | self.dynamicPlots.append(frame) 101 | 102 | Grid.rowconfigure(frame, 1, weight=1) 103 | Grid.columnconfigure(frame, 1, weight=1) 104 | 105 | frame.grid(row=1, 106 | column=6, 107 | columnspan=21, 108 | rowspan=11, 109 | sticky=W + E + N + S) 110 | print("done") 111 | 112 | def nextPlot(self): 113 | coord = self.coord 114 | if (self.coordIdx >= 0): 115 | if (coord.samples[self.coordIdx]): 116 | self.coordIdx += 1 117 | self.showPlot(coord) 118 | 119 | def prevPlot(self): 120 | coord = self.coord 121 | if (self.coordIdx >= 0): 122 | if (coord.samples[self.coordIdx]): 123 | print(coord.samples_stat[self.coordIdx]) 124 | print(coord.samples_index) 125 | self.coordIdx -= 1 126 | self.showPlot(coord) 127 | 128 | def showStatus(self, ltext): 129 | if (self.label == None): 130 | frame = Frame(self) 131 | frame.grid() 132 | 133 | Grid.rowconfigure(frame, 0, pad=3, weight=1) 134 | Grid.columnconfigure(frame, 0, pad=3, weight=1) 135 | self.label = Label(frame, text=ltext) 136 | self.label.grid(row=0, column=0, columnspan=21) 137 | frame.grid(row=0, column=6, rowspan=1, columnspan=21, sticky=W + E + N + S) 138 | else: 139 | self.label.config(text=ltext) 140 | 141 | def addButtonsBelowImage(self): 142 | frame = Frame(self) 143 | frame.grid() 144 | 145 | Grid.rowconfigure(frame, 0, pad=3, weight=1) 146 | for col in range(0, 24): 147 | Grid.columnconfigure(frame, col, pad=3, weight=1) 148 | 149 | prevButton = Button(frame, text="prev") 150 | nextButton = Button(frame, text="next", command=self.nextPlot) 151 | redothisrecord = lambda: self.create_redo_record() 152 | redothispace = lambda: self.create_redo_pace() 153 | statwrong = lambda: self.statwrong_file() 154 | 155 | redoThisRecordButton = Button(frame, text="Redo This Record", command=redothisrecord) 156 | redoEntireButton = Button(frame, text="Redo Entire Pacing Site", command=redothispace) 157 | statsWrongButton = Button(frame, text="Stats Wrong", command=statwrong) 158 | 159 | prevButton.grid(row=0, 160 | column=0, 161 | columnspan=5, 162 | sticky=W + E + N + S) 163 | 164 | redoThisRecordButton.grid(row=0, 165 | column=5, 166 | columnspan=5, 167 | sticky=W + E + N + S) 168 | 169 | redoEntireButton.grid(row=0, 170 | column=10, 171 | columnspan=5, 172 | sticky=W + E + N + S) 173 | 174 | statsWrongButton.grid(row=0, 175 | column=15, 176 | columnspan=5, 177 | sticky=W + E + N + S) 178 | 179 | nextButton.grid(row=0, 180 | column=20, 181 | columnspan=5, 182 | sticky=W + E + N + S) 183 | 184 | frame.grid(row=12, column=6, rowspan=2, columnspan=21, sticky=W + E + N + S) 185 | 186 | def create_redo_record(self): 187 | directory = './pacingsite' 188 | if not os.path.exists(directory): 189 | os.makedirs(directory) 190 | 191 | coord = self.coord 192 | filename = str(coord.file_name) + '.txt' 193 | with open(os.path.join(directory, filename), 'w') as f: 194 | f.write("Pacing Site :{}\n".format(coord.pacingSite)) 195 | f.write("File Name : file{}.big\n".format(coord.file_name)) 196 | f.write("Record : {}\n".format(coord.samples_stat[self.coordIdx])) 197 | f.write(' Record Index :{} \n'.format(coord.samples_index[self.coordIdx])) 198 | 199 | f.close() 200 | print(filename + ' created') 201 | newfile = [] 202 | with open('persons.csv', 'r') as file: 203 | reader = csv.reader(file) 204 | for row in reader: 205 | if int(row[2]) == int(coord.samples_index[self.coordIdx]): 206 | row.pop(4) 207 | row.append('Redo Record') 208 | newfile.append(row) 209 | 210 | else: 211 | newfile.append(row) 212 | 213 | with open('persons.csv', 'w') as csvfile: 214 | 215 | filewriter = csv.writer(csvfile, delimiter=',', 216 | lineterminator='\n') 217 | for row in newfile: 218 | filewriter.writerow(row) 219 | print("csv wrote") 220 | 221 | def create_redo_pace(self): 222 | directory = './record' 223 | if not os.path.exists(directory): 224 | os.makedirs(directory) 225 | coord = self.coord 226 | filename = str(coord.file_name) + '.txt' 227 | with open(os.path.join(directory, filename), 'w') as f: 228 | f.write("Pacing Site :{}\n".format(coord.pacingSite)) 229 | f.write("File Name : file{}.big\n".format(coord.file_name)) 230 | f.write('Stat:{}\n'.format(coord.stats)) 231 | f.write('Pace Start Index :{} \n'.format(coord.samples_index[self.coordIdx] - self.coordIdx)) 232 | f.write('Pace End Index : {} \n'.format( 233 | (coord.samples_index[self.coordIdx] - self.coordIdx) + len(coord.samples_index) - 1)) 234 | f.close() 235 | 236 | newfile = [] 237 | with open('persons.csv', 'r') as file: 238 | reader = csv.reader(file) 239 | for row in reader: 240 | if int(row[2]) in list(range(coord.samples_index[self.coordIdx] - self.coordIdx, 241 | coord.samples_index[self.coordIdx] - self.coordIdx + len( 242 | coord.samples_index))): 243 | row.pop(4) 244 | row.append('Redo Pacing Site') 245 | newfile.append(row) 246 | 247 | else: 248 | newfile.append(row) 249 | 250 | with open('persons.csv', 'w') as csvfile: 251 | 252 | filewriter = csv.writer(csvfile, delimiter=',', 253 | lineterminator='\n') 254 | for row in newfile: 255 | filewriter.writerow(row) 256 | print("csv wrote") 257 | 258 | print(filename + ' created') 259 | 260 | def statwrong_file(self): 261 | directory = './statwrong' 262 | if not os.path.exists(directory): 263 | os.makedirs(directory) 264 | coord = self.coord 265 | filename = str(coord.file_name) + '.txt' 266 | with open(os.path.join(directory, filename), 'w') as f: 267 | f.write("Pacing Site :{}\n".format(coord.pacingSite)) 268 | f.write("File Name : file{}.big\n".format(coord.file_name)) 269 | f.write('Stat:{}\n'.format(coord.stats)) 270 | f.write("Record : {}\n".format(coord.samples_stat[self.coordIdx])) 271 | f.write('Pace Start Index :{} \n'.format(coord.samples_index[self.coordIdx])) 272 | 273 | f.close() 274 | print(filename + ' created') 275 | newfile = [] 276 | with open('persons.csv', 'r') as file: 277 | reader = csv.reader(file) 278 | for row in reader: 279 | if int(row[2]) == int(coord.samples_index[self.coordIdx]): 280 | row.pop(4) 281 | row.append('Stat wrong') 282 | newfile.append(row) 283 | 284 | else: 285 | newfile.append(row) 286 | 287 | with open('persons.csv', 'w') as csvfile: 288 | 289 | filewriter = csv.writer(csvfile, delimiter=',', 290 | lineterminator='\n') 291 | for row in newfile: 292 | filewriter.writerow(row) 293 | print("csv wrote") 294 | 295 | 296 | def main(): 297 | root = Tk() 298 | Grid.rowconfigure(root, 0, weight=1) 299 | Grid.columnconfigure(root, 0, weight=1) 300 | app = Application() 301 | root.mainloop() 302 | 303 | 304 | if __name__ == '__main__': 305 | main() 306 | -------------------------------------------------------------------------------- /merge_files.py: -------------------------------------------------------------------------------- 1 | import scipy.io 2 | import numpy as np 3 | from collections import defaultdict 4 | import pyexcel 5 | from patient import Patient 6 | import pickle 7 | import numpy as np 8 | import csv 9 | import sys 10 | 11 | 12 | def compute(filename_master, filename_corrected, filename_xlsx): 13 | data_mat, location_master = readfile_qrsData(filename_master, filename_corrected) 14 | 15 | data_xlsx, case_coord_data = readfile_xlsx(filename_xlsx) 16 | 17 | # find_correlation(data_mat, data_xlsx) 18 | coorelation = mapping(data_mat, data_xlsx) 19 | patient_objects = [] 20 | keyset = data_mat.keys() 21 | with open('persons.csv', 'w') as csvfile: 22 | filewriter = csv.writer(csvfile, delimiter=',') 23 | # filewriter.writerow(['PatientID','PacingID','Record','Status','Action']) 24 | for patient_id in sorted(keyset): 25 | object = Patient(patient_id, list(data_mat[patient_id].keys()), list(data_mat[patient_id].values()), 26 | coorelation[patient_id], data_mat[patient_id], data_xlsx, 27 | case_coord_data[coorelation[patient_id]], location_master, filewriter) 28 | patient_objects.append(object) 29 | 30 | with open("data.pickle", "wb") as f: 31 | pickle.dump(patient_objects, f) 32 | f.close() 33 | 34 | return patient_objects 35 | 36 | 37 | def find_correlation(data_mat, data_xlsx): 38 | correlation = {} 39 | for patient_id in data_mat.keys(): 40 | samples_count = len(data_mat[patient_id]) 41 | for case_number in data_xlsx.keys(): 42 | case_count = len(data_xlsx[case_number]) 43 | if case_count == samples_count: 44 | if patient_id in correlation.keys(): 45 | temp_list = correlation[patient_id] 46 | temp_list.append(case_number) 47 | correlation[patient_id] = temp_list 48 | else: 49 | correlation[patient_id] = [case_number] 50 | # for key in correlation.keys(): 51 | # print(key,correlation[key]) 52 | for patient_id in correlation.keys(): 53 | if len(correlation[patient_id]) > 2: 54 | # print(patient_id) 55 | correlated_list = correlation[patient_id] 56 | for case_number in correlated_list: 57 | matching = compare_pacingid(data_mat[patient_id], data_xlsx[case_number]) 58 | print(patient_id, case_number, matching) 59 | 60 | 61 | def compare_pacingid(mat_data, xlsx_data): 62 | matching = 0 63 | 64 | for pacing_id in mat_data.keys(): 65 | for case_pace in xlsx_data.keys(): 66 | pacing_id = [round(elem, 4) for elem in pacing_id] 67 | # print(case_pace,pacing_id) 68 | if case_pace == pacing_id: 69 | matching + 1 70 | return matching 71 | 72 | 73 | def mapping(data_mat, data_xlsx): 74 | correlation = {} 75 | final = {} 76 | for patient_id in data_mat.keys(): 77 | for pacing_coord in data_mat[patient_id].keys(): 78 | for case_number in data_xlsx.keys(): 79 | for case_coord in data_xlsx[case_number].keys(): 80 | if case_coord == pacing_coord: 81 | # print('{} ==== {}'.format(patient_id, case_number)) 82 | if patient_id in correlation.keys(): 83 | if case_number in correlation[patient_id].keys(): 84 | count = correlation[patient_id][case_number] 85 | correlation[patient_id][case_number] = count + 1 86 | else: 87 | correlation[patient_id][case_number] = 1 88 | else: 89 | correlation[patient_id] = {case_number: 1} 90 | for patient_id in correlation.keys(): 91 | for case_number in correlation[patient_id].keys(): 92 | 93 | if len(data_mat[patient_id]) == correlation[patient_id][case_number]: 94 | # print('{} {} {}=={} '.format(patient_id, case_number, correlation[patient_id][case_number], 95 | # len(data_mat[patient_id]))) 96 | final[patient_id] = case_number 97 | 98 | return final 99 | 100 | 101 | def readfile_xlsx(filename_xlsx): 102 | patient_data = {} 103 | case_coord_data = {} 104 | 105 | my_array = pyexcel.get_array(file_name=filename_xlsx) 106 | for record in my_array[2:1014]: 107 | case_number = record[2] 108 | # print(i) 109 | file_name = record[1] 110 | coord = [round(record[9], 4), round(record[10], 4), round(record[11], 4)] 111 | if case_number in patient_data.keys(): 112 | patient_data[case_number][tuple(coord)] = file_name 113 | case_coord_data[case_number][file_name] = coord 114 | 115 | else: 116 | patient_data[case_number] = {tuple(coord): file_name} 117 | case_coord_data[case_number] = {file_name: coord} 118 | 119 | return patient_data, case_coord_data 120 | 121 | 122 | def readfile_qrsData(filename_master, filename_corrected): 123 | mat = scipy.io.loadmat(filename_master) 124 | 125 | train_x = mat['train_x'] 126 | train_y = mat['train_y'] 127 | train_coord = mat['train_coord'] 128 | val_x = mat['val_x'] 129 | val_y = mat['val_y'] 130 | val_coord = mat['val_coord'] 131 | test_x = mat['test_x'] 132 | test_y = mat['test_y'] 133 | test_coord = mat['test_coord'] 134 | mean_x = mat['mean_x'] 135 | std_x = mat['std_x'] 136 | 137 | coord_corrected = scipy.io.loadmat(filename_corrected) 138 | size_test = 0 139 | data_coord_corrected = coord_corrected['data_coord'] 140 | size_train = len(train_x) 141 | size_test = len(test_x) 142 | size_val = len(val_x) 143 | 144 | data_coord_train = data_coord_corrected[size_val + size_test:size_val + size_test + size_train] 145 | data_coord_test = data_coord_corrected[size_val:size_val + size_test] 146 | data_coord_val = data_coord_corrected[:size_val] 147 | 148 | master_data_x = np.concatenate((train_x, test_x, val_x), axis=0) 149 | master_data_y = np.concatenate((train_y, test_y, val_y), axis=0) 150 | master_data_coord = np.concatenate((data_coord_train, data_coord_test, data_coord_val), axis=0) 151 | scipy.io.savemat("master_data.mat", 152 | {'x': master_data_x, 'y': master_data_y, 'coord': master_data_coord, 'mean': mean_x, 'std': std_x}) 153 | 154 | location_master = defaultdict(list) 155 | master_index = 0 156 | patient_data = defaultdict(list) 157 | patient_data, master_index, location_master = group_by_patientID(train_x, train_y, train_coord, data_coord_train, 158 | patient_data, master_index, location_master) 159 | patient_data, master_index, location_master = group_by_patientID(test_x, test_y, test_coord, data_coord_test, 160 | patient_data, master_index, location_master) 161 | patient_data, master_index, location_master = group_by_patientID(val_x, val_y, val_coord, data_coord_val, 162 | patient_data, master_index, location_master) 163 | 164 | # for data in patient_data.keys(): 165 | # print(data,len(location_master[data].keys()),len(patient_data[data].keys())) 166 | 167 | return patient_data, location_master 168 | 169 | 170 | def group_by_patientID(X, Y, coord, corrected_coord, patient_data, master_index, location_master): 171 | pacing_site = {} 172 | for i in range(len(X)): 173 | pacing_coord_raw = corrected_coord[i].tolist() 174 | pacing_coord = tuple([round(elem, 8) for elem in pacing_coord_raw]) 175 | if Y[i][1] in patient_data.keys(): 176 | if pacing_coord in patient_data[Y[i][1]].keys(): 177 | pacing_site_samples = patient_data[Y[i][1]][(pacing_coord)] 178 | sample_x = X[i].tolist() 179 | if len(pacing_site_samples) == 1200: 180 | merge = [pacing_site_samples, sample_x] 181 | patient_data[Y[i][1]][pacing_coord] = merge 182 | else: 183 | pacing_site_samples.append(sample_x) 184 | patient_data[Y[i][1]][pacing_coord] = pacing_site_samples 185 | 186 | if pacing_coord in location_master[Y[i][1]].keys(): 187 | # print('pacing present') 188 | index_list = location_master[Y[i][1]][pacing_coord] 189 | index_list.append(master_index) 190 | master_index += 1 191 | location_master[Y[i][1]][pacing_coord] = index_list 192 | else: 193 | # print('pacing not present') 194 | location_master[Y[i][1]][pacing_coord] = [master_index] 195 | master_index += 1 196 | 197 | else: 198 | 199 | samples = X[i].tolist() 200 | # pacing_site={(pacing_coord): samples} 201 | patient_data[Y[i][1]][pacing_coord] = samples 202 | 203 | # location_master[Y[i][1]] = {pacing_coord: [master_index]} 204 | # master_index += 1 205 | 206 | else: 207 | # print('patient present') 208 | samples = X[i].tolist() 209 | 210 | patient_data[Y[i][1]] = {pacing_coord: samples} 211 | 212 | location_master[Y[i][1]] = {pacing_coord: [master_index]} 213 | master_index += 1 214 | 215 | return patient_data, master_index, location_master 216 | 217 | 218 | if __name__ == '__main__': 219 | filename_master = sys.argv[1] 220 | filename_corrected = sys.argv[2] 221 | filename_xlsx = sys.argv[3] 222 | 223 | compute(filename_master, filename_corrected, filename_xlsx) 224 | -------------------------------------------------------------------------------- /pacing_coord.py: -------------------------------------------------------------------------------- 1 | from data_cleaning_complete_set import compute 2 | from data_cleaning_record import check_quality 3 | 4 | 5 | class Pacing_Coord: 6 | def __init__(self, pacing_site, pacing_site_samples, coord_filename_mapping, case_number, case_coord_data_list, 7 | location_list): 8 | # print('there') 9 | # print(pacing_site in location_list.keys()) 10 | self.pacingSite = self.find_serial_numbers(pacing_site, case_coord_data_list) 11 | incorrect_record_list = compute(pacing_site_samples) 12 | self.stats = round((len(incorrect_record_list) / len(pacing_site_samples)) * 100, 3) 13 | self.samples = pacing_site_samples 14 | 15 | result = [] 16 | for pacing_site_sample in pacing_site_samples: 17 | status = check_quality(pacing_site_sample) 18 | result.append(status) 19 | self.samples_stat = result 20 | # print(location_list) 21 | self.samples_index = location_list[pacing_site] 22 | 23 | self.file_name = self.find_filename(coord_filename_mapping, pacing_site, case_number) 24 | 25 | def find_filename(self, coord_filename_mapping, pacing_site, case_number): 26 | 27 | return coord_filename_mapping[case_number][tuple(pacing_site)] 28 | 29 | def find_serial_numbers(self, pacing_site, case_coord_data_list): 30 | serial = [] 31 | # print(case_coord_data_list) 32 | for data in case_coord_data_list.keys(): 33 | # print(type(pacing_site),type(case_coord_data_list[data])) 34 | if pacing_site == tuple(case_coord_data_list[data]): 35 | serial.append(data) 36 | # print(pacing_site,serial) 37 | return serial 38 | -------------------------------------------------------------------------------- /patient.py: -------------------------------------------------------------------------------- 1 | from data_cleaning_complete_set import compute 2 | import itertools 3 | from pacing_coord import Pacing_Coord 4 | import csv 5 | 6 | 7 | class Patient: 8 | def __init__(self, patient_id, pacing_coord, pacing_samples, casenumber, pacing_coord_sample, 9 | xlsx_data, case_coord_data_list, location_master, filewriter): 10 | # print('here') 11 | self.id = patient_id 12 | pacing_samples = list(itertools.chain.from_iterable(pacing_samples)) 13 | 14 | self.pacing_samples = pacing_samples 15 | self.case_number = casenumber 16 | 17 | self.pacing_coord_samples = pacing_coord_sample 18 | 19 | incorrect_record_list = compute(pacing_samples) 20 | self.stat = round((len(incorrect_record_list) / len(pacing_samples)) * 100, 3) 21 | 22 | coords = [] 23 | for pacing_sample in pacing_coord: 24 | coords.append(Pacing_Coord(pacing_sample, pacing_coord_sample[pacing_sample], xlsx_data, casenumber, 25 | case_coord_data_list, location_master[patient_id])) 26 | self.pacing_coord = coords 27 | 28 | for coord in coords: 29 | for i in range(len(coord.samples_index)): 30 | filewriter.writerow( 31 | [patient_id, coord.pacingSite, coord.samples_index[i], coord.samples_stat[i], 'No Action']) 32 | -------------------------------------------------------------------------------- /patientIds.py: -------------------------------------------------------------------------------- 1 | from tkinter import * 2 | from tkinter import Tk, W, E, N, S 3 | from tkinter.ttk import Frame, Button, Entry, Style 4 | import math 5 | 6 | def patientButtonFrame(master, ids): 7 | frame = Frame(master) 8 | frame.grid() 9 | 10 | for row in range(0, 7): 11 | Grid.rowconfigure(frame, row, pad=3, weight=1) 12 | 13 | for col in range(0, 6): 14 | Grid.columnconfigure(frame, col, pad=3, weight=1) 15 | 16 | 17 | idx = 0 18 | for row in range(0, 7): 19 | for column in range(0, 6): 20 | if idx < len(ids): 21 | patient = ids[idx] 22 | idx += 1 23 | anonFunc = lambda patient=patient: master.onClickPatient(patient) 24 | button = Button(frame, 25 | text="{0}, stat: {1}".format(patient.id, patient.stat), 26 | command=anonFunc) 27 | button.grid(row=row, column=column, sticky= W+E+N+S) 28 | 29 | return frame 30 | 31 | def pacingSiteFrame(master, patient): 32 | frame = Frame(master) 33 | frame.grid() 34 | rows = math.ceil(len(patient.pacing_coord) / 6.0) 35 | for row in range(0, rows): 36 | Grid.rowconfigure(frame, row, pad=3, weight=1) 37 | for col in range(0, 6): 38 | Grid.columnconfigure(frame, col, pad=3, weight=1) 39 | 40 | idx = 0 41 | for row in range(0, rows): 42 | for col in range(0, 6): 43 | if idx < len(patient.pacing_coord): 44 | coord = patient.pacing_coord[idx] 45 | idx += 1 46 | anonFunc = lambda coord=coord: master.onClickCoordinate(coord) 47 | button = Button(frame, 48 | text="{0} stats: {1}".format(coord.pacingSite, coord.stats), 49 | command=anonFunc) 50 | button.grid(row=row, column=col, sticky= W+E+N+S) 51 | 52 | return frame 53 | -------------------------------------------------------------------------------- /plotter.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | from matplotlib.figure import Figure 3 | 4 | 5 | def plotter(data): 6 | plt = Figure(figsize=(5, 4), dpi=100) 7 | lower_limit = 0 8 | upper_limit = 100 9 | figure_count = 1 10 | 11 | for _ in range(12): 12 | temp = [] 13 | for index in range(lower_limit, upper_limit): 14 | temp.append(float(data[index])) 15 | a = plt.add_subplot(4, 3, figure_count) 16 | a.plot(temp) 17 | lower_limit = upper_limit 18 | upper_limit = upper_limit + 100 19 | figure_count = figure_count + 1 20 | return plt, a 21 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | How to Use: 2 | 3 | 1)install the requirements 4 | pip install -r requirements.txt 5 | 6 | 2) python3 merge_files.py 7 | 8 | this will generate data.pickle file , persons.csv , master_data.mat in the current directory location. 9 | These files are used in the application.I have already done this step and generated the 2 files. 10 | 11 | 12 | 3)python3 gui.py 13 | 14 | This runs the application. 15 | Patient ID from 1-39 is listed along with their statistics of " not good instances". 16 | When you click on the patient id,all the unique pacing sites are listed. 17 | 18 | Pacing sites along with the Statistics is listed as buttons.Upon selection,each lead is plotted 19 | and the 3 options to classifiy that instance would be listed. 20 | 21 | Redo Record |---> creates a file with details and 22 | Redo Pacing Site | makes the option in persons.csv 23 | Stat Wrong | 24 | 25 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | et-xmlfile==1.0.1 3 | jdcal==1.3 4 | lml==0.0.1 5 | matplotlib==2.1.0 6 | numpy==1.13.3 7 | openpyxl==2.4.9 8 | pyexcel==0.5.6 9 | pyexcel-io==0.5.4 10 | pyexcel-xlsx==0.5.4 11 | pyparsing==2.2.0 12 | python-dateutil==2.6.1 13 | pytz==2017.3 14 | scipy==1.0.0 15 | six==1.11.0 16 | texttable==1.1.1 17 | yml==0.0.1 18 | --------------------------------------------------------------------------------