├── data_cleaning_complete_set.py
├── data_cleaning_record.py
├── gui.py
├── merge_files.py
├── pacing_coord.py
├── patient.py
├── patientIds.py
├── plotter.py
├── readme.txt
└── requirements.txt


/data_cleaning_complete_set.py:
--------------------------------------------------------------------------------
  1 | """
  2 | author: Varun Rajiv Mantri
  3 | """
  4 | 
  5 | from matplotlib import pyplot as plt
  6 | 
  7 | 
  8 | def file_reader(file_name):
  9 |     '''
 10 |     This method reads in the data from a csv file
 11 |     :param file_name:name of the input file
 12 |     :return:
 13 |     '''
 14 |     complete_data = []
 15 |     with open(file_name) as file:
 16 |         for row in file:
 17 |             row = row.strip()
 18 |             row = row.split(",")
 19 |             complete_data.append(row)
 20 |     return complete_data
 21 | 
 22 | 
 23 | def peak_finder(row, mean):
 24 |     '''
 25 |     This method finds the peaks
 26 |     :param row: record under consideration
 27 |     :param mean: mean value for that record
 28 |     :return: peaks
 29 |     '''
 30 |     peaks = []
 31 |     previous = abs(float(row[0]))
 32 |     current = abs(float(row[1]))
 33 |     next_item = abs(float(row[2]))
 34 |     biggest_peak = 0
 35 |     for index in range(3, len(row) - 1):
 36 |         if current > previous and current > next_item and current > (mean + 5):
 37 |             peaks.append(index)
 38 |         previous = current
 39 |         current = abs(float(row[index]))
 40 |         if current > biggest_peak:
 41 |             biggest_peak = current
 42 |         next_item = abs(float(row[index + 1]))
 43 |     return peaks, biggest_peak
 44 | 
 45 | 
 46 | def mean_calculator(row):
 47 |     mean = 0
 48 |     for value in row:
 49 |         mean = mean + float(value)
 50 |     mean = round(mean / len(row), 3)
 51 |     return mean
 52 | 
 53 | 
 54 | def rejection_condition_two(complete_data):
 55 |     '''
 56 |     This method looks for peaks and counts only those peaks that have sudden falls at precisely same location
 57 |     across more than 5 leads
 58 |     :param complete_data: Complete input data
 59 |     :return: Bad data list
 60 |     '''
 61 |     number_of_sudden_falls = 0
 62 |     id = 0
 63 |     bad_list = []
 64 |     for row in complete_data:
 65 |         lower_limit = 0
 66 |         upper_limit = 100
 67 |         leads = 0
 68 |         location_recorder = [False for _ in range(12)]
 69 |         index_recorder = [False for _ in range(12)]
 70 |         while leads < 12:
 71 |             max_peak = float("-inf")
 72 |             for index in range(lower_limit, upper_limit):
 73 |                 if float(row[index]) > max_peak:
 74 |                     max_peak = float(row[index])
 75 |                     location = index
 76 |             lower_limit = upper_limit
 77 |             upper_limit = upper_limit + 100
 78 | 
 79 |             mean = mean_calculator(row)
 80 |             # checking if the next value is the biggest
 81 |             if location + 1 < 1200:
 82 |                 if float(row[location + 1]) < (mean + 5):
 83 |                     location_recorder[leads] = True
 84 |                     index_recorder[leads] = location - lower_limit
 85 |             if location - 1 >= 0:
 86 |                 if float(row[location - 1]) < (mean + 5):
 87 |                     location_recorder[leads] = True
 88 |                     index_recorder[leads] = location - lower_limit
 89 |             leads = leads + 1
 90 |         max_count = 0
 91 |         for index in range(len(index_recorder) - 1):
 92 |             current = index_recorder[index]
 93 |             counter = 0
 94 |             for index_1 in range(index, len(index_recorder)):
 95 |                 if current == index_recorder[index_1]:
 96 |                     counter = counter + 1
 97 |             if max_count < counter:
 98 |                 max_count = counter
 99 |                 value = current
100 |         counter = 0
101 |         for index in range(len(index_recorder)):
102 |             if index_recorder[index] == value:
103 |                 if location_recorder[index] == True:
104 |                     counter = counter + 1
105 |         if counter >= 5:
106 |             bad_list.append(id)
107 |         id = id + 1
108 |     return bad_list
109 | 
110 | 
111 | def rejection_condition_one(complete_data):
112 |     '''
113 |     This method rejects the records
114 |     :param complete_data:
115 |     :return: correct and incorrect records list
116 |     '''
117 |     record_id = 0
118 |     incorrect_records_list = []
119 |     correct_records = []
120 |     flag = False
121 |     for row in complete_data:
122 |         mean = 0
123 |         for value in row:
124 |             mean = mean + float(value)
125 |         mean = round(mean / len(row), 3)
126 |         count = 0
127 |         upper_limit = 100
128 |         lower_limit = 0
129 |         # finding peaks
130 |         peaks, biggest_peak = peak_finder(row, mean)
131 |         mid_value = (mean + biggest_peak) / 2
132 |         while (count < 12):
133 |             for i in range(lower_limit, lower_limit + 9):
134 |                 # cheking first five
135 |                 if abs(float(row[i])) >= mean + mid_value:
136 |                     flag = True
137 |                     break
138 |             for i in range(upper_limit - 1, upper_limit - 10, -1):
139 |                 # cheking first five
140 |                 # print(upper_limit)
141 |                 if abs(float(row[i])) >= mean + mid_value:
142 |                     flag = True
143 |                     break
144 |             if flag == True:
145 |                 break
146 |             count = count + 1
147 |             lower_limit = upper_limit
148 |             upper_limit = upper_limit + 100
149 |         for index in peaks:
150 |             if round(float(row[index + 1])) == 0:
151 |                 flag = True
152 |             elif round(float(row[index - 1])) == 0:
153 |                 flag = True
154 |         if flag == True:
155 |             incorrect_records_list.append(record_id)
156 |             flag = False
157 |         else:
158 |             correct_records.append(record_id)
159 |         record_id = record_id + 1
160 |     return incorrect_records_list, correct_records
161 | 
162 | 
163 | def plotter(data, fig):
164 |     lower_limit = 0
165 |     upper_limit = 100
166 |     figure_count = 1
167 |     plt.figure(fig)
168 |     row = 1
169 |     col = 1
170 |     for _ in range(12):
171 |         temp = []
172 |         for index in range(lower_limit, upper_limit):
173 |             temp.append(float(data[index]))
174 |         plt.subplot(4, 3, figure_count)
175 |         plt.plot(temp)
176 |         plt.title("Lead" + str(figure_count))
177 |         lower_limit = upper_limit
178 |         upper_limit = upper_limit + 100
179 |         figure_count = figure_count + 1
180 | 
181 | 
182 | def combine(incorrect_record_list, bad_list):
183 |     dicto = {}
184 |     for item in incorrect_record_list:
185 |         dicto[item] = True
186 |     for item in bad_list:
187 |         if item not in dicto.keys():
188 |             dicto[item] = True
189 |     complete_list = []
190 |     for item in dicto.keys():
191 |         complete_list.append(item)
192 |     return complete_list
193 | 
194 | 
195 | def compute(complete_data):
196 |     incorrect_record_list, correct_records = rejection_condition_one(complete_data)
197 |     bad_list = rejection_condition_two(complete_data)
198 |     incorrect_record_list = combine(incorrect_record_list, bad_list)
199 |     return incorrect_record_list
200 | 
201 |     # def main():
202 |     #     complete_data=file_reader("train_x.csv")
203 |     #
204 |     #
205 |     #
206 |     #     print("-----------------------------------")
207 |     #     print("Incorrect record ID's: ")
208 |     #     print(incorrect_record_list)
209 |     #     print("Correct record ID's:")
210 |     #     print(correct_records)
211 |     #     print("\n\nPercentage of records that are wrong:"+str(round((len(incorrect_record_list)/len(complete_data))*100,3))+"%")
212 |     #     print("-----------------------------------")
213 |     #     plotter(complete_data[correct_records[8]],0)
214 |     #     plt.title("Good Records")
215 |     #     plotter(complete_data[incorrect_record_list[8]], 1)
216 |     #     plt.title("Bad Records")
217 |     #     plt.show()
218 |     #
219 |     #
220 |     #
221 |     # main()
222 | 


--------------------------------------------------------------------------------
/data_cleaning_record.py:
--------------------------------------------------------------------------------
  1 | """
  2 | author: Varun Rajiv Mantri
  3 | """
  4 | 
  5 | from matplotlib import pyplot as plt
  6 | 
  7 | 
  8 | def file_reader(file_name):
  9 |     '''
 10 |     This method reads in the data from a csv file
 11 |     :param file_name:name of the input file
 12 |     :return:
 13 |     '''
 14 |     complete_data = []
 15 |     with open(file_name) as file:
 16 |         for row in file:
 17 |             row = row.strip()
 18 |             row = row.split(",")
 19 |             complete_data.append(row)
 20 |     return complete_data
 21 | 
 22 | 
 23 | def peak_finder(row, mean):
 24 |     '''
 25 |     This method finds the peaks
 26 |     :param row: record under consideration
 27 |     :param mean: mean value for that record
 28 |     :return: peaks
 29 |     '''
 30 |     peaks = []
 31 |     previous = abs(float(row[0]))
 32 |     current = abs(float(row[1]))
 33 |     next_item = abs(float(row[2]))
 34 |     biggest_peak = 0
 35 |     for index in range(3, len(row) - 1):
 36 |         if current > previous and current > next_item and current > (mean + 5):
 37 |             peaks.append(index)
 38 |         previous = current
 39 |         current = abs(float(row[index]))
 40 |         if current > biggest_peak:
 41 |             biggest_peak = current
 42 |         next_item = abs(float(row[index + 1]))
 43 |     return peaks, biggest_peak
 44 | 
 45 | 
 46 | def mean_calculator(row):
 47 |     mean = 0
 48 |     for value in row:
 49 |         mean = mean + float(value)
 50 |     mean = round(mean / len(row), 3)
 51 |     return mean
 52 | 
 53 | 
 54 | def rejection_condition_two(row):
 55 |     '''
 56 |     This method looks for peaks and counts only those peaks that have sudden falls at precisely same location
 57 |     across more than 5 leads
 58 |     :param complete_data: Complete input data
 59 |     :return: Bad data list
 60 |     '''
 61 |     lower_limit = 0
 62 |     upper_limit = 100
 63 |     leads = 0
 64 |     location_recorder = [False for _ in range(12)]
 65 |     index_recorder = [False for _ in range(12)]
 66 |     while leads < 12:
 67 |         max_peak = float("-inf")
 68 |         for index in range(lower_limit, upper_limit):
 69 |             if float(row[index]) > max_peak:
 70 |                 max_peak = float(row[index])
 71 |                 location = index
 72 |         lower_limit = upper_limit
 73 |         upper_limit = upper_limit + 100
 74 | 
 75 |         mean = mean_calculator(row)
 76 |         # checking if the next value is the biggest
 77 |         if location + 1 < 1200:
 78 |             if float(row[location + 1]) < (mean + 5):
 79 |                 location_recorder[leads] = True
 80 |                 index_recorder[leads] = location - lower_limit
 81 |         if location - 1 >= 0:
 82 |             if float(row[location - 1]) < (mean + 5):
 83 |                 location_recorder[leads] = True
 84 |                 index_recorder[leads] = location - lower_limit
 85 |         leads = leads + 1
 86 |     max_count = 0
 87 |     for index in range(len(index_recorder) - 1):
 88 |         current = index_recorder[index]
 89 |         counter = 0
 90 |         for index_1 in range(index, len(index_recorder)):
 91 |             if current == index_recorder[index_1]:
 92 |                 counter = counter + 1
 93 |         if max_count < counter:
 94 |             max_count = counter
 95 |             value = current
 96 |     counter = 0
 97 |     for index in range(len(index_recorder)):
 98 |         if index_recorder[index] == value:
 99 |             if location_recorder[index] == True:
100 |                 counter = counter + 1
101 |     if counter >= 5:
102 |         return True
103 |     return False
104 | 
105 | 
106 | def rejection_condition_one(row):
107 |     '''
108 |     This method rejects the records
109 |     :param complete_data:
110 |     :return: correct and incorrect records list
111 |     '''
112 |     flag = False
113 |     mean = 0
114 |     for value in row:
115 |         mean = mean + float(value)
116 |     mean = round(mean / len(row), 3)
117 |     count = 0
118 |     upper_limit = 100
119 |     lower_limit = 0
120 |     # finding peaks
121 |     peaks, biggest_peak = peak_finder(row, mean)
122 |     mid_value = (mean + biggest_peak) / 2
123 |     while (count < 12):
124 |         for i in range(lower_limit, lower_limit + 9):
125 |             # cheking first five
126 |             if abs(float(row[i])) >= mean + mid_value:
127 |                 flag = True
128 |                 break
129 |         for i in range(upper_limit - 1, upper_limit - 10, -1):
130 |             # cheking first five
131 |             # print(upper_limit)
132 |             if abs(float(row[i])) >= mean + mid_value:
133 |                 flag = True
134 |                 break
135 |         if flag == True:
136 |             break
137 |         count = count + 1
138 |         lower_limit = upper_limit
139 |         upper_limit = upper_limit + 100
140 |     for index in peaks:
141 |         if round(float(row[index + 1])) == 0:
142 |             flag = True
143 |         elif round(float(row[index - 1])) == 0:
144 |             flag = True
145 |     if flag == True:
146 |         return True
147 |     return False
148 | 
149 | 
150 | def plotter(data, fig):
151 |     lower_limit = 0
152 |     upper_limit = 100
153 |     figure_count = 1
154 |     plt.figure(fig)
155 |     row = 1
156 |     col = 1
157 |     for _ in range(12):
158 |         temp = []
159 |         for index in range(lower_limit, upper_limit):
160 |             temp.append(float(data[index]))
161 |         plt.subplot(4, 3, figure_count)
162 |         plt.plot(temp)
163 |         plt.title("Lead" + str(figure_count))
164 |         lower_limit = upper_limit
165 |         upper_limit = upper_limit + 100
166 |         figure_count = figure_count + 1
167 | 
168 | 
169 | def combine(incorrect_record_list, bad_list):
170 |     dicto = {}
171 |     for item in incorrect_record_list:
172 |         dicto[item] = True
173 |     for item in bad_list:
174 |         if item not in dicto.keys():
175 |             dicto[item] = True
176 |     complete_list = []
177 |     for item in dicto.keys():
178 |         complete_list.append(item)
179 |     return complete_list
180 | 
181 | 
182 | def check_quality(record):
183 |     status = rejection_condition_one(record)
184 |     if status == False:
185 |         status = rejection_condition_two(record)
186 |     if status == False:
187 |         return 'good'
188 |     else:
189 |         return 'bad'
190 | 
191 | 
192 | '''
193 | def main():
194 |     complete_data=file_reader("train_x.csv")
195 |     check_quality(record)
196 |     incorrect_record_list,correct_records=rejection_condition_one(complete_data)
197 |     bad_list=rejection_condition_two(complete_data)
198 |     incorrect_record_list=combine(incorrect_record_list,bad_list)
199 |     print("-----------------------------------")
200 |     print("Incorrect record ID's: ")
201 |     print(incorrect_record_list)
202 |     print("Correct record ID's:")
203 |     print(correct_records)
204 |     print("\n\nPercentage of records that are wrong:"+str(round((len(incorrect_record_list)/len(complete_data))*100,3))+"%")
205 |     print("-----------------------------------")
206 |     plotter(complete_data[correct_records[8]],0)
207 |     plt.title("Good Records")
208 |     plotter(complete_data[incorrect_record_list[8]], 1)
209 |     plt.title("Bad Records")
210 |     plt.show()
211 | '''
212 | 
213 | 
214 | # main()
215 | 


--------------------------------------------------------------------------------
/gui.py:
--------------------------------------------------------------------------------
  1 | from tkinter import *
  2 | from tkinter import Tk, Label, Button
  3 | from plotter import plotter
  4 | from matplotlib.figure import Figure
  5 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
  6 | import patientIds
  7 | import tkinter as tk
  8 | # import yaml
  9 | import os.path
 10 | import patient
 11 | import plotter
 12 | import pickle
 13 | import csv
 14 | 
 15 | COLOR = 'BLACK'
 16 | 
 17 | 
 18 | class Application(Frame):
 19 |     def __init__(self):
 20 |         super().__init__()
 21 |         self.initUI()
 22 |         self.dynamicFrames = []
 23 |         self.dynamicPlots = []
 24 |         self.initIndexes()
 25 |         self.coord = None
 26 |         self.label = None
 27 | 
 28 |     def initIndexes(self):
 29 |         self.coordIdx = 0
 30 |         self.pId = 0
 31 | 
 32 |     def patientObjects(self):
 33 |         patientObjects = None
 34 |         with open("data.pickle", 'rb') as f:
 35 |             unpickler = pickle.Unpickler(f)
 36 |             patientObjects = unpickler.load()
 37 |         return patientObjects
 38 | 
 39 |     def initializeGrid(self):
 40 |         self.grid(row=0, column=0, sticky=N + S + E + W)
 41 |         for row in range(0, 13):
 42 |             Grid.rowconfigure(self, row, weight=1)
 43 |         for column in range(0, 27):
 44 |             Grid.columnconfigure(self, column, weight=1)
 45 | 
 46 |     def initUI(self):
 47 |         self.master.title("ECG Validator")
 48 |         self.initializeGrid()
 49 |         patientIdFrame = patientIds.patientButtonFrame(self,
 50 |                                                        self.patientObjects())
 51 |         patientIdFrame.grid(row=0,
 52 |                             column=0,
 53 |                             columnspan=6,
 54 |                             rowspan=7,
 55 |                             sticky=W + E + N + S)
 56 | 
 57 |     def onClickPatient(self, patient):
 58 |         self.initIndexes()
 59 |         self.clearDynamicFrames(self.dynamicFrames)
 60 | 
 61 |         pacingSiteFrame = patientIds.pacingSiteFrame(self, patient)
 62 |         pacingSiteFrame.grid(row=7,
 63 |                              column=0,
 64 |                              columnspan=6,
 65 |                              rowspan=7,
 66 |                              sticky=W + E + N + S)
 67 | 
 68 |         self.dynamicFrames.append(pacingSiteFrame)
 69 | 
 70 |     def onClickCoordinate(self, coord):
 71 |         self.coord = None
 72 |         self.initIndexes()
 73 |         self.addButtonsBelowImage()
 74 |         self.showPlot(coord)
 75 | 
 76 |     def clearDynamicFrames(self, dynamicFrames):
 77 |         for frame in dynamicFrames:
 78 |             dynamicFrames.remove(frame)
 79 |             frame.grid_forget()
 80 |             frame.destroy()
 81 | 
 82 |     def showStat(self):
 83 |         frame = Frame(self)
 84 |         frame.grid()
 85 |         stat = Label(frame, text='hhh')
 86 |         Grid.rowconfigure(frame, 1, weight=1)
 87 |         Grid.columnconfigure(frame, 1, weight=1)
 88 |         frame.grid(row=1, column=6, rowspan=1, columnspan=21, sticky=W + E + N + S)
 89 | 
 90 |     def showPlot(self, coord):
 91 |         print('loading frame')
 92 |         self.showStatus(coord.samples_stat[self.coordIdx])
 93 | 
 94 |         self.coord = coord
 95 |         subplot, a = plotter.plotter(coord.samples[self.coordIdx])
 96 |         canvas = FigureCanvasTkAgg(subplot, master=self)
 97 |         canvas.show()
 98 |         self.clearDynamicFrames(self.dynamicPlots)
 99 |         frame = canvas.get_tk_widget()
100 |         self.dynamicPlots.append(frame)
101 | 
102 |         Grid.rowconfigure(frame, 1, weight=1)
103 |         Grid.columnconfigure(frame, 1, weight=1)
104 | 
105 |         frame.grid(row=1,
106 |                    column=6,
107 |                    columnspan=21,
108 |                    rowspan=11,
109 |                    sticky=W + E + N + S)
110 |         print("done")
111 | 
112 |     def nextPlot(self):
113 |         coord = self.coord
114 |         if (self.coordIdx >= 0):
115 |             if (coord.samples[self.coordIdx]):
116 |                 self.coordIdx += 1
117 |                 self.showPlot(coord)
118 | 
119 |     def prevPlot(self):
120 |         coord = self.coord
121 |         if (self.coordIdx >= 0):
122 |             if (coord.samples[self.coordIdx]):
123 |                 print(coord.samples_stat[self.coordIdx])
124 |                 print(coord.samples_index)
125 |                 self.coordIdx -= 1
126 |                 self.showPlot(coord)
127 | 
128 |     def showStatus(self, ltext):
129 |         if (self.label == None):
130 |             frame = Frame(self)
131 |             frame.grid()
132 | 
133 |             Grid.rowconfigure(frame, 0, pad=3, weight=1)
134 |             Grid.columnconfigure(frame, 0, pad=3, weight=1)
135 |             self.label = Label(frame, text=ltext)
136 |             self.label.grid(row=0, column=0, columnspan=21)
137 |             frame.grid(row=0, column=6, rowspan=1, columnspan=21, sticky=W + E + N + S)
138 |         else:
139 |             self.label.config(text=ltext)
140 | 
141 |     def addButtonsBelowImage(self):
142 |         frame = Frame(self)
143 |         frame.grid()
144 | 
145 |         Grid.rowconfigure(frame, 0, pad=3, weight=1)
146 |         for col in range(0, 24):
147 |             Grid.columnconfigure(frame, col, pad=3, weight=1)
148 | 
149 |         prevButton = Button(frame, text="prev")
150 |         nextButton = Button(frame, text="next", command=self.nextPlot)
151 |         redothisrecord = lambda: self.create_redo_record()
152 |         redothispace = lambda: self.create_redo_pace()
153 |         statwrong = lambda: self.statwrong_file()
154 | 
155 |         redoThisRecordButton = Button(frame, text="Redo This Record", command=redothisrecord)
156 |         redoEntireButton = Button(frame, text="Redo Entire Pacing Site", command=redothispace)
157 |         statsWrongButton = Button(frame, text="Stats Wrong", command=statwrong)
158 | 
159 |         prevButton.grid(row=0,
160 |                         column=0,
161 |                         columnspan=5,
162 |                         sticky=W + E + N + S)
163 | 
164 |         redoThisRecordButton.grid(row=0,
165 |                                   column=5,
166 |                                   columnspan=5,
167 |                                   sticky=W + E + N + S)
168 | 
169 |         redoEntireButton.grid(row=0,
170 |                               column=10,
171 |                               columnspan=5,
172 |                               sticky=W + E + N + S)
173 | 
174 |         statsWrongButton.grid(row=0,
175 |                               column=15,
176 |                               columnspan=5,
177 |                               sticky=W + E + N + S)
178 | 
179 |         nextButton.grid(row=0,
180 |                         column=20,
181 |                         columnspan=5,
182 |                         sticky=W + E + N + S)
183 | 
184 |         frame.grid(row=12, column=6, rowspan=2, columnspan=21, sticky=W + E + N + S)
185 | 
186 |     def create_redo_record(self):
187 |         directory = './pacingsite'
188 |         if not os.path.exists(directory):
189 |             os.makedirs(directory)
190 | 
191 |         coord = self.coord
192 |         filename = str(coord.file_name) + '.txt'
193 |         with open(os.path.join(directory, filename), 'w') as f:
194 |             f.write("Pacing Site :{}\n".format(coord.pacingSite))
195 |             f.write("File Name : file{}.big\n".format(coord.file_name))
196 |             f.write("Record : {}\n".format(coord.samples_stat[self.coordIdx]))
197 |             f.write(' Record Index :{} \n'.format(coord.samples_index[self.coordIdx]))
198 | 
199 |         f.close()
200 |         print(filename + ' created')
201 |         newfile = []
202 |         with open('persons.csv', 'r') as file:
203 |             reader = csv.reader(file)
204 |             for row in reader:
205 |                 if int(row[2]) == int(coord.samples_index[self.coordIdx]):
206 |                     row.pop(4)
207 |                     row.append('Redo Record')
208 |                     newfile.append(row)
209 | 
210 |                 else:
211 |                     newfile.append(row)
212 | 
213 |         with open('persons.csv', 'w') as csvfile:
214 | 
215 |             filewriter = csv.writer(csvfile, delimiter=',',
216 |                                     lineterminator='\n')
217 |             for row in newfile:
218 |                 filewriter.writerow(row)
219 |         print("csv wrote")
220 | 
221 |     def create_redo_pace(self):
222 |         directory = './record'
223 |         if not os.path.exists(directory):
224 |             os.makedirs(directory)
225 |         coord = self.coord
226 |         filename = str(coord.file_name) + '.txt'
227 |         with open(os.path.join(directory, filename), 'w') as f:
228 |             f.write("Pacing Site :{}\n".format(coord.pacingSite))
229 |             f.write("File Name : file{}.big\n".format(coord.file_name))
230 |             f.write('Stat:{}\n'.format(coord.stats))
231 |             f.write('Pace Start Index :{} \n'.format(coord.samples_index[self.coordIdx] - self.coordIdx))
232 |             f.write('Pace End Index : {} \n'.format(
233 |                 (coord.samples_index[self.coordIdx] - self.coordIdx) + len(coord.samples_index) - 1))
234 |         f.close()
235 | 
236 |         newfile = []
237 |         with open('persons.csv', 'r') as file:
238 |             reader = csv.reader(file)
239 |             for row in reader:
240 |                 if int(row[2]) in list(range(coord.samples_index[self.coordIdx] - self.coordIdx,
241 |                                              coord.samples_index[self.coordIdx] - self.coordIdx + len(
242 |                                                  coord.samples_index))):
243 |                     row.pop(4)
244 |                     row.append('Redo Pacing Site')
245 |                     newfile.append(row)
246 | 
247 |                 else:
248 |                     newfile.append(row)
249 | 
250 |         with open('persons.csv', 'w') as csvfile:
251 | 
252 |             filewriter = csv.writer(csvfile, delimiter=',',
253 |                                     lineterminator='\n')
254 |             for row in newfile:
255 |                 filewriter.writerow(row)
256 |             print("csv wrote")
257 | 
258 |             print(filename + ' created')
259 | 
260 |     def statwrong_file(self):
261 |         directory = './statwrong'
262 |         if not os.path.exists(directory):
263 |             os.makedirs(directory)
264 |         coord = self.coord
265 |         filename = str(coord.file_name) + '.txt'
266 |         with open(os.path.join(directory, filename), 'w') as f:
267 |             f.write("Pacing Site :{}\n".format(coord.pacingSite))
268 |             f.write("File Name : file{}.big\n".format(coord.file_name))
269 |             f.write('Stat:{}\n'.format(coord.stats))
270 |             f.write("Record : {}\n".format(coord.samples_stat[self.coordIdx]))
271 |             f.write('Pace Start Index :{} \n'.format(coord.samples_index[self.coordIdx]))
272 | 
273 |         f.close()
274 |         print(filename + ' created')
275 |         newfile = []
276 |         with open('persons.csv', 'r') as file:
277 |             reader = csv.reader(file)
278 |             for row in reader:
279 |                 if int(row[2]) == int(coord.samples_index[self.coordIdx]):
280 |                     row.pop(4)
281 |                     row.append('Stat wrong')
282 |                     newfile.append(row)
283 | 
284 |                 else:
285 |                     newfile.append(row)
286 | 
287 |         with open('persons.csv', 'w') as csvfile:
288 | 
289 |             filewriter = csv.writer(csvfile, delimiter=',',
290 |                                     lineterminator='\n')
291 |             for row in newfile:
292 |                 filewriter.writerow(row)
293 |         print("csv wrote")
294 | 
295 | 
296 | def main():
297 |     root = Tk()
298 |     Grid.rowconfigure(root, 0, weight=1)
299 |     Grid.columnconfigure(root, 0, weight=1)
300 |     app = Application()
301 |     root.mainloop()
302 | 
303 | 
304 | if __name__ == '__main__':
305 |     main()
306 | 


--------------------------------------------------------------------------------
/merge_files.py:
--------------------------------------------------------------------------------
  1 | import scipy.io
  2 | import numpy as np
  3 | from collections import defaultdict
  4 | import pyexcel
  5 | from patient import Patient
  6 | import pickle
  7 | import numpy as np
  8 | import csv
  9 | import sys
 10 | 
 11 | 
 12 | def compute(filename_master, filename_corrected, filename_xlsx):
 13 |     data_mat, location_master = readfile_qrsData(filename_master, filename_corrected)
 14 | 
 15 |     data_xlsx, case_coord_data = readfile_xlsx(filename_xlsx)
 16 | 
 17 |     # find_correlation(data_mat, data_xlsx)
 18 |     coorelation = mapping(data_mat, data_xlsx)
 19 |     patient_objects = []
 20 |     keyset = data_mat.keys()
 21 |     with open('persons.csv', 'w') as csvfile:
 22 |         filewriter = csv.writer(csvfile, delimiter=',')
 23 |         # filewriter.writerow(['PatientID','PacingID','Record','Status','Action'])
 24 |         for patient_id in sorted(keyset):
 25 |             object = Patient(patient_id, list(data_mat[patient_id].keys()), list(data_mat[patient_id].values()),
 26 |                              coorelation[patient_id], data_mat[patient_id], data_xlsx,
 27 |                              case_coord_data[coorelation[patient_id]], location_master, filewriter)
 28 |             patient_objects.append(object)
 29 | 
 30 |     with open("data.pickle", "wb") as f:
 31 |         pickle.dump(patient_objects, f)
 32 |     f.close()
 33 | 
 34 |     return patient_objects
 35 | 
 36 | 
 37 | def find_correlation(data_mat, data_xlsx):
 38 |     correlation = {}
 39 |     for patient_id in data_mat.keys():
 40 |         samples_count = len(data_mat[patient_id])
 41 |         for case_number in data_xlsx.keys():
 42 |             case_count = len(data_xlsx[case_number])
 43 |             if case_count == samples_count:
 44 |                 if patient_id in correlation.keys():
 45 |                     temp_list = correlation[patient_id]
 46 |                     temp_list.append(case_number)
 47 |                     correlation[patient_id] = temp_list
 48 |                 else:
 49 |                     correlation[patient_id] = [case_number]
 50 |     # for key in correlation.keys():
 51 |     #     print(key,correlation[key])
 52 |     for patient_id in correlation.keys():
 53 |         if len(correlation[patient_id]) > 2:
 54 |             # print(patient_id)
 55 |             correlated_list = correlation[patient_id]
 56 |             for case_number in correlated_list:
 57 |                 matching = compare_pacingid(data_mat[patient_id], data_xlsx[case_number])
 58 |                 print(patient_id, case_number, matching)
 59 | 
 60 | 
 61 | def compare_pacingid(mat_data, xlsx_data):
 62 |     matching = 0
 63 | 
 64 |     for pacing_id in mat_data.keys():
 65 |         for case_pace in xlsx_data.keys():
 66 |             pacing_id = [round(elem, 4) for elem in pacing_id]
 67 |             # print(case_pace,pacing_id)
 68 |             if case_pace == pacing_id:
 69 |                 matching + 1
 70 |     return matching
 71 | 
 72 | 
 73 | def mapping(data_mat, data_xlsx):
 74 |     correlation = {}
 75 |     final = {}
 76 |     for patient_id in data_mat.keys():
 77 |         for pacing_coord in data_mat[patient_id].keys():
 78 |             for case_number in data_xlsx.keys():
 79 |                 for case_coord in data_xlsx[case_number].keys():
 80 |                     if case_coord == pacing_coord:
 81 |                         # print('{} ==== {}'.format(patient_id, case_number))
 82 |                         if patient_id in correlation.keys():
 83 |                             if case_number in correlation[patient_id].keys():
 84 |                                 count = correlation[patient_id][case_number]
 85 |                                 correlation[patient_id][case_number] = count + 1
 86 |                             else:
 87 |                                 correlation[patient_id][case_number] = 1
 88 |                         else:
 89 |                             correlation[patient_id] = {case_number: 1}
 90 |     for patient_id in correlation.keys():
 91 |         for case_number in correlation[patient_id].keys():
 92 | 
 93 |             if len(data_mat[patient_id]) == correlation[patient_id][case_number]:
 94 |                 # print('{} {} {}=={} '.format(patient_id, case_number, correlation[patient_id][case_number],
 95 |                 #                              len(data_mat[patient_id])))
 96 |                 final[patient_id] = case_number
 97 | 
 98 |     return final
 99 | 
100 | 
101 | def readfile_xlsx(filename_xlsx):
102 |     patient_data = {}
103 |     case_coord_data = {}
104 | 
105 |     my_array = pyexcel.get_array(file_name=filename_xlsx)
106 |     for record in my_array[2:1014]:
107 |         case_number = record[2]
108 |         # print(i)
109 |         file_name = record[1]
110 |         coord = [round(record[9], 4), round(record[10], 4), round(record[11], 4)]
111 |         if case_number in patient_data.keys():
112 |             patient_data[case_number][tuple(coord)] = file_name
113 |             case_coord_data[case_number][file_name] = coord
114 | 
115 |         else:
116 |             patient_data[case_number] = {tuple(coord): file_name}
117 |             case_coord_data[case_number] = {file_name: coord}
118 | 
119 |     return patient_data, case_coord_data
120 | 
121 | 
122 | def readfile_qrsData(filename_master, filename_corrected):
123 |     mat = scipy.io.loadmat(filename_master)
124 | 
125 |     train_x = mat['train_x']
126 |     train_y = mat['train_y']
127 |     train_coord = mat['train_coord']
128 |     val_x = mat['val_x']
129 |     val_y = mat['val_y']
130 |     val_coord = mat['val_coord']
131 |     test_x = mat['test_x']
132 |     test_y = mat['test_y']
133 |     test_coord = mat['test_coord']
134 |     mean_x = mat['mean_x']
135 |     std_x = mat['std_x']
136 | 
137 |     coord_corrected = scipy.io.loadmat(filename_corrected)
138 |     size_test = 0
139 |     data_coord_corrected = coord_corrected['data_coord']
140 |     size_train = len(train_x)
141 |     size_test = len(test_x)
142 |     size_val = len(val_x)
143 | 
144 |     data_coord_train = data_coord_corrected[size_val + size_test:size_val + size_test + size_train]
145 |     data_coord_test = data_coord_corrected[size_val:size_val + size_test]
146 |     data_coord_val = data_coord_corrected[:size_val]
147 | 
148 |     master_data_x = np.concatenate((train_x, test_x, val_x), axis=0)
149 |     master_data_y = np.concatenate((train_y, test_y, val_y), axis=0)
150 |     master_data_coord = np.concatenate((data_coord_train, data_coord_test, data_coord_val), axis=0)
151 |     scipy.io.savemat("master_data.mat",
152 |                      {'x': master_data_x, 'y': master_data_y, 'coord': master_data_coord, 'mean': mean_x, 'std': std_x})
153 | 
154 |     location_master = defaultdict(list)
155 |     master_index = 0
156 |     patient_data = defaultdict(list)
157 |     patient_data, master_index, location_master = group_by_patientID(train_x, train_y, train_coord, data_coord_train,
158 |                                                                      patient_data, master_index, location_master)
159 |     patient_data, master_index, location_master = group_by_patientID(test_x, test_y, test_coord, data_coord_test,
160 |                                                                      patient_data, master_index, location_master)
161 |     patient_data, master_index, location_master = group_by_patientID(val_x, val_y, val_coord, data_coord_val,
162 |                                                                      patient_data, master_index, location_master)
163 | 
164 |     # for data in patient_data.keys():
165 |     #     print(data,len(location_master[data].keys()),len(patient_data[data].keys()))
166 | 
167 |     return patient_data, location_master
168 | 
169 | 
170 | def group_by_patientID(X, Y, coord, corrected_coord, patient_data, master_index, location_master):
171 |     pacing_site = {}
172 |     for i in range(len(X)):
173 |         pacing_coord_raw = corrected_coord[i].tolist()
174 |         pacing_coord = tuple([round(elem, 8) for elem in pacing_coord_raw])
175 |         if Y[i][1] in patient_data.keys():
176 |             if pacing_coord in patient_data[Y[i][1]].keys():
177 |                 pacing_site_samples = patient_data[Y[i][1]][(pacing_coord)]
178 |                 sample_x = X[i].tolist()
179 |                 if len(pacing_site_samples) == 1200:
180 |                     merge = [pacing_site_samples, sample_x]
181 |                     patient_data[Y[i][1]][pacing_coord] = merge
182 |                 else:
183 |                     pacing_site_samples.append(sample_x)
184 |                     patient_data[Y[i][1]][pacing_coord] = pacing_site_samples
185 | 
186 |                 if pacing_coord in location_master[Y[i][1]].keys():
187 |                     # print('pacing present')
188 |                     index_list = location_master[Y[i][1]][pacing_coord]
189 |                     index_list.append(master_index)
190 |                     master_index += 1
191 |                     location_master[Y[i][1]][pacing_coord] = index_list
192 |                 else:
193 |                     # print('pacing not present')
194 |                     location_master[Y[i][1]][pacing_coord] = [master_index]
195 |                     master_index += 1
196 | 
197 |             else:
198 | 
199 |                 samples = X[i].tolist()
200 |                 # pacing_site={(pacing_coord): samples}
201 |                 patient_data[Y[i][1]][pacing_coord] = samples
202 | 
203 |                 # location_master[Y[i][1]] = {pacing_coord: [master_index]}
204 |                 # master_index += 1
205 | 
206 |         else:
207 |             # print('patient present')
208 |             samples = X[i].tolist()
209 | 
210 |             patient_data[Y[i][1]] = {pacing_coord: samples}
211 | 
212 |             location_master[Y[i][1]] = {pacing_coord: [master_index]}
213 |             master_index += 1
214 | 
215 |     return patient_data, master_index, location_master
216 | 
217 | 
218 | if __name__ == '__main__':
219 |     filename_master = sys.argv[1]
220 |     filename_corrected = sys.argv[2]
221 |     filename_xlsx = sys.argv[3]
222 | 
223 |     compute(filename_master, filename_corrected, filename_xlsx)
224 | 


--------------------------------------------------------------------------------
/pacing_coord.py:
--------------------------------------------------------------------------------
 1 | from data_cleaning_complete_set import compute
 2 | from data_cleaning_record import check_quality
 3 | 
 4 | 
 5 | class Pacing_Coord:
 6 |     def __init__(self, pacing_site, pacing_site_samples, coord_filename_mapping, case_number, case_coord_data_list,
 7 |                  location_list):
 8 |         # print('there')
 9 |         # print(pacing_site in location_list.keys())
10 |         self.pacingSite = self.find_serial_numbers(pacing_site, case_coord_data_list)
11 |         incorrect_record_list = compute(pacing_site_samples)
12 |         self.stats = round((len(incorrect_record_list) / len(pacing_site_samples)) * 100, 3)
13 |         self.samples = pacing_site_samples
14 | 
15 |         result = []
16 |         for pacing_site_sample in pacing_site_samples:
17 |             status = check_quality(pacing_site_sample)
18 |             result.append(status)
19 |         self.samples_stat = result
20 |         # print(location_list)
21 |         self.samples_index = location_list[pacing_site]
22 | 
23 |         self.file_name = self.find_filename(coord_filename_mapping, pacing_site, case_number)
24 | 
25 |     def find_filename(self, coord_filename_mapping, pacing_site, case_number):
26 | 
27 |         return coord_filename_mapping[case_number][tuple(pacing_site)]
28 | 
29 |     def find_serial_numbers(self, pacing_site, case_coord_data_list):
30 |         serial = []
31 |         # print(case_coord_data_list)
32 |         for data in case_coord_data_list.keys():
33 |             # print(type(pacing_site),type(case_coord_data_list[data]))
34 |             if pacing_site == tuple(case_coord_data_list[data]):
35 |                 serial.append(data)
36 |         # print(pacing_site,serial)
37 |         return serial
38 | 


--------------------------------------------------------------------------------
/patient.py:
--------------------------------------------------------------------------------
 1 | from data_cleaning_complete_set import compute
 2 | import itertools
 3 | from pacing_coord import Pacing_Coord
 4 | import csv
 5 | 
 6 | 
 7 | class Patient:
 8 |     def __init__(self, patient_id, pacing_coord, pacing_samples, casenumber, pacing_coord_sample,
 9 |                  xlsx_data, case_coord_data_list, location_master, filewriter):
10 |         # print('here')
11 |         self.id = patient_id
12 |         pacing_samples = list(itertools.chain.from_iterable(pacing_samples))
13 | 
14 |         self.pacing_samples = pacing_samples
15 |         self.case_number = casenumber
16 | 
17 |         self.pacing_coord_samples = pacing_coord_sample
18 | 
19 |         incorrect_record_list = compute(pacing_samples)
20 |         self.stat = round((len(incorrect_record_list) / len(pacing_samples)) * 100, 3)
21 | 
22 |         coords = []
23 |         for pacing_sample in pacing_coord:
24 |             coords.append(Pacing_Coord(pacing_sample, pacing_coord_sample[pacing_sample], xlsx_data, casenumber,
25 |                                        case_coord_data_list, location_master[patient_id]))
26 |         self.pacing_coord = coords
27 | 
28 |         for coord in coords:
29 |             for i in range(len(coord.samples_index)):
30 |                 filewriter.writerow(
31 |                     [patient_id, coord.pacingSite, coord.samples_index[i], coord.samples_stat[i], 'No Action'])
32 | 


--------------------------------------------------------------------------------
/patientIds.py:
--------------------------------------------------------------------------------
 1 | from tkinter import *
 2 | from tkinter import Tk, W, E, N, S
 3 | from tkinter.ttk import Frame, Button, Entry, Style
 4 | import math
 5 | 
 6 | def patientButtonFrame(master, ids):
 7 |     frame = Frame(master)
 8 |     frame.grid()
 9 | 
10 |     for row in range(0, 7):
11 |         Grid.rowconfigure(frame, row, pad=3, weight=1)
12 | 
13 |     for col in range(0, 6):
14 |         Grid.columnconfigure(frame, col, pad=3, weight=1)
15 | 
16 | 
17 |     idx = 0
18 |     for row in range(0, 7):
19 |         for column in range(0, 6):
20 |             if idx < len(ids):
21 |                 patient = ids[idx]
22 |                 idx += 1
23 |                 anonFunc = lambda patient=patient: master.onClickPatient(patient)
24 |                 button = Button(frame,
25 |                                 text="{0}, stat: {1}".format(patient.id, patient.stat),
26 |                                 command=anonFunc)
27 |                 button.grid(row=row, column=column, sticky= W+E+N+S)
28 | 
29 |     return frame
30 | 
31 | def pacingSiteFrame(master, patient):
32 |     frame = Frame(master)
33 |     frame.grid()
34 |     rows = math.ceil(len(patient.pacing_coord) / 6.0)
35 |     for row in range(0, rows):
36 |         Grid.rowconfigure(frame, row, pad=3, weight=1)
37 |     for col in range(0, 6):
38 |         Grid.columnconfigure(frame, col, pad=3, weight=1)
39 | 
40 |     idx = 0
41 |     for row in range(0, rows):
42 |         for col in range(0, 6):
43 |             if idx < len(patient.pacing_coord):
44 |                 coord = patient.pacing_coord[idx]
45 |                 idx += 1
46 |                 anonFunc = lambda coord=coord: master.onClickCoordinate(coord)
47 |                 button = Button(frame,
48 |                                 text="{0} stats:  {1}".format(coord.pacingSite, coord.stats),
49 |                                 command=anonFunc)
50 |                 button.grid(row=row, column=col, sticky= W+E+N+S)
51 | 
52 |     return frame
53 | 


--------------------------------------------------------------------------------
/plotter.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | from matplotlib.figure import Figure
 3 | 
 4 | 
 5 | def plotter(data):
 6 |     plt = Figure(figsize=(5, 4), dpi=100)
 7 |     lower_limit = 0
 8 |     upper_limit = 100
 9 |     figure_count = 1
10 | 
11 |     for _ in range(12):
12 |         temp = []
13 |         for index in range(lower_limit, upper_limit):
14 |             temp.append(float(data[index]))
15 |         a = plt.add_subplot(4, 3, figure_count)
16 |         a.plot(temp)
17 |         lower_limit = upper_limit
18 |         upper_limit = upper_limit + 100
19 |         figure_count = figure_count + 1
20 |     return plt, a
21 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
 1 | How to Use:
 2 | 
 3 | 1)install the requirements
 4 |     pip install -r requirements.txt
 5 | 
 6 | 2) python3 merge_files.py <location of qrsData.mat> <location of qrsData1012Coord_sr3_corrected.mat> <master1012.xlsx>
 7 | 
 8 |     this will generate data.pickle file , persons.csv , master_data.mat in the current directory location.
 9 |     These files are used in the application.I have already done this step and generated the 2 files.
10 | 
11 | 
12 | 3)python3 gui.py
13 | 
14 |     This runs the application.
15 |     Patient ID from 1-39 is listed along with their statistics of " not good instances".
16 |     When you click on the patient id,all the unique pacing sites are listed.
17 | 
18 |     Pacing sites along with the Statistics is listed as buttons.Upon selection,each lead is plotted
19 |      and the 3 options to classifiy that instance would be listed.
20 | 
21 |      Redo Record      |---> creates a file with details and
22 |      Redo Pacing Site |      makes the option in persons.csv
23 |      Stat Wrong       |
24 | 
25 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | et-xmlfile==1.0.1
 3 | jdcal==1.3
 4 | lml==0.0.1
 5 | matplotlib==2.1.0
 6 | numpy==1.13.3
 7 | openpyxl==2.4.9
 8 | pyexcel==0.5.6
 9 | pyexcel-io==0.5.4
10 | pyexcel-xlsx==0.5.4
11 | pyparsing==2.2.0
12 | python-dateutil==2.6.1
13 | pytz==2017.3
14 | scipy==1.0.0
15 | six==1.11.0
16 | texttable==1.1.1
17 | yml==0.0.1
18 | 


--------------------------------------------------------------------------------