├── .gitignore ├── GEMuseXMLReader.py ├── README.md └── __pycache__ └── GEMuseXMLReader.cpython-37.pyc /.gitignore: -------------------------------------------------------------------------------- 1 | *.XML 2 | *.XML.swp -------------------------------------------------------------------------------- /GEMuseXMLReader.py: -------------------------------------------------------------------------------- 1 | import xmltodict 2 | import xlwt 3 | import traceback 4 | import numpy as np 5 | import pandas as pd 6 | from time import gmtime, strftime 7 | import argparse 8 | import json 9 | import re 10 | from functools import reduce 11 | import os 12 | 13 | 14 | 15 | __author__ = "Daniel Osorio" 16 | __credits__ = ["Daniel Osorio"] 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Daniel Osorio" 19 | __email__ = "vdosavh@gmail.com" 20 | __status__ = "Production" 21 | 22 | class GEMuseXMLReader: 23 | def __init__(self, path): 24 | try: 25 | with open(path, 'rb') as fd: 26 | self.dic = xmltodict.parse(fd.read().decode('utf8')) 27 | self.__path = path 28 | self.__patientInfoNode = self.dic['sapphire']['dcarRecord']['patientInfo'] 29 | self.__ecgNode = self.__patientInfoNode['visit']['order']['ecgResting']['params']['ecg']['wav']['ecgWaveformMXG'] 30 | self.header = self.__makeHeaderDic() 31 | self.__makeDataArray() 32 | self.__makeStructuredArray() 33 | 34 | except Exception: 35 | print(traceback.print_exc()) 36 | # 37 | 38 | def __makeHeaderDic(self): 39 | patientInfo = self.__patientInfoHeader() 40 | deviceInfo = self.__deviceInfoHeader() 41 | acquisitionInfo = self.__aquisitionInfoHeader() 42 | return {'PatientInfo': patientInfo, 'DeviceInfo': deviceInfo, 'AcquisitionInfo': acquisitionInfo} 43 | 44 | 45 | def __patientInfoHeader(self): 46 | if('unknownID' in self.__patientInfoNode.keys() or 'name' not in self.__patientInfoNode.keys()): 47 | given_name = 'Unknown' 48 | family_name = 'Unknown' 49 | id = 'Unknown' 50 | else: 51 | given_name = self.__patientInfoNode['name']['given']['@V'] 52 | family_name = self.__patientInfoNode['name']['family']['@V'] 53 | id = self.__patientInfoNode['identifier']['id']['@V'] 54 | gender = self.__patientInfoNode['gender']['@V'] 55 | race = self.__patientInfoNode['raceCode']['@V'] 56 | pacemaker = self.__patientInfoNode['visit']['order']['testInfo']['hasPacemaker']['@V'] 57 | return {'Given_Name': given_name, 'Family_Name': family_name, 'ID': id, 'Gender': gender, 'Race': race, 'Pacemaker': pacemaker} 58 | 59 | 60 | def __deviceInfoHeader(self): 61 | deviceModel = self.__patientInfoNode['visit']['order']['device']['modelID']['@V'] 62 | deviceName = self.__patientInfoNode['visit']['order']['device']['deviceName']['@V'] 63 | deviceSerial = self.__patientInfoNode['visit']['order']['device']['serialID']['@V'] 64 | return {'DeviceModel': deviceModel, 'DeviceName': deviceName, 'DeviceSerial': deviceSerial} 65 | 66 | 67 | def __aquisitionInfoHeader(self): 68 | acquisitionDate = self.__patientInfoNode['visit']['order']['testInfo']['acquisitionDateTime']['@V'] 69 | LeadAmplitudeUnitsPerBit = self.__ecgNode['@S'] 70 | LeadAmplitudeUnits = self.__ecgNode['@U'] 71 | Res = self.__ecgNode['@INV'] 72 | filters = self.__getFilterInfo() 73 | sampleRate = {'SampleRate': self.__ecgNode['sampleRate']['@V'], 'Units': self.__ecgNode['sampleRate']['@U']} 74 | leadsInformation = self.__getLeadInfo() 75 | return {'Resolution': Res, 'AcquisitionDate': acquisitionDate, 'LeadAmplitudeUnitsPerBit': LeadAmplitudeUnitsPerBit, 'LeadAmplitudeUnits': LeadAmplitudeUnits, 'Filters': filters, 'SampleRate': sampleRate, 'LeadsInformation': leadsInformation} 76 | 77 | 78 | def __getFilterInfo(self): 79 | highPassNode = self.__ecgNode['filters']['highPass'] 80 | highPass = {'Frequency': highPassNode['frequency']['@V'], 'Units': highPassNode['frequency']['@U'], 'Order': highPassNode['order']['@V']} 81 | LowPassNode = self.__ecgNode['filters']['lowPass'] 82 | lowPass = {'Frequency': LowPassNode['frequency']['@V'], 'Units': LowPassNode['frequency']['@U'], 'Order': LowPassNode['order']['@V']} 83 | algorithms = [] 84 | algorithmsNodes = self.__ecgNode['filters']['algorithm'] 85 | for i in algorithmsNodes: 86 | if(i == 'name'): 87 | algorithms.append({'Name': algorithmsNodes['name']['@V'], 'Purpose': algorithmsNodes['purpose']['@V']}) 88 | break 89 | else: 90 | algorithms.append({'Name': i['name']['@V'], 'Purpose': i['purpose']['@V']}) 91 | 92 | return {'HighPass': highPass, 'LowPass': lowPass, 'Algorithms': algorithms} 93 | 94 | 95 | def __getLeadInfo(self): 96 | leadsNames = [] 97 | leadsLabels = [] 98 | for i in self.__ecgNode['ecgWaveform']: 99 | leadsNames.append(i['@lead']) 100 | leadsLabels.append(i['@label']) 101 | self.__numberOfSamples = i['@asizeVT'] 102 | self.__leadsNames = leadsNames 103 | return {'LeadsNames': leadsNames, 'LeadsLabels': leadsLabels, 'NumberOfSamples': self.__numberOfSamples} 104 | 105 | 106 | def __makeDataArray(self): 107 | self.dataArray = np.zeros((int(self.__numberOfSamples), len(self.__leadsNames)), dtype=int) 108 | for i in range(0, len(self.__ecgNode['ecgWaveform'])): 109 | self.dataArray[:, i] = list(map(int, self.__ecgNode['ecgWaveform'][i]['@V'].split(' '))) 110 | 111 | 112 | def __makeStructuredArray(self): 113 | self.dataObject = {} 114 | for i in range(0, len(self.__ecgNode['ecgWaveform'])): 115 | self.dataObject[self.__leadsNames[i]] = self.dataArray[:, i] 116 | 117 | self.dataFrame = pd.DataFrame(self.dataObject) 118 | 119 | self.__data_string = self.dataFrame.to_string(header=False) 120 | self.__data_string = re.sub(' +',',', self.__data_string) 121 | self.__header_string = 'nSeq ' 122 | self.__header_string += reduce((lambda x, y: x + ' ' + y), self.__leadsNames) 123 | self.header['AcquisitionInfo']['HeaderString'] = self.__header_string 124 | def getLead(self, lead): 125 | return self.dataFrame[[lead]] 126 | 127 | 128 | def __makeOSHeader(self): 129 | self.__OSHeader = {'00:00:00:00:00:00': {}} 130 | self.__OSHeader['00:00:00:00:00:00']['sensor'] = ['RAW'] * len(self.__ecgNode['ecgWaveform']) 131 | self.__OSHeader['00:00:00:00:00:00']['device name'] = self.header['DeviceInfo']['DeviceName'] 132 | self.__OSHeader['00:00:00:00:00:00']['column'] = self.__header_string.split(' ') 133 | self.__OSHeader['00:00:00:00:00:00']['sync interval'] = 0 134 | self.__OSHeader['00:00:00:00:00:00']['time'] = (self.header['AcquisitionInfo']['AcquisitionDate'].split('T')[1]+'0').strip() 135 | self.__OSHeader['00:00:00:00:00:00']['date'] = (self.header['AcquisitionInfo']['AcquisitionDate'].split('T')[0]).strip() 136 | self.__OSHeader['00:00:00:00:00:00']['comments'] = '' 137 | self.__OSHeader['00:00:00:00:00:00']['device connection'] = 'BTH00:00:00:00:00:00' 138 | self.__OSHeader['00:00:00:00:00:00']['channels'] = list(range(1, 1+len(self.__ecgNode['ecgWaveform']))) 139 | self.__OSHeader['00:00:00:00:00:00']['mode'] = 0 140 | self.__OSHeader['00:00:00:00:00:00']['digital IO'] = [] 141 | self.__OSHeader['00:00:00:00:00:00']['firmware version'] = 770 142 | self.__OSHeader['00:00:00:00:00:00']['device'] = 'virtual_plux' 143 | self.__OSHeader['00:00:00:00:00:00']['position'] = 0 144 | self.__OSHeader['00:00:00:00:00:00']['sampling rate'] = int(self.header['AcquisitionInfo']['SampleRate']['SampleRate']) 145 | self.__OSHeader['00:00:00:00:00:00']['label'] = self.__leadsNames 146 | self.__OSHeader['00:00:00:00:00:00']['resolution'] = [int(self.header['AcquisitionInfo']['Resolution']).bit_length()] * len(self.__ecgNode['ecgWaveform']) 147 | self.__OSHeader['00:00:00:00:00:00']['special'] = [{}, {}, {}, {}, {}] 148 | return json.dumps(self.__OSHeader) 149 | 150 | def saveHeader(self, filename): 151 | temp = open('.{}{}_header.json'.format(os.sep, filename), 'w') 152 | temp.write(json.dumps(self.header)) 153 | temp.close() 154 | 155 | 156 | def saveToCSV(self, filename=None): 157 | if(filename==None): 158 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 159 | temp = open('.{}{}.csv'.format(os.sep, filename), 'w') 160 | temp.write('# ' + self.__header_string + '\n') 161 | temp.write(self.__data_string) 162 | temp.close() 163 | 164 | 165 | def saveToPandasCSV(self, filename=None, header=True): 166 | if(filename==None): 167 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 168 | self.dataFrame.to_csv('.{}{}_pandas.csv'.format(os.sep, filename)) 169 | if(header): 170 | self.saveHeader(filename) 171 | 172 | 173 | def saveToJson(self, filename=None, header=True): 174 | if(filename==None): 175 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 176 | tempDic = {'Header': self.header, 'Data': {}} 177 | for i in range(0, len(self.__ecgNode['ecgWaveform'])): 178 | tempDic['Data'][self.__ecgNode['ecgWaveform'][i]['@lead']] = list(map(int, self.__ecgNode['ecgWaveform'][i]['@V'].split(' '))) 179 | temp = open('.{}{}.json'.format(os.sep, filename), 'w') 180 | temp.write(json.dumps(tempDic)) 181 | temp.close() 182 | 183 | 184 | def saveToExcel(self, filename=None, header=True): 185 | if(filename==None): 186 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 187 | self.dataFrame.to_excel('.{}{}.xls'.format(os.sep, filename)) 188 | if(header): 189 | self.saveHeader(filename) 190 | 191 | 192 | def saveNumpyArray(self, filename=None, header=True): 193 | if(filename==None): 194 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 195 | np.save('.{}{}.npy'.format(os.sep, filename), self.dataArray) 196 | if(header): 197 | self.saveHeader(filename) 198 | 199 | 200 | def saveToOPS(self, filename=None): 201 | if(filename==None): 202 | filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) 203 | temp = open('.{}{}.txt'.format(os.sep, filename), 'w') 204 | temp.write('# OpenSignals Text File Format\n') 205 | temp.write('# ' + self.__makeOSHeader() + '\n') 206 | temp.write('# EndOfHeaders\n') 207 | temp.write(self.dataFrame.to_string(header=False)) 208 | temp.close() 209 | 210 | 211 | if __name__ == "__main__": 212 | 213 | def parseArgParser(file, arg, type): 214 | if(arg == ' '): 215 | filename = None 216 | else: 217 | filename = arg 218 | 219 | if(type == 'csv'): 220 | file.saveToCSV(filename) 221 | if(type == 'pcsv'): 222 | file.saveToPandasCSV(filename) 223 | elif(type == 'ops'): 224 | file.saveToOPS(filename) 225 | elif(type == 'json'): 226 | file.saveToJson(filename) 227 | elif(type == 'excel'): 228 | file.saveToExcel(filename) 229 | elif(type == 'numpy'): 230 | file.saveNumpyArray(filename) 231 | elif(type == 'all'): 232 | file.saveToCSV(filename) 233 | file.saveToPandasCSV(filename, False) 234 | file.saveToOPS(filename) 235 | file.saveToJson(filename, False) 236 | file.saveToExcel(filename, False) 237 | file.saveNumpyArray(filename) 238 | 239 | 240 | parser = argparse.ArgumentParser() 241 | parser.add_argument('file', help="file path") 242 | parser.add_argument("-csv", help="convert to csv", nargs='?', const=' ') 243 | parser.add_argument("-pcsv", help="convert to pandas csv", nargs='?', const=' ') 244 | parser.add_argument("-ops", help="convert to opensignals formated txt", nargs='?', const=' ') 245 | parser.add_argument("-x", '--excel', help="convert to excel", nargs='?', const=' ') 246 | parser.add_argument("-np", '--numpy', help="convert to numpy", nargs='?', const=' ') 247 | parser.add_argument("-json", help="convert to json", nargs='?', const=' ') 248 | parser.add_argument("-all", help="convert to csv, excel, numpy and json", nargs='?', const=' ') 249 | args = parser.parse_args() 250 | 251 | file = GEMuseXMLReader(args.file) 252 | 253 | if args.csv: 254 | parseArgParser(file, args.csv, 'csv') 255 | 256 | if args.pcsv: 257 | parseArgParser(file, args.pcsv, 'pcsv') 258 | 259 | if args.ops: 260 | parseArgParser(file, args.ops, 'ops') 261 | 262 | if args.excel: 263 | parseArgParser(file, args.excel, 'excel') 264 | 265 | if args.numpy: 266 | parseArgParser(file, args.numpy, 'numpy') 267 | 268 | if args.json: 269 | parseArgParser(file, args.json, 'json') 270 | 271 | if args.all: 272 | parseArgParser(file, args.all, 'all') 273 | 274 | 275 | 276 | 277 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GEMuseXMLReader 2 |      Python class for reading GE® MUSE® XML files. Returns a header with most of the file configurations and the lead's data is available as a Numpy array or a Pandas data frame. 3 | 4 | ## Dependencies 5 |      This reader needs the following libraries to parse the XML files: 6 | 7 | - xmltodict (To read and create a dictionary from the XML file) 8 | - numpy (To save the data from the leads into an array) 9 | - pandas (To save the data from the leads into a structured data frame) 10 | - xlwt (To save the lead data in a Excel file) 11 | 12 | ## Usage 13 |      The GEMuseXMLReader can be used in two different approaches: 14 | 15 | - as a converter 16 | - as a python class for accessing the data 17 | ---- 18 | ###      Converting XML file 19 | 20 | 21 |      The GEMuseXMLReader can be called in the command line to convert the XML into a CSV, JSON, Excel, Numpy object, or all. Paired with the converted file is a JSON with the header information. 22 | 23 | ``` 24 | python3 GEMuseXMLReader.py 'filename.XML' [arguments] 25 | ``` 26 | ####                Arguments 27 | 28 | - [-csv [CSV]] - Convert the XML to a CSV. Output filename is optional. 29 | - [-pcsv [PCSV]] - Convert the XML to a CSV (Pandas formated). Output filename is optional. 30 | - [-ops [OPS]] - Convert the XML to a txt compatible with PLUX's opensignals. Output filename is optional. 31 | - [-x [EXCEL]] - Convert the XML to a Excel. Output filename is optional. 32 | - [-np [NUMPY]] - Convert the XML to a Numpy. Output filename is optional. 33 | - [-json [JSON]] - Convert the XML to a JSON. Output filename is optional. 34 | - [-all [ALL]] - Convert the XML to a CSV, Excel, Numpy and JSON. Output filename is optional. 35 | ---- 36 | ###      Python class 37 | 38 |      The GEMuseXMLReader can be also be imported by another python script and used to converted the XML files, providing the data in either a Numpy array or a Panda data frame. 39 | 40 | ```python 41 | import GEMuseXMLReader 42 | 43 | GEMuseData = GEMuseXMLReader('filename.XML') 44 | 45 | GEMuseData.header ## Header containing the patient, device and acquisition session parameters 46 | 47 | GEMuseData.dataObject ## Dictionary containing the data separated by lead 48 | 49 | GEMuseData.dataFrame ## Panda's data frame containg the acquisition data 50 | 51 | GEMuseData.dataArray ## Numpy matrix containing the acquisition data 52 | ``` 53 | 54 | ## Header 55 | 56 |      The header is structured as follows: 57 | 58 | * Header 59 | * PatientInfo 60 | * Given_Name 61 | * Family_Name 62 | * ID 63 | * Gender 64 | * Race 65 | * Pacemaker 66 | 67 | * DeviceInfo 68 | * DeviceModel 69 | * DeviceName 70 | * DeviceSerial 71 | 72 | * AcquisitionInfo 73 | * AcquisitionDate 74 | * LeadAmplitudeUnitsPerBit 75 | * LeadAmplitudeUnits 76 | * Resolution 77 | * Filters 78 | * HighPass 79 | * LowPass 80 | * Algorithms* 81 | * Name 82 | * Purpose 83 | * SampleRate 84 | * LeadsInformation 85 | * LeadsNames 86 | * LeadsLabels 87 | * NumberOfSamples 88 | * HeaderString 89 | 90 | * Could be more than one. 91 | 92 | -------------------------------------------------------------------------------- /__pycache__/GEMuseXMLReader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DFNOsorio/GEMuseXMLReader/d3cea694d05ebf2b8e39b5976d791e3f4c6c947d/__pycache__/GEMuseXMLReader.cpython-37.pyc --------------------------------------------------------------------------------