├── .gitignore
├── GEMuseXMLReader.py
├── README.md
└── __pycache__
    └── GEMuseXMLReader.cpython-37.pyc


/.gitignore:
--------------------------------------------------------------------------------
1 | *.XML
2 | *.XML.swp


--------------------------------------------------------------------------------
/GEMuseXMLReader.py:
--------------------------------------------------------------------------------
  1 | import xmltodict
  2 | import xlwt
  3 | import traceback
  4 | import numpy as np
  5 | import pandas as pd
  6 | from time import gmtime, strftime
  7 | import argparse
  8 | import json
  9 | import re
 10 | from functools import reduce
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | __author__ = "Daniel Osorio"
 16 | __credits__ = ["Daniel Osorio"]
 17 | __version__ = "1.0.0"
 18 | __maintainer__ = "Daniel Osorio"
 19 | __email__ = "vdosavh@gmail.com"
 20 | __status__ = "Production"
 21 | 
 22 | class GEMuseXMLReader:
 23 |     def __init__(self, path):
 24 |         try:
 25 |             with open(path, 'rb') as fd:
 26 |                 self.dic = xmltodict.parse(fd.read().decode('utf8'))
 27 |             self.__path = path
 28 |             self.__patientInfoNode = self.dic['sapphire']['dcarRecord']['patientInfo']
 29 |             self.__ecgNode = self.__patientInfoNode['visit']['order']['ecgResting']['params']['ecg']['wav']['ecgWaveformMXG']
 30 |             self.header = self.__makeHeaderDic()
 31 |             self.__makeDataArray()
 32 |             self.__makeStructuredArray()
 33 | 
 34 |         except Exception: 
 35 |             print(traceback.print_exc())
 36 | # 
 37 |     
 38 |     def __makeHeaderDic(self):
 39 |         patientInfo = self.__patientInfoHeader()
 40 |         deviceInfo = self.__deviceInfoHeader()
 41 |         acquisitionInfo = self.__aquisitionInfoHeader()
 42 |         return {'PatientInfo': patientInfo, 'DeviceInfo': deviceInfo, 'AcquisitionInfo': acquisitionInfo}
 43 | 
 44 | 
 45 |     def __patientInfoHeader(self):
 46 |         if('unknownID' in self.__patientInfoNode.keys() or 'name' not in self.__patientInfoNode.keys()):
 47 |             given_name = 'Unknown'
 48 |             family_name = 'Unknown'
 49 |             id = 'Unknown'
 50 |         else:
 51 |             given_name = self.__patientInfoNode['name']['given']['@V']
 52 |             family_name = self.__patientInfoNode['name']['family']['@V']
 53 |             id = self.__patientInfoNode['identifier']['id']['@V']
 54 |         gender = self.__patientInfoNode['gender']['@V']
 55 |         race = self.__patientInfoNode['raceCode']['@V']
 56 |         pacemaker = self.__patientInfoNode['visit']['order']['testInfo']['hasPacemaker']['@V']
 57 |         return {'Given_Name': given_name, 'Family_Name': family_name, 'ID': id, 'Gender': gender, 'Race': race, 'Pacemaker': pacemaker}
 58 | 
 59 |     
 60 |     def __deviceInfoHeader(self):
 61 |         deviceModel = self.__patientInfoNode['visit']['order']['device']['modelID']['@V']
 62 |         deviceName = self.__patientInfoNode['visit']['order']['device']['deviceName']['@V']
 63 |         deviceSerial = self.__patientInfoNode['visit']['order']['device']['serialID']['@V']
 64 |         return {'DeviceModel': deviceModel, 'DeviceName': deviceName, 'DeviceSerial': deviceSerial}
 65 | 
 66 |     
 67 |     def __aquisitionInfoHeader(self):
 68 |         acquisitionDate = self.__patientInfoNode['visit']['order']['testInfo']['acquisitionDateTime']['@V']
 69 |         LeadAmplitudeUnitsPerBit = self.__ecgNode['@S']
 70 |         LeadAmplitudeUnits = self.__ecgNode['@U']
 71 |         Res = self.__ecgNode['@INV']
 72 |         filters = self.__getFilterInfo()
 73 |         sampleRate = {'SampleRate': self.__ecgNode['sampleRate']['@V'], 'Units': self.__ecgNode['sampleRate']['@U']}
 74 |         leadsInformation = self.__getLeadInfo()
 75 |         return {'Resolution': Res, 'AcquisitionDate': acquisitionDate, 'LeadAmplitudeUnitsPerBit': LeadAmplitudeUnitsPerBit, 'LeadAmplitudeUnits': LeadAmplitudeUnits, 'Filters': filters, 'SampleRate': sampleRate, 'LeadsInformation': leadsInformation}
 76 | 
 77 |     
 78 |     def __getFilterInfo(self):
 79 |         highPassNode = self.__ecgNode['filters']['highPass']
 80 |         highPass = {'Frequency': highPassNode['frequency']['@V'], 'Units': highPassNode['frequency']['@U'], 'Order': highPassNode['order']['@V']}
 81 |         LowPassNode = self.__ecgNode['filters']['lowPass']
 82 |         lowPass = {'Frequency': LowPassNode['frequency']['@V'], 'Units': LowPassNode['frequency']['@U'], 'Order': LowPassNode['order']['@V']}
 83 |         algorithms = []
 84 |         algorithmsNodes = self.__ecgNode['filters']['algorithm']
 85 |         for i in algorithmsNodes:
 86 |             if(i == 'name'):
 87 |                 algorithms.append({'Name': algorithmsNodes['name']['@V'], 'Purpose': algorithmsNodes['purpose']['@V']})
 88 |                 break
 89 |             else:
 90 |                 algorithms.append({'Name': i['name']['@V'], 'Purpose': i['purpose']['@V']})
 91 | 
 92 |         return {'HighPass': highPass, 'LowPass': lowPass, 'Algorithms': algorithms}
 93 | 
 94 | 
 95 |     def __getLeadInfo(self):
 96 |         leadsNames = []
 97 |         leadsLabels = []
 98 |         for i in self.__ecgNode['ecgWaveform']:
 99 |             leadsNames.append(i['@lead'])
100 |             leadsLabels.append(i['@label'])
101 |             self.__numberOfSamples = i['@asizeVT']
102 |         self.__leadsNames = leadsNames
103 |         return {'LeadsNames': leadsNames, 'LeadsLabels': leadsLabels, 'NumberOfSamples': self.__numberOfSamples}
104 | 
105 | 
106 |     def __makeDataArray(self):
107 |         self.dataArray = np.zeros((int(self.__numberOfSamples), len(self.__leadsNames)), dtype=int)
108 |         for i in range(0, len(self.__ecgNode['ecgWaveform'])):
109 |             self.dataArray[:, i] = list(map(int, self.__ecgNode['ecgWaveform'][i]['@V'].split(' ')))
110 | 
111 | 
112 |     def __makeStructuredArray(self):
113 |         self.dataObject = {}
114 |         for i in range(0, len(self.__ecgNode['ecgWaveform'])):
115 |             self.dataObject[self.__leadsNames[i]] = self.dataArray[:, i]
116 |         
117 |         self.dataFrame = pd.DataFrame(self.dataObject)
118 |         
119 |         self.__data_string = self.dataFrame.to_string(header=False)
120 |         self.__data_string = re.sub(' +',',', self.__data_string)
121 |         self.__header_string = 'nSeq '
122 |         self.__header_string += reduce((lambda x, y: x + ' ' + y), self.__leadsNames)
123 |         self.header['AcquisitionInfo']['HeaderString'] = self.__header_string
124 |     def getLead(self, lead):
125 |         return self.dataFrame[[lead]]
126 | 
127 | 
128 |     def __makeOSHeader(self):
129 |         self.__OSHeader = {'00:00:00:00:00:00': {}}
130 |         self.__OSHeader['00:00:00:00:00:00']['sensor'] = ['RAW'] * len(self.__ecgNode['ecgWaveform'])
131 |         self.__OSHeader['00:00:00:00:00:00']['device name'] = self.header['DeviceInfo']['DeviceName']
132 |         self.__OSHeader['00:00:00:00:00:00']['column'] = self.__header_string.split(' ')
133 |         self.__OSHeader['00:00:00:00:00:00']['sync interval'] = 0
134 |         self.__OSHeader['00:00:00:00:00:00']['time'] = (self.header['AcquisitionInfo']['AcquisitionDate'].split('T')[1]+'0').strip()
135 |         self.__OSHeader['00:00:00:00:00:00']['date'] = (self.header['AcquisitionInfo']['AcquisitionDate'].split('T')[0]).strip()
136 |         self.__OSHeader['00:00:00:00:00:00']['comments'] = ''
137 |         self.__OSHeader['00:00:00:00:00:00']['device connection'] = 'BTH00:00:00:00:00:00'
138 |         self.__OSHeader['00:00:00:00:00:00']['channels'] = list(range(1, 1+len(self.__ecgNode['ecgWaveform'])))
139 |         self.__OSHeader['00:00:00:00:00:00']['mode'] = 0
140 |         self.__OSHeader['00:00:00:00:00:00']['digital IO'] = []
141 |         self.__OSHeader['00:00:00:00:00:00']['firmware version'] = 770
142 |         self.__OSHeader['00:00:00:00:00:00']['device'] = 'virtual_plux'
143 |         self.__OSHeader['00:00:00:00:00:00']['position'] = 0
144 |         self.__OSHeader['00:00:00:00:00:00']['sampling rate'] = int(self.header['AcquisitionInfo']['SampleRate']['SampleRate'])
145 |         self.__OSHeader['00:00:00:00:00:00']['label'] = self.__leadsNames
146 |         self.__OSHeader['00:00:00:00:00:00']['resolution'] = [int(self.header['AcquisitionInfo']['Resolution']).bit_length()] * len(self.__ecgNode['ecgWaveform'])
147 |         self.__OSHeader['00:00:00:00:00:00']['special'] = [{}, {}, {}, {}, {}]
148 |         return json.dumps(self.__OSHeader)
149 | 
150 |     def saveHeader(self, filename):
151 |         temp = open('.{}{}_header.json'.format(os.sep, filename), 'w')
152 |         temp.write(json.dumps(self.header))
153 |         temp.close()
154 | 
155 | 
156 |     def saveToCSV(self, filename=None):
157 |         if(filename==None):
158 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
159 |         temp = open('.{}{}.csv'.format(os.sep, filename), 'w')
160 |         temp.write('# ' + self.__header_string + '\n')
161 |         temp.write(self.__data_string)
162 |         temp.close()
163 |     
164 | 
165 |     def saveToPandasCSV(self, filename=None, header=True):
166 |         if(filename==None):
167 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
168 |         self.dataFrame.to_csv('.{}{}_pandas.csv'.format(os.sep, filename))
169 |         if(header):
170 |             self.saveHeader(filename)
171 | 
172 | 
173 |     def saveToJson(self, filename=None, header=True):
174 |         if(filename==None):
175 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
176 |         tempDic = {'Header': self.header, 'Data': {}}
177 |         for i in range(0, len(self.__ecgNode['ecgWaveform'])):
178 |             tempDic['Data'][self.__ecgNode['ecgWaveform'][i]['@lead']] = list(map(int, self.__ecgNode['ecgWaveform'][i]['@V'].split(' ')))
179 |         temp = open('.{}{}.json'.format(os.sep, filename), 'w')
180 |         temp.write(json.dumps(tempDic))
181 |         temp.close()
182 | 
183 |     
184 |     def saveToExcel(self, filename=None, header=True):
185 |         if(filename==None):
186 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
187 |         self.dataFrame.to_excel('.{}{}.xls'.format(os.sep, filename))
188 |         if(header):
189 |             self.saveHeader(filename)
190 |     
191 | 
192 |     def saveNumpyArray(self, filename=None, header=True):
193 |         if(filename==None):
194 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
195 |         np.save('.{}{}.npy'.format(os.sep, filename), self.dataArray)
196 |         if(header):
197 |             self.saveHeader(filename)
198 | 
199 | 
200 |     def saveToOPS(self, filename=None):
201 |         if(filename==None):
202 |             filename = 'GEMuseXML' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
203 |         temp = open('.{}{}.txt'.format(os.sep, filename), 'w')
204 |         temp.write('# OpenSignals Text File Format\n')
205 |         temp.write('# ' + self.__makeOSHeader() + '\n')
206 |         temp.write('# EndOfHeaders\n')
207 |         temp.write(self.dataFrame.to_string(header=False))
208 |         temp.close()
209 | 
210 | 
211 | if __name__ == "__main__":
212 | 
213 |     def parseArgParser(file, arg, type):
214 |         if(arg == ' '):
215 |             filename = None
216 |         else:
217 |             filename = arg
218 | 
219 |         if(type == 'csv'):
220 |             file.saveToCSV(filename)
221 |         if(type == 'pcsv'):
222 |             file.saveToPandasCSV(filename)
223 |         elif(type == 'ops'):
224 |             file.saveToOPS(filename)
225 |         elif(type == 'json'):
226 |             file.saveToJson(filename)
227 |         elif(type == 'excel'):
228 |             file.saveToExcel(filename)
229 |         elif(type == 'numpy'):
230 |             file.saveNumpyArray(filename)
231 |         elif(type == 'all'):
232 |             file.saveToCSV(filename)
233 |             file.saveToPandasCSV(filename, False)
234 |             file.saveToOPS(filename)
235 |             file.saveToJson(filename, False)
236 |             file.saveToExcel(filename, False)
237 |             file.saveNumpyArray(filename)
238 | 
239 | 
240 |     parser = argparse.ArgumentParser()
241 |     parser.add_argument('file', help="file path")
242 |     parser.add_argument("-csv", help="convert to csv", nargs='?', const=' ')
243 |     parser.add_argument("-pcsv", help="convert to pandas csv", nargs='?', const=' ')
244 |     parser.add_argument("-ops", help="convert to opensignals formated txt", nargs='?', const=' ')
245 |     parser.add_argument("-x", '--excel', help="convert to excel", nargs='?', const=' ')
246 |     parser.add_argument("-np", '--numpy', help="convert to numpy", nargs='?', const=' ')
247 |     parser.add_argument("-json", help="convert to json", nargs='?', const=' ')
248 |     parser.add_argument("-all", help="convert to csv, excel, numpy and json", nargs='?', const=' ')
249 |     args = parser.parse_args()
250 | 
251 |     file = GEMuseXMLReader(args.file)
252 | 
253 |     if args.csv:
254 |         parseArgParser(file, args.csv, 'csv')
255 |     
256 |     if args.pcsv:
257 |         parseArgParser(file, args.pcsv, 'pcsv')
258 | 
259 |     if args.ops:
260 |         parseArgParser(file, args.ops, 'ops')
261 |     
262 |     if args.excel:
263 |         parseArgParser(file, args.excel, 'excel')
264 | 
265 |     if args.numpy:
266 |         parseArgParser(file, args.numpy, 'numpy')
267 | 
268 |     if args.json:
269 |         parseArgParser(file, args.json, 'json')
270 | 
271 |     if args.all:
272 |         parseArgParser(file, args.all, 'all')
273 | 
274 |     
275 | 
276 | 
277 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GEMuseXMLReader
 2 | &nbsp;&nbsp;&nbsp;&nbsp; Python class for reading GE&reg; MUSE&reg; XML files. Returns a header with most of the file configurations and the lead's data is available as a Numpy array or a Pandas data frame.
 3 | 
 4 | ## Dependencies
 5 | &nbsp;&nbsp;&nbsp;&nbsp; This reader needs the following libraries to parse the XML files:
 6 | 
 7 | - xmltodict (To read and create a dictionary from the XML file)
 8 | - numpy (To save the data from the leads into an array)
 9 | - pandas (To save the data from the leads into a structured data frame)
10 | - xlwt (To save the lead data in a Excel file)
11 | 
12 | ## Usage
13 | &nbsp;&nbsp;&nbsp;&nbsp; The GEMuseXMLReader can be used in two different approaches:
14 | 
15 | - as a converter
16 | - as a python class for accessing the data
17 | ----
18 | ### &nbsp;&nbsp;&nbsp;&nbsp; Converting XML file
19 | 
20 | 
21 | &nbsp;&nbsp;&nbsp;&nbsp; The GEMuseXMLReader can be called in the command line to convert the XML into a CSV, JSON, Excel, Numpy object, or all. Paired with the converted file is a JSON with the header information.
22 | 
23 | ```
24 | python3 GEMuseXMLReader.py 'filename.XML' [arguments]
25 | ``` 
26 | #### &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Arguments
27 | 
28 | - [-csv [CSV]] - Convert the XML to a CSV. Output filename is optional.
29 | - [-pcsv [PCSV]] - Convert the XML to a CSV (Pandas formated). Output filename is optional.
30 | - [-ops [OPS]] - Convert the XML to a txt compatible with PLUX's opensignals. Output filename is optional.
31 | - [-x [EXCEL]] - Convert the XML to a Excel. Output filename is optional.
32 | - [-np [NUMPY]] - Convert the XML to a Numpy. Output filename is optional.
33 | - [-json [JSON]] - Convert the XML to a JSON. Output filename is optional.
34 | - [-all [ALL]] - Convert the XML to a CSV, Excel, Numpy and JSON. Output filename is optional.
35 | ----
36 | ### &nbsp;&nbsp;&nbsp;&nbsp; Python class
37 | 
38 | &nbsp;&nbsp;&nbsp;&nbsp; The GEMuseXMLReader can be also be imported by another python script and used to converted the XML files, providing the data in either a Numpy array or a Panda data frame.
39 | 
40 | ```python
41 | import GEMuseXMLReader
42 | 
43 | GEMuseData = GEMuseXMLReader('filename.XML')
44 | 
45 | GEMuseData.header ## Header containing the patient, device and acquisition session parameters
46 | 
47 | GEMuseData.dataObject ## Dictionary containing the data separated by lead
48 | 
49 | GEMuseData.dataFrame ## Panda's data frame containg the acquisition data
50 | 
51 | GEMuseData.dataArray ## Numpy matrix containing the acquisition data
52 | ``` 
53 | 
54 | ## Header
55 | 
56 | &nbsp;&nbsp;&nbsp;&nbsp; The header is structured as follows:
57 | 
58 | * Header
59 |     * PatientInfo
60 |         * Given_Name
61 |         * Family_Name
62 |         * ID
63 |         * Gender
64 |         * Race
65 |         * Pacemaker
66 | 
67 |     * DeviceInfo
68 |         * DeviceModel
69 |         * DeviceName
70 |         * DeviceSerial
71 | 
72 |     * AcquisitionInfo
73 |         * AcquisitionDate
74 |         * LeadAmplitudeUnitsPerBit
75 |         * LeadAmplitudeUnits
76 |         * Resolution
77 |         * Filters
78 |             * HighPass
79 |             * LowPass
80 |             * Algorithms*
81 |                 * Name
82 |                 * Purpose
83 |         * SampleRate
84 |         * LeadsInformation
85 |             * LeadsNames
86 |             * LeadsLabels
87 |             * NumberOfSamples
88 |         * HeaderString
89 | 
90 | * Could be more than one.
91 | 
92 | 


--------------------------------------------------------------------------------
/__pycache__/GEMuseXMLReader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DFNOsorio/GEMuseXMLReader/d3cea694d05ebf2b8e39b5976d791e3f4c6c947d/__pycache__/GEMuseXMLReader.cpython-37.pyc


--------------------------------------------------------------------------------