├── LICENSE
├── lidcXmlHelper.py
├── lidc_data_to_nifti.py
├── readme.md
└── template.pf


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 DKFZ, Division of Medical Image Computing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lidcXmlHelper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Feb 21 12:28:19 2018
 4 | 
 5 | @author: m.goetz@dkfz-heidelberg.de
 6 | """
 7 | import xml.etree.ElementTree as ET
 8 | 
 9 | def create_xml_tree(filepath):
10 |     """
11 |     Method to ignore the namespaces if ElementTree is used. 
12 |     Necessary becauseElementTree, by default, extend
13 |     Tag names by the name space, but the namespaces used in the
14 |     LIDC-IDRI dataset are not consistent. 
15 |     Solution based on https://stackoverflow.com/questions/13412496/python-elementtree-module-how-to-ignore-the-namespace-of-xml-files-to-locate-ma
16 |     
17 |     instead of ET.fromstring(xml)
18 |     """
19 |     it = ET.iterparse(filepath)
20 |     for _, el in it:
21 |         if '}' in el.tag:
22 |             el.tag = el.tag.split('}', 1)[1]  # strip all namespaces
23 |         for at in el.attrib.keys(): # strip namespaces of attributes too
24 |             if '}' in at:
25 |                 newat = at.split('}', 1)[1]
26 |                 el.attrib[newat] = el.attrib[at]
27 |                 del el.attrib[at]
28 |     return it.root
29 |         
30 | def get_study_uid(root):
31 |     result=None
32 |     try:
33 |         result=root.find("ResponseHeader/StudyInstanceUID").text
34 |     except:
35 |         pass
36 |     return result
37 | 
38 | def get_series_uid(root):
39 |     result=None
40 |     try:
41 |         result=root.find("ResponseHeader/SeriesInstanceUid").text
42 |     except:
43 |         pass
44 |     return result
45 | 
46 | def read_nodule_property(nodule_tree, tag):
47 |     """
48 |     Reading a specified propertiy of a nodule. If the property is not specified
49 |     in the corresponding part of the xml tree (nodule_tree), -1 is returned.
50 |     """
51 |     try:
52 |         result=str(nodule_tree.find("characteristics/"+tag).text)
53 |         return result
54 |     except:
55 |         return str(-1)


--------------------------------------------------------------------------------
/lidc_data_to_nifti.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Feb 21 12:28:19 2018
  4 | 
  5 | @author: Michael Goetz (m.goetz@dkfz-heidelberg.de)
  6 | """
  7 | 
  8 | import glob
  9 | import os
 10 | import subprocess
 11 | import SimpleITK as sitk
 12 | import numpy as np
 13 | import lidcXmlHelper as xmlHelper
 14 | 
 15 | # Path to the command lines tools of MITK Phenotyping
 16 | path_to_executables=r"E:\Tools\MITK Phenotyping 2018-10-18\bin"
 17 | # Path to the folder that contains the LIDC-IDRI DICOM files
 18 | path_to_dicoms = r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\DICOM"
 19 | # Path to the folder that contains the LIDC-IDRI XML files
 20 | path_to_xmls= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\XML\tcia-lidc-xml"
 21 | path_to_xmls= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\XML2"
 22 | # Output path where the generated NRRD and NIFTI files will be saved
 23 | path_to_nrrds =  r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\new_nrrd_2"
 24 | # Output path where the genreated Planar Figures will be saved
 25 | path_to_planars= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\new_planars_2"
 26 | # Output path to the CSV-file that will contain the nodule characteristics. An existing will be appended
 27 | path_to_characteristics=r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\characteristics_2.csv"
 28 | # Ouput path to an error file where errors will be logged. An existing file will be appended.
 29 | path_to_error_file=r"W:\Old\LungImages\LIDC-IDRI\conversion_error_2.txt"
 30 | 
 31 | planar_template=r"template.pf"
 32 | 
 33 | list_of_appendix=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
 34 | 
 35 | def write_error(msg, errorfile=path_to_error_file):
 36 |     """ 
 37 |     A simple error logging method. Errors should be reported using this functions.
 38 |     All errors are then logged in the file specified with the global variable
 39 |     'path_to_error_file' if no other file is specified.
 40 |     
 41 |     The error message is also printed in the main text. 
 42 |     """
 43 |     a=open(errorfile,'a')
 44 |     a.write(str(msg) + "\n")
 45 |     a.close()
 46 |     print("ERROR:",msg)
 47 | 
 48 | def get_dicom_from_study_uid(study_uid, series_uid):
 49 |     """
 50 |     Find the folder containing the dicoms that corresponds to a given study id
 51 |     or an study id and a series id.
 52 |     
 53 |     Returns:
 54 |         The path to the DICOMs matching the given IDs
 55 |         The number of DICOMs that had been found.
 56 |     """
 57 |     if series_uid is not None:
 58 |         search_path=os.path.join(path_to_dicoms, "*","*"+study_uid+"*","*"+series_uid+"*","*.dcm")
 59 |     else:
 60 |         search_path=os.path.join(path_to_dicoms, "*","*"+study_uid+"*","*","*.dcm")
 61 |     paths=glob.glob(search_path)
 62 |     if len(paths) > 0:
 63 |         return paths[0], len(paths)
 64 |     else:
 65 |         return [], 0
 66 |     
 67 | def create_nrrd_from_dicoms(image, patient_id):
 68 |     """
 69 |     Reads a folder that contains multiple DICOM files and 
 70 |     converts the input into a single nrrd file using a command line 
 71 |     app from MITK or MITK Phenotyping. 
 72 |     
 73 |     Input:
 74 |         * path to one dicom (other are automatically found.)
 75 |         * Patient ID
 76 |     
 77 |     Output:
 78 |         Creates a single nrrd file with the path: $target_path / patient_id + '_ct_scan.nrrd'
 79 |     
 80 |     """
 81 |     target_path = os.path.join(path_to_nrrds, patient_id)
 82 |     target_name = os.path.join(target_path, patient_id+"_ct_scan.nrrd")
 83 |     os.makedirs(target_path, exist_ok=True)
 84 |     cmd_string=r"MitkCLDicom2Nrrd "+\
 85 |             "-i \"" + image  + "\"" \
 86 |             " -o \"" + target_name + "\""
 87 |     print(cmd_string)
 88 |     a=subprocess.Popen(cmd_string,shell=True,cwd=path_to_executables)
 89 |     a.wait()
 90 |     return target_name
 91 | 
 92 | def get_spacing_and_origin(file):
 93 |     """ Reading nrrd files, extract spacing and origin usign SimpleITK and returning them"""
 94 |     image=sitk.ReadImage(file)
 95 |     spacing=image.GetSpacing()
 96 |     origin=image.GetOrigin()
 97 |     return spacing, origin
 98 | 
 99 | def create_planarfigure_for_session(session, spacing, origin, patient_id, session_id):
100 |     """
101 |     Given one session of an expert, and the corresponding patient id, the given
102 |     contours are converted into multiple planar figures. 
103 |     Each Nodule gets an ID that is unique for ALL Nodules from all images / reading sessions. 
104 |     
105 |     The planar figures are saved in a path following this structure:
106 |     path_to_planars/<Patient ID>/<Patient_ID>_<Session_ID>_<Nodule_ID>_<ROI_ID>.pf
107 |     
108 |     with the following properties:
109 |     * path_to_planars : Globally specified folder
110 |     * <Patient ID> : Unique Patient ID consisting of patient number and an appendix
111 |     * <Session_ID> : Number of the reading session / expert. Unique to the given patient only.
112 |     * <Nodule ID>  : An globally unique ID of the given Nodule
113 |     * <ROI ID>     : A nodule-wide unique, consecutive number of the current ROI. (Each planar figure contains the annotation of a single slice) 
114 |     """
115 |     # Obtaining the code of the radiologist. Replacing underscore (_) as it is later used to 
116 |     # encode different IDs in the resulting file name.
117 |     radiologist=str(session.find("servicingRadiologistID").text).replace("_","-")
118 |     
119 |     # Reading each Nodule in the given session and creating planar figures for them (if large enough)
120 |     global nodule_id
121 |     for nodule in session.iter('unblindedReadNodule'):
122 |         create_planarfigures_for_nodule(nodule, spacing, origin, patient_id, session_id, radiologist)
123 |         nodule_id = nodule_id + 1
124 | 
125 | def create_planarfigures_for_nodule(nodule, spacing, origin, patient_id, session_id, radiologist):
126 |     """ 
127 |     Extracts the properties of an given nodule, saves them to the CSV file specified in the
128 |     global variable 'path_to_characteristics' and saves all contours for that 
129 |     nodule as planar figure.
130 |     
131 |     Each contour is given a consecutive number.
132 |     """
133 |     global nodule_id
134 |     nodule_str="{:08n}".format(nodule_id)
135 |     
136 |     # Extract the properties of the nodule
137 |     subtlety=xmlHelper.read_nodule_property(nodule, 'subtlety')
138 |     internalStructure=xmlHelper.read_nodule_property(nodule, 'internalStructure')
139 |     calcification=xmlHelper.read_nodule_property(nodule, 'calcification')
140 |     sphericity=xmlHelper.read_nodule_property(nodule, 'sphericity')
141 |     margin=xmlHelper.read_nodule_property(nodule, 'margin')
142 |     lobulation=xmlHelper.read_nodule_property(nodule, 'lobulation')
143 |     spiculation=xmlHelper.read_nodule_property(nodule, 'spiculation')
144 |     texture=xmlHelper.read_nodule_property(nodule, 'texture')
145 |     malignancy=xmlHelper.read_nodule_property(nodule, 'malignancy')
146 | 
147 |     # save characteristic and specifics of the nodule to the global CSV-file
148 |     global path_to_characteristics
149 |     with open(path_to_characteristics,"a") as file:
150 |         file.write(";".join([str(patient_id),str(session_id),str(radiologist),str(nodule_str),subtlety,internalStructure,calcification,sphericity,margin,lobulation,spiculation,texture,malignancy])+"\n") 
151 |     
152 |     # Extract all rois necessary specified within the Nodule
153 |     roi_id=0
154 |     for roi in nodule.iter('roi'):
155 |         create_planarfigures_for_roi(roi, spacing, origin, patient_id, session_id, nodule_str, roi_id)
156 |         roi_id=roi_id+1
157 |         
158 | def create_planarfigures_for_roi(roi, spacing, origin, patient_id, session_id, nodule_id, roi_id):
159 |     """ 
160 |     Given the section of XML that specifies a certain ROI, this function creates a 
161 |     planar figure file out of it. 
162 |     
163 |     The planar figure is saved in a path following this structure:
164 |     path_to_planars/<Patient ID>/<Patient_ID>_<Session_ID>_<Nodule_ID>_<ROI_ID>.pf
165 |     
166 |     with the following properties:
167 |     * path_to_planars : Globally specified folder
168 |     * <Patient ID> : Unique Patient ID consisting of patient number and an appendix
169 |     * <Session_ID> : Number of the reading session / expert. Unique to the given patient only.
170 |     * <Nodule ID>  : An globally unique ID of the given Nodule
171 |     * <ROI ID>     : A nodule-wide unique, consecutive number of the current ROI. 
172 |     
173 |     """
174 |     # All Rois are within a single Z-plane, so the z-position needs only to be obtained once
175 |     z_position = roi.find("imageZposition").text
176 |     
177 |     # Create file name and esure that the corresponding folder exists to prevent write errors
178 |     target_path = os.path.join(path_to_planars, patient_id)
179 |     target_name = os.path.join(target_path, patient_id+"_"+str(session_id)+"_"+str(nodule_id)+"_"+str(roi_id)+".pf")
180 |     os.makedirs(target_path, exist_ok=True)
181 |     
182 |     # Convert the given edge information into an XML part describing the planar figure
183 |     vertex_string=""
184 |     edge_id=0
185 |     for edge in roi.iter('edgeMap'):
186 |         x=float(edge[0].text)*spacing[0]
187 |         y=float(edge[1].text)*spacing[1]
188 |         vertex_string=vertex_string+"        <Vertex id=\""+str(edge_id)+"\" x=\""+str(x)+"\" y=\""+str(y)+"\" /> \n"
189 |         edge_id=edge_id+1
190 |     
191 |     # If less than two points are defined, it is not a complete mesh. This happens
192 |     # if the lesion was too small, so the experts didn't draw spacial annotations.
193 |     if edge_id < 2:
194 |         return None
195 |     
196 |     # Read the template, replace the corresponding structures and 
197 |     # save the result as a new planar figure
198 |     with open(planar_template,"r") as file:
199 |         template=file.read()
200 |     template=template.replace("%%origin_z%%", str(z_position))
201 |     template=template.replace("%%origin_x%%", str(origin[0]))
202 |     template=template.replace("%%origin_y%%", str(origin[1]))
203 |     template=template.replace("%%points%%", vertex_string)
204 |     with open(target_name,"w") as file:
205 |         file.write(template)
206 |     
207 | def convert_planar_figures_to_masks(image, patient_id):
208 |     """ Finds all planar figure for a single patient and converts them to segmentations that match the CT of the patient"""    
209 |     for planar_figure in glob.glob(os.path.join(path_to_planars,patient_id,"*.pf")):
210 |         create_mask_for_planar_figure(image, patient_id, planar_figure)
211 |     
212 | def create_mask_for_planar_figure(image, patient_id, planar_figure):
213 |     """ 
214 |     Create a segmentation file from a planar figure, using the corresponding ct file.
215 |     
216 |     All Mask files are saved in a folder with the structure of
217 |     path_to_nrrds/<patient ID>/planar_masks/<Name of the Planar Figure File>.nrrd
218 |     """
219 |     # Create the new filename
220 |     file_name=os.path.basename(planar_figure)
221 |     target_path = os.path.join(path_to_nrrds, patient_id,"planar_masks")
222 |     target_name = os.path.join(target_path, file_name.replace(".pf",".nrrd"))
223 |     os.makedirs(target_path, exist_ok=True)
224 |     
225 |     cmd_string=r"MitkCLPlanarFigureToNrrd "+\
226 |             "-i \"" + image  + "\"" \
227 |             " -p \"" + planar_figure  + "\"" \
228 |             " -o \"" + target_name + "\""
229 |     #print(cmd_string)
230 |     a=subprocess.Popen(cmd_string,shell=True,cwd=path_to_executables)
231 |     a.wait()
232 |     return target_name    
233 |     
234 | def merge_planar_figures_per_nodule(image, patient_id):
235 |     """
236 |     There are two problems associated with the planar figures generated segmentations
237 |     that are based on the way that the original data is presented. First, the 
238 |     segmentations of a each nodes is splitted in multile files, as the corresponding
239 |     ROIS as given as slice-wise contours. Second, corresponding annotations
240 |     of the same nodule are not identified, as lesions share no common id between
241 |     different experts. 
242 |     
243 |     This method tries to match segmentations that are from the same rater and combine
244 |     them. It also tries to identify multiple segmentations of the same nodule by 
245 |     different rater, looking at the overlap of segmentations. 
246 |     
247 |     It is assumed that two segmentations cover the same nodule, if their segmentations
248 |     overlap by more than 10 voxel. 
249 |     
250 |     The new created segmentation has the format
251 |     path_to_nrrds/<Patient ID>/<Patient_ID>_<Session_ID>_<Nodule_ID>_<True Nodule ID>.nii.gz
252 |     with the following properties:
253 |     * path_to_planars : Globally specified folder
254 |     * <Patient ID>       : Unique Patient ID consisting of patient number and an appendix
255 |     * <Session_ID>       : Number of the reading session / expert. Unique to the given patient only.
256 |     * <Nodule ID>        : An globally unique ID of the given Nodule
257 |     * <True Nodule ID>   : A globally minimum unique ID of the nodule. All masks of this nodule should share the same True Nodule ID
258 |     """
259 |     # Loading all masks to numpy arrays and save them in a dictionary. 
260 |     # The keys of the dictionary match the (preliminary) mask id
261 |     origin_path = os.path.join(path_to_nrrds, patient_id,"planar_masks","*.nrrd")
262 |     images={}
263 |     arrays={}
264 |     for mask in glob.glob(origin_path):
265 |         mask_name=os.path.basename(mask)
266 |         mask_limits=mask_name.split("_")
267 |         # The first three properties of the file name (Patient ID, Session ID, and Nodule ID)
268 |         # identify wheter a given ROI belongs to a certain Nodule. (ROI ID is ignored)
269 |         mask_id=mask_limits[0]+"_"+mask_limits[1]+"_"+mask_limits[2]
270 |         # If no array with the mask_id is available, create one
271 |         if mask_id not in images.keys():
272 |             image=sitk.ReadImage(mask)
273 |             images[mask_id]=image
274 |             array=sitk.GetArrayFromImage(image)
275 |             arrays[mask_id]=array
276 |         # If already a planar figure belonging to the given nodule exists, add
277 |         # the new one to the old one (e.g. merge both segmentations)
278 |         else:
279 |             image=sitk.ReadImage(mask)
280 |             array=sitk.GetArrayFromImage(image)
281 |             arrays[mask_id]=arrays[mask_id]+array
282 |     
283 |     for key,idx in zip(images.keys(),range(len(images.keys()))):
284 |         # If values larger than 1 are present in a segmentation, there are 
285 |         # overlaps between two segmentations for this nodule. This should not happen
286 |         # but occures due to errors in the original XML files
287 |         if len(arrays[key][arrays[key]>1])>1:
288 |             write_error("Failed due to wrong segmentations: " + key)
289 |             continue        
290 |         # Identify the smallest global nodule ID for the given nodule. 
291 |         # It is assumed that two segmentations cover the same nodule if more than 
292 |         # 10 voxels are covered by both segmentations. The global nodule id is
293 |         # the smallest nodule id for each nodule
294 |         own_id=int(key.split("_")[2])
295 |         minimum_id=own_id
296 |         for k2 in arrays.keys():
297 |              mask=(arrays[key]*arrays[k2])==1
298 |              if len(arrays[key][mask])>10:
299 |                  new_id=int(k2.split("_")[2])
300 |                  minimum_id=min(minimum_id, new_id)
301 |         #Save the new created segmentation
302 |         minimum_id="{:08n}".format(minimum_id)
303 |         image=sitk.GetImageFromArray(arrays[key])
304 |         image.CopyInformation(images[key])
305 |         key_parts=key.split("_")
306 |         new_key=key_parts[0]+"_"+key_parts[1]+"_"+key_parts[2]+"_"+str(minimum_id)
307 |         sitk.WriteImage(image, os.path.join(path_to_nrrds, patient_id,new_key+".nii.gz"))
308 | 
309 | def parse_xml_file(file):
310 |     # Create an XML Tree, use own method to remove namespaces 
311 |     root=xmlHelper.create_xml_tree(file)
312 |     
313 |     # Find the Study and Series IDs if possible
314 |     study_uid=xmlHelper.get_study_uid(root)
315 |     series_uid=xmlHelper.get_series_uid(root)
316 |     print(file)
317 |     print(study_uid, series_uid)
318 |     if study_uid is None:
319 |         write_error("Failed to find Study UID: " + file)
320 |         return
321 |     
322 |     # Find the DICOMS matching the study and series ID. 
323 |     # Assuming that all DICOMS to a study/series ID are in one folder. 
324 |     dicom_path, no_of_dicoms=get_dicom_from_study_uid(study_uid, series_uid)
325 |     if no_of_dicoms < 10:
326 |         print(dicom_path)
327 |         print("No DICOM's found for file:",file)
328 |         return
329 |     print(dicom_path)
330 |     # Files are saved in a folder with the structure $PatientID/$StudyID/$SeriesID/$DicomName
331 |     # Removing StudyID, SeriesID and DICOM-Name gives a patient ID
332 |     long_patient_id=os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(dicom_path))))
333 |     patient_id=long_patient_id.replace("LIDC-IDRI-","")
334 |     
335 |     # For some patients, more than one scan is provided (for example due to multiple
336 |     # time points). To ensure that each time point is only scanned one, an appendix
337 |     # is added to the patient_id, ensuring that multiple time points can be selected. 
338 |     for appendix in list_of_appendix:
339 |         target_path = os.path.join(path_to_nrrds, patient_id+appendix)
340 |         if not os.path.exists(target_path):
341 |             patient_id =patient_id+appendix
342 |             print(patient_id)
343 |             break
344 |     
345 |     # Create Nrrd files from DICOMS and reading spacing and orgiin. 
346 |     nrrd_file=create_nrrd_from_dicoms(dicom_path, patient_id)
347 |     spacing, origin = get_spacing_and_origin(nrrd_file)
348 |     
349 |     # Creating multiple planar figures for each reading session. 
350 |     # Each session represents the result of an different expert
351 |     # Each session gets an session ID that is unique for the given patient ID
352 |     # Same session ids for differnt patients do not necessarily correspond to the same expert.
353 |     print("Creating Planar Figure")
354 |     session_id=0
355 |     for session in root.iter('readingSession'):
356 |         create_planarfigure_for_session(session, spacing, origin, patient_id, session_id)
357 |         session_id=session_id+1
358 |     convert_planar_figures_to_masks(nrrd_file, patient_id)
359 |     print("Merging Planar Figures")
360 |     merge_planar_figures_per_nodule(nrrd_file, patient_id)
361 | 
362 | 
363 | nodule_id = 0
364 | for xml_file in glob.glob(os.path.join(path_to_xmls,"*","*.xml")):
365 |     global path_to_characteristics
366 |     os.makedirs(os.path.dirname(path_to_characteristics), exist_ok=True)
367 |     with open(path_to_characteristics,"a") as file:
368 |         file.write(";".join(["Patient_ID","Session_ID","Radiologist","Nodule_Str","subtlety","internalStructure","calcification","sphericity","margin","lobulation","spiculation","texture","malignancy"])+"\n") 
369 |     print(xml_file)
370 |     try:
371 |         parse_xml_file(xml_file)
372 |     except:
373 |         write_error("Unspecific error in file : " + xml_file)
374 |     
375 |     


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2249217.svg)](https://doi.org/10.5281/zenodo.2249217)
  2 | 
  3 |  LIDC Data processing scripts
  4 |  ============================
  5 |  Copyright © German Cancer Research Center (DKFZ), Division of Medical Image Computing (MIC). 
  6 |  
  7 |  
  8 | The scripts within this repository can be used to convert the LIDC-IDRI data. After calling this script,
  9 | the image and segmentation data is available in nifti/nrrd format and the nodule characteristics are available 
 10 | in a single comma separated (csv) file.
 11 | 
 12 | If you are using these scripts for your publication, please cite as
 13 | 
 14 | Michael Goetz, "MIC-DKFZ/LIDC-IDRI-processing: Release 1.0.1", DOI: 10.5281/zenodo.2249217
 15 | 
 16 | 
 17 | ## Requirements
 18 | The scripts uses some standard python libraries (glob, os, subprocess, numpy, and xml), the python library SimpleITK. 
 19 | Additionally, some command line tools from MITK are used. They can be either obtained by building MITK and enabling 
 20 | the classification module or by installing  [MITK Phenotyping](http://mitk.org/Phenotyping) which contains all 
 21 | necessary command line tools. 
 22 | 
 23 | ## Basic Usage
 24 |  * Download the data from the [LIDC-IDRI](https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI) website. Required are the Image DICOM files and the the describing XML files (Radiologist Annotations/Segmentations (XML format)). When you download the data using the NBIA Data Retrriever, select the "Classic Directory Name" option instead of the default option "Descriptive Directory Name".
 25 |  * If not already happend, build or download and install [MITK Phenotyping](http://mitk.org/Phenotyping)
 26 |  * Adapt the paths in the file "lidc_data_to_nifti.py"
 27 |  * Run the script "lidc_data_to_nifti.py"
 28 |  
 29 | Following input paths needs to be defined: 
 30 |  * path_to_executables : Path where the command line tool from MITK Phenotyping can be found
 31 |  * path_to_dicoms : Folder which contains the DICOM image files (not the segmentation dicoms)
 32 |  * path_to_xmls : Folder that contains the XML which describes the nodules
 33 | Following output paths needs to be defined: 
 34 |  * path_to_nrrds : Folder that will contain the created Nrrd / Nifti Files
 35 |  * path_to_planars :Folder that will contain the Planar figure for each subject
 36 |  * path_to_characteristics : Path to a CSV File, where the characteristic of a nodule will be stored. If the file exists, the new content will be appended. 
 37 |  * path_to_error_file : Path to an error file where error messages are written to. Existing files will be appended.
 38 | 
 39 | ## Output / Result
 40 | 
 41 | The output created of this script consists of Nrrd-Files containing a whole DICOM Series (i.e. an 
 42 | complete 3D CT image), Nifti (.nii.gz) files of the Nodule-Segmentations (3D), Nrrd and Planar 
 43 | Figures (.pf) containing slice-wise segmentations of Nodules.
 44 | 
 45 | The data are stored in subfolders, indicating the <Patient ID>. The 5 sign <Patient ID> matches the 
 46 | numerical part of the Patient ID that is used in the LIDC_IDRI Dicom folder. However, since 
 47 | some patients come with more than one CT image, the <Patient ID> is appended a single letter,
 48 | so that each CT scan has an unique <Patient ID>. For example, the folder "LIDC_IDRI-0129" may contain 
 49 | two CT images, which will then have the <Patient ID> "0129a" and "0129b".
 50 | 
 51 | There are up to four reader sessions given for each patient and image. <Session ID> is a 1-sign number indicating 
 52 | the rang of expert FOR THE GIVEN IMAGE. According to the corresponding publication, each session 
 53 | was done by one of 12 experts. However, it is not possible to ensure that two images where 
 54 | annotated by the same expert. Therefore, two images might be annotated by different experts even 
 55 | if they have the same <Session ID>
 56 | 
 57 | Each combination of Nodule and Expert has an unique 8-digit <Nodule ID>, for example 0000358. This ID is unique between all
 58 | created segmentations of nodules and experts. This means that two segmentations of the 
 59 | same Nodule will have different <Nodule ID>s. In contrast to this, the 8-digit <True Nodule ID> is the 
 60 | same for all segmentations of the same nodule. It is defined as the minimum <Nodule ID> of all 
 61 | segmentations of a given Nodule.
 62 | 
 63 | The <ROI ID> is an id, which is unique within a set of Planar Figures or 2D Segmentations 
 64 | of a single nodule. It is used to differenciate multiple planes of segmentations of the same object.
 65 |  
 66 | Based on these definitions, the following files are created:
 67 |  * path_to_nrrds/<Patient ID>/<Patient_ID>_ct_scan.nrrd : A nrrd file containing the 3D ct image
 68 |  * path_to_nrrds/<Patient ID>/<Patient_ID>_<Session ID>_<Nodule ID>_<True Nodule ID>.nii.gz : Nifti files containing the segmentation of nodules
 69 |  * path_to_nrrds/<Patient ID>/planar_masks/<Patient_ID>_<Session ID>_<Nodule ID>_<ROI ID>.nrrd : Nrrd-Files containing a single plane of the Nodule Segmentations
 70 |  * path_to_planars/<Patient ID>/<Patient_ID>_<Session ID>_<Nodule ID>_<ROI ID>.pf : Planar Figure-Files containing a single plane of the Nodule Segmentations
 71 | 
 72 | In addition, the characteristic of the nodules are saved in the file specified in path_to_characteristics
 73 | and errors occuring during the whole process are recorded in path_to_error_file
 74 |  
 75 | ## Limitations
 76 | The script had been developed using windows. It should be possible to execute it using linux, however this had never
 77 | been tested. Problems may be caused by the subprocess calls (calling the executables of MITK Phenotyping).
 78 | 
 79 | Also, the script had been developed for own research and is not extensivly tested. It is possible that i faulty included
 80 | some limitations. 
 81 | 
 82 | I've deloped this script when there were no DICOM Seg-files for the LIDC_IDRI available online. 
 83 | So this script relys on the XML-description, which might not be the best solution. Feel free to extend
 84 | / write a new solution which makes use of the now available DICOM Seg objects.
 85 | 
 86 | ## Further questions
 87 | If you have suggestions or questions, you can reach the author (Michael Goetz) at m.goetz@dkfz-heidelberg.de
 88 | 
 89 | ## Licence
 90 | 
 91 | Copyright (c) 2003-2019 German Cancer Research Center,
 92 | Division of Medical Image Computing
 93 | All rights reserved.
 94 | 
 95 | Redistribution and use in source and binary forms, with or
 96 | without modification, are permitted provided that the
 97 | following conditions are met:
 98 | 
 99 |  * Redistributions of source code must retain the above
100 |    copyright notice, this list of conditions and the
101 |    following disclaimer.
102 | 
103 |  * Redistributions in binary form must reproduce the above
104 |    copyright notice, this list of conditions and the
105 |    following disclaimer in the documentation and/or other
106 |    materials provided with the distribution.
107 | 
108 |  * Neither the name of the German Cancer Research Center,
109 |    nor the names of its contributors may be used to endorse
110 |    or promote products derived from this software without
111 |    specific prior written permission.
112 | 
113 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
114 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
115 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
116 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
117 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
118 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
119 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
120 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
121 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
122 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
123 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
124 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
125 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
126 | POSSIBILITY OF SUCH DAMAGE.
127 | 


--------------------------------------------------------------------------------
/template.pf:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <Version Writer="D:\projects\release\MITK\Modules\PlanarFigure\src\IO\mitkPlanarFigureWriter.cpp" CVSRevision="$Revision: 17055 $" FileVersion="1" />
 3 | <PlanarFigure type="PlanarPolygon">
 4 |     <property key="closed" type="BoolProperty">
 5 |         <bool value="true" />
 6 |     </property>
 7 |     <property key="initiallyplaced" type="BoolProperty">
 8 |         <bool value="true" />
 9 |     </property>
10 |     <property key="subdivision" type="BoolProperty">
11 |         <bool value="false" />
12 |     </property>
13 |     <ControlPoints>
14 | %%points%%
15 |     </ControlPoints>
16 |     <Geometry>
17 |         <transformParam param0="1" param1="0" param2="0" param3="0" param4="1" param5="0" param6="0" param7="0" param8="1" param9="%%origin_x%%" param10="%%origin_y%%" param11="%%origin_z%%" />
18 |         <boundsParam bound0="0" bound1="512" bound2="0" bound3="512" bound4="0" bound5="1" />
19 |         <Spacing x="1" y="1" z="1" />
20 |         <Origin x="%%origin_x%%" y="%%origin_y%%" z="%%origin_z%%" />
21 |     </Geometry>
22 | </PlanarFigure>
23 | 


--------------------------------------------------------------------------------