├── LICENSE ├── lidcXmlHelper.py ├── lidc_data_to_nifti.py ├── readme.md └── template.pf /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 DKFZ, Division of Medical Image Computing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lidcXmlHelper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Feb 21 12:28:19 2018 4 | 5 | @author: m.goetz@dkfz-heidelberg.de 6 | """ 7 | import xml.etree.ElementTree as ET 8 | 9 | def create_xml_tree(filepath): 10 | """ 11 | Method to ignore the namespaces if ElementTree is used. 12 | Necessary becauseElementTree, by default, extend 13 | Tag names by the name space, but the namespaces used in the 14 | LIDC-IDRI dataset are not consistent. 15 | Solution based on https://stackoverflow.com/questions/13412496/python-elementtree-module-how-to-ignore-the-namespace-of-xml-files-to-locate-ma 16 | 17 | instead of ET.fromstring(xml) 18 | """ 19 | it = ET.iterparse(filepath) 20 | for _, el in it: 21 | if '}' in el.tag: 22 | el.tag = el.tag.split('}', 1)[1] # strip all namespaces 23 | for at in el.attrib.keys(): # strip namespaces of attributes too 24 | if '}' in at: 25 | newat = at.split('}', 1)[1] 26 | el.attrib[newat] = el.attrib[at] 27 | del el.attrib[at] 28 | return it.root 29 | 30 | def get_study_uid(root): 31 | result=None 32 | try: 33 | result=root.find("ResponseHeader/StudyInstanceUID").text 34 | except: 35 | pass 36 | return result 37 | 38 | def get_series_uid(root): 39 | result=None 40 | try: 41 | result=root.find("ResponseHeader/SeriesInstanceUid").text 42 | except: 43 | pass 44 | return result 45 | 46 | def read_nodule_property(nodule_tree, tag): 47 | """ 48 | Reading a specified propertiy of a nodule. If the property is not specified 49 | in the corresponding part of the xml tree (nodule_tree), -1 is returned. 50 | """ 51 | try: 52 | result=str(nodule_tree.find("characteristics/"+tag).text) 53 | return result 54 | except: 55 | return str(-1) -------------------------------------------------------------------------------- /lidc_data_to_nifti.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Feb 21 12:28:19 2018 4 | 5 | @author: Michael Goetz (m.goetz@dkfz-heidelberg.de) 6 | """ 7 | 8 | import glob 9 | import os 10 | import subprocess 11 | import SimpleITK as sitk 12 | import numpy as np 13 | import lidcXmlHelper as xmlHelper 14 | 15 | # Path to the command lines tools of MITK Phenotyping 16 | path_to_executables=r"E:\Tools\MITK Phenotyping 2018-10-18\bin" 17 | # Path to the folder that contains the LIDC-IDRI DICOM files 18 | path_to_dicoms = r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\DICOM" 19 | # Path to the folder that contains the LIDC-IDRI XML files 20 | path_to_xmls= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\XML\tcia-lidc-xml" 21 | path_to_xmls= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\XML2" 22 | # Output path where the generated NRRD and NIFTI files will be saved 23 | path_to_nrrds = r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\new_nrrd_2" 24 | # Output path where the genreated Planar Figures will be saved 25 | path_to_planars= r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\new_planars_2" 26 | # Output path to the CSV-file that will contain the nodule characteristics. An existing will be appended 27 | path_to_characteristics=r"P:\Goetz\Datenkollektive\Lungendaten\Nodules_LIDC_IDRI\characteristics_2.csv" 28 | # Ouput path to an error file where errors will be logged. An existing file will be appended. 29 | path_to_error_file=r"W:\Old\LungImages\LIDC-IDRI\conversion_error_2.txt" 30 | 31 | planar_template=r"template.pf" 32 | 33 | list_of_appendix=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] 34 | 35 | def write_error(msg, errorfile=path_to_error_file): 36 | """ 37 | A simple error logging method. Errors should be reported using this functions. 38 | All errors are then logged in the file specified with the global variable 39 | 'path_to_error_file' if no other file is specified. 40 | 41 | The error message is also printed in the main text. 42 | """ 43 | a=open(errorfile,'a') 44 | a.write(str(msg) + "\n") 45 | a.close() 46 | print("ERROR:",msg) 47 | 48 | def get_dicom_from_study_uid(study_uid, series_uid): 49 | """ 50 | Find the folder containing the dicoms that corresponds to a given study id 51 | or an study id and a series id. 52 | 53 | Returns: 54 | The path to the DICOMs matching the given IDs 55 | The number of DICOMs that had been found. 56 | """ 57 | if series_uid is not None: 58 | search_path=os.path.join(path_to_dicoms, "*","*"+study_uid+"*","*"+series_uid+"*","*.dcm") 59 | else: 60 | search_path=os.path.join(path_to_dicoms, "*","*"+study_uid+"*","*","*.dcm") 61 | paths=glob.glob(search_path) 62 | if len(paths) > 0: 63 | return paths[0], len(paths) 64 | else: 65 | return [], 0 66 | 67 | def create_nrrd_from_dicoms(image, patient_id): 68 | """ 69 | Reads a folder that contains multiple DICOM files and 70 | converts the input into a single nrrd file using a command line 71 | app from MITK or MITK Phenotyping. 72 | 73 | Input: 74 | * path to one dicom (other are automatically found.) 75 | * Patient ID 76 | 77 | Output: 78 | Creates a single nrrd file with the path: $target_path / patient_id + '_ct_scan.nrrd' 79 | 80 | """ 81 | target_path = os.path.join(path_to_nrrds, patient_id) 82 | target_name = os.path.join(target_path, patient_id+"_ct_scan.nrrd") 83 | os.makedirs(target_path, exist_ok=True) 84 | cmd_string=r"MitkCLDicom2Nrrd "+\ 85 | "-i \"" + image + "\"" \ 86 | " -o \"" + target_name + "\"" 87 | print(cmd_string) 88 | a=subprocess.Popen(cmd_string,shell=True,cwd=path_to_executables) 89 | a.wait() 90 | return target_name 91 | 92 | def get_spacing_and_origin(file): 93 | """ Reading nrrd files, extract spacing and origin usign SimpleITK and returning them""" 94 | image=sitk.ReadImage(file) 95 | spacing=image.GetSpacing() 96 | origin=image.GetOrigin() 97 | return spacing, origin 98 | 99 | def create_planarfigure_for_session(session, spacing, origin, patient_id, session_id): 100 | """ 101 | Given one session of an expert, and the corresponding patient id, the given 102 | contours are converted into multiple planar figures. 103 | Each Nodule gets an ID that is unique for ALL Nodules from all images / reading sessions. 104 | 105 | The planar figures are saved in a path following this structure: 106 | path_to_planars//___.pf 107 | 108 | with the following properties: 109 | * path_to_planars : Globally specified folder 110 | * : Unique Patient ID consisting of patient number and an appendix 111 | * : Number of the reading session / expert. Unique to the given patient only. 112 | * : An globally unique ID of the given Nodule 113 | * : A nodule-wide unique, consecutive number of the current ROI. (Each planar figure contains the annotation of a single slice) 114 | """ 115 | # Obtaining the code of the radiologist. Replacing underscore (_) as it is later used to 116 | # encode different IDs in the resulting file name. 117 | radiologist=str(session.find("servicingRadiologistID").text).replace("_","-") 118 | 119 | # Reading each Nodule in the given session and creating planar figures for them (if large enough) 120 | global nodule_id 121 | for nodule in session.iter('unblindedReadNodule'): 122 | create_planarfigures_for_nodule(nodule, spacing, origin, patient_id, session_id, radiologist) 123 | nodule_id = nodule_id + 1 124 | 125 | def create_planarfigures_for_nodule(nodule, spacing, origin, patient_id, session_id, radiologist): 126 | """ 127 | Extracts the properties of an given nodule, saves them to the CSV file specified in the 128 | global variable 'path_to_characteristics' and saves all contours for that 129 | nodule as planar figure. 130 | 131 | Each contour is given a consecutive number. 132 | """ 133 | global nodule_id 134 | nodule_str="{:08n}".format(nodule_id) 135 | 136 | # Extract the properties of the nodule 137 | subtlety=xmlHelper.read_nodule_property(nodule, 'subtlety') 138 | internalStructure=xmlHelper.read_nodule_property(nodule, 'internalStructure') 139 | calcification=xmlHelper.read_nodule_property(nodule, 'calcification') 140 | sphericity=xmlHelper.read_nodule_property(nodule, 'sphericity') 141 | margin=xmlHelper.read_nodule_property(nodule, 'margin') 142 | lobulation=xmlHelper.read_nodule_property(nodule, 'lobulation') 143 | spiculation=xmlHelper.read_nodule_property(nodule, 'spiculation') 144 | texture=xmlHelper.read_nodule_property(nodule, 'texture') 145 | malignancy=xmlHelper.read_nodule_property(nodule, 'malignancy') 146 | 147 | # save characteristic and specifics of the nodule to the global CSV-file 148 | global path_to_characteristics 149 | with open(path_to_characteristics,"a") as file: 150 | file.write(";".join([str(patient_id),str(session_id),str(radiologist),str(nodule_str),subtlety,internalStructure,calcification,sphericity,margin,lobulation,spiculation,texture,malignancy])+"\n") 151 | 152 | # Extract all rois necessary specified within the Nodule 153 | roi_id=0 154 | for roi in nodule.iter('roi'): 155 | create_planarfigures_for_roi(roi, spacing, origin, patient_id, session_id, nodule_str, roi_id) 156 | roi_id=roi_id+1 157 | 158 | def create_planarfigures_for_roi(roi, spacing, origin, patient_id, session_id, nodule_id, roi_id): 159 | """ 160 | Given the section of XML that specifies a certain ROI, this function creates a 161 | planar figure file out of it. 162 | 163 | The planar figure is saved in a path following this structure: 164 | path_to_planars//___.pf 165 | 166 | with the following properties: 167 | * path_to_planars : Globally specified folder 168 | * : Unique Patient ID consisting of patient number and an appendix 169 | * : Number of the reading session / expert. Unique to the given patient only. 170 | * : An globally unique ID of the given Nodule 171 | * : A nodule-wide unique, consecutive number of the current ROI. 172 | 173 | """ 174 | # All Rois are within a single Z-plane, so the z-position needs only to be obtained once 175 | z_position = roi.find("imageZposition").text 176 | 177 | # Create file name and esure that the corresponding folder exists to prevent write errors 178 | target_path = os.path.join(path_to_planars, patient_id) 179 | target_name = os.path.join(target_path, patient_id+"_"+str(session_id)+"_"+str(nodule_id)+"_"+str(roi_id)+".pf") 180 | os.makedirs(target_path, exist_ok=True) 181 | 182 | # Convert the given edge information into an XML part describing the planar figure 183 | vertex_string="" 184 | edge_id=0 185 | for edge in roi.iter('edgeMap'): 186 | x=float(edge[0].text)*spacing[0] 187 | y=float(edge[1].text)*spacing[1] 188 | vertex_string=vertex_string+" \n" 189 | edge_id=edge_id+1 190 | 191 | # If less than two points are defined, it is not a complete mesh. This happens 192 | # if the lesion was too small, so the experts didn't draw spacial annotations. 193 | if edge_id < 2: 194 | return None 195 | 196 | # Read the template, replace the corresponding structures and 197 | # save the result as a new planar figure 198 | with open(planar_template,"r") as file: 199 | template=file.read() 200 | template=template.replace("%%origin_z%%", str(z_position)) 201 | template=template.replace("%%origin_x%%", str(origin[0])) 202 | template=template.replace("%%origin_y%%", str(origin[1])) 203 | template=template.replace("%%points%%", vertex_string) 204 | with open(target_name,"w") as file: 205 | file.write(template) 206 | 207 | def convert_planar_figures_to_masks(image, patient_id): 208 | """ Finds all planar figure for a single patient and converts them to segmentations that match the CT of the patient""" 209 | for planar_figure in glob.glob(os.path.join(path_to_planars,patient_id,"*.pf")): 210 | create_mask_for_planar_figure(image, patient_id, planar_figure) 211 | 212 | def create_mask_for_planar_figure(image, patient_id, planar_figure): 213 | """ 214 | Create a segmentation file from a planar figure, using the corresponding ct file. 215 | 216 | All Mask files are saved in a folder with the structure of 217 | path_to_nrrds//planar_masks/.nrrd 218 | """ 219 | # Create the new filename 220 | file_name=os.path.basename(planar_figure) 221 | target_path = os.path.join(path_to_nrrds, patient_id,"planar_masks") 222 | target_name = os.path.join(target_path, file_name.replace(".pf",".nrrd")) 223 | os.makedirs(target_path, exist_ok=True) 224 | 225 | cmd_string=r"MitkCLPlanarFigureToNrrd "+\ 226 | "-i \"" + image + "\"" \ 227 | " -p \"" + planar_figure + "\"" \ 228 | " -o \"" + target_name + "\"" 229 | #print(cmd_string) 230 | a=subprocess.Popen(cmd_string,shell=True,cwd=path_to_executables) 231 | a.wait() 232 | return target_name 233 | 234 | def merge_planar_figures_per_nodule(image, patient_id): 235 | """ 236 | There are two problems associated with the planar figures generated segmentations 237 | that are based on the way that the original data is presented. First, the 238 | segmentations of a each nodes is splitted in multile files, as the corresponding 239 | ROIS as given as slice-wise contours. Second, corresponding annotations 240 | of the same nodule are not identified, as lesions share no common id between 241 | different experts. 242 | 243 | This method tries to match segmentations that are from the same rater and combine 244 | them. It also tries to identify multiple segmentations of the same nodule by 245 | different rater, looking at the overlap of segmentations. 246 | 247 | It is assumed that two segmentations cover the same nodule, if their segmentations 248 | overlap by more than 10 voxel. 249 | 250 | The new created segmentation has the format 251 | path_to_nrrds//___.nii.gz 252 | with the following properties: 253 | * path_to_planars : Globally specified folder 254 | * : Unique Patient ID consisting of patient number and an appendix 255 | * : Number of the reading session / expert. Unique to the given patient only. 256 | * : An globally unique ID of the given Nodule 257 | * : A globally minimum unique ID of the nodule. All masks of this nodule should share the same True Nodule ID 258 | """ 259 | # Loading all masks to numpy arrays and save them in a dictionary. 260 | # The keys of the dictionary match the (preliminary) mask id 261 | origin_path = os.path.join(path_to_nrrds, patient_id,"planar_masks","*.nrrd") 262 | images={} 263 | arrays={} 264 | for mask in glob.glob(origin_path): 265 | mask_name=os.path.basename(mask) 266 | mask_limits=mask_name.split("_") 267 | # The first three properties of the file name (Patient ID, Session ID, and Nodule ID) 268 | # identify wheter a given ROI belongs to a certain Nodule. (ROI ID is ignored) 269 | mask_id=mask_limits[0]+"_"+mask_limits[1]+"_"+mask_limits[2] 270 | # If no array with the mask_id is available, create one 271 | if mask_id not in images.keys(): 272 | image=sitk.ReadImage(mask) 273 | images[mask_id]=image 274 | array=sitk.GetArrayFromImage(image) 275 | arrays[mask_id]=array 276 | # If already a planar figure belonging to the given nodule exists, add 277 | # the new one to the old one (e.g. merge both segmentations) 278 | else: 279 | image=sitk.ReadImage(mask) 280 | array=sitk.GetArrayFromImage(image) 281 | arrays[mask_id]=arrays[mask_id]+array 282 | 283 | for key,idx in zip(images.keys(),range(len(images.keys()))): 284 | # If values larger than 1 are present in a segmentation, there are 285 | # overlaps between two segmentations for this nodule. This should not happen 286 | # but occures due to errors in the original XML files 287 | if len(arrays[key][arrays[key]>1])>1: 288 | write_error("Failed due to wrong segmentations: " + key) 289 | continue 290 | # Identify the smallest global nodule ID for the given nodule. 291 | # It is assumed that two segmentations cover the same nodule if more than 292 | # 10 voxels are covered by both segmentations. The global nodule id is 293 | # the smallest nodule id for each nodule 294 | own_id=int(key.split("_")[2]) 295 | minimum_id=own_id 296 | for k2 in arrays.keys(): 297 | mask=(arrays[key]*arrays[k2])==1 298 | if len(arrays[key][mask])>10: 299 | new_id=int(k2.split("_")[2]) 300 | minimum_id=min(minimum_id, new_id) 301 | #Save the new created segmentation 302 | minimum_id="{:08n}".format(minimum_id) 303 | image=sitk.GetImageFromArray(arrays[key]) 304 | image.CopyInformation(images[key]) 305 | key_parts=key.split("_") 306 | new_key=key_parts[0]+"_"+key_parts[1]+"_"+key_parts[2]+"_"+str(minimum_id) 307 | sitk.WriteImage(image, os.path.join(path_to_nrrds, patient_id,new_key+".nii.gz")) 308 | 309 | def parse_xml_file(file): 310 | # Create an XML Tree, use own method to remove namespaces 311 | root=xmlHelper.create_xml_tree(file) 312 | 313 | # Find the Study and Series IDs if possible 314 | study_uid=xmlHelper.get_study_uid(root) 315 | series_uid=xmlHelper.get_series_uid(root) 316 | print(file) 317 | print(study_uid, series_uid) 318 | if study_uid is None: 319 | write_error("Failed to find Study UID: " + file) 320 | return 321 | 322 | # Find the DICOMS matching the study and series ID. 323 | # Assuming that all DICOMS to a study/series ID are in one folder. 324 | dicom_path, no_of_dicoms=get_dicom_from_study_uid(study_uid, series_uid) 325 | if no_of_dicoms < 10: 326 | print(dicom_path) 327 | print("No DICOM's found for file:",file) 328 | return 329 | print(dicom_path) 330 | # Files are saved in a folder with the structure $PatientID/$StudyID/$SeriesID/$DicomName 331 | # Removing StudyID, SeriesID and DICOM-Name gives a patient ID 332 | long_patient_id=os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(dicom_path)))) 333 | patient_id=long_patient_id.replace("LIDC-IDRI-","") 334 | 335 | # For some patients, more than one scan is provided (for example due to multiple 336 | # time points). To ensure that each time point is only scanned one, an appendix 337 | # is added to the patient_id, ensuring that multiple time points can be selected. 338 | for appendix in list_of_appendix: 339 | target_path = os.path.join(path_to_nrrds, patient_id+appendix) 340 | if not os.path.exists(target_path): 341 | patient_id =patient_id+appendix 342 | print(patient_id) 343 | break 344 | 345 | # Create Nrrd files from DICOMS and reading spacing and orgiin. 346 | nrrd_file=create_nrrd_from_dicoms(dicom_path, patient_id) 347 | spacing, origin = get_spacing_and_origin(nrrd_file) 348 | 349 | # Creating multiple planar figures for each reading session. 350 | # Each session represents the result of an different expert 351 | # Each session gets an session ID that is unique for the given patient ID 352 | # Same session ids for differnt patients do not necessarily correspond to the same expert. 353 | print("Creating Planar Figure") 354 | session_id=0 355 | for session in root.iter('readingSession'): 356 | create_planarfigure_for_session(session, spacing, origin, patient_id, session_id) 357 | session_id=session_id+1 358 | convert_planar_figures_to_masks(nrrd_file, patient_id) 359 | print("Merging Planar Figures") 360 | merge_planar_figures_per_nodule(nrrd_file, patient_id) 361 | 362 | 363 | nodule_id = 0 364 | for xml_file in glob.glob(os.path.join(path_to_xmls,"*","*.xml")): 365 | global path_to_characteristics 366 | os.makedirs(os.path.dirname(path_to_characteristics), exist_ok=True) 367 | with open(path_to_characteristics,"a") as file: 368 | file.write(";".join(["Patient_ID","Session_ID","Radiologist","Nodule_Str","subtlety","internalStructure","calcification","sphericity","margin","lobulation","spiculation","texture","malignancy"])+"\n") 369 | print(xml_file) 370 | try: 371 | parse_xml_file(xml_file) 372 | except: 373 | write_error("Unspecific error in file : " + xml_file) 374 | 375 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2249217.svg)](https://doi.org/10.5281/zenodo.2249217) 2 | 3 | LIDC Data processing scripts 4 | ============================ 5 | Copyright © German Cancer Research Center (DKFZ), Division of Medical Image Computing (MIC). 6 | 7 | 8 | The scripts within this repository can be used to convert the LIDC-IDRI data. After calling this script, 9 | the image and segmentation data is available in nifti/nrrd format and the nodule characteristics are available 10 | in a single comma separated (csv) file. 11 | 12 | If you are using these scripts for your publication, please cite as 13 | 14 | Michael Goetz, "MIC-DKFZ/LIDC-IDRI-processing: Release 1.0.1", DOI: 10.5281/zenodo.2249217 15 | 16 | 17 | ## Requirements 18 | The scripts uses some standard python libraries (glob, os, subprocess, numpy, and xml), the python library SimpleITK. 19 | Additionally, some command line tools from MITK are used. They can be either obtained by building MITK and enabling 20 | the classification module or by installing [MITK Phenotyping](http://mitk.org/Phenotyping) which contains all 21 | necessary command line tools. 22 | 23 | ## Basic Usage 24 | * Download the data from the [LIDC-IDRI](https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI) website. Required are the Image DICOM files and the the describing XML files (Radiologist Annotations/Segmentations (XML format)). When you download the data using the NBIA Data Retrriever, select the "Classic Directory Name" option instead of the default option "Descriptive Directory Name". 25 | * If not already happend, build or download and install [MITK Phenotyping](http://mitk.org/Phenotyping) 26 | * Adapt the paths in the file "lidc_data_to_nifti.py" 27 | * Run the script "lidc_data_to_nifti.py" 28 | 29 | Following input paths needs to be defined: 30 | * path_to_executables : Path where the command line tool from MITK Phenotyping can be found 31 | * path_to_dicoms : Folder which contains the DICOM image files (not the segmentation dicoms) 32 | * path_to_xmls : Folder that contains the XML which describes the nodules 33 | Following output paths needs to be defined: 34 | * path_to_nrrds : Folder that will contain the created Nrrd / Nifti Files 35 | * path_to_planars :Folder that will contain the Planar figure for each subject 36 | * path_to_characteristics : Path to a CSV File, where the characteristic of a nodule will be stored. If the file exists, the new content will be appended. 37 | * path_to_error_file : Path to an error file where error messages are written to. Existing files will be appended. 38 | 39 | ## Output / Result 40 | 41 | The output created of this script consists of Nrrd-Files containing a whole DICOM Series (i.e. an 42 | complete 3D CT image), Nifti (.nii.gz) files of the Nodule-Segmentations (3D), Nrrd and Planar 43 | Figures (.pf) containing slice-wise segmentations of Nodules. 44 | 45 | The data are stored in subfolders, indicating the . The 5 sign matches the 46 | numerical part of the Patient ID that is used in the LIDC_IDRI Dicom folder. However, since 47 | some patients come with more than one CT image, the is appended a single letter, 48 | so that each CT scan has an unique . For example, the folder "LIDC_IDRI-0129" may contain 49 | two CT images, which will then have the "0129a" and "0129b". 50 | 51 | There are up to four reader sessions given for each patient and image. is a 1-sign number indicating 52 | the rang of expert FOR THE GIVEN IMAGE. According to the corresponding publication, each session 53 | was done by one of 12 experts. However, it is not possible to ensure that two images where 54 | annotated by the same expert. Therefore, two images might be annotated by different experts even 55 | if they have the same 56 | 57 | Each combination of Nodule and Expert has an unique 8-digit , for example 0000358. This ID is unique between all 58 | created segmentations of nodules and experts. This means that two segmentations of the 59 | same Nodule will have different s. In contrast to this, the 8-digit is the 60 | same for all segmentations of the same nodule. It is defined as the minimum of all 61 | segmentations of a given Nodule. 62 | 63 | The is an id, which is unique within a set of Planar Figures or 2D Segmentations 64 | of a single nodule. It is used to differenciate multiple planes of segmentations of the same object. 65 | 66 | Based on these definitions, the following files are created: 67 | * path_to_nrrds//_ct_scan.nrrd : A nrrd file containing the 3D ct image 68 | * path_to_nrrds//___.nii.gz : Nifti files containing the segmentation of nodules 69 | * path_to_nrrds//planar_masks/___.nrrd : Nrrd-Files containing a single plane of the Nodule Segmentations 70 | * path_to_planars//___.pf : Planar Figure-Files containing a single plane of the Nodule Segmentations 71 | 72 | In addition, the characteristic of the nodules are saved in the file specified in path_to_characteristics 73 | and errors occuring during the whole process are recorded in path_to_error_file 74 | 75 | ## Limitations 76 | The script had been developed using windows. It should be possible to execute it using linux, however this had never 77 | been tested. Problems may be caused by the subprocess calls (calling the executables of MITK Phenotyping). 78 | 79 | Also, the script had been developed for own research and is not extensivly tested. It is possible that i faulty included 80 | some limitations. 81 | 82 | I've deloped this script when there were no DICOM Seg-files for the LIDC_IDRI available online. 83 | So this script relys on the XML-description, which might not be the best solution. Feel free to extend 84 | / write a new solution which makes use of the now available DICOM Seg objects. 85 | 86 | ## Further questions 87 | If you have suggestions or questions, you can reach the author (Michael Goetz) at m.goetz@dkfz-heidelberg.de 88 | 89 | ## Licence 90 | 91 | Copyright (c) 2003-2019 German Cancer Research Center, 92 | Division of Medical Image Computing 93 | All rights reserved. 94 | 95 | Redistribution and use in source and binary forms, with or 96 | without modification, are permitted provided that the 97 | following conditions are met: 98 | 99 | * Redistributions of source code must retain the above 100 | copyright notice, this list of conditions and the 101 | following disclaimer. 102 | 103 | * Redistributions in binary form must reproduce the above 104 | copyright notice, this list of conditions and the 105 | following disclaimer in the documentation and/or other 106 | materials provided with the distribution. 107 | 108 | * Neither the name of the German Cancer Research Center, 109 | nor the names of its contributors may be used to endorse 110 | or promote products derived from this software without 111 | specific prior written permission. 112 | 113 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 114 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 115 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 116 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 117 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 118 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 119 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 120 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 121 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 122 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 123 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 124 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 125 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 126 | POSSIBILITY OF SUCH DAMAGE. 127 | -------------------------------------------------------------------------------- /template.pf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | %%points%% 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | --------------------------------------------------------------------------------