├── README.md └── dvh_analysis.py /README.md: -------------------------------------------------------------------------------- 1 | # DICOM dose parameter extraction 2 | Script for automated extraction of dosimetric parameters from multiple RT DICOM files. 3 | Note: The code is not optimised for speed and the directory structure must be as follows: 4 | 5 | - Parent directory 6 | - Patient Folder 1 7 | - RS file 8 | - RD file 9 | - RP file 10 | - Patient Folder 2 11 | - RS file 12 | - RD file 13 | - RP file.... 14 | 15 | ## Instructions for use 16 | Contained within this repository is the Python module "dvh_analysis.py" which can be run on a directory to extract dosimetric parameters from DICOM RT files. 17 | Dependencies are: 18 | - pandas 19 | - numpy 20 | - tkinter 21 | - dicompyler-core 22 | 23 | 24 | 25 | ## Example usage 26 | ```python 27 | # import the module 28 | import dvh_analysis 29 | 30 | # generate and store results 31 | my_res = dvh_analysis.dicom_dvh_stats_multi(stats='import', # allow import of file with list of dosimetric statistics to extract 32 | include_body=False, # excludes the body contour from DVH statistic generation (for speed) 33 | struct_labels=True,# allow import of file containing user-defined stucture labels to add to output 34 | save_df=True, # automatically save the generated pandas dataframe 35 | user_structures=None) # list of structures to analyse. None ==> analyses all structures. 36 | ``` 37 | -------------------------------------------------------------------------------- /dvh_analysis.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ## Process developed to analyse multiple DICOM files and extract DVH information 5 | # All files must be stored as follows: 6 | # - Parent directory 7 | # - Patient Folder 1 8 | # - RS file 9 | # - RD file 10 | # - RP file 11 | # - Patient Folder 2 12 | # - RS file 13 | # - RD file 14 | # - RP file.... 15 | # 16 | # The DVH analysis is basaed on the dicompyler-core module (which is based on pydicom). 17 | # Notebook with examples: https://github.com/bastula/dicom-notebooks/blob/master/dicompyler-core_usage.ipynb 18 | # 19 | # - The user can specify the parent directory and then all sub directories are analysed. 20 | # - The user must specify which DVH statistics are required (Can be from an excel/text/csv file). 21 | # - Additional structure labels can be added to allow simpler comparison between differnet plans which may have different original structure names. *Note that all structure names are converted to lowercase*. 22 | # - Results are output in CSV format and contain patient id, plan name, structrure name and the dvh statistics requested. 23 | # 24 | 25 | # In[1]: 26 | 27 | import pandas as pd 28 | import numpy as np 29 | import matplotlib.pyplot as plt 30 | #get_ipython().magic('matplotlib inline') 31 | 32 | import itertools as it 33 | from dicompylercore import dicomparser, dvhcalc#, dvh 34 | import os 35 | import pydicom 36 | 37 | ## for choosing a directory 38 | import tkinter as tk 39 | from tkinter import filedialog 40 | 41 | ## for exiting on error (no oncentra dose grid) 42 | import sys 43 | 44 | 45 | # In[2]: 46 | 47 | def dir_open(title = 'Select Directory...'): 48 | """Get a directory string using the OS file chooser dialog.""" 49 | root = tk.Tk() 50 | my_dir = filedialog.askdirectory(title = title) 51 | #print (root.directory) 52 | root.withdraw() ## for some reason it freezes if the empty window is not open 53 | return my_dir 54 | 55 | def file_open(title = 'Select File...'): 56 | """Get a file string using the OS file chooser dialog.""" 57 | root = tk.Tk() 58 | my_file = filedialog.askopenfilename(title = title) 59 | #print (root.filename) 60 | root.withdraw() 61 | return my_file 62 | 63 | def file_save(title = 'Save File As...',filetypes = (("csv","*.csv"),("all files","*.*")), initialfile=''): 64 | root = tk.Tk() 65 | ftypes = [('CSV', '.csv'), ('All files', '*')] 66 | my_file = filedialog.asksaveasfilename(title = title, filetypes = ftypes, defaultextension='.csv',initialfile=initialfile) 67 | root.withdraw() 68 | return my_file 69 | 70 | 71 | # In[3]: 72 | 73 | def save_df_to_csv(df,prompt=False,save_dir='',save_name = None): 74 | """Function to save the dataframe as csv. 75 | Option to prompt the user where to save. 76 | Defualt save location is within the parent directory of analysis""" 77 | 78 | if save_name == None: 79 | the_date = pd.Timestamp("now").strftime("%Y%m%d-%H%M") ## no need to inport date module as can use pandas 80 | default_save_name = 'results-' + str(the_date) + '.csv' 81 | 82 | if prompt==True: 83 | save_name = file_save(title = 'Save Results As...', initialfile = default_save_name) 84 | #print(save_name) 85 | if save_name == '': ## user closed before saving 86 | return 87 | else: 88 | save_name = os.path.join(save_dir,default_save_name) 89 | 90 | df.to_csv(save_name) 91 | print('Results saved:', save_name) 92 | 93 | 94 | # In[4]: 95 | 96 | def swap_dict(the_dict): 97 | """Swaps key,value pairs in dict. Must be unique values to work""" 98 | return {y:x for x,y in the_dict.items()} 99 | 100 | 101 | # In[5]: 102 | 103 | def dicom_type(file): 104 | """ will return 'rtss', 'rtdose', 'rtplan','ct' dependant on type of dicom file""" 105 | the_type = dicomparser.DicomParser(file).GetSOPClassUID() 106 | return the_type 107 | 108 | 109 | # In[6]: 110 | 111 | def get_rt_files(directory): 112 | """provide a directory and the rtdose, rtss, rtplan files will be returned as a dict""" 113 | ## get list of all files in directory 114 | all_files = os.listdir(directory) 115 | 116 | ## get the rt files and put tehm in a dict 117 | ## note: key will not exist if the correct file does not exist 118 | pt_rt_file_dict = {} 119 | for file in all_files: 120 | full_file_dir = os.path.join(directory,file) 121 | #print(full_file_dir) 122 | d_type = dicom_type(full_file_dir) 123 | if d_type == 'rtss': 124 | pt_rt_file_dict['rtss']=full_file_dir 125 | if d_type == 'rtdose': 126 | pt_rt_file_dict['rtdose']=full_file_dir 127 | if d_type == 'rtplan': 128 | pt_rt_file_dict['rtplan']=full_file_dir 129 | 130 | return pt_rt_file_dict 131 | 132 | 133 | # In[7]: 134 | 135 | def structure_dict(file): 136 | """Loop through (rtss) file to get {struct_name:struct_id}. 137 | The loop will go through the IDs to get the structure names. 138 | file is the complete file directory""" 139 | ## check to ensure rtss filetype before trying to get structure names 140 | file_type = dicom_type(file) 141 | if file_type != 'rtss':## alternative-check length of the returned dict. if 0 then not structs - best to check before 142 | print('File is not an RT Structure set:',file) 143 | return 144 | else: 145 | ## get the raw structure info from the dicom file - this returns a dict {number:{'color'...id....name etc}} 146 | struct_info = dicomparser.DicomParser(file).GetStructures() 147 | 148 | ## create dict to store the {strucure_name,:structure_id} 149 | struct_name_dict = {} 150 | 151 | ## go through structures and store name and id in dict 152 | for keys,values in struct_info.items(): 153 | struct_name = struct_info[keys]['name'].lower() ## make all lowercase? 154 | struct_id = struct_info[keys]['id'] 155 | #print(keys,struct_id,struct_name) 156 | #print(abc[keys]['id']) 157 | struct_name_dict[struct_name] = struct_id 158 | 159 | return struct_name_dict 160 | 161 | 162 | # In[8]: 163 | 164 | def get_dicom_plan_info(file): 165 | """Get info from the dicom plan file. 166 | Return results as a dictionary so can be easily expanded as required""" 167 | 168 | if dicom_type(file)!= 'rtplan': 169 | print('Should provide an RTPLAN filetype') 170 | dicom_info = None 171 | else: 172 | ds = pydicom.read_file(file) 173 | 174 | ## Try and keep this fuinction the same for all manufacturers. USe seperate functinos to get specific info. 175 | ## this uses the functionality of pydicom, rather than specifying the dicom tags. 176 | ## pydicom essentially looks up the names from a dictionary and then looks up the dicom tag 177 | ## https://github.com/pydicom/pydicom/blob/master/pydicom/_dicom_dict.py 178 | ## using names is more understanable to most people, but can also use dicom tags 179 | 180 | dicom_info = {} ## store results in a dictionary 181 | 182 | ## values specified by name 183 | dicom_info['study_date'] = ds.StudyDate ## study is the CT info 184 | dicom_info['study_time'] = ds.StudyTime 185 | 186 | if 'study_description' in ds: ## this doesnt seem to appear in Variseed, so have to check it exists before setting it 187 | dicom_info['study_description'] = ds.StudyDescription 188 | else: 189 | dicom_info['study_description'] = None 190 | 191 | dicom_info['patient_id'] = ds.PatientID 192 | dicom_info['patient_name'] = ds.PatientName 193 | dicom_info['plan_label'] = ds.RTPlanLabel ## plan name 194 | dicom_info['plan_date'] = ds.RTPlanDate 195 | dicom_info['plan_time'] = ds.RTPlanTime 196 | 197 | ## values can also be specified by dicom tag - need to use the .value to retrieve the actual value 198 | dicom_info['manufacturer'] = ds[0x0008, 0x0070].value 199 | 200 | return dicom_info 201 | 202 | 203 | # In[9]: 204 | 205 | def get_pt_id(file): 206 | """Get patient ID from dicom file""" 207 | pt_info = dicomparser.DicomParser(file).GetDemographics() 208 | return pt_info['id'] 209 | 210 | 211 | # In[10]: 212 | 213 | def all_equal(iterable): 214 | """Returns True if all the elements are equal to each other. 215 | Based on itertools recipe: https://docs.python.org/3/library/itertools.html#recipes 216 | Used to ensure same patient IDs for generating DVH from files""" 217 | g = it.groupby(iterable) 218 | return next(g, True) and not next(g, False) 219 | 220 | 221 | # In[11]: 222 | 223 | def all_rt_files_exist(directory): 224 | """Checks if all required DICOM file types exist within suipplied directory. 225 | Should have rtss, rtplan, rtdose. 226 | Returns (True/False, List of files)""" 227 | rt_files = get_rt_files(directory) 228 | rt_files_keys = rt_files.keys() 229 | 230 | ## check if each filetype exists 231 | rt_non_exist = [] ## store ones which do not exist 232 | rt_exist = [] ## store ones which do exist 233 | if 'rtss' not in rt_files_keys: 234 | rt_non_exist.append('rtss') 235 | else: 236 | rt_exist.append('rtss') 237 | if 'rtdose' not in rt_files_keys: 238 | rt_non_exist.append('rtdose') 239 | else: 240 | rt_exist.append('rtdose') 241 | if 'rtplan' not in rt_files_keys: 242 | rt_non_exist.append('rtplan') 243 | else: 244 | rt_exist.append('rtplan') 245 | 246 | if len(rt_non_exist) != 0: 247 | print('The following filetypes do not exist:',rt_non_exist) 248 | all_exist = False 249 | else: 250 | all_exist = True 251 | 252 | ## extract the ids for the files which exist 253 | rt_files_exist_pt_ids = [] 254 | for key in rt_exist: 255 | rt_pt_id = get_pt_id(rt_files[key]) 256 | rt_files_exist_pt_ids.append(rt_pt_id) 257 | 258 | ## put in value to force failure for testing 259 | #rt_files_exist_pt_ids[2] = '333' 260 | 261 | ## check all the ids match 262 | if all_equal(rt_files_exist_pt_ids): 263 | all_ids_match = True 264 | else: 265 | all_ids_match = False 266 | print('Mis-matched patient IDs identified:',rt_files_exist_pt_ids) 267 | 268 | ## if all exist and match then return True 269 | if all_exist == True and all_ids_match == True: 270 | all_exist_match = True 271 | else: 272 | all_exist_match = False 273 | 274 | return all_exist_match,rt_files 275 | 276 | 277 | # In[12]: 278 | 279 | def get_dicom_dose(file): 280 | """Will return the prescribed dose (in Gy) from an rtplan file. This is needed for DVH analysis. 281 | An error will be shown to the user if a non RTPlan file is supplied""" 282 | if dicom_type(file) != 'rtplan': 283 | print('Prescription cannot be obtained from non RTPlan DICOM file. Returned None.') 284 | dicom_dose = None 285 | else: 286 | if is_oncentra(file) == True: ## oncentra seems to not store the prescribed dose in the same way, so deal with seperately 287 | print('Oncentra File - Attempting to determine prescription...') 288 | dicom_dose = get_oncentra_dose(file) 289 | ## get oncentra dose... 290 | else: 291 | dicom_dose = dicomparser.DicomParser(file).GetPlan()['rxdose']/100 ## convert to Gy for use 292 | return dicom_dose 293 | 294 | 295 | # In[13]: 296 | 297 | # In[14]: 298 | 299 | def check_oncentra_dose_exists(file): 300 | dose_scaling_factor_tag = [0x3004,0x000e] ## use this to see if there is dose within an Oncentra file. 301 | try: 302 | dose_scale_exists = get_dicom_tag_info(file,dose_scaling_factor_tag) 303 | #print('Dose Grid Exists in Oncentra File') 304 | dose_exists = True 305 | except KeyError: 306 | #print('No Dose Grid within Oncentra File - do 3D Dose Grid Calculation') 307 | dose_exists = False 308 | return dose_exists 309 | 310 | 311 | # In[15]: 312 | 313 | def get_dicom_tag_info(file,tag): 314 | """Get info from dicom file using a dicom tag specified as a tuple (0x0000,0x0000) 315 | Reading the file each time a piece of info is needed is not particularly efficient, so try not to use this to much""" 316 | tag_value = pydicom.read_file(file)[tag].value 317 | return tag_value 318 | 319 | 320 | # In[16]: 321 | 322 | def is_oncentra(rt_plan): 323 | """Determine if the RTPLAN file indicates that it is from Oncentra. 324 | Used to allow prescribed dose to be determined from private tag""" 325 | manufacturer = pydicom.read_file(rt_plan)[0x00081090].value 326 | if manufacturer == 'Oncentra': 327 | oncentra = True ## probably want to look up this tag: (0008, 1090) Manufacturer's Model Name LO: 'Oncentra' 328 | else: 329 | oncentra = False 330 | return oncentra 331 | 332 | def get_oncentra_dose(my_file): 333 | """Get dose from private Oncentra Tag (3007, 1000)""" 334 | omp_dose_tag = (0x3007,0x1000) 335 | oncentra_dose = get_dicom_tag_info(my_file,omp_dose_tag) 336 | dose_exists = check_oncentra_dose_exists(my_file) 337 | if dose_exists == False: 338 | #print('3D dose grid was not calculated in Oncentra') 339 | sys.exit('No Dose Grid within Oncentra File - do 3D Dose Grid Calculation. Aborting Calculation') 340 | return oncentra_dose 341 | 342 | 343 | 344 | # In[18]: 345 | 346 | def calc_dvh(rtss,rtdose,structure_id,px): 347 | """Calculate a dvh object from input files and structure id, and assign the prescription to the dvh object. 348 | All 3 files must be provided.""" 349 | dvh = dvhcalc.get_dvh(rtss,rtdose,structure_id) 350 | dvh.rx_dose = px 351 | return dvh 352 | 353 | 354 | # In[19]: 355 | 356 | def non_int_stat(dvh,stat_string): 357 | """Function to return the dvh statistic for integer and non-integer values""" 358 | 359 | ## get the stat type based on first letter (V/D) 360 | stat_string = stat_string.lower() ## make all lowercase for simplicity 361 | stat_type = stat_string[0] 362 | if stat_type not in ['v','d']: 363 | sys.exit('Unknown dose statistic type: ' + stat_type) 364 | 365 | ## get the units based on last letters (if they exist) (cc/Gy) 366 | ## get the end string (must reverse through the lsit to do this, then reverse it back to correct way) 367 | end_string = "".join(it.takewhile(str.isalpha, reversed(stat_string)))[::-1] 368 | if len(end_string) == 0: 369 | units = None 370 | else: 371 | units = end_string 372 | if units not in ['cc','gy']: 373 | sys.exit('Unknown dose statistic units: ' + units) 374 | 375 | ## get the value required - remove the stat type and the units to give the numerical value 376 | stat_numeric = float(stat_string[len(stat_type):len(stat_string)-len(end_string)]) 377 | 378 | ## now if D.. then use dose_constraint, if V... use volume_constraint with the obtained units 379 | if stat_type =='v': 380 | stat = dvh.volume_constraint(stat_numeric, units) 381 | 382 | if stat_type =='d': 383 | stat = dvh.dose_constraint(stat_numeric, units) 384 | 385 | return stat 386 | 387 | 388 | # In[20]: 389 | 390 | def get_statistics(dvh,stat_list): 391 | """get multiple statistics from a dvh and store in a dict for use""" 392 | dvh_stats = {} 393 | 394 | ## record the volume 395 | dvh_vol = (dvh.volume, dvh.volume_units) 396 | dvh_stats['volume'] = (dvh_vol[0], dvh_vol[1]) 397 | 398 | ## check if valid volume from the calcs 399 | if dvh_vol[0] == 0 or np.isnan(dvh_vol[0]): 400 | valid_vol = False 401 | else: 402 | valid_vol = True 403 | #print(dvh_vol,valid_vol) 404 | 405 | ## record the dosimetric statistics 406 | for stat in stat_list: 407 | if valid_vol == True: 408 | 409 | special_stats = {'mean':(dvh.mean, dvh.dose_units), 410 | 'max':(dvh.max, dvh.dose_units), 411 | 'min':(dvh.min, dvh.dose_units), 412 | 'median':(dvh.statistic('D50').value,dvh.statistic('D50').units)} 413 | 414 | if stat.lower() in special_stats: 415 | dvh_stats[stat] = special_stats[stat.lower()] 416 | 417 | else: 418 | #dvh_stat = dvh.statistic(stat) 419 | dvh_stat = non_int_stat(dvh,stat) 420 | dvh_stats[stat] = (dvh_stat.value,dvh_stat.units) 421 | else: 422 | dvh_stats[stat] = None 423 | return dvh_stats 424 | 425 | 426 | # In[65]: 427 | 428 | def dicom_dvh_stats_single(directory=None, stats=None, structures=None, output='df', include_body=True,verbose=True,save_df=False,user_structures=None): 429 | """Get dvh statistics for the specified structures from the DICOM files in the given directory. 430 | directory = string :'C:\\.....' 431 | structures = list of structure IDs: [1,2,5....]. Default is None which will get stats from all structures. 432 | stats = list of stats: ['D90', 'V100'....] 433 | This will probably be wrapped by another funciton which will allow selection of the directory and the structure mapping. 434 | Raw results are as a dict, but default is a dataframe. Specifying anything other than 'df' will output a dict. 435 | output='df' must be used for processing multiple dicom sets""" 436 | 437 | if directory==None: 438 | directory = dir_open() 439 | 440 | ## list of default stats. HAs to be set as None in the function as called from multi patient version too 441 | default_stats = ['D98','D90','V100','V10', 'V20', 'D2cc', 'max', 'min', 'mean', 'median'] 442 | if stats==None: 443 | stats = default_stats 444 | 445 | ## 1, 2, 3 446 | ## get the DICOM file paths into dictionary for use 447 | ## function also checks if the IDs match 448 | all_exist_and_match, dir_files = all_rt_files_exist(directory) 449 | 450 | ## 4 451 | ## get structures which exist - dont need this if specifying structure ID 452 | ## get the structure names {name:id} 453 | structure_names = structure_dict(dir_files['rtss']) 454 | 455 | ## remove body structure if required (default) 456 | if include_body != True: 457 | if 'body' in structure_names: 458 | del structure_names['body'] 459 | 460 | ## remove couch structures from analysis 461 | for the_struct in ['couchsurface','couchinterior']: 462 | if the_struct in structure_names: 463 | del structure_names[the_struct] 464 | 465 | ## remove any obvious pseudos: 466 | structure_names = {k:v for k,v in structure_names.items() if 'pseudo' not in k} 467 | 468 | ## limit to only specified structure names by removing non-matching structures 469 | ## **********need to reconstruct dict by checking names are in list supplied by user... This below line is untested!****** 470 | print('***') 471 | print(structure_names) 472 | if user_structures == 'mapping': 473 | the_structs = import_structure_list(file_open(title='Select File Containing Structure List...')) 474 | the_structs = [i.lower() for i in the_structs] 475 | structure_names = {k:v for k,v in structure_names.items() if k.lower() in the_structs} 476 | elif user_structures is not None: 477 | ## if passed a list 478 | the_structs = [i.lower() for i in user_structures] 479 | structure_names = {k:v for k,v in structure_names.items() if k.lower() in the_structs} 480 | print(structure_names) 481 | print('*********') 482 | 483 | ## swap the structure dict to give {id:name} 484 | all_structures = swap_dict(structure_names) 485 | #print (structure_names) 486 | 487 | ## if structure IDs not specified, then get data for all structures 488 | ## ideally user should be able specify the structure names rather than IDs. 489 | ## would need a funciton to get teh required IDs from the names - see above proposal 490 | if structures==None: 491 | structures = list(all_structures.keys()) 492 | 493 | ## ******* I think this below chunk is now obsolete as list of structures is constructed differently. 494 | # if not all(structure in all_structures for structure in structures): 495 | # ## tell user which structures IDs do not match if there are any which do not match 496 | # ## then remove this from the list of structures to analyse 497 | # struct_id_mismatch = list_exist_in_dict(structures,all_structures) 498 | # print('The structure IDs', struct_id_mismatch, 'do not exist within the structure set.') 499 | # print(structures) 500 | # for struct_id in struct_id_mismatch: 501 | # structures.remove(struct_id) 502 | ## ********* 503 | 504 | ## get the dvh for each structure and extract the required statistics 505 | pt_id = get_pt_id(dir_files['rtplan']) 506 | plan_name = get_dicom_plan_info(dir_files['rtplan'])['plan_label'] 507 | #print(plan_name) 508 | prescription = get_dicom_dose(dir_files['rtplan']) 509 | 510 | print('Patient id:',pt_id, '\nPlan name:',plan_name, '\nGetting DVH statistics', stats,) 511 | 512 | all_dvh_stats = {} 513 | 514 | struct_num = 1 515 | tot_structs = len(structures) 516 | for structure_id in structures: 517 | structure_name = all_structures[structure_id] 518 | ## display ful progress if required 519 | if verbose == True: 520 | print('Patient id: ' + str(pt_id) + ', Plan name: ' + str(plan_name) + 521 | ', Processing structure:' + str(struct_num) + ' of ' + str(tot_structs) + ' (' + structure_name + ')') 522 | struct_dvh = calc_dvh(dir_files['rtss'],dir_files['rtdose'],structure_id,prescription) 523 | ## get required stats from DVH 524 | structure_dvh_stats = get_statistics(struct_dvh,stats) 525 | all_dvh_stats[structure_name]=structure_dvh_stats 526 | #print(structure_id,structure_name,structure_dvh_stats) 527 | struct_num += 1 528 | 529 | print('Patient Completed') 530 | 531 | dir_string_output = os.path.split(directory)[1] 532 | 533 | results_dict = {(dir_string_output,pt_id,plan_name): all_dvh_stats} 534 | 535 | if output == 'df': 536 | results_output = pt_stats_df(results_dict) 537 | if save_df == True: 538 | pickle_save_dir = os.path.split(directory)[0]+ '\\' +os.path.split(directory)[1] + '.pkl' 539 | print(pickle_save_dir) 540 | results_output.to_pickle(pickle_save_dir) 541 | else: 542 | results_output = results_dict 543 | 544 | return results_output 545 | 546 | 547 | # In[66]: 548 | 549 | def pt_stats_df(results_dict): 550 | """Formats the results (which are in a dict) into a more user friendly dataframe""" 551 | 552 | ## get the patient results into a df 553 | results_df = pd.DataFrame.from_dict(results_dict[list(results_dict.keys())[0]],orient='index') 554 | #print(results_df) 555 | 556 | ## rename the index as 'structure' and then re-index for consistency of data access (i.e. index is seperate) 557 | results_df.index.rename('structure',inplace=True) 558 | results_df.reset_index(inplace=True,drop=False) 559 | 560 | ## insert the patient id as a seperate column at the start of the df (i.d. = first (only) key in results dict) 561 | results_df.insert(0,'plan_name',list(results_dict.keys())[0][2]) 562 | results_df.insert(0,'patient_id',list(results_dict.keys())[0][1]) 563 | results_df.insert(0,'sub_dir',list(results_dict.keys())[0][0]) 564 | 565 | ## split the statistic results into values and units for simpler analysis/formatting 566 | for heading in results_df.columns.values: 567 | if heading not in ['sub_dir','structure', 'patient_id', 'plan_name']: 568 | head_val = heading + '_val' 569 | head_unit = heading + '_unit' 570 | 571 | results_df[[head_val,head_unit]] = results_df[heading].apply(pd.Series) 572 | 573 | ## convert pt_id, plan_name, structure columns to lowercase and remove trailing whitespace. 574 | results_df['patient_id'] = results_df['patient_id'].str.strip().str.lower() 575 | results_df['plan_name'] = results_df['plan_name'].str.strip().str.lower() 576 | results_df['structure'] = results_df['structure'].str.strip().str.lower() 577 | 578 | ## set all headers to lowercase 579 | results_df.columns = results_df.columns.str.lower() 580 | 581 | return results_df 582 | 583 | 584 | # In[67]: 585 | 586 | def get_sub_dirs(parent_dir=None): 587 | """Get a list of sub-directories (only first level) from the provided (or usually chosen form prompt) directory""" 588 | ## can allow user to specify path in code if tehy want to, otherwise it will prompt 589 | if parent_dir == None: 590 | parent_dir = dir_open("Select Directory Containing Subdirectories to Analyse...") 591 | 592 | ## extract the info from the directory 593 | path,dirs,files = next(os.walk(parent_dir)) 594 | 595 | ## combine the path and the dirs into the required strings 596 | sub_dirs = [path + '/' + dirs[i] for i in range(len(dirs))] 597 | 598 | return(path,sub_dirs) 599 | 600 | 601 | # In[68]: 602 | 603 | def dicom_dvh_stats_multi(parent_directory=None, stats='prompt', structures=None, 604 | save_results=True, save_prompt=False, include_body=True, limit=None, 605 | struct_labels=False, verbose=True,save_df=False,user_structures=None): 606 | """Function to allow entire sub directory of folders to be analysed and the results combined. 607 | The number of directories can be limited using e.g. limit=2. 608 | The results can be saved as a csv file using save_results=True 609 | Save_prompt will aim to allow user specified save location and file name in the future. 610 | If verbose=True, then will show structures being processed.""" 611 | if parent_directory == None: 612 | path, sub_dirs = get_sub_dirs() 613 | else: 614 | path, sub_dirs = get_sub_dirs(parent_dir = parent_directory) 615 | 616 | ## can get list of pseudo structure labels from user to add to results. 617 | if struct_labels == True: 618 | struct_label_file = file_open(title='Select File Containing Structure Labels...') 619 | 620 | ## can limit number of directories checked. Default is no limit. 621 | if limit is not None: 622 | sub_dirs = sub_dirs[:limit] 623 | 624 | ## get the dch stats from the user if required. (Only not required if they set stats=None) 625 | if stats == 'import': 626 | stats = import_dvh_stats(file_open(title='Select File Containing DVH Stats to Return...')) 627 | if stats == 'prompt': 628 | stats = prompt_dvh_stat_list() 629 | 630 | if user_structures == 'mapping': 631 | user_struct_list = import_structure_list(file_open(title='Select File Containing Structure List...')) 632 | else: 633 | user_struct_list = None 634 | 635 | ## get the results from each directory and append the results into a list which then becomes a df 636 | all_data_list = [] 637 | num_sub_dirs = len(sub_dirs) 638 | sub_dir_count = 1 639 | for sub_dir in sub_dirs: 640 | print('Processing Directory ' + str(sub_dir_count) + ' of ' + str(num_sub_dirs) + ', ' + sub_dir) 641 | sub_dir_results = dicom_dvh_stats_single(directory=sub_dir, stats=stats, structures=structures, output='df', 642 | include_body=include_body, verbose=verbose,save_df=save_df,user_structures=user_struct_list) 643 | 644 | all_data_list.append(sub_dir_results) 645 | sub_dir_count += 1 646 | all_results = pd.concat(all_data_list,axis=0) 647 | print('Dosimetric data extracted from all directories') 648 | if struct_labels == True: 649 | all_results = add_struct_label_plan_id(all_results, struct_label_file) 650 | print('Adding struct_label structure names') 651 | else: 652 | all_results['struct_label'] = np.nan 653 | 654 | ## move the struct_label col to a better location in the df 655 | cols = all_results.columns.tolist() ## get a list of the columns 656 | cols.insert(3, cols.pop(cols.index('struct_label'))) ## get the required column (pop) and move it to new location 657 | all_results = all_results.reindex(columns= cols) ## reindex the df with new column order 658 | 659 | ## save the results 660 | if save_results == True: 661 | save_df_to_csv(all_results,prompt=save_prompt,save_dir=path) 662 | 663 | ## df of results is returned 664 | return all_results 665 | 666 | 667 | #%% 668 | 669 | def prompt_dvh_stat_list(prompt="Enter DVH Stats Seperate by a comma. e.g. D90,V100,D2cc: "): 670 | """ Get the stats input by the user and output as a list for use """ 671 | print('\a') 672 | return input(prompt).replace(' ', '').split(',') 673 | 674 | # In[69]: 675 | 676 | def import_dvh_stats(file): 677 | """Import the DVH stats to calculate and return a list of these. 678 | The stats should be on seperate rows in the file, with no header.""" 679 | 680 | ## get file extension to determine import method 681 | filetype = os.path.splitext(file)[1] 682 | #print(filetype) 683 | ## read in the struct_label structure file and ensure patient IDs are strings to match the DICOM data 684 | if filetype in ['.xlsx','.xls','.xlm','.xlsm']: 685 | df = pd.read_excel(file, index_col=None, header=None) 686 | elif filetype in ['.csv']: 687 | df = pd.read_csv(file, index_col=False, header=None) 688 | elif filetype in ['.txt']: 689 | df = pd.read_table(file, index_col=False, header=None) 690 | else: 691 | print('Can currently only import .csv, .txt, or excel filetypes for DVH stats') 692 | 693 | ## convert df to list 694 | dvh_stat_list = list(df[0].values) 695 | 696 | return dvh_stat_list 697 | # %% 698 | def import_structure_list(file): 699 | """Import the structures as a list. 700 | The stats should be on seperate rows in the file, with no header.""" 701 | 702 | ## get file extension to determine import method 703 | filetype = os.path.splitext(file)[1] 704 | #print(filetype) 705 | ## read in the struct_label structure file and ensure patient IDs are strings to match the DICOM data 706 | if filetype in ['.xlsx','.xls','.xlm','.xlsm']: 707 | df = pd.read_excel(file, index_col=None, header=None) 708 | elif filetype in ['.csv']: 709 | df = pd.read_csv(file, index_col=False, header=None) 710 | elif filetype in ['.txt']: 711 | df = pd.read_table(file, index_col=False, header=None) 712 | else: 713 | print('Can currently only import .csv, .txt, or excel filetypes for structure names') 714 | 715 | ## convert df to list 716 | structure_list = list(df[0].values) 717 | 718 | return structure_list 719 | 720 | 721 | # In[70]: 722 | 723 | def import_struct_label_file(file): 724 | """Import file and get into correctly formatted dataframe for use. 725 | Supported filetypes are: excel, csv, txt""" 726 | 727 | ## get file extension to determine import method 728 | filetype = os.path.splitext(file)[1] 729 | #print(filetype) 730 | ## read in the struct_label structure file and ensure patient IDs are strings to match the DICOM data 731 | if filetype in ['.xlsx','.xls','.xlm','.xlsm']: 732 | df = pd.read_excel(file, index_col=None) 733 | elif filetype in ['.csv']: 734 | df = pd.read_csv(file, index_col=False) 735 | elif filetype in ['.txt']: 736 | df = pd.read_table(file, index_col=False) 737 | else: 738 | print('Can currently only import .csv, .txt, or excel filetypes for structure labels') 739 | 740 | ## do some tidying - lower case and remove whitespace 741 | df['patient_id'] = df['patient_id'].astype(str).str.strip().str.lower() 742 | df['structure'] = df['structure'].str.strip().str.lower() 743 | 744 | ## plan_name might not exist if all unique patients 745 | if 'plan_name' in df.columns: 746 | df['plan_name'] = df['plan_name'].str.strip().str.lower() 747 | 748 | return df 749 | 750 | 751 | # In[71]: 752 | 753 | def add_struct_label_plan_id(results_df,struct_label_file): 754 | """Function to check the struct_label structure file has the plan ID included. 755 | If it does not, then this is inferred from the patient ID from the results. 756 | Only if unique plan ids for each pateint can the struct_label strucutre names be reliably used. 757 | The struct_label_df is returned including the plan names""" 758 | 759 | ## read in the struct_label structure file and ensure patient IDs are strings to match the DICOM data 760 | df_structs = import_struct_label_file(struct_label_file) 761 | unique_pts = results_df['patient_id'].unique() ## get pateint ids from results 762 | df_structs = df_structs[df_structs['patient_id'].isin(unique_pts)] ## remove structure mapping info from uneeded pts 763 | 764 | 765 | ## if plan names do not exist, then add them from the results if they are unique for each patient 766 | if 'plan_name' not in df_structs.columns: 767 | print('Determing plan names from results df') 768 | #unique_pts = results_df['patient_id'].unique() 769 | #df_structs = df_structs[df_structs['patient_id'] in unique_pts] ## filter out structure mapping from pts not in results 770 | ## need to actually remove the rows completely so the lengths match properly...?? 771 | 772 | ## store the plan names in a dict {pt_id, plan_name} for each patient. 773 | plan_names_dict = {} 774 | 775 | for patient in unique_pts: 776 | pt_plan_names = results_df[results_df['patient_id']==patient]['plan_name'].unique() 777 | plan_names_dict[patient] = pt_plan_names 778 | 779 | ## check length of all items in the dict. Should be 1 if unique plan names. Warn user if not. 780 | unique_plan_names = True 781 | for item in plan_names_dict: 782 | if len(plan_names_dict[item]) != 1 : 783 | print('Patient', item, 784 | 'does not have unique plan names. These must be specified within the struct_label structures file.') 785 | unique_plan_names = False 786 | 787 | ## if all unique then add to the dataframe 788 | if unique_plan_names == True: 789 | all_struct_pts = df_structs['patient_id'].values 790 | ## create list of pateint plan names and then add to df 791 | plan_names_to_add = [] 792 | for patient in all_struct_pts: 793 | plan_names_to_add.append(plan_names_dict[patient][0]) 794 | df_structs['plan_name'] = plan_names_to_add 795 | if len(df_structs) != 0: ## if nothing matches then cant set values whcih dont exist 796 | df_structs['plan_name'] = df_structs['plan_name'].str.strip().str.lower() ## lower case and strip whitespace 797 | print('Unique plan names added to struct_label structure data for use') 798 | else: 799 | df_structs['plan_name'] = 'No matching plan names' 800 | print('No matching plan names to add from structure mapping file') 801 | 802 | ## check psudo_struct df only contains unique rows to ensure no abiguity in results 803 | if df_structs.equals(df_structs.drop_duplicates()) == False: 804 | print('struct_label Structure Dataframe contains duplicates. Results may be ambiguous.') 805 | 806 | ## add the struct_label names to the results using the merge functionality. 807 | merged_df = results_df.merge(df_structs, how='outer', on=['patient_id', 'plan_name', 'structure']) 808 | merged_df = merged_df[~merged_df['sub_dir'].isnull()] ## remove any blank values which have appeared due to merge? 809 | 810 | return merged_df 811 | 812 | 813 | # In[72]: 814 | 815 | #my_results = dicom_dvh_stats_multi(save_results=True, save_prompt=True, stats='import', 816 | # limit=2 ,struct_labels=True, verbose=True, include_body=False) 817 | 818 | 819 | # In[74]: 820 | 821 | #my_results.head() 822 | 823 | 824 | # In[59]: 825 | 826 | #simple_results = dicom_dvh_stats_multi() 827 | 828 | 829 | # ## Analysis of Results. 830 | # - Make use of pandas to demonstrate simplicity of extracting data. 831 | 832 | # In[75]: 833 | 834 | ## produce a boxplot of each measured dvh statistic 835 | 836 | ## produce a list of stats to plot (only want the values, and not volume) 837 | #cols = list(my_results.columns) 838 | ## keep only items with '_val' at the end 839 | #cols = [x for x in cols if '_val' in x] 840 | ## remove any other unnesessary thigns from the list 841 | #to_remove = ['volume_val'] 842 | #cols = [x for x in cols if x not in to_remove] 843 | #print(cols) 844 | 845 | #my_results[cols].plot.box(figsize=(10,4)) 846 | #plt.show() 847 | 848 | 849 | # ## Get non-integer stats 850 | # e.g D0.1cc 851 | # - probably best to check the string passed and see if there is a decimal. 852 | # - Then shoudl eb abel to determine method to use: https://groups.google.com/d/msg/dicompyler/EMnyhcEg4_Y/4P1wIcJ3AQAJ 853 | 854 | # In[265]: 855 | 856 | 857 | # In[268]: 858 | 859 | #abc = calc_dvh(ss,dose,3,get_dicom_dose(plan)) 860 | 861 | 862 | # In[351]: 863 | 864 | ## D90 (D90Gy is odd...? Does it even mean anything? Have I ever used it...?) 865 | #print(abc.D9) 866 | #print(abc.statistic('D9')) 867 | #print(abc.dose_constraint(9.6)) 868 | #print('-') 869 | 870 | #print(abc.D2cc) 871 | #print(abc.statistic('D2cc')) 872 | #print(abc.dose_constraint(2.2, volume_units='cc')) 873 | #print('-') 874 | 875 | #print(abc.V50) 876 | #print(abc.statistic('V50')) 877 | #print(abc.volume_constraint(50.3)) 878 | #print('-') 879 | 880 | #print(abc.V50Gy) 881 | #print(abc.statistic('V50Gy')) 882 | #print(abc.volume_constraint(50, dose_units='Gy')) 883 | #print('-') 884 | 885 | 886 | # In[ ]: 887 | 888 | #Dx = dose_constraint(x) 889 | #Dxcc = dose_constraint(x, volume_units='cc') 890 | #Vx = volume_constraint(x) 891 | #VxGy = volume_constraint(x, dose_units='Gy') 892 | 893 | 894 | # In[447]: 895 | 896 | #abc.statistic('V50Gy') 897 | 898 | 899 | # In[ ]: 900 | 901 | 902 | 903 | --------------------------------------------------------------------------------