├── .gitignore ├── CHANGELOG.md ├── IQDM ├── __init__.py ├── main.py ├── parsers │ ├── __init__.py │ ├── delta4.py │ ├── parser.py │ └── sncpatient.py ├── pdf_to_text.py ├── pdf_to_text_data.py ├── trending.py ├── trending_arccheck.py ├── trending_delta4.py └── utilities.py ├── LICENSE ├── MANIFEST.in ├── README.md └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vscode/ 3 | .idea/ 4 | *__pycache__* 5 | build/ 6 | dist/ 7 | *.egg-info -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change log of DVH Analytics 2 | 3 | v0.3.2 (TBD) 4 | -------------------- 5 | - [Issue 17] IN PROGRESS: Allow only new reports to be processed 6 | 7 | v0.3.1 (2020.01.21) 8 | -------------------- 9 | - [Misc] IQDM is FINALLY using a change log 10 | - [Issue 19] Add option to assume day first for ambiguous dates 11 | - [Trending] Allow installed IQDM to launch trending from terminal -------------------------------------------------------------------------------- /IQDM/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cutright/IMRT-QA-Data-Miner/85abf9dc66a139c02574c386377f46f0944c5893/IQDM/__init__.py -------------------------------------------------------------------------------- /IQDM/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main program for IMRT QA PDF report parser 4 | Created on Thu May 30 2019 5 | @author: Dan Cutright, PhD 6 | """ 7 | 8 | from __future__ import print_function 9 | from os.path import isdir, isfile, join, splitext, basename, dirname 10 | from os import walk, listdir 11 | from datetime import datetime 12 | from IQDM.parsers.parser import ReportParser 13 | from IQDM.utilities import DELIMITER, is_file_name_found_in_processed_files, get_processed_files 14 | from IQDM.pdf_to_text import convert_pdf_to_txt 15 | import argparse 16 | from pathvalidate import sanitize_filename 17 | import subprocess 18 | 19 | 20 | CURRENT_VERSION = '0.3.1' 21 | 22 | SCRIPT_DIR = dirname(__file__) 23 | 24 | 25 | def pdf_to_qa_result(abs_file_path): 26 | """ 27 | Given an absolute file path, convert file to text 28 | :param abs_file_path: file to be converted to text 29 | :return: csv row to be written to csv file, report type, column headers for csv 30 | :rtype: tuple 31 | """ 32 | 33 | text = convert_pdf_to_txt(abs_file_path) 34 | 35 | report_obj = ReportParser(text) 36 | if report_obj.report is not None: 37 | return report_obj.csv + DELIMITER + abs_file_path, report_obj.report_type, report_obj.columns 38 | 39 | 40 | def process_files(init_directory, ignore_extension=False, output_file=None, output_dir=None, no_recursive_search=False, 41 | process_all=True, results_dir=None): 42 | """ 43 | Given an initial directory, process all pdf files into parser classes, write their csv property to results_file 44 | :param init_directory: initial scanning directory 45 | :param ignore_extension: if you'd like to catch pdf files that are missing .pdf extension, set to True 46 | :type ignore_extension: bool 47 | :param output_file: user specified output file name, report type will be prepended to this value 48 | :param output_dir: user specified output directory, default value is to local directory 49 | :param no_recursive_search: to ignore sub-directories, set to True 50 | :type no_recursive_search: bool 51 | :param process_all: Only process files found in results csv files in the local directory or the specified results_dir 52 | :type process_all: bool 53 | :param results_dir: directory containing results files 54 | :type results_dir: str 55 | """ 56 | 57 | if process_all: 58 | ignored_files = [] 59 | else: 60 | results_dir = [results_dir, ''][results_dir is None] 61 | ignored_files = get_processed_files(results_dir, no_recursive_search=no_recursive_search) 62 | 63 | time_stamp = str(datetime.now()).replace(':', '-').replace('.', '-') 64 | if output_file is None: 65 | output_file = "results_%s.csv" % time_stamp 66 | 67 | if no_recursive_search: 68 | for file_name in listdir(init_directory): 69 | if not is_file_name_found_in_processed_files(file_name, init_directory, ignored_files): 70 | if ignore_extension or splitext(file_name)[1].lower() == '.pdf': 71 | file_path = join(init_directory, file_name) 72 | process_file(file_path, output_file, output_dir) 73 | else: 74 | print('File previously processed: %s' % join(init_directory, file_name)) 75 | else: 76 | for dirName, subdirList, fileList in walk(init_directory): # iterate through files and all sub-directories 77 | for file_name in fileList: 78 | if not is_file_name_found_in_processed_files(file_name, init_directory, ignored_files): 79 | if ignore_extension or splitext(file_name)[1].lower() == '.pdf': 80 | file_path = join(dirName, file_name) 81 | process_file(file_path, output_file, output_dir) 82 | else: 83 | print('File previously processed: %s' % join(dirName, file_name)) 84 | 85 | 86 | def process_file(file_path, output_file, output_dir): 87 | try: 88 | row, report_type, columns = pdf_to_qa_result(file_path) # process file 89 | except Exception as e: 90 | print(str(e)) 91 | print('Skipping: %s' % file_path) 92 | return 93 | 94 | current_file = "%s_%s" % (report_type, output_file) # prepend report type to file name 95 | if output_dir: 96 | current_file = join(output_dir, current_file) 97 | if row: 98 | if not isfile(current_file): # if file doesn't exist, need to write columns 99 | with open(current_file, 'w') as csv: 100 | csv.write(DELIMITER.join(columns) + '\n') 101 | with open(current_file, "a") as csv: # write the processed data 102 | csv.write(row + '\n') 103 | print("Processed: %s" % file_path) 104 | 105 | 106 | def main(): 107 | 108 | cmd_parser = argparse.ArgumentParser(description="Command line interface for IQDM") 109 | cmd_parser.add_argument('-ie', '--ignore-extension', 110 | dest='ignore_extension', 111 | help='Script will check all files, not just ones with .pdf extensions', 112 | default=False, 113 | action='store_true') 114 | cmd_parser.add_argument('-od', '--output-dir', 115 | dest='output_dir', 116 | help='Output stored in local directory by default, specify otherwise here', 117 | default=None) 118 | cmd_parser.add_argument('-rd', '--results-dir', 119 | dest='results_dir', 120 | help='Results assumed to be stored in local directory by default, specify otherwise here', 121 | default=None) 122 | cmd_parser.add_argument('-all', '--process-all', 123 | dest='process_all', 124 | help='Process all identified report files, otherwise only new reports will be analyzed', 125 | default=False, 126 | action='store_true') 127 | cmd_parser.add_argument('-of', '--output-file', 128 | dest='output_file', 129 | help='Output will be saved as _results_.csv by default. ' 130 | 'Define this tag to customize file name after _', 131 | default=None) 132 | cmd_parser.add_argument('-ver', '--version', 133 | dest='print_version', 134 | help='Print the IQDM version', 135 | default=False, 136 | action='store_true') 137 | cmd_parser.add_argument('-nr', '--no-recursive-search', 138 | dest='no_recursive_search', 139 | help='Include this flag to skip sub-directories', 140 | default=False, 141 | action='store_true') 142 | cmd_parser.add_argument('-df', '--day-first', 143 | dest='day_first', 144 | help='Assume day first for ambiguous dates in trending dashboard', 145 | default=False, 146 | action='store_true') 147 | cmd_parser.add_argument('-p', '--port', 148 | dest='port', 149 | help='Specify port of trending dashboard webserver', 150 | default='5006') 151 | cmd_parser.add_argument('-wo', '--allow-websocket-origin', 152 | dest='websocket_origin', 153 | help='Allow a websocket origin other than localhost, see bokeh documentation', 154 | default=None) 155 | cmd_parser.add_argument('file_path', nargs='?', 156 | help='Initiate scan if directory, launch dashboard if results file') 157 | args = cmd_parser.parse_args() 158 | 159 | # if args.file_path and len(args.file_path) > 2: 160 | # print("Too many arguments provided. Please only provide the initial scanning directory after IQDM") 161 | # return 162 | 163 | path = args.file_path 164 | if not path or len(path) < 2: 165 | if args.print_version: 166 | print('IMRT-QA-Data-Miner: IQDM v%s' % CURRENT_VERSION) 167 | return 168 | else: 169 | print('Initial directory or results file for trending not provided!') 170 | return 171 | 172 | if not isdir(path): 173 | if isfile(path) and splitext(path)[1].lower() == '.csv': 174 | if basename(path).startswith('delta4_results_'): 175 | trend_path = join(SCRIPT_DIR, 'trending.py') 176 | elif basename(path).startswith('sncpatient_results_'): 177 | trend_path = join(SCRIPT_DIR, 'trending_arccheck.py') 178 | else: 179 | print('Did you provide an IQDM results csv?') 180 | return 181 | try: 182 | day_first = ['false', 'true'][args.day_first] # must pass a string in subprocess.run()iq 183 | cmd = ['bokeh', 'serve', trend_path, '--port', args.port] 184 | if args.websocket_origin: 185 | cmd.extend(['--allow-websocket-origin', args.websocket_origin]) 186 | cmd.extend(['--args', path, day_first]) 187 | subprocess.run(cmd) 188 | except KeyboardInterrupt: 189 | pass 190 | 191 | else: 192 | print("%s is not a valid or accessible directory" % path) 193 | return 194 | 195 | output_file, print_file_name_change = None, False 196 | if args.output_file: 197 | output_file = sanitize_filename(args.output_file) 198 | if output_file not in args.output_file: 199 | print_file_name_change = True 200 | 201 | process_files(args.file_path, 202 | ignore_extension=args.ignore_extension, 203 | output_file=output_file, 204 | output_dir=args.output_dir, 205 | no_recursive_search=args.no_recursive_search, 206 | process_all=args.process_all, 207 | results_dir=args.results_dir) 208 | 209 | if args.print_version: 210 | print('IMRT-QA-Data-Miner: IQDM v%s' % CURRENT_VERSION) 211 | 212 | if print_file_name_change: 213 | print('Output file name was changed to _%s' % output_file) 214 | 215 | 216 | if __name__ == '__main__': 217 | main() 218 | -------------------------------------------------------------------------------- /IQDM/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cutright/IMRT-QA-Data-Miner/85abf9dc66a139c02574c386377f46f0944c5893/IQDM/parsers/__init__.py -------------------------------------------------------------------------------- /IQDM/parsers/delta4.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main program for IMRT QA PDF report parser 4 | Created on Thu May 30 2019 5 | @author: Dan Cutright, PhD 6 | """ 7 | 8 | from IQDM.utilities import are_all_strings_in_text, get_csv 9 | from dateutil.parser import parse as date_parser 10 | 11 | 12 | # So far I've only come across Composite and Fraction as beam name place holders for the composite row 13 | COMPOSITE_BEAM_NAMES = ['Composite', 'Fraction'] 14 | 15 | # If you provide your possible energies here, script will do a global search instead of trying to parse the table 16 | # Table parsing was difficult, but this seems to work consistently. For example, if '6 MV, FFF' is found anywhere 17 | # in the PDF, the energy will be assumed to be 6 MV, FFF and then stop looking through the other options, therefore, 18 | # the order of ENERGY_OPTIONS is important. Set ENERGY_OPTIONS to None or [] to skip this feature 19 | ENERGY_OPTIONS = ['6 MV, FFF', '6 MV', '10 MV, FFF', '10 MV'] 20 | 21 | 22 | class Delta4Report: 23 | def __init__(self): 24 | self.report_type = 'delta4' 25 | self.columns = ['Patient Name', 'Patient ID', 'Plan Date', 'Energy', 'Daily Corr', 'Norm Dose', 'Dev', 'DTA', 26 | 'Gamma-Index', 'Dose Dev', 'Radiation Dev', 'Gamma Pass Criteria', 'Gamma Dose Criteria', 27 | 'Gamma Dist Criteria', 'Beam Count'] 28 | self.identifiers = ['ScandiDos AB', 'Treatment Summary', 'Acceptance Limits', 'Daily corr', 29 | 'Selected Detectors', 'Parameter Definitions & Acceptance Criteria, Detectors'] 30 | 31 | self.treatment_summary_columns = ['Beam', 'Gantry', 'Energy', 'Daily Corr', 'Norm Dose', 32 | 'Dev', 'DTA', 'Gamma-Index', 'Dose Dev'] 33 | self.data = {} 34 | self.index_start = {} 35 | self.index_end = {} 36 | self.text = None 37 | 38 | def process_data(self, text_data): 39 | self.text = text_data.split('\n') 40 | 41 | # Patient information 42 | if 'PRE-TREATMENT REPORT' in self.text[3]: 43 | self.data['patient_name'] = self.text[0] 44 | self.data['patient_id'] = self.text[1] 45 | elif 'Clinic' not in self.text[2]: 46 | self.data['patient_name'] = self.text[2] 47 | self.data['patient_id'] = self.text[3] 48 | else: 49 | if 'Treatment Summary' in self.text: 50 | tx_sum_index = self.text.index('Treatment Summary') 51 | self.data['patient_name'] = self.text[tx_sum_index-3] 52 | self.data['patient_id'] = self.text[tx_sum_index-2] 53 | else: 54 | self.data['patient_name'] = 'Not found' 55 | self.data['patient_id'] = 'Not found' 56 | 57 | # Beam 58 | self.index_start['Beam'] = self.get_index_of_next_text_block(self.get_string_index_in_text('°')) 59 | self.index_end['Beam'] = self.get_index_of_next_text_block(self.index_start['Beam']) - 1 60 | if self.text[self.index_start['Beam']] == 'Gantry': 61 | self.index_start['Beam'] = self.get_index_of_next_text_block(self.index_end['Beam']) 62 | self.index_end['Beam'] = self.get_index_of_next_text_block(self.index_start['Beam']) - 1 63 | self.data['Beam'] = self.get_data_block('Beam') 64 | for composite_name_option in COMPOSITE_BEAM_NAMES: 65 | if composite_name_option in self.data['Beam'][0]: 66 | self.data['Beam'].pop(0) 67 | 68 | # Gantry 69 | self.index_start['Gantry'] = self.get_index_of_next_text_block(self.index_end['Beam']) 70 | self.index_end['Gantry'] = self.get_index_of_next_text_block(self.index_start['Gantry']) - 1 71 | self.data['Gantry'] = ['N/A'] + self.get_data_block('Gantry') 72 | for composite_name_option in COMPOSITE_BEAM_NAMES: 73 | if composite_name_option in self.data['Gantry']: 74 | self.data['Gantry'].pop(self.data['Gantry'].index(composite_name_option)) 75 | 76 | energy_override = [] # sometimes the energy is on the same line as the gantry 77 | for i, row in enumerate(self.data['Gantry']): 78 | self.data['Gantry'][i] = row.replace('\xc2', '').replace('\xb0', '') 79 | energy_override.append(None) 80 | row_split = row.split(' ') 81 | if len(row_split) > 3: 82 | energy_override[-1] = ' '.join(row_split[-2:]) 83 | self.data['Gantry'][i] = self.data['Gantry'][i].replace(energy_override[-1], '').strip() 84 | 85 | # Dose and analysis 86 | self.index_start['Analysis'] = self.get_string_index_in_text('Daily corr Norm') + 2 87 | self.index_end['Analysis'] = self.get_index_of_next_text_block(self.index_start['Analysis']) - 1 88 | analysis_data_block = self.text[self.index_start['Analysis']:self.index_end['Analysis']] 89 | analysis_data = [] 90 | while analysis_data_block: 91 | row = analysis_data_block.pop(0) 92 | 93 | # Sometime dose Norm Dose and other analysis data aren't in the same string, 94 | # and sometimes if different order. Ensure they are in same string with Norm Dose first. 95 | if 'Gy' not in row: 96 | row = "%s %s" % (analysis_data_block.pop(0), row) 97 | if '%' not in row: 98 | row = "%s %s" % (row, analysis_data_block.pop(0)) 99 | 100 | if 'Gy' in row and '%' in row: 101 | row = row.split('%') 102 | split = ['Gy', 'cGy']['cGy' in row[0]] # Report may be in cGy or Gy 103 | data = [row[0].split(split)[0].strip(), 104 | row[0].split(split)[1].strip()] 105 | data.extend(row[1:-1]) 106 | analysis_data.append(data) 107 | 108 | self.data['Norm Dose'] = [row[0] for row in analysis_data] 109 | self.data['Dev'] = [row[1].strip() for row in analysis_data] 110 | self.data['DTA'] = [row[2].strip() for row in analysis_data] 111 | self.data['Gamma-Index'] = [row[3].strip() for row in analysis_data] 112 | self.data['Dose Dev'] = [row[4].strip() for row in analysis_data] 113 | 114 | try: 115 | self.data['Norm Dose'][0] = float(self.data['Norm Dose'][0]) 116 | except: 117 | pass 118 | 119 | if 'factor' in self.data['Dev'][0]: 120 | self.data['Dev'][0] = self.data['Dev'][0].replace('factor', '').strip() 121 | 122 | # Daily Correction Factor 123 | self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_end['Analysis']) 124 | if 'Det within acceptance' in self.text[self.index_start['Daily Corr']]: 125 | self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr']) 126 | if 'index dose dev' in self.text[self.index_start['Daily Corr']]: 127 | self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr']) 128 | if 'factor' in self.text[self.index_start['Daily Corr']]: 129 | self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr']) 130 | self.index_end['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr']) - 1 131 | self.data['Daily Corr'] = ['N/A'] + self.get_data_block('Daily Corr') 132 | for i, row in enumerate(self.data['Daily Corr']): 133 | if not row.isdigit(): 134 | self.data['Daily Corr'][i] = row[-5:] 135 | 136 | # Energy 137 | self.data['Energy'] = None 138 | if ENERGY_OPTIONS: 139 | for energy_option in ENERGY_OPTIONS: 140 | if self.data['Energy'] is None and energy_option in text_data: 141 | self.data['Energy'] = [energy_option.replace(',', '')] * len(energy_override) 142 | if self.data['Energy'] is None: 143 | self.index_start['Energy'] = self.get_index_of_next_text_block(self.index_end['Daily Corr']) 144 | if 'dose dev' in self.text[self.index_start['Energy']]: 145 | self.index_start['Energy'] = self.get_index_of_next_text_block(self.index_start['Energy']) 146 | self.index_end['Energy'] = self.get_index_of_next_text_block(self.index_start['Energy']) - 1 147 | self.data['Energy'] = ['N/A'] + self.get_data_block('Energy') 148 | if any(energy_override): # replace values with overrides found in Gantry code block 149 | for i, override in enumerate(energy_override): 150 | if override is not None: 151 | if len(self.data['Energy']) > i: 152 | self.data['Energy'][i] = override 153 | 154 | # Gamma Criteria 155 | self.index_start['Gamma Criteria'] = self.text.index('Parameter Definitions & Acceptance Criteria, Detectors') 156 | self.index_start['Acceptance Limits'] = self.text.index('Acceptance Limits') 157 | self.index_end['Gamma Criteria'] = self.index_start['Acceptance Limits'] - 1 158 | self.index_end['Acceptance Limits'] = self.get_index_of_next_text_block(self.index_start['Acceptance Limits']) - 1 159 | 160 | for row in self.get_data_block('Gamma Criteria'): 161 | if 'mm' in row: 162 | temp = row.split('mm')[0].strip() 163 | try: 164 | float(temp) 165 | self.data['gamma_dist'] = temp 166 | except: 167 | pass 168 | elif '±' in row: 169 | self.data['gamma_dose'] = row.split('±')[1].replace('%', '') 170 | 171 | self.data['gamma_pass'] = self.get_data_block('Acceptance Limits')[-1].split('%')[0] 172 | 173 | @property 174 | def radiation_device(self): 175 | for row in self.text: 176 | if row.startswith('Radiation Device: '): 177 | return row.replace('Radiation Device: ', '') 178 | return None 179 | 180 | @property 181 | def measured_date(self): 182 | index_of_first_date = self.get_index_of_first_date() 183 | date_candidate_1 = self.text[index_of_first_date].split(' ')[0] 184 | date_candidate_2 = self.text[index_of_first_date+2].split(' ')[0] 185 | try: 186 | return str(date_parser(date_candidate_1)).split(' ')[0] 187 | except: 188 | try: 189 | return str(date_parser(date_candidate_2)).split(' ')[0] 190 | except: 191 | pass 192 | return None 193 | 194 | def get_index_of_first_date(self): 195 | for i, row in enumerate(self.text): 196 | if are_all_strings_in_text(row, ['/', ':', 'M']) or \ 197 | are_all_strings_in_text(row, ['.', ':', 'M']): 198 | try: 199 | date_parser(row.split(' ')[0].strip()) 200 | return i 201 | except: 202 | pass 203 | return None 204 | 205 | def get_string_index_in_text(self, string, start_index=0): 206 | for i, row in enumerate(self.text[start_index:]): 207 | if string in row: 208 | return i 209 | return None 210 | 211 | def get_index_of_next_text_block(self, start_index): 212 | for i, row in enumerate(self.text[start_index:]): 213 | if row.strip() == '': 214 | return i + start_index + 1 215 | return None 216 | 217 | def get_data_block(self, data_type): 218 | return self.text[self.index_start[data_type]:self.index_end[data_type]] 219 | 220 | @property 221 | def summary_data(self): 222 | try: 223 | daily_corr = sum([float(f) for f in self.data['Daily Corr'] if f != 'N/A']) / (len(self.data['Daily Corr']) - 1) 224 | except: 225 | print('WARNING: Could not process daily corr for %s - %s' % 226 | (self.data['patient_name'], self.data['patient_id'])) 227 | daily_corr = 1. 228 | 229 | return {'Patient Name': self.data['patient_name'], 230 | 'Patient ID': self.data['patient_id'], 231 | 'Plan Date': self.measured_date, 232 | 'Energy': '/'.join(list(set([e for e in self.data['Energy'] if e != 'N/A']))), 233 | 'Daily Corr': daily_corr, 234 | 'Norm Dose': self.data['Norm Dose'][0], 235 | 'Dev': float(self.data['Dev'][0]), 236 | 'DTA': float(self.data['DTA'][0]), 237 | 'Gamma-Index': float(self.data['Gamma-Index'][0]), 238 | 'Dose Dev': float(self.data['Dose Dev'][0]), 239 | 'Radiation Dev': self.radiation_device, 240 | 'Gamma Pass Criteria': float(self.data['gamma_pass']), 241 | 'Gamma Dose Criteria': float(self.data['gamma_dose']), 242 | 'Gamma Dist Criteria': float(self.data['gamma_dist']), 243 | 'Beam Count': len(self.data['Beam'])} 244 | 245 | @property 246 | def csv(self): 247 | return get_csv(self.summary_data, self.columns) 248 | -------------------------------------------------------------------------------- /IQDM/parsers/parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main program for IMRT QA PDF report parser 4 | Created on Thu May 30 2019 5 | @author: Dan Cutright, PhD 6 | """ 7 | 8 | from IQDM.utilities import are_all_strings_in_text 9 | from IQDM.parsers.delta4 import Delta4Report 10 | from IQDM.parsers.sncpatient import SNCPatientReport 11 | 12 | # These classes will be checked in ReportParser.get_report() 13 | REPORT_CLASSES = [Delta4Report, SNCPatientReport] 14 | 15 | 16 | class ReportParser: 17 | """ 18 | This class determines which Report class to use and subsequently processes the data. 19 | 20 | Use of this class requires each report class listed in REPORT_CLASSES contains the following properties: 21 | identifiers: this is a list of strings that collectively are uniquely found in a report type 22 | columns: a list of strings indicating the columns of the csv to be output 23 | csv: a string of values for each column, delimited with DELIMITER in utilities.py 24 | report_type: a string describing the report, this will be used in the results filename created in main.py 25 | 26 | This class also requires the following method: 27 | process_data(text_data): processing the data does not occur until this is called 28 | 29 | If ReportParser.report is None, the input text was not identified to be any of the report classes listed in 30 | REPORT_CLASSES 31 | """ 32 | def __init__(self, text): 33 | self.report = self.get_report(text) 34 | if self.report: 35 | self.columns = self.report.columns 36 | self.csv = self.report.csv 37 | self.report_type = self.report.report_type 38 | 39 | @staticmethod 40 | def get_report(text): 41 | for report_class in REPORT_CLASSES: 42 | rc = report_class() # initialize class to access identifiers 43 | if are_all_strings_in_text(text, rc.identifiers): 44 | rc.process_data(text) # parse the text data 45 | return rc 46 | return None 47 | -------------------------------------------------------------------------------- /IQDM/parsers/sncpatient.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | SNC Patient Report class 4 | Created on Fri Jun 21 2019 5 | @author: Dan Cutright, PhD 6 | @contributor: Marc J.P. Chamberland, PhD 7 | """ 8 | 9 | from IQDM.utilities import get_csv 10 | import re 11 | 12 | 13 | class SNCPatientReport: 14 | def __init__(self): 15 | self.report_type = 'sncpatient' 16 | self.columns = ['Patient Last Name', 'Patient First Name', 'Patient ID', 'Plan Date', 'Energy', 'Angle', 'Dose Type', 'Difference (%)', 'Distance (mm)', 17 | 'Threshold (%)', 'Meas Uncertainty', 'Analysis Type', 'Total Points', 'Passed', 'Failed', 18 | '% Passed', 'Min', 'Max', 'Average', 'Std Dev', 'X offset (mm)', 'Y offset (mm)', 'Notes'] 19 | self.identifiers = ['QA File Parameter', 'Threshold', 'Notes', 'Reviewed By :', 'SSD', 'Depth', 'Energy'] 20 | self.text = None 21 | self.data = {} 22 | 23 | def process_data(self, text_data): 24 | self.text = text_data.split('\n') 25 | self.data['date'], self.data['hospital'] = [], [] 26 | for row in self.text: 27 | if row.find('Date: ') > -1: 28 | self.data['date'] = row.strip('Date: ') 29 | if row.find('Hospital Name: ') > -1: 30 | self.data['hospital'] = row.split('Hospital Name: ', 1)[-1] 31 | 32 | if self.data['date'] and self.data['hospital']: 33 | break 34 | 35 | self.data['qa_file_parameter'] = self.get_group_results('QA File Parameter') 36 | 37 | x_offset = '0' 38 | y_offset = '0' 39 | try: 40 | plan_index = self.text.index('Plan') 41 | if self.text[plan_index + 2].find('CAX') > -1: 42 | x_offset, y_offset = re.findall(r'[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?', 43 | self.text[plan_index + 2]) 44 | except ValueError: 45 | pass 46 | 47 | self.data['cax_offset'] = {'X offset': str(x_offset), 'Y offset': str(y_offset)} 48 | 49 | # Dose Comparison Block 50 | try: 51 | self.text.index('Absolute Dose Comparison') 52 | self.data['dose_comparison_type'] = 'Absolute Dose Comparison' 53 | except ValueError: 54 | self.data['dose_comparison_type'] = 'Relative Comparison' 55 | self.data['dose_comparison'] = self.get_group_results(self.data['dose_comparison_type']) 56 | if '% Diff' in list(self.data['dose_comparison']): # Alternate for Difference (%) for some versions of report? 57 | self.data['dose_comparison']['Difference (%)'] = self.data['dose_comparison']['% Diff'] 58 | if 'Threshold' in list(self.data['dose_comparison']): # Alternate for Threshold (%) for some versions of report? 59 | self.data['dose_comparison']['Threshold (%)'] = self.data['dose_comparison']['Threshold'] 60 | 61 | # Summary Analysis Block 62 | try: 63 | self.text.index('Summary (Gamma Analysis)') 64 | self.data['analysis_type'] = 'Gamma' 65 | except ValueError: 66 | try: 67 | self.data['analysis_type'] = 'DTA' 68 | except ValueError: 69 | self.data['analysis_type'] = 'GC' # Gradient Correction 70 | 71 | self.data['summary'] = self.get_group_results('Summary (%s Analysis)' % self.data['analysis_type']) 72 | 73 | # Gamma Index Summary Block 74 | try: 75 | self.text.index('Gamma Index Summary') 76 | self.data['gamma_stats'] = self.get_gamma_statistics('Gamma Index Summary') 77 | except ValueError: 78 | self.data['gamma_stats'] = {'Minimum': 'n/a', 'Maximum': 'n/a', 'Average': 'n/a', 'Stdv': 'n/a'} 79 | 80 | self.data['notes'] = self.text[self.text.index('Notes') + 1] 81 | 82 | def get_gamma_statistics(self, stats_delimiter): 83 | gamma_stats = {} 84 | stats_fields = ['Minimum', 'Maximum', 'Average', 'Stdv'] 85 | 86 | group_start = self.text.index(stats_delimiter) 87 | 88 | for field in stats_fields: 89 | field_start = self.text[group_start:-1].index(field) + 1 90 | gamma_stats[field] = self.text[group_start:-1][field_start] 91 | 92 | return gamma_stats 93 | 94 | def get_group_results(self, data_group): 95 | """ 96 | SNC Patient reports contain three blocks of results. data_group may be among the following: 97 | 'QA File Parameter' 98 | 'Absolute Dose Comparison' or 'Relative Comparison' 99 | 'Gamma' or 'DTA' 100 | """ 101 | group_start = self.text.index(data_group) 102 | var_name_start = group_start + 1 103 | data_start = self.text[var_name_start:-1].index('') + 1 + var_name_start 104 | data_count = data_start - var_name_start 105 | 106 | # If patient name is too long, sometimes the pdf parsing gets off-set 107 | if self.text[data_start] == 'Set1': 108 | data_start += 1 109 | 110 | group_results = {} 111 | for i in range(data_count): 112 | if self.text[var_name_start+i]: 113 | group_results[self.text[var_name_start+i]] = self.text[data_start+i].replace(' : ', '') 114 | 115 | return group_results 116 | 117 | @property 118 | def summary_data(self): 119 | """ 120 | Collect the parsed data into a dictionary with keys corresponding to columns 121 | :return: parsed data 122 | :rtype: dict 123 | """ 124 | patient_name = self.data['qa_file_parameter']['Patient Name'].replace('^', ' ').split(', ') 125 | if len(patient_name) > 1: 126 | last_name = patient_name[0].title() 127 | first_name = patient_name[1].title() 128 | elif len(patient_name) == 1: 129 | last_name = patient_name[0].title() 130 | first_name = 'n/a' 131 | else: 132 | last_name = 'n/a' 133 | first_name = 'n/a' 134 | 135 | return {'Patient Last Name': last_name, 136 | 'Patient First Name': first_name, 137 | 'Patient ID': self.data['qa_file_parameter']['Patient ID'], 138 | 'Plan Date': self.data['qa_file_parameter']['Plan Date'], 139 | 'Energy': self.data['qa_file_parameter']['Energy'], 140 | 'Angle': self.data['qa_file_parameter']['Angle'], 141 | 'Dose Type': self.data['dose_comparison_type'], 142 | 'Difference (%)': self.data['dose_comparison']['Difference (%)'], 143 | 'Distance (mm)': self.data['dose_comparison']['Distance (mm)'], 144 | 'Threshold (%)': self.data['dose_comparison']['Threshold (%)'], 145 | 'Meas Uncertainty': self.data['dose_comparison']['Meas Uncertainty'], 146 | 'Analysis Type': self.data['analysis_type'], 147 | 'Total Points': self.data['summary']['Total Points'], 148 | 'Passed': self.data['summary']['Passed'], 149 | 'Failed': self.data['summary']['Failed'], 150 | '% Passed': self.data['summary']['% Passed'], 151 | 'Min': self.data['gamma_stats']['Minimum'], 152 | 'Max': self.data['gamma_stats']['Maximum'], 153 | 'Average': self.data['gamma_stats']['Average'], 154 | 'Std Dev': self.data['gamma_stats']['Stdv'], 155 | 'X offset (mm)': self.data['cax_offset']['X offset'], 156 | 'Y offset (mm)':self.data['cax_offset']['Y offset'], 157 | 'Notes': self.data['notes']} 158 | 159 | @property 160 | def csv(self): 161 | return get_csv(self.summary_data, self.columns) 162 | -------------------------------------------------------------------------------- /IQDM/pdf_to_text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | The following code is from StackOverflow 4 | https://stackoverflow.com/questions/26494211/extracting-text-from-a-pdf-file-using-pdfminer-in-python 5 | Web page accessed on May 30, 2019 6 | """ 7 | 8 | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter 9 | from pdfminer.converter import TextConverter 10 | from pdfminer.layout import LAParams 11 | from pdfminer.pdfpage import PDFPage 12 | try: 13 | from io import StringIO 14 | except ImportError: 15 | from cStringIO import StringIO # python 2 16 | 17 | 18 | def convert_pdf_to_txt(path): 19 | rsrcmgr = PDFResourceManager() 20 | retstr = StringIO() 21 | laparams = LAParams() 22 | device = TextConverter(rsrcmgr, retstr, laparams=laparams) 23 | fp = open(path, 'rb') 24 | interpreter = PDFPageInterpreter(rsrcmgr, device) 25 | password = "" 26 | maxpages = 0 27 | caching = True 28 | pagenos = set() 29 | 30 | for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password, caching=caching, 31 | check_extractable=True): 32 | interpreter.process_page(page) 33 | 34 | text = retstr.getvalue() 35 | 36 | fp.close() 37 | device.close() 38 | retstr.close() 39 | return text 40 | -------------------------------------------------------------------------------- /IQDM/pdf_to_text_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Code adapted from Mark Amery's answer at: 4 | https://stackoverflow.com/questions/22898145/how-to-extract-text-and-text-coordinates-from-a-pdf-file 5 | Accessed August 8, 2019 6 | """ 7 | 8 | from pdfminer.pdfparser import PDFParser 9 | from pdfminer.pdfdocument import PDFDocument 10 | from pdfminer.pdfpage import PDFPage 11 | from pdfminer.pdfpage import PDFTextExtractionNotAllowed 12 | from pdfminer.pdfinterp import PDFResourceManager 13 | from pdfminer.pdfinterp import PDFPageInterpreter 14 | from pdfminer.pdfdevice import PDFDevice 15 | from pdfminer.layout import LAParams 16 | from pdfminer.converter import PDFPageAggregator 17 | import pdfminer 18 | 19 | 20 | class CustomPDFParser: 21 | def __init__(self, file_path, verbose=False): 22 | self.page = [] 23 | self.file_path = file_path 24 | self.convert_pdf_to_text(verbose=verbose) 25 | self.data = [] 26 | 27 | def print(self): 28 | for p, page in enumerate(self.page): 29 | print("Page %s" % (p+1)) 30 | page.print() 31 | 32 | def print_block(self, page, index): 33 | self.page[page].print_block(index) 34 | 35 | def get_block_data(self, page, index): 36 | return self.page[page].get_block_data(index) 37 | 38 | def get_block_data_with_y(self, page, y): 39 | return self.page[page].get_block_data_with_y(y) 40 | 41 | def convert_pdf_to_text(self, verbose=False): 42 | 43 | # Open a PDF file. 44 | fp = open(self.file_path, 'rb') 45 | 46 | # Create a PDF parser object associated with the file object. 47 | parser = PDFParser(fp) 48 | 49 | # Create a PDF document object that stores the document structure. 50 | # Password for initialization as 2nd parameter 51 | document = PDFDocument(parser) 52 | 53 | # Check if the document allows text extraction. If not, abort. 54 | if not document.is_extractable: 55 | raise PDFTextExtractionNotAllowed 56 | 57 | # Create a PDF resource manager object that stores shared resources. 58 | rsrcmgr = PDFResourceManager() 59 | 60 | # Create a PDF device object. 61 | device = PDFDevice(rsrcmgr) 62 | 63 | # BEGIN LAYOUT ANALYSIS 64 | # Set parameters for analysis. 65 | laparams = LAParams() 66 | 67 | # Create a PDF page aggregator object. 68 | device = PDFPageAggregator(rsrcmgr, laparams=laparams) 69 | 70 | # Create a PDF interpreter object. 71 | interpreter = PDFPageInterpreter(rsrcmgr, device) 72 | 73 | # loop over all pages in the document 74 | for p, page in enumerate(PDFPage.create_pages(document)): 75 | # read the page into a layout object 76 | interpreter.process_page(page) 77 | layout = device.get_result() 78 | 79 | # extract text from this object 80 | page_data = {'x': [], 'y': [], 'text': []} 81 | self.page.append(PDFPageParser(layout._objs, page_data, verbose=verbose)) 82 | 83 | 84 | class PDFPageParser: 85 | def __init__(self, lt_objs, page_data, verbose=False): 86 | self.lt_objs = lt_objs 87 | self.data = page_data 88 | self.verbose = verbose 89 | 90 | self.parse_obj(lt_objs) 91 | self.sort_all_data_by_y() 92 | self.sub_sort_all_data_by_x() 93 | 94 | def parse_obj(self, lt_objs): 95 | # loop over the object list 96 | for obj in lt_objs: 97 | # if it's a textbox, print text and location 98 | if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal): 99 | if self.verbose: 100 | print("%6d, %6d, %s" % (obj.bbox[0], obj.bbox[1], obj.get_text().replace('\n', '_'))) 101 | self.data['x'].append(round(obj.bbox[0], 2)) 102 | self.data['y'].append(round(obj.bbox[1], 2)) 103 | # self.data['text'].append(obj.get_text().replace('\n', '_')) 104 | self.data['text'].append(obj.get_text()) 105 | # if it's a container, recurse 106 | elif isinstance(obj, pdfminer.layout.LTFigure): 107 | self.parse_obj(obj._objs) 108 | 109 | def sort_all_data_by_y(self): 110 | self.sort_all_data('y', reverse=True) 111 | 112 | def sub_sort_all_data_by_x(self): 113 | for y in set(self.data['y']): 114 | # for a given y, collect all indices, y, and text values with given y 115 | indices, x, text = [], [], [] 116 | for i, y_ in enumerate(self.data['y']): 117 | if y_ == y: 118 | indices.append(i) 119 | x.append(self.data['x'][i]) 120 | text.append(self.data['text'][i]) 121 | 122 | for sort_index, data_index in enumerate(self.get_sorted_indices(x)): 123 | self.data['x'][indices[sort_index]] = x[data_index] 124 | self.data['text'][indices[sort_index]] = text[data_index] 125 | 126 | def sort_all_data(self, sort_key, reverse=False): 127 | sorted_indices = self.get_sorted_indices(self.data[sort_key], reverse=reverse) 128 | 129 | for key in list(self.data): 130 | self.data[key] = [self.data[key][i] for i in sorted_indices] 131 | 132 | @staticmethod 133 | def get_sorted_indices(some_list, reverse=False): 134 | return [i[0] for i in sorted(enumerate(some_list), key=lambda x: x[1], reverse=reverse)] 135 | 136 | def get_coordinates(self, index): 137 | return [self.data[key][index] for key in ['x', 'y']] 138 | 139 | def print(self): 140 | for index, text in enumerate(self.data['text']): 141 | coord = self.get_coordinates(index) 142 | print("x:%s\ty:%s\n%s" % (coord[0], coord[1], text)) 143 | 144 | def print_block(self, index): 145 | coord = self.get_coordinates(index) 146 | print("x:%s\ty:%s\n%s" % (coord[0], coord[1], (self.data['text'][index]))) 147 | 148 | def get_block_data(self, index): 149 | coord = self.get_coordinates(index) 150 | return coord[0], coord[1], self.data['text'][index] 151 | 152 | def get_block_data_with_y(self, y, exact=False): 153 | tolerance = 20 154 | block_data = [] 155 | for i, data in enumerate(self.data['text']): 156 | if exact: 157 | if int(self.data['y'][i]) == y: 158 | block_data.append(data) 159 | else: 160 | if y + tolerance > int(self.data['y'][i]) > y - tolerance: 161 | block_data.append(data) 162 | return block_data 163 | -------------------------------------------------------------------------------- /IQDM/trending.py: -------------------------------------------------------------------------------- 1 | from bokeh.io import curdoc 2 | from IQDM.trending_delta4 import TrendingDashboard as TrendDelta4 3 | import sys 4 | 5 | 6 | FILE_PATH = sys.argv[1] 7 | DAY_FIRST = {'true': True, 'false': False}[sys.argv[2]] 8 | if 'delta4' in FILE_PATH: 9 | dashboard = TrendDelta4(FILE_PATH, day_first=DAY_FIRST) 10 | curdoc().add_root(dashboard.layout) 11 | curdoc().title = "Delta 4 Trending" 12 | 13 | else: # sncpatient 14 | pass 15 | -------------------------------------------------------------------------------- /IQDM/trending_arccheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # trending_arccheck.py 5 | """ 6 | Bokeh server script to analyze a delta4_results csv from IQDM 7 | """ 8 | # Copyright (c) 2019 9 | # Dan Cutright, PhD 10 | # Medical Physicist 11 | # University of Chicago Medical Center 12 | # This file is part of IMRT QA Data Miner, partial based on code from DVH Analytics 13 | 14 | from bokeh.io import curdoc 15 | from bokeh.plotting import figure 16 | from bokeh.models import HoverTool, ColumnDataSource, Select, Div, TextInput, Legend, Spacer 17 | from bokeh.layouts import column, row 18 | from bokeh.models.widgets import DatePicker, CheckboxButtonGroup 19 | import numpy as np 20 | from IQDM.utilities import collapse_into_single_dates, moving_avg, get_control_limits, import_csv 21 | import sys 22 | 23 | FILE_PATH = sys.argv[1] 24 | DAY_FIRST = day_first = {'true': True, 'false': False}[sys.argv[2]] 25 | 26 | 27 | class Plot: 28 | def __init__(self, data): 29 | 30 | self.data = data 31 | self.source = {key: {'plot': ColumnDataSource(data=dict(x=[], y=[])), 32 | 'trend': ColumnDataSource(data=dict(x=[], y=[])), 33 | 'bound': ColumnDataSource(data=dict(x=[], y=[])), 34 | 'patch': ColumnDataSource(data=dict(x=[], y=[])), 35 | 'hist': ColumnDataSource(data=dict(x=[], y=[]))} for key in [1, 2]} 36 | 37 | self.ichart = None 38 | 39 | self.__set_x() 40 | self.__create_figure() 41 | self.__add_plot_data() 42 | self.__add_histogram_data() 43 | self.__add_hover() 44 | self.__add_legend() 45 | self.__set_plot_attr() 46 | 47 | def __create_figure(self): 48 | 49 | self.fig = figure(plot_width=1000, plot_height=375, x_axis_type='datetime') 50 | self.fig.xaxis.axis_label_text_font_size = "17pt" 51 | self.fig.yaxis.axis_label_text_font_size = "17pt" 52 | self.fig.xaxis.major_label_text_font_size = "15pt" 53 | self.fig.yaxis.major_label_text_font_size = "15pt" 54 | 55 | def __add_hover(self): 56 | self.fig.add_tools(HoverTool(tooltips=[("Plan Date", "@x{%F}"), 57 | ("Patient", "@id"), 58 | ("y", "@y"), 59 | ('Gamma Crit', "@gamma_crit"), 60 | ('Gamma Pass', '@gamma_index'), 61 | ('file', '@file_name')], 62 | formatters={'x': 'datetime'}, 63 | renderers=[self.plot_data_1])) 64 | 65 | def __set_plot_attr(self): 66 | self.fig.title.align = 'center' 67 | 68 | def __set_x(self): 69 | self.x = self.data['date_time_obj'] 70 | 71 | def __add_plot_data(self): 72 | self.plot_data_1 = self.fig.circle('x', 'y', source=self.source[1]['plot'], color='blue', size=8, alpha=0.4) 73 | self.plot_trend_1 = self.fig.line('x', 'y', source=self.source[1]['trend'], line_color='black', line_width=4) 74 | self.plot_avg_1 = self.fig.line('x', 'avg', source=self.source[1]['bound'], line_color='black') 75 | self.plot_patch_1 = self.fig.patch('x', 'y', source=self.source[1]['patch'], color='blue', alpha=0.2) 76 | 77 | # self.plot_data_2 = self.fig.circle('x', 'y', source=self.source[2]['plot'], color='red', size=4, alpha=0.3) 78 | # self.plot_trend_2 = self.fig.line('x', 'y', source=self.source[2]['trend'], line_color='black', line_width=4) 79 | # self.plot_avg_2 = self.fig.line('x', 'avg', source=self.source[2]['bound'], line_color='black') 80 | # self.plot_patch_2 = self.fig.patch('x', 'y', source=self.source[2]['patch'], color='red', alpha=0.2) 81 | 82 | def __add_legend(self): 83 | # Set the legend 84 | legend_plot = Legend(items=[("Data 1 ", [self.plot_data_1]), 85 | ("Avg 1 ", [self.plot_avg_1]), 86 | ("Rolling Avg 1 ", [self.plot_trend_1]), 87 | ("Percentile Region 1 ", [self.plot_patch_1]) 88 | ], 89 | orientation='horizontal') 90 | 91 | # Add the layout outside the plot, clicking legend item hides the line 92 | self.fig.add_layout(legend_plot, 'above') 93 | self.fig.legend.click_policy = "hide" 94 | 95 | def __add_histogram_data(self): 96 | self.histogram = figure(tools="", plot_width=1000, plot_height=275) 97 | # self.histogram.xaxis.axis_label_text_font_size = self.options.PLOT_AXIS_LABEL_FONT_SIZE 98 | # self.histogram.yaxis.axis_label_text_font_size = self.options.PLOT_AXIS_LABEL_FONT_SIZE 99 | # self.histogram.xaxis.major_label_text_font_size = self.options.PLOT_AXIS_MAJOR_LABEL_FONT_SIZE 100 | # self.histogram.yaxis.major_label_text_font_size = self.options.PLOT_AXIS_MAJOR_LABEL_FONT_SIZE 101 | # self.histogram.min_border_left = self.options.MIN_BORDER 102 | # self.histogram.min_border_bottom = self.options.MIN_BORDER 103 | self.vbar_1 = self.histogram.vbar(x='x', width='width', bottom=0, top='top', source=self.source[1]['hist'], alpha=0.5, color='blue') 104 | # self.vbar_2 = self.histogram.vbar(x='x', width='width', bottom=0, top='top', source=self.source[2]['hist'], alpha=0.5, color='red') 105 | 106 | self.histogram.xaxis.axis_label = "" 107 | self.histogram.yaxis.axis_label = "Frequency" 108 | 109 | self.histogram.xaxis.axis_label_text_font_size = "17pt" 110 | self.histogram.yaxis.axis_label_text_font_size = "17pt" 111 | self.histogram.xaxis.major_label_text_font_size = "15pt" 112 | self.histogram.yaxis.major_label_text_font_size = "15pt" 113 | 114 | def update_source(self, attr, old, new): 115 | for source_key in [1, 2]: 116 | new_data = {key: [] for key in ['x', 'y', 'id', 'gamma_crit', 'file_name', 'gamma_index']} 117 | active_gamma = [gamma_options[a] for a in checkbox_button_group.active] 118 | # if select_linac[source_key] != 'None': 119 | for i in range(len(self.x)): 120 | # if select_linac[source_key].value == 'All' or self.data['Radiation Dev'][i] == select_linac[source_key].value: 121 | if end_date_picker.value > self.x[i] > start_date_picker.value: 122 | gamma_crit = "%s%%/%smm" % (self.data['Difference (%)'][i], self.data['Distance (mm)'][i]) 123 | if 'Any' in active_gamma or gamma_crit in active_gamma: 124 | try: 125 | new_data['y'].append(float(self.data[select_y.value][i])) 126 | except ValueError: 127 | continue 128 | 129 | new_data['x'].append(self.x[i]) 130 | new_data['id'].append(self.data['Patient ID'][i]) 131 | new_data['gamma_crit'].append(gamma_crit) 132 | new_data['file_name'].append(self.data['file_name'][i]) 133 | new_data['gamma_index'].append('%s%%' % self.data['% Passed'][i]) 134 | # new_data['daily_corr'].append(self.data['Daily Corr'][i]) 135 | # new_data['dta'].append('%s%%' % self.data['DTA'][i]) 136 | 137 | try: 138 | y = new_data['y'] 139 | text[source_key].text = "Linac %s: Min: %0.3f | Low: %0.3f | Mean: %0.3f | Median: %0.3f | Upper: %0.3f | Max: %0.3f" % \ 140 | (source_key, np.min(y), np.percentile(y, 25), np.sum(y)/len(y), np.percentile(y, 50), np.percentile(y, 75), np.max(y)) 141 | except: 142 | text[source_key].text = "Linac %s" % source_key 143 | 144 | self.source[source_key]['plot'].data = new_data 145 | 146 | self.fig.yaxis.axis_label = select_y.value 147 | self.fig.xaxis.axis_label = 'Plan Date' 148 | 149 | self.update_histogram(source_key, bin_size=20) 150 | self.update_trend(source_key, int(float(avg_len_input.value)), float(percentile_input.value)) 151 | self.ichart.update_plot() 152 | 153 | def update_histogram(self, source_key, bin_size=10): 154 | width_fraction = 0.9 155 | hist, bins = np.histogram(self.source[source_key]['plot'].data['y'], bins=bin_size) 156 | width = [width_fraction * (bins[1] - bins[0])] * bin_size 157 | center = (bins[:-1] + bins[1:]) / 2. 158 | self.source[source_key]['hist'].data = {'x': center, 'top': hist, 'width': width} 159 | 160 | self.histogram.xaxis.axis_label = select_y.value 161 | 162 | def update_trend(self, source_key, avg_len, percentile): 163 | x = self.source[source_key]['plot'].data['x'] 164 | y = self.source[source_key]['plot'].data['y'] 165 | if x and y: 166 | x_len = len(x) 167 | 168 | data_collapsed = collapse_into_single_dates(x, y) 169 | x_trend, y_trend = moving_avg(data_collapsed, avg_len) 170 | 171 | y_np = np.array(self.source[source_key]['plot'].data['y']) 172 | upper_bound = float(np.percentile(y_np, 50. + percentile / 2.)) 173 | average = float(np.percentile(y_np, 50)) 174 | lower_bound = float(np.percentile(y_np, 50. - percentile / 2.)) 175 | 176 | self.source[source_key]['trend'].data = {'x': x_trend, 177 | 'y': y_trend, 178 | 'mrn': ['Avg'] * len(x_trend)} 179 | self.source[source_key]['bound'].data = {'x': [x[0], x[-1]], 180 | 'mrn': ['Series Avg'] * 2, 181 | 'upper': [upper_bound] * 2, 182 | 'avg': [average] * 2, 183 | 'lower': [lower_bound] * 2, 184 | 'y': [average] * 2} 185 | self.source[source_key]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]], 186 | 'y': [upper_bound, upper_bound, lower_bound, lower_bound]} 187 | else: 188 | self.source[source_key]['trend'].data = {'x': [], 189 | 'y': [], 190 | 'mrn': []} 191 | self.source[source_key]['bound'].data = {'x': [], 192 | 'mrn': [], 193 | 'upper': [], 194 | 'avg': [], 195 | 'lower': [], 196 | 'y': []} 197 | self.source[source_key]['patch'].data = {'x': [], 198 | 'y': []} 199 | 200 | 201 | class PlotControlChart: 202 | """ 203 | Generate plot for Control Chart frame 204 | """ 205 | def __init__(self, main_plot): 206 | 207 | self.main_plot = main_plot 208 | 209 | self.y_axis_label = '' 210 | self.source = {'plot': ColumnDataSource(data=dict(x=[], y=[], mrn=[], color=[], alpha=[], dates=[], 211 | gamma_index=[], daily_corr=[], gamma_crit=[], dta=[])), 212 | 'center_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 213 | 'ucl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 214 | 'lcl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 215 | 'bound': ColumnDataSource(data=dict(x=[], mrn=[], upper=[], avg=[], lower=[])), 216 | 'patch': ColumnDataSource(data=dict(x=[], y=[]))} 217 | 218 | self.figure = figure(plot_width=1000, plot_height=375) 219 | self.figure.xaxis.axis_label = "Study #" 220 | self.figure.xaxis.axis_label_text_font_size = "17pt" 221 | self.figure.yaxis.axis_label_text_font_size = "17pt" 222 | self.figure.xaxis.major_label_text_font_size = "15pt" 223 | self.figure.yaxis.major_label_text_font_size = "15pt" 224 | 225 | self.__add_plot_data() 226 | self.__add_hover() 227 | self.__create_divs() 228 | self.__add_legend() 229 | 230 | def __add_plot_data(self): 231 | self.plot_data = self.figure.circle('x', 'y', source=self.source['plot'], 232 | size=8, color='color', alpha='alpha') 233 | self.plot_data_line = self.figure.line('x', 'y', source=self.source['plot'], color='blue', 234 | line_dash='solid') 235 | self.plot_patch = self.figure.patch('x', 'y', color='blue', source=self.source['patch'], alpha=0.1) 236 | self.plot_center_line = self.figure.line('x', 'y', source=self.source['center_line'], alpha=1, color='black', 237 | line_dash='solid') 238 | self.plot_lcl_line = self.figure.line('x', 'y', source=self.source['lcl_line'], alpha=1, color='red', line_dash='dashed') 239 | self.plot_ucl_line = self.figure.line('x', 'y', source=self.source['ucl_line'], alpha=1, color='red', line_dash='dashed') 240 | 241 | def __add_hover(self): 242 | self.figure.add_tools(HoverTool(show_arrow=True, 243 | tooltips=[('ID', '@mrn'), 244 | ('Date', '@dates{%F}'), 245 | ('Study', '@x'), 246 | ('Value', '@y{0.2f}'), 247 | ("y", "@y"), 248 | ('Gamma Crit', "@gamma_crit"), 249 | ('Gamma Pass', '@gamma_index'), 250 | ('file', '@file_name') 251 | ], 252 | formatters={'dates': 'datetime'}, 253 | renderers=[self.plot_data])) 254 | 255 | def __add_legend(self): 256 | # Set the legend 257 | legend_plot = Legend(items=[("Charting Variable ", [self.plot_data]), 258 | ("Charting Variable Line ", [self.plot_data_line]), 259 | ('Center Line ', [self.plot_center_line]), 260 | ('UCL ', [self.plot_ucl_line]), 261 | ('LCL ', [self.plot_lcl_line])], 262 | orientation='horizontal') 263 | 264 | # Add the layout outside the plot, clicking legend item hides the line 265 | self.figure.add_layout(legend_plot, 'above') 266 | self.figure.legend.click_policy = "hide" 267 | 268 | def __create_divs(self): 269 | self.div_center_line = Div(text='', width=175) 270 | self.div_ucl = Div(text='', width=175) 271 | self.div_lcl = Div(text='', width=175) 272 | 273 | def update_plot(self): 274 | 275 | self.y_axis_label = select_y.value 276 | self.figure.yaxis.axis_label = self.y_axis_label 277 | 278 | y = self.main_plot.source[1]['plot'].data['y'] 279 | mrn = self.main_plot.source[1]['plot'].data['id'] 280 | dates = self.main_plot.source[1]['plot'].data['x'] 281 | gamma_crit = self.main_plot.source[1]['plot'].data['gamma_crit'] 282 | gamma_index = self.main_plot.source[1]['plot'].data['gamma_index'] 283 | # daily_corr = self.main_plot.source[1]['plot'].data['daily_corr'] 284 | # dta = self.main_plot.source[1]['plot'].data['dta'] 285 | file_name = self.main_plot.source[1]['plot'].data['file_name'] 286 | x = list(range(len(dates))) 287 | 288 | center_line, ucl, lcl = get_control_limits(y) 289 | 290 | if select_y.value in ['% Passed', 'Gamma-Index', 'DTA'] and ucl > 100: 291 | ucl = 100 292 | 293 | colors = ['red', 'blue'] 294 | alphas = [0.3, 0.4] 295 | color = [colors[ucl >= value >= lcl] for value in y] 296 | alpha = [alphas[ucl >= value >= lcl] for value in y] 297 | 298 | self.source['plot'].data = {'x': x, 'y': y, 'mrn': mrn, 'gamma_crit': gamma_crit, 'gamma_index': gamma_index, 299 | 'color': color, 'alpha': alpha, 300 | 'dates': dates, 'file_name': file_name} 301 | 302 | self.source['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]], 303 | 'y': [ucl, ucl, lcl, lcl]} 304 | self.source['center_line'].data = {'x': [min(x), max(x)], 305 | 'y': [center_line] * 2, 306 | 'mrn': ['center line'] * 2} 307 | 308 | self.source['lcl_line'].data = {'x': [min(x), max(x)], 309 | 'y': [lcl] * 2, 310 | 'mrn': ['center line'] * 2} 311 | self.source['ucl_line'].data = {'x': [min(x), max(x)], 312 | 'y': [ucl] * 2, 313 | 'mrn': ['center line'] * 2} 314 | 315 | self.div_center_line.text = "Center line: %0.3f" % center_line 316 | self.div_ucl.text = "UCL: %0.3f" % ucl 317 | self.div_lcl.text = "LCL: %0.3f" % lcl 318 | 319 | def clear_div(self): 320 | self.div_center_line.text = "Center line:" 321 | self.div_ucl.text = "UCL:" 322 | self.div_lcl.text = "LCL:" 323 | 324 | 325 | data = import_csv(FILE_PATH, day_first=DAY_FIRST) 326 | plot = Plot(data) 327 | ichart = PlotControlChart(plot) 328 | plot.ichart = ichart 329 | ignored_y = ['Patient Last Name', 'Patient First Name', 'Patient ID', 'Plan Date', 'Dose Type', 'Radiation Dev', 330 | 'Energy', 'file_name', 'Meas Uncertainty', 'Analysis Type', 'Notes'] 331 | y_options = [option for option in list(data) if option not in ignored_y] 332 | select_y = Select(title='Y-variable:', value='% Passed', options=y_options) 333 | select_y.on_change('value', plot.update_source) 334 | 335 | # linacs = list(set(data['Radiation Dev'])) 336 | # linacs.sort() 337 | # linacs.insert(0, 'All') 338 | # linacs.append('None') 339 | # select_linac = {key: Select(title='Linac %s:' % key, value='All', options=['All'], width=250) for key in [1, 2]} 340 | # select_linac[2].value = 'None' 341 | # select_linac[1].on_change('value', plot.update_source) 342 | # select_linac[2].on_change('value', plot.update_source) 343 | 344 | avg_len_input = TextInput(title='Avg. Len:', value='10', width=100) 345 | avg_len_input.on_change('value', plot.update_source) 346 | 347 | percentile_input = TextInput(title='Percentile:', value='90', width=100) 348 | percentile_input.on_change('value', plot.update_source) 349 | 350 | 351 | start_date_picker = DatePicker(title='Start Date:', value=plot.x[0]) 352 | end_date_picker = DatePicker(title='End Date:', value=plot.x[-1]) 353 | start_date_picker.on_change('value', plot.update_source) 354 | end_date_picker.on_change('value', plot.update_source) 355 | 356 | gamma_options = ['5.0%/3.0mm', '3.0%/3.0mm', '3.0%/2.0mm', 'Any'] 357 | checkbox_button_group = CheckboxButtonGroup(labels=gamma_options, active=[3]) 358 | checkbox_button_group.on_change('active', plot.update_source) 359 | 360 | text = {key: Div() for key in [1, 2]} 361 | 362 | plot.update_source(None, None, None) 363 | 364 | layout = column(row(select_y, avg_len_input, percentile_input), 365 | row(start_date_picker, end_date_picker), 366 | row(Div(text='Gamma Criteria: '), checkbox_button_group), 367 | text[1], 368 | text[2], 369 | row(Spacer(width=10), plot.fig), 370 | Spacer(height=50), 371 | row(Spacer(width=10), plot.histogram), 372 | Spacer(height=50), 373 | row(Spacer(width=10), ichart.figure), 374 | row(ichart.div_center_line, ichart.div_ucl, ichart.div_lcl)) 375 | 376 | 377 | curdoc().add_root(layout) 378 | curdoc().title = "ArcCheck Trending" 379 | -------------------------------------------------------------------------------- /IQDM/trending_delta4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # trending_delta4.py 5 | """ 6 | Bokeh server script to analyze a delta4_results csv from IQDM 7 | """ 8 | # Copyright (c) 2019 9 | # Dan Cutright, PhD 10 | # Medical Physicist 11 | # University of Chicago Medical Center 12 | # This file is part of IMRT QA Data Miner, partial based on code from DVH Analytics 13 | 14 | from bokeh.plotting import figure 15 | from bokeh.models import HoverTool, ColumnDataSource, Select, Div, TextInput, Legend, Spacer 16 | from bokeh.layouts import column, row 17 | from bokeh.models.widgets import DatePicker, CheckboxButtonGroup 18 | import numpy as np 19 | from IQDM.utilities import collapse_into_single_dates, moving_avg, get_control_limits, import_csv 20 | 21 | GROUPS = [1, 2] 22 | COLORS = {1: 'blue', 2: 'red'} 23 | 24 | # TODO: Generalize for different parsers 25 | MAIN_PLOT_KEYS = ['x', 'y', 'id', 'gamma_crit', 'file_name', 'gamma_index', 'daily_corr', 'dta'] 26 | 27 | 28 | class TrendingDashboard: 29 | def __init__(self, file_path, day_first=False): 30 | 31 | self.data = import_csv(file_path, day_first=day_first) 32 | 33 | self.__create_sources() 34 | self.__set_x() 35 | self.__create_figures() 36 | self.__set_properties() 37 | self.__create_divs() 38 | self.__add_plot_data() 39 | self.__add_histogram_data() 40 | self.__add_ichart_data() 41 | self.__add_hover() 42 | self.__add_legend() 43 | 44 | self.__create_widgets() 45 | self.__bind_widgets() 46 | self.__do_layout() 47 | 48 | self.update() 49 | 50 | def __create_sources(self): 51 | self.source = {grp: {'plot': ColumnDataSource(data={key: [] for key in MAIN_PLOT_KEYS}), 52 | 'trend': ColumnDataSource(data=dict(x=[], y=[])), 53 | 'bound': ColumnDataSource(data=dict(x=[], y=[])), 54 | 'patch': ColumnDataSource(data=dict(x=[], y=[])), 55 | 'hist': ColumnDataSource(data=dict(x=[], y=[]))} for grp in GROUPS} 56 | 57 | self.ichart_source = {grp: {'plot': ColumnDataSource(data=dict(x=[], y=[], mrn=[], color=[], alpha=[], dates=[], 58 | gamma_index=[], daily_corr=[], gamma_crit=[], 59 | dta=[])), 60 | 'center_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 61 | 'ucl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 62 | 'lcl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])), 63 | 'bound': ColumnDataSource(data=dict(x=[], mrn=[], upper=[], avg=[], lower=[])), 64 | 'patch': ColumnDataSource(data=dict(x=[], y=[]))} for grp in GROUPS} 65 | 66 | def __set_x(self): 67 | self.x = self.data['date_time_obj'] 68 | 69 | def __create_figures(self): 70 | 71 | self.fig = figure(plot_width=1000, plot_height=375, x_axis_type='datetime') 72 | self.histogram = figure(tools="", plot_width=1000, plot_height=275) 73 | self.ichart = figure(plot_width=1000, plot_height=375) 74 | 75 | def __set_properties(self): 76 | self.fig.xaxis.axis_label_text_font_size = "17pt" 77 | self.fig.yaxis.axis_label_text_font_size = "17pt" 78 | self.fig.xaxis.major_label_text_font_size = "15pt" 79 | self.fig.yaxis.major_label_text_font_size = "15pt" 80 | 81 | self.histogram.xaxis.axis_label_text_font_size = "17pt" 82 | self.histogram.yaxis.axis_label_text_font_size = "17pt" 83 | self.histogram.xaxis.major_label_text_font_size = "15pt" 84 | self.histogram.yaxis.major_label_text_font_size = "15pt" 85 | 86 | self.ichart.xaxis.axis_label = "Study #" 87 | self.ichart.xaxis.axis_label_text_font_size = "17pt" 88 | self.ichart.yaxis.axis_label_text_font_size = "17pt" 89 | self.ichart.xaxis.major_label_text_font_size = "15pt" 90 | self.ichart.yaxis.major_label_text_font_size = "15pt" 91 | 92 | def __add_plot_data(self): 93 | self.plot_data = {grp: self.fig.circle('x', 'y', source=self.source[grp]['plot'], 94 | color=COLORS[grp], size=4, alpha=0.4) for grp in GROUPS} 95 | self.plot_trend = {grp: self.fig.line('x', 'y', source=self.source[grp]['trend'], 96 | line_color='black', line_width=4) for grp in GROUPS} 97 | self.plot_avg = {grp: self.fig.line('x', 'avg', source=self.source[grp]['bound'], 98 | line_color='black') for grp in GROUPS} 99 | self.plot_patch = {grp: self.fig.patch('x', 'y', source=self.source[grp]['patch'], 100 | color=COLORS[grp], alpha=0.2) for grp in GROUPS} 101 | 102 | def __add_histogram_data(self): 103 | self.vbar = {grp: self.histogram.vbar(x='x', width='width', bottom=0, top='top', 104 | source=self.source[grp]['hist'], alpha=0.5, color=COLORS[grp]) 105 | for grp in GROUPS} 106 | 107 | self.histogram.xaxis.axis_label = "" 108 | self.histogram.yaxis.axis_label = "Frequency" 109 | 110 | def __add_ichart_data(self): 111 | self.ichart_data = {grp: self.ichart.circle('x', 'y', source=self.ichart_source[grp]['plot'], 112 | size=4, color='color', alpha='alpha') for grp in GROUPS} 113 | self.ichart_data_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['plot'], 114 | color=COLORS[grp], line_dash='solid') for grp in GROUPS} 115 | self.ichart_patch = {grp: self.ichart.patch('x', 'y', color=COLORS[grp], 116 | source=self.ichart_source[grp]['patch'], 117 | alpha=0.1) for grp in GROUPS} 118 | self.ichart_center_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['center_line'], 119 | alpha=1, color='black', line_dash='solid') for grp in GROUPS} 120 | self.ichart_lcl_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['lcl_line'], alpha=1, 121 | color='red', line_dash='dashed') for grp in GROUPS} 122 | self.ichart_ucl_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['ucl_line'], alpha=1, 123 | color='red', line_dash='dashed') for grp in GROUPS} 124 | 125 | def __add_legend(self): 126 | # Main TrendingDashboard 127 | group_items = {grp: [("Data %s " % grp, [self.plot_data[grp]]), 128 | ("Avg %s " % grp, [self.plot_avg[grp]]), 129 | ("Rolling Avg %s " % grp, [self.plot_trend[grp]]), 130 | ("Percentile Region %s " % grp, [self.plot_patch[grp]])] for grp in GROUPS} 131 | items = group_items[GROUPS[0]] 132 | if len(GROUPS) > 1: 133 | for grp in GROUPS[1:]: 134 | items.extend(group_items[grp]) 135 | legend_plot = Legend(items=items, orientation='horizontal') 136 | self.fig.add_layout(legend_plot, 'above') 137 | self.fig.legend.click_policy = "hide" 138 | 139 | # Control Chart 140 | group_items = {grp: [("Value %s " % grp, [self.ichart_data[grp]]), 141 | ("Line %s" % grp, [self.ichart_data_line[grp]]), 142 | ('Center %s' % grp, [self.ichart_center_line[grp]]), 143 | ('UCL %s' % grp, [self.ichart_ucl_line[grp]]), 144 | ('LCL %s' % grp, [self.ichart_lcl_line[grp]]), 145 | ('In Ctrl %s' % grp, [self.ichart_patch[grp]])] for grp in GROUPS} 146 | items = group_items[GROUPS[0]] 147 | if len(GROUPS) > 1: 148 | for grp in GROUPS[1:]: 149 | items.extend(group_items[grp]) 150 | legend_ichart = Legend(items=items, orientation='horizontal') 151 | self.ichart.add_layout(legend_ichart, 'above') 152 | self.ichart.legend.click_policy = "hide" 153 | 154 | def __add_hover(self): 155 | self.fig.add_tools(HoverTool(tooltips=[("Plan Date", "@x{%F}"), 156 | ("Patient", "@id"), 157 | ("y", "@y"), 158 | ('Gamma Crit', "@gamma_crit"), 159 | ('Gamma Pass', '@gamma_index'), 160 | ('DTA', '@dta'), 161 | ('Daily Corr', '@daily_corr'), 162 | ('file', '@file_name')], 163 | formatters={'x': 'datetime'}, 164 | renderers=[self.plot_data[grp] for grp in GROUPS])) 165 | 166 | self.histogram.add_tools(HoverTool(show_arrow=True, line_policy='next', mode='vline', 167 | tooltips=[("Bin Center", "@x"), 168 | ('Frequency', '@top')], 169 | renderers=[self.vbar[grp] for grp in GROUPS])) 170 | 171 | self.ichart.add_tools(HoverTool(show_arrow=True, 172 | tooltips=[('ID', '@mrn'), 173 | ('Date', '@dates{%F}'), 174 | ('Study', '@x'), 175 | ('Value', '@y{0.2f}'), 176 | ("y", "@y"), 177 | ('Gamma Crit', "@gamma_crit"), 178 | ('Gamma Pass', '@gamma_index'), 179 | ('DTA', '@dta'), 180 | ('Daily Corr', '@daily_corr'), 181 | ('file', '@file_name') 182 | ], 183 | formatters={'dates': 'datetime'}, 184 | renderers=[self.ichart_data[grp] for grp in GROUPS])) 185 | 186 | def __create_divs(self): 187 | self.div_summary = {grp: Div() for grp in GROUPS} 188 | self.div_center_line = {grp: Div(text='', width=175) for grp in GROUPS} 189 | self.div_ucl = {grp: Div(text='', width=175) for grp in GROUPS} 190 | self.div_lcl = {grp: Div(text='', width=175) for grp in GROUPS} 191 | 192 | def __create_widgets(self): 193 | ignored_y = ['Patient Name', 'Patient ID', 'Plan Date', 'Radiation Dev', 'Energy', 'file_name', 'date_time_obj'] 194 | y_options = [option for option in list(self.data) if option not in ignored_y] 195 | self.select_y = Select(title='Y-variable:', value='Dose Dev', options=y_options) 196 | 197 | linacs = list(set(self.data['Radiation Dev'])) 198 | linacs.sort() 199 | linacs.insert(0, 'All') 200 | linacs.append('None') 201 | self.select_linac = {grp: Select(title='Linac %s:' % grp, value='All', options=linacs, width=250) 202 | for grp in GROUPS} 203 | self.select_linac[2].value = 'None' 204 | 205 | energies = list(set(self.data['Energy'])) 206 | energies.sort() 207 | energies.insert(0, 'Any') 208 | self.select_energies = {grp: Select(title='Energy %s:' % grp, value='Any', options=energies, width=250) 209 | for grp in GROUPS} 210 | 211 | self.avg_len_input = TextInput(title='Avg. Len:', value='10', width=100) 212 | 213 | self.percentile_input = TextInput(title='Percentile:', value='90', width=100) 214 | 215 | self.bins_input = TextInput(title='Bins:', value='20', width=100) 216 | 217 | self.start_date_picker = DatePicker(title='Start Date:', value=self.x[0]) 218 | self.end_date_picker = DatePicker(title='End Date:', value=self.x[-1]) 219 | 220 | self.gamma_options = ['5.0%/3.0mm', '3.0%/3.0mm', '3.0%/2.0mm', 'Any'] 221 | self.checkbox_button_group = CheckboxButtonGroup(labels=self.gamma_options, active=[3]) 222 | 223 | def __bind_widgets(self): 224 | 225 | self.select_y.on_change('value', self.update_source_ticker) 226 | for grp in GROUPS: 227 | self.select_linac[grp].on_change('value', self.update_source_ticker) 228 | self.select_energies[grp].on_change('value', self.update_source_ticker) 229 | self.avg_len_input.on_change('value', self.update_source_ticker) 230 | self.percentile_input.on_change('value', self.update_source_ticker) 231 | self.bins_input.on_change('value', self.update_source_ticker) 232 | self.start_date_picker.on_change('value', self.update_source_ticker) 233 | self.end_date_picker.on_change('value', self.update_source_ticker) 234 | self.checkbox_button_group.on_change('active', self.update_source_ticker) 235 | 236 | def __do_layout(self): 237 | # TODO: Generalize for 1 or 2 groups 238 | self.layout = column(row(self.select_y, self.select_linac[1], self.select_linac[2], self.avg_len_input, 239 | self.percentile_input, self.bins_input), 240 | row(self.select_energies[1], self.select_energies[2]), 241 | row(self.start_date_picker, self.end_date_picker), 242 | row(Div(text='Gamma Criteria: '), self.checkbox_button_group), 243 | self.div_summary[1], 244 | self.div_summary[2], 245 | row(Spacer(width=10), self.fig), 246 | Spacer(height=50), 247 | row(Spacer(width=10), self.histogram), 248 | Spacer(height=50), 249 | row(Spacer(width=10), self.ichart), 250 | row(self.div_center_line[1], self.div_ucl[1], self.div_lcl[1]), 251 | row(self.div_center_line[2], self.div_ucl[2], self.div_lcl[2])) 252 | 253 | def update_source_ticker(self, attr, old, new): 254 | self.update() 255 | 256 | def update(self): 257 | for grp in GROUPS: 258 | new_data = {key: [] for key in MAIN_PLOT_KEYS} 259 | active_gamma = [self.gamma_options[a] for a in self.checkbox_button_group.active] 260 | if self.select_linac[grp] != 'None': 261 | for i in range(len(self.x)): 262 | if self.select_linac[grp].value == 'All' or \ 263 | self.data['Radiation Dev'][i] == self.select_linac[grp].value: 264 | if self.end_date_picker.value > self.x[i] > self.start_date_picker.value: 265 | gamma_crit = "%s%%/%smm" % (self.data['Gamma Dose Criteria'][i], 266 | self.data['Gamma Dist Criteria'][i]) 267 | if 'Any' in active_gamma or gamma_crit in active_gamma: 268 | if 'Any' == self.select_energies[grp].value or \ 269 | self.data['Energy'][i] == self.select_energies[grp].value: 270 | 271 | try: 272 | new_data['y'].append(float(self.data[self.select_y.value][i])) 273 | except ValueError: 274 | continue 275 | new_data['x'].append(self.x[i]) 276 | new_data['id'].append(self.data['Patient ID'][i]) 277 | new_data['gamma_crit'].append(gamma_crit) 278 | new_data['file_name'].append(self.data['file_name'][i]) 279 | new_data['gamma_index'].append('%s%%' % self.data['Gamma-Index'][i]) 280 | new_data['daily_corr'].append(self.data['Daily Corr'][i]) 281 | new_data['dta'].append('%s%%' % self.data['DTA'][i]) 282 | 283 | try: 284 | y = new_data['y'] 285 | self.div_summary[grp].text = "Linac %s: Min: %0.3f | Low: %0.3f | " \ 286 | "Mean: %0.3f | Median: %0.3f | Upper: %0.3f | " \ 287 | "Max: %0.3f" % \ 288 | (grp, np.min(y), np.percentile(y, 25), np.sum(y)/len(y), 289 | np.percentile(y, 50), np.percentile(y, 75), np.max(y)) 290 | except: 291 | self.div_summary[grp].text = "Linac %s" % grp 292 | 293 | self.source[grp]['plot'].data = new_data 294 | 295 | self.fig.yaxis.axis_label = self.select_y.value 296 | self.fig.xaxis.axis_label = 'Plan Date' 297 | 298 | self.update_histogram(grp) 299 | self.update_trend(grp, int(float(self.avg_len_input.value)), float(self.percentile_input.value)) 300 | self.update_ichart() 301 | 302 | def update_histogram(self, group): 303 | width_fraction = 0.9 304 | try: 305 | bin_size = int(self.bins_input.value) 306 | except ValueError: 307 | bin_size = 20 308 | self.bins_input.value = str(bin_size) 309 | hist, bins = np.histogram(self.source[group]['plot'].data['y'], bins=bin_size) 310 | width = [width_fraction * (bins[1] - bins[0])] * bin_size 311 | center = (bins[:-1] + bins[1:]) / 2. 312 | if set(hist) != {0}: 313 | self.source[group]['hist'].data = {'x': center, 'top': hist, 'width': width} 314 | else: 315 | self.source[group]['hist'].data = {'x': [], 'top': [], 'width': []} 316 | 317 | self.histogram.xaxis.axis_label = self.select_y.value 318 | 319 | def update_trend(self, source_key, avg_len, percentile): 320 | x = self.source[source_key]['plot'].data['x'] 321 | y = self.source[source_key]['plot'].data['y'] 322 | if x and y: 323 | data_collapsed = collapse_into_single_dates(x, y) 324 | x_trend, y_trend = moving_avg(data_collapsed, avg_len) 325 | 326 | y_np = np.array(self.source[source_key]['plot'].data['y']) 327 | upper_bound = float(np.percentile(y_np, 50. + percentile / 2.)) 328 | average = float(np.percentile(y_np, 50)) 329 | lower_bound = float(np.percentile(y_np, 50. - percentile / 2.)) 330 | 331 | self.source[source_key]['trend'].data = {'x': x_trend, 'y': y_trend, 'mrn': ['Avg'] * len(x_trend)} 332 | self.source[source_key]['bound'].data = {'x': [x[0], x[-1]], 333 | 'mrn': ['Series Avg'] * 2, 334 | 'upper': [upper_bound] * 2, 335 | 'avg': [average] * 2, 336 | 'lower': [lower_bound] * 2, 337 | 'y': [average] * 2} 338 | self.source[source_key]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]], 339 | 'y': [upper_bound, upper_bound, lower_bound, lower_bound]} 340 | else: 341 | self.source[source_key]['trend'].data = {'x': [], 'y': [], 'mrn': []} 342 | self.source[source_key]['bound'].data = {'x': [], 'mrn': [], 'upper': [], 'avg': [], 'lower': [], 'y': []} 343 | self.source[source_key]['patch'].data = {'x': [], 'y': []} 344 | 345 | def update_ichart(self): 346 | self.ichart.yaxis.axis_label = self.select_y.value 347 | 348 | for grp in GROUPS: 349 | y = self.source[grp]['plot'].data['y'] 350 | mrn = self.source[grp]['plot'].data['id'] 351 | dates = self.source[grp]['plot'].data['x'] 352 | gamma_crit = self.source[grp]['plot'].data['gamma_crit'] 353 | gamma_index = self.source[grp]['plot'].data['gamma_index'] 354 | daily_corr = self.source[grp]['plot'].data['daily_corr'] 355 | dta = self.source[grp]['plot'].data['dta'] 356 | file_name = self.source[grp]['plot'].data['file_name'] 357 | x = list(range(len(dates))) 358 | 359 | center_line, ucl, lcl = get_control_limits(y) 360 | 361 | if self.select_y.value in ['Gamma-Index', 'DTA'] and ucl > 100: 362 | ucl = 100 363 | 364 | colors = ['red', 'blue'] 365 | alphas = [0.3, 0.4] 366 | color = [colors[ucl >= value >= lcl] for value in y] 367 | alpha = [alphas[ucl >= value >= lcl] for value in y] 368 | 369 | self.ichart_source[grp]['plot'].data = {'x': x, 'y': y, 'mrn': mrn, 'gamma_crit': gamma_crit, 370 | 'gamma_index': gamma_index, 'daily_corr': daily_corr, 'dta': dta, 371 | 'color': color, 'alpha': alpha, 'dates': dates, 372 | 'file_name': file_name} 373 | 374 | if len(x) > 1: 375 | self.ichart_source[grp]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]], 376 | 'y': [ucl, ucl, lcl, lcl]} 377 | self.ichart_source[grp]['center_line'].data = {'x': [min(x), max(x)], 378 | 'y': [center_line] * 2, 379 | 'mrn': ['center line'] * 2} 380 | 381 | self.ichart_source[grp]['lcl_line'].data = {'x': [min(x), max(x)], 382 | 'y': [lcl] * 2, 383 | 'mrn': ['center line'] * 2} 384 | self.ichart_source[grp]['ucl_line'].data = {'x': [min(x), max(x)], 385 | 'y': [ucl] * 2, 386 | 'mrn': ['center line'] * 2} 387 | 388 | self.div_center_line[grp].text = "Center line: %0.3f" % center_line 389 | self.div_ucl[grp].text = "UCL: %0.3f" % ucl 390 | self.div_lcl[grp].text = "LCL: %0.3f" % lcl 391 | else: 392 | self.ichart_source[grp]['patch'].data = {'x': [], 'y': []} 393 | self.ichart_source[grp]['center_line'].data = {'x': [], 'y': [], 'mrn': []} 394 | self.ichart_source[grp]['lcl_line'].data = {'x': [], 'y': [], 'mrn': []} 395 | self.ichart_source[grp]['ucl_line'].data = {'x': [], 'y': [], 'mrn': []} 396 | 397 | self.div_center_line[grp].text = "Center line:" 398 | self.div_ucl[grp].text = "UCL:" 399 | self.div_lcl[grp].text = "LCL:" 400 | -------------------------------------------------------------------------------- /IQDM/utilities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main program for IMRT QA PDF report parser 4 | Created on Thu May 30 2019 5 | @author: Dan Cutright, PhD 6 | """ 7 | 8 | from os.path import isdir, join, splitext, normpath 9 | from os import walk, listdir 10 | import zipfile 11 | from datetime import datetime 12 | from dateutil.parser import parse as date_parser 13 | import numpy as np 14 | import codecs 15 | 16 | DELIMITER = ',' # delimiter for the csv output file for process_files 17 | ALTERNATE = '^' # replace the delimiter character with this so not to confuse csv file parsing 18 | 19 | 20 | def are_all_strings_in_text(text, list_of_strings): 21 | """ 22 | :param text: output from convert_pdf_to_text 23 | :type text: list of str 24 | :param list_of_strings: a list of strings used to identify document type 25 | :type list_of_strings: list of str 26 | :return: Will return true if every string in list_of_strings is found in the text data 27 | :rtype: bool 28 | """ 29 | for str_to_find in list_of_strings: 30 | if str_to_find not in text: 31 | return False 32 | return True 33 | 34 | 35 | ############################################################# 36 | # CSV related functions 37 | ############################################################# 38 | def get_csv(data, columns): 39 | """ 40 | Convert a dictionary of data into a row for a csv file 41 | :param data: a dictionary with values with str representations 42 | :type data: dict 43 | :param columns: a list of keys dictating the order of the csv 44 | :type columns: list 45 | :return: a csv string delimited by DELIMITER 46 | :rtype: str 47 | """ 48 | clean_csv = [str(data[column]).replace(DELIMITER, ALTERNATE) for column in columns] 49 | return DELIMITER.join(clean_csv) 50 | 51 | 52 | def load_csv_file(file_path): 53 | with codecs.open(file_path, 'r', encoding='utf-8', errors='ignore') as doc: 54 | return [line.split(',') for line in doc] 55 | 56 | 57 | def import_csv(file_path, day_first=False): 58 | raw_data = load_csv_file(file_path) 59 | keys = raw_data.pop(0) # remove column header row 60 | keys = [key.strip() for key in keys if key.strip()] + ['file_name'] 61 | data = {key: [] for key in keys} 62 | for row in raw_data: 63 | for col, key in enumerate(keys): 64 | data[key].append(row[col]) 65 | 66 | sorted_data = {key: [] for key in keys} 67 | sorted_data['date_time_obj'] = [] 68 | 69 | date_time_objs = get_date_times(data, day_first=day_first) 70 | 71 | for i in get_sorted_indices(date_time_objs): 72 | for key in keys: 73 | sorted_data[key].append(data[key][i]) 74 | sorted_data['date_time_obj'].append(date_time_objs[i]) 75 | 76 | return sorted_data 77 | 78 | 79 | def get_file_names_from_csv_file(file_path): 80 | raw_data = load_csv_file(file_path) 81 | column_headers = raw_data.pop(0) # remove column header row 82 | fp_start = len(column_headers) 83 | file_names = [] 84 | for row in raw_data: 85 | file_name_fields = [value for value in row[fp_start:]] 86 | file_name = ','.join(file_name_fields) 87 | file_names.append(normpath(file_name.strip())) 88 | return file_names 89 | 90 | 91 | ############################################################# 92 | # Plotting and Stat related functions 93 | ############################################################# 94 | def collapse_into_single_dates(x, y): 95 | """ 96 | Function used for a time plot to convert multiple values into one value, while retaining enough information 97 | to perform a moving average over time 98 | :param x: a list of dates in ascending order 99 | :param y: a list of values and can use the '+' operator as a function of date 100 | :return: a unique list of dates, sum of y for that date, and number of original points for that date 101 | :rtype: dict 102 | """ 103 | 104 | # average daily data and keep track of points per day 105 | x_collapsed = [x[0]] 106 | y_collapsed = [y[0]] 107 | w_collapsed = [1] 108 | for n in range(1, len(x)): 109 | if x[n] == x_collapsed[-1]: 110 | y_collapsed[-1] = (y_collapsed[-1] + y[n]) 111 | w_collapsed[-1] += 1 112 | else: 113 | x_collapsed.append(x[n]) 114 | y_collapsed.append(y[n]) 115 | w_collapsed.append(1) 116 | 117 | return {'x': x_collapsed, 'y': y_collapsed, 'w': w_collapsed} 118 | 119 | 120 | def moving_avg(xyw, avg_len): 121 | """ 122 | Calculate a moving average for a given averaging length 123 | :param xyw: output from collapse_into_single_dates 124 | :type xyw: dict 125 | :param avg_len: average of these number of points, i.e., look-back window 126 | :type avg_len: int 127 | :return: list of x values, list of y values 128 | :rtype: tuple 129 | """ 130 | cumsum, moving_aves, x_final = [0], [], [] 131 | 132 | for i, y in enumerate(xyw['y'], 1): 133 | cumsum.append(cumsum[i - 1] + y / xyw['w'][i - 1]) 134 | if i >= avg_len: 135 | moving_ave = (cumsum[i] - cumsum[i - avg_len]) / avg_len 136 | moving_aves.append(moving_ave) 137 | x_final = [xyw['x'][i] for i in range(avg_len - 1, len(xyw['x']))] 138 | 139 | return x_final, moving_aves 140 | 141 | 142 | def get_sorted_indices(some_list): 143 | try: 144 | return [i[0] for i in sorted(enumerate(some_list), key=lambda x: x[1])] 145 | except TypeError: # can't sort if a mix of str and float 146 | try: 147 | temp_data = [[value, -float('inf')][value == 'None'] for value in some_list] 148 | return [i[0] for i in sorted(enumerate(temp_data), key=lambda x: x[1])] 149 | except TypeError: 150 | temp_data = [str(value) for value in some_list] 151 | return [i[0] for i in sorted(enumerate(temp_data), key=lambda x: x[1])] 152 | 153 | 154 | def get_date_times(data, datetime_key='Plan Date', row_id_key='Patient ID', day_first=False): 155 | dates = [] 156 | for i, date_str in enumerate(data[datetime_key]): 157 | try: 158 | dates.append(date_parser(date_str, dayfirst=day_first).date()) 159 | except ValueError: 160 | print('ERROR: Could not parse the following into a date: %s' % date_str) 161 | print("\tPatient ID: %s" % data[row_id_key][i]) 162 | print("\tUsing today's date instead") 163 | dates.append(datetime.today().date()) 164 | return dates 165 | 166 | 167 | def get_control_limits(y): 168 | """ 169 | Calculate control limits for Control Chart 170 | :param y: data 171 | :type y: list 172 | :return: center line, upper control limit, and lower control limit 173 | """ 174 | y = np.array(y) 175 | 176 | center_line = np.mean(y) 177 | avg_moving_range = np.mean(np.absolute(np.diff(y))) 178 | 179 | scalar_d = 1.128 180 | 181 | ucl = center_line + 3 * avg_moving_range / scalar_d 182 | lcl = center_line - 3 * avg_moving_range / scalar_d 183 | 184 | return center_line, ucl, lcl 185 | 186 | 187 | ############################################################# 188 | # File related functions 189 | ############################################################# 190 | def extract_files_from_zipped_files(init_directory, extract_to_path, extension='.pdf'): 191 | """ 192 | Function to extract .pdf files from zipped files 193 | :param init_directory: initial top-level directory to walk through 194 | :type init_directory: str 195 | :param extract_to_path: directory to extract pdfs into 196 | :type extract_to_path: str 197 | :param extension: file extension of file type to extract, set to None to extract all files 198 | :type extension: str or None 199 | """ 200 | for dirName, subdirList, fileList in walk(init_directory): # iterate through files and all sub-directories 201 | for fileName in fileList: 202 | if splitext(fileName)[1].lower == '.zip': 203 | zip_file_path = join(dirName, fileName) 204 | with zipfile.ZipFile(zip_file_path, 'r') as z: 205 | for file_name in z.namelist(): 206 | if not isdir(file_name) and (extension is None or splitext(file_name)[1].lower == extension): 207 | temp_path = join(extract_to_path) 208 | z.extract(file_name, path=temp_path) 209 | 210 | 211 | def find_latest_results(init_directory, no_recursive_search=False): 212 | """ 213 | Find the most recent IQDM results csv file within the provided directory 214 | :param init_directory: initial scan directory 215 | :type init_directory: str 216 | :param no_recursive_search: set to True to ignore subdirectories 217 | :type no_recursive_search: bool 218 | :return: a dictionary like {report_type: {'time_stamp': datetime, 'file_path': str}} 219 | :rtype: dict 220 | """ 221 | results = {} 222 | if no_recursive_search: 223 | process_result_csvs(listdir(init_directory), results) 224 | else: 225 | for dirName, subdirList, fileList in walk(init_directory): # iterate through files and all sub-directories 226 | process_result_csvs(fileList, results, directory_name=dirName) 227 | return results 228 | 229 | 230 | def process_result_csvs(file_list, results, directory_name=None): 231 | """ 232 | Parse each file for report type and time stamp, edit results with the latest file_path for each report_type 233 | :param file_list: files to be parsed 234 | :type file_list: list 235 | :param results: results dict from find_latest_results() 236 | :type results: dict 237 | :param directory_name: optionally specify the directory 238 | :type directory_name: str 239 | """ 240 | for file_name in file_list: 241 | fn = splitext(file_name)[0].lower() 242 | ext = splitext(file_name)[1].lower() 243 | if ext == '.csv' and '_results_' in fn: 244 | try: 245 | result_info = file_name.split('_') 246 | report_type = result_info[0] 247 | time_stamp = result_info[2].replace(ext, '') 248 | time_stamp = datetime.strptime(time_stamp[:-7], '%Y-%m-%d %H-%M-%S') 249 | 250 | if report_type and report_type not in results.keys() \ 251 | or results[report_type]['time_stamp'] < time_stamp: 252 | if directory_name is None: 253 | file_path = file_name 254 | else: 255 | file_path = join(directory_name, file_name) 256 | results[report_type] = {'time_stamp': time_stamp, 'file_path': file_path} 257 | except Exception: 258 | continue 259 | 260 | 261 | def get_processed_files(init_directory, no_recursive_search=False): 262 | processed = [] 263 | if no_recursive_search: 264 | get_file_names_from_result_csvs(listdir(init_directory), processed) 265 | else: 266 | for dirName, subdirList, fileList in walk(init_directory): # iterate through files and all sub-directories 267 | get_file_names_from_result_csvs(fileList, processed, directory_name=dirName) 268 | return list(set(processed)) 269 | 270 | 271 | def get_file_names_from_result_csvs(file_list, processed, directory_name=None): 272 | for file_name in file_list: 273 | fn = splitext(file_name)[0].lower() 274 | ext = splitext(file_name)[1].lower() 275 | if ext == '.csv' and '_results_' in fn: 276 | if directory_name is None: 277 | file_path = file_name 278 | else: 279 | file_path = join(directory_name, file_name) 280 | try: 281 | file_names = get_file_names_from_csv_file(file_path) 282 | processed.extend(file_names) 283 | except Exception: 284 | continue 285 | 286 | 287 | def is_file_name_found_in_processed_files(file_name, directory, processed_files): 288 | for processed_file in processed_files: 289 | if normpath(file_name) in processed_file or normpath(join(directory, file_name)) in processed_files: 290 | return True 291 | return False 292 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Dan Cutright 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include IQDM/*.py 2 | README.md 3 | LICENSE 4 | setup.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IMRT-QA-Data-Miner 2 | Scans a directory for IMRT QA results. 3 | 4 | 5 | ### THIS PROJECT HAS MOVED 6 | This project has since been sponsored by the AAPM's IMRT Working Group. 7 | There is a new project here: https://github.com/IQDM/IQDM-PDF 8 | 9 | The new version does not have plotting built in, but we plan to use 10 | [DVHA Stats](https://github.com/cutright/DVHA-Stats) to take care of that 11 | once enough testing has been done, and more vendors are added. 12 | 13 | 14 | ### Install 15 | ~~~~ 16 | pip install iqdm 17 | ~~~~ 18 | 19 | ### How to run 20 | To scan a directory for IMRT QA report files and genereate a results .csv file: 21 | ~~~~ 22 | iqdm 23 | ~~~~ 24 | To launch a trending dashboard (and open the resulting link): 25 | ~~~~ 26 | iqdm 27 | ~~~~ 28 | 29 | Screenshot of dashboard: 30 | 31 | 32 | 33 | ### Command line usage 34 | ~~~~ 35 | usage: iqdm [-h] [-ie] [-od OUTPUT_DIR] [-rd RESULTS_DIR] [-all] 36 | [-of OUTPUT_FILE] [-ver] [-nr] [-df] [-p PORT] 37 | [-wo WEBSOCKET_ORIGIN] 38 | [file_path] 39 | 40 | Command line interface for IQDM 41 | 42 | positional arguments: 43 | file_path Initiate scan if directory, launch dashboard if 44 | results file 45 | 46 | optional arguments: 47 | -h, --help show this help message and exit 48 | -ie, --ignore-extension 49 | Script will check all files, not just ones with .pdf 50 | extensions 51 | -od OUTPUT_DIR, --output-dir OUTPUT_DIR 52 | Output stored in local directory by default, specify 53 | otherwise here 54 | -rd RESULTS_DIR, --results-dir RESULTS_DIR 55 | Results assumed to be stored in local directory by 56 | default, specify otherwise here 57 | -all, --process-all Process all identified report files, otherwise only 58 | new reports will be analyzed 59 | -of OUTPUT_FILE, --output-file OUTPUT_FILE 60 | Output will be saved as _results_.csv by default. Define this tag to customize 62 | file name after _ 63 | -ver, --version Print the IQDM version 64 | -nr, --no-recursive-search 65 | Include this flag to skip sub-directories 66 | -df, --day-first Assume day first for ambiguous dates in trending 67 | dashboard 68 | -p PORT, --port PORT Specify port of trending dashboard webserver 69 | -wo WEBSOCKET_ORIGIN, --allow-websocket-origin WEBSOCKET_ORIGIN 70 | Allow a websocket origin other than localhost, see 71 | bokeh documentation 72 | ~~~~ 73 | 74 | ### Notes 75 | This script was written specifically for SNC Patient and Delta4, but I'd be happy to include support for other vendors 76 | if someone could provide some anonymized example reports. 77 | 78 | ### Vendor Compatibility 79 | * **[Sun Nuclear](http://sunnuclear.com)**: *SNC Patient* 80 | * ArcCheck compatibility contributed by [Marc Chamberland](https://github.com/mchamberland) 81 | * **[ScandiDos](http://scandidos.com)**: *Delta4* 82 | This is still in beta, but the reported csv data is largely correct (reported energy might be off). The class parses much 83 | more data (including individual beam results), but isn't currently in csv nor validated. 84 | 85 | 86 | ### Contributing 87 | If you'd like to contribute code to support a new vendor, please create a new python file in the parsers directory 88 | containing a new class. This class should include the following to be compatible: 89 | 90 | * **PROPERTIES** 91 | * **identifiers** 92 | this is a list of strings that collectively and uniquely are found in a report type 93 | * **columns** 94 | a list of strings indicating the columns of the csv to be output 95 | * **csv** 96 | a string of values for each column, delimited with DELIMITER in utilities.py 97 | * **report_type** 98 | a string succinctly describing the report, this will be used in the results filename created in main.py 99 | 100 | * **METHODS** 101 | * **process_data(text_data)** 102 | processing the data does not occur until this is called 103 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | requires = [ 4 | 'pdfminer.six', 5 | 'pdfminer > 19', 6 | 'numpy', 7 | 'python-dateutil', 8 | 'chardet == 3.0.4', 9 | 'pathvalidate', 10 | 'bokeh', 11 | 'python-dateutil', 12 | 'pathvalidate' 13 | ] 14 | 15 | with open('README.md', 'r') as doc: 16 | long_description = doc.read() 17 | 18 | setup( 19 | name='IQDM', 20 | include_package_data=True, 21 | packages=find_packages(), 22 | version='0.3.1', 23 | description='Scans a directory for IMRT QA results', 24 | author='Dan Cutright', 25 | author_email='dan.cutright@gmail.com', 26 | url='https://github.com/cutright/IMRT-QA-Data-Miner/', 27 | download_url='https://github.com/cutright/IMRT-QA-Data-Miner/archive/master.zip', 28 | license="MIT License", 29 | keywords=['radiation therapy', 'qa', 'research'], 30 | classifiers=[], 31 | install_requires=requires, 32 | entry_points={ 33 | 'console_scripts': [ 34 | 'IQDM=IQDM.main:main', 35 | ], 36 | }, 37 | long_description=long_description, 38 | long_description_content_type="text/markdown" 39 | ) --------------------------------------------------------------------------------