├── .gitignore
├── CHANGELOG.md
├── IQDM
    ├── __init__.py
    ├── main.py
    ├── parsers
    │   ├── __init__.py
    │   ├── delta4.py
    │   ├── parser.py
    │   └── sncpatient.py
    ├── pdf_to_text.py
    ├── pdf_to_text_data.py
    ├── trending.py
    ├── trending_arccheck.py
    ├── trending_delta4.py
    └── utilities.py
├── LICENSE
├── MANIFEST.in
├── README.md
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .vscode/
3 | .idea/
4 | *__pycache__*
5 | build/
6 | dist/
7 | *.egg-info


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change log of DVH Analytics
 2 | 
 3 | v0.3.2 (TBD)
 4 | --------------------
 5 |  - [Issue 17] IN PROGRESS: Allow only new reports to be processed
 6 | 
 7 | v0.3.1 (2020.01.21)
 8 | --------------------
 9 |  - [Misc] IQDM is FINALLY using a change log
10 |  - [Issue 19] Add option to assume day first for ambiguous dates
11 |  - [Trending] Allow installed IQDM to launch trending from terminal


--------------------------------------------------------------------------------
/IQDM/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cutright/IMRT-QA-Data-Miner/85abf9dc66a139c02574c386377f46f0944c5893/IQDM/__init__.py


--------------------------------------------------------------------------------
/IQDM/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | main program for IMRT QA PDF report parser
  4 | Created on Thu May 30 2019
  5 | @author: Dan Cutright, PhD
  6 | """
  7 | 
  8 | from __future__ import print_function
  9 | from os.path import isdir, isfile, join, splitext, basename, dirname
 10 | from os import walk, listdir
 11 | from datetime import datetime
 12 | from IQDM.parsers.parser import ReportParser
 13 | from IQDM.utilities import DELIMITER, is_file_name_found_in_processed_files, get_processed_files
 14 | from IQDM.pdf_to_text import convert_pdf_to_txt
 15 | import argparse
 16 | from pathvalidate import sanitize_filename
 17 | import subprocess
 18 | 
 19 | 
 20 | CURRENT_VERSION = '0.3.1'
 21 | 
 22 | SCRIPT_DIR = dirname(__file__)
 23 | 
 24 | 
 25 | def pdf_to_qa_result(abs_file_path):
 26 |     """
 27 |     Given an absolute file path, convert file to text
 28 |     :param abs_file_path: file to be converted to text
 29 |     :return: csv row to be written to csv file, report type, column headers for csv
 30 |     :rtype: tuple
 31 |     """
 32 | 
 33 |     text = convert_pdf_to_txt(abs_file_path)
 34 | 
 35 |     report_obj = ReportParser(text)
 36 |     if report_obj.report is not None:
 37 |         return report_obj.csv + DELIMITER + abs_file_path, report_obj.report_type, report_obj.columns
 38 | 
 39 | 
 40 | def process_files(init_directory, ignore_extension=False, output_file=None, output_dir=None, no_recursive_search=False,
 41 |                   process_all=True, results_dir=None):
 42 |     """
 43 |     Given an initial directory, process all pdf files into parser classes, write their csv property to results_file
 44 |     :param init_directory: initial scanning directory
 45 |     :param ignore_extension: if you'd like to catch pdf files that are missing .pdf extension, set to True
 46 |     :type ignore_extension: bool
 47 |     :param output_file: user specified output file name, report type will be prepended to this value
 48 |     :param output_dir: user specified output directory, default value is to local directory
 49 |     :param no_recursive_search: to ignore sub-directories, set to True
 50 |     :type no_recursive_search: bool
 51 |     :param process_all: Only process files found in results csv files in the local directory or the specified results_dir
 52 |     :type process_all: bool
 53 |     :param results_dir: directory containing results files
 54 |     :type results_dir: str
 55 |     """
 56 | 
 57 |     if process_all:
 58 |         ignored_files = []
 59 |     else:
 60 |         results_dir = [results_dir, ''][results_dir is None]
 61 |         ignored_files = get_processed_files(results_dir, no_recursive_search=no_recursive_search)
 62 | 
 63 |     time_stamp = str(datetime.now()).replace(':', '-').replace('.', '-')
 64 |     if output_file is None:
 65 |         output_file = "results_%s.csv" % time_stamp
 66 | 
 67 |     if no_recursive_search:
 68 |         for file_name in listdir(init_directory):
 69 |             if not is_file_name_found_in_processed_files(file_name, init_directory, ignored_files):
 70 |                 if ignore_extension or splitext(file_name)[1].lower() == '.pdf':
 71 |                     file_path = join(init_directory, file_name)
 72 |                     process_file(file_path, output_file, output_dir)
 73 |             else:
 74 |                 print('File previously processed: %s' % join(init_directory, file_name))
 75 |     else:
 76 |         for dirName, subdirList, fileList in walk(init_directory):  # iterate through files and all sub-directories
 77 |             for file_name in fileList:
 78 |                 if not is_file_name_found_in_processed_files(file_name, init_directory, ignored_files):
 79 |                     if ignore_extension or splitext(file_name)[1].lower() == '.pdf':
 80 |                         file_path = join(dirName, file_name)
 81 |                         process_file(file_path, output_file, output_dir)
 82 |                 else:
 83 |                     print('File previously processed: %s' % join(dirName, file_name))
 84 | 
 85 | 
 86 | def process_file(file_path, output_file, output_dir):
 87 |     try:
 88 |         row, report_type, columns = pdf_to_qa_result(file_path)  # process file
 89 |     except Exception as e:
 90 |         print(str(e))
 91 |         print('Skipping: %s' % file_path)
 92 |         return
 93 |         
 94 |     current_file = "%s_%s" % (report_type, output_file)  # prepend report type to file name
 95 |     if output_dir:
 96 |         current_file = join(output_dir, current_file)
 97 |     if row:
 98 |         if not isfile(current_file):  # if file doesn't exist, need to write columns
 99 |             with open(current_file, 'w') as csv:
100 |                 csv.write(DELIMITER.join(columns) + '\n')
101 |         with open(current_file, "a") as csv:  # write the processed data
102 |             csv.write(row + '\n')
103 |         print("Processed: %s" % file_path)
104 | 
105 | 
106 | def main():
107 | 
108 |     cmd_parser = argparse.ArgumentParser(description="Command line interface for IQDM")
109 |     cmd_parser.add_argument('-ie', '--ignore-extension',
110 |                             dest='ignore_extension',
111 |                             help='Script will check all files, not just ones with .pdf extensions',
112 |                             default=False,
113 |                             action='store_true')
114 |     cmd_parser.add_argument('-od', '--output-dir',
115 |                             dest='output_dir',
116 |                             help='Output stored in local directory by default, specify otherwise here',
117 |                             default=None)
118 |     cmd_parser.add_argument('-rd', '--results-dir',
119 |                             dest='results_dir',
120 |                             help='Results assumed to be stored in local directory by default, specify otherwise here',
121 |                             default=None)
122 |     cmd_parser.add_argument('-all', '--process-all',
123 |                             dest='process_all',
124 |                             help='Process all identified report files, otherwise only new reports will be analyzed',
125 |                             default=False,
126 |                             action='store_true')
127 |     cmd_parser.add_argument('-of', '--output-file',
128 |                             dest='output_file',
129 |                             help='Output will be saved as <report_type>_results_<time-stamp>.csv by default. '
130 |                                  'Define this tag to customize file name after <report_type>_',
131 |                             default=None)
132 |     cmd_parser.add_argument('-ver', '--version',
133 |                             dest='print_version',
134 |                             help='Print the IQDM version',
135 |                             default=False,
136 |                             action='store_true')
137 |     cmd_parser.add_argument('-nr', '--no-recursive-search',
138 |                             dest='no_recursive_search',
139 |                             help='Include this flag to skip sub-directories',
140 |                             default=False,
141 |                             action='store_true')
142 |     cmd_parser.add_argument('-df', '--day-first',
143 |                             dest='day_first',
144 |                             help='Assume day first for ambiguous dates in trending dashboard',
145 |                             default=False,
146 |                             action='store_true')
147 |     cmd_parser.add_argument('-p', '--port',
148 |                             dest='port',
149 |                             help='Specify port of trending dashboard webserver',
150 |                             default='5006')
151 |     cmd_parser.add_argument('-wo', '--allow-websocket-origin',
152 |                             dest='websocket_origin',
153 |                             help='Allow a websocket origin other than localhost, see bokeh documentation',
154 |                             default=None)
155 |     cmd_parser.add_argument('file_path', nargs='?',
156 |                             help='Initiate scan if directory, launch dashboard if results file')
157 |     args = cmd_parser.parse_args()
158 | 
159 |     # if args.file_path and len(args.file_path) > 2:
160 |     #     print("Too many arguments provided. Please only provide the initial scanning directory after IQDM")
161 |     #     return
162 | 
163 |     path = args.file_path
164 |     if not path or len(path) < 2:
165 |         if args.print_version:
166 |             print('IMRT-QA-Data-Miner: IQDM v%s' % CURRENT_VERSION)
167 |             return
168 |         else:
169 |             print('Initial directory or results file for trending not provided!')
170 |             return
171 | 
172 |     if not isdir(path):
173 |         if isfile(path) and splitext(path)[1].lower() == '.csv':
174 |             if basename(path).startswith('delta4_results_'):
175 |                 trend_path = join(SCRIPT_DIR, 'trending.py')
176 |             elif basename(path).startswith('sncpatient_results_'):
177 |                 trend_path = join(SCRIPT_DIR, 'trending_arccheck.py')
178 |             else:
179 |                 print('Did you provide an IQDM results csv?')
180 |                 return
181 |             try:
182 |                 day_first = ['false', 'true'][args.day_first]  # must pass a string in subprocess.run()iq
183 |                 cmd = ['bokeh', 'serve', trend_path, '--port', args.port]
184 |                 if args.websocket_origin:
185 |                     cmd.extend(['--allow-websocket-origin', args.websocket_origin])
186 |                 cmd.extend(['--args', path, day_first])
187 |                 subprocess.run(cmd)
188 |             except KeyboardInterrupt:
189 |                 pass
190 | 
191 |         else:
192 |             print("%s is not a valid or accessible directory" % path)
193 |         return
194 | 
195 |     output_file, print_file_name_change = None, False
196 |     if args.output_file:
197 |         output_file = sanitize_filename(args.output_file)
198 |         if output_file not in args.output_file:
199 |             print_file_name_change = True
200 | 
201 |     process_files(args.file_path,
202 |                   ignore_extension=args.ignore_extension,
203 |                   output_file=output_file,
204 |                   output_dir=args.output_dir,
205 |                   no_recursive_search=args.no_recursive_search,
206 |                   process_all=args.process_all,
207 |                   results_dir=args.results_dir)
208 | 
209 |     if args.print_version:
210 |         print('IMRT-QA-Data-Miner: IQDM v%s' % CURRENT_VERSION)
211 | 
212 |     if print_file_name_change:
213 |         print('Output file name was changed to <report_type>_%s' % output_file)
214 | 
215 | 
216 | if __name__ == '__main__':
217 |     main()
218 | 


--------------------------------------------------------------------------------
/IQDM/parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cutright/IMRT-QA-Data-Miner/85abf9dc66a139c02574c386377f46f0944c5893/IQDM/parsers/__init__.py


--------------------------------------------------------------------------------
/IQDM/parsers/delta4.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | main program for IMRT QA PDF report parser
  4 | Created on Thu May 30 2019
  5 | @author: Dan Cutright, PhD
  6 | """
  7 | 
  8 | from IQDM.utilities import are_all_strings_in_text, get_csv
  9 | from dateutil.parser import parse as date_parser
 10 | 
 11 | 
 12 | # So far I've only come across Composite and Fraction as beam name place holders for the composite row
 13 | COMPOSITE_BEAM_NAMES = ['Composite', 'Fraction']
 14 | 
 15 | # If you provide your possible energies here, script will do a global search instead of trying to parse the table
 16 | # Table parsing was difficult, but this seems to work consistently.  For example, if '6 MV, FFF' is found anywhere
 17 | # in the PDF, the energy will be assumed to be 6 MV, FFF and then stop looking through the other options, therefore,
 18 | # the order of ENERGY_OPTIONS is important.  Set ENERGY_OPTIONS to None or [] to skip this feature
 19 | ENERGY_OPTIONS = ['6 MV, FFF', '6 MV', '10 MV, FFF', '10 MV']
 20 | 
 21 | 
 22 | class Delta4Report:
 23 |     def __init__(self):
 24 |         self.report_type = 'delta4'
 25 |         self.columns = ['Patient Name', 'Patient ID', 'Plan Date', 'Energy', 'Daily Corr', 'Norm Dose', 'Dev', 'DTA',
 26 |                         'Gamma-Index', 'Dose Dev', 'Radiation Dev', 'Gamma Pass Criteria', 'Gamma Dose Criteria',
 27 |                         'Gamma Dist Criteria', 'Beam Count']
 28 |         self.identifiers = ['ScandiDos AB', 'Treatment Summary', 'Acceptance Limits', 'Daily corr',
 29 |                             'Selected Detectors', 'Parameter Definitions & Acceptance Criteria, Detectors']
 30 | 
 31 |         self.treatment_summary_columns = ['Beam', 'Gantry', 'Energy', 'Daily Corr', 'Norm Dose',
 32 |                                           'Dev', 'DTA', 'Gamma-Index', 'Dose Dev']
 33 |         self.data = {}
 34 |         self.index_start = {}
 35 |         self.index_end = {}
 36 |         self.text = None
 37 | 
 38 |     def process_data(self, text_data):
 39 |         self.text = text_data.split('\n')
 40 | 
 41 |         # Patient information
 42 |         if 'PRE-TREATMENT REPORT' in self.text[3]:
 43 |             self.data['patient_name'] = self.text[0]
 44 |             self.data['patient_id'] = self.text[1]
 45 |         elif 'Clinic' not in self.text[2]:
 46 |             self.data['patient_name'] = self.text[2]
 47 |             self.data['patient_id'] = self.text[3]
 48 |         else:
 49 |             if 'Treatment Summary' in self.text:
 50 |                 tx_sum_index = self.text.index('Treatment Summary')
 51 |                 self.data['patient_name'] = self.text[tx_sum_index-3]
 52 |                 self.data['patient_id'] = self.text[tx_sum_index-2]
 53 |             else:
 54 |                 self.data['patient_name'] = 'Not found'
 55 |                 self.data['patient_id'] = 'Not found'
 56 | 
 57 |         # Beam
 58 |         self.index_start['Beam'] = self.get_index_of_next_text_block(self.get_string_index_in_text('°'))
 59 |         self.index_end['Beam'] = self.get_index_of_next_text_block(self.index_start['Beam']) - 1
 60 |         if self.text[self.index_start['Beam']] == 'Gantry':
 61 |             self.index_start['Beam'] = self.get_index_of_next_text_block(self.index_end['Beam'])
 62 |             self.index_end['Beam'] = self.get_index_of_next_text_block(self.index_start['Beam']) - 1
 63 |         self.data['Beam'] = self.get_data_block('Beam')
 64 |         for composite_name_option in COMPOSITE_BEAM_NAMES:
 65 |             if composite_name_option in self.data['Beam'][0]:
 66 |                 self.data['Beam'].pop(0)
 67 | 
 68 |         # Gantry
 69 |         self.index_start['Gantry'] = self.get_index_of_next_text_block(self.index_end['Beam'])
 70 |         self.index_end['Gantry'] = self.get_index_of_next_text_block(self.index_start['Gantry']) - 1
 71 |         self.data['Gantry'] = ['N/A'] + self.get_data_block('Gantry')
 72 |         for composite_name_option in COMPOSITE_BEAM_NAMES:
 73 |             if composite_name_option in self.data['Gantry']:
 74 |                 self.data['Gantry'].pop(self.data['Gantry'].index(composite_name_option))
 75 | 
 76 |         energy_override = []  # sometimes the energy is on the same line as the gantry
 77 |         for i, row in enumerate(self.data['Gantry']):
 78 |             self.data['Gantry'][i] = row.replace('\xc2', '').replace('\xb0', '')
 79 |             energy_override.append(None)
 80 |             row_split = row.split(' ')
 81 |             if len(row_split) > 3:
 82 |                 energy_override[-1] = ' '.join(row_split[-2:])
 83 |                 self.data['Gantry'][i] = self.data['Gantry'][i].replace(energy_override[-1], '').strip()
 84 | 
 85 |         # Dose and analysis
 86 |         self.index_start['Analysis'] = self.get_string_index_in_text('Daily corr Norm') + 2
 87 |         self.index_end['Analysis'] = self.get_index_of_next_text_block(self.index_start['Analysis']) - 1
 88 |         analysis_data_block = self.text[self.index_start['Analysis']:self.index_end['Analysis']]
 89 |         analysis_data = []
 90 |         while analysis_data_block:
 91 |             row = analysis_data_block.pop(0)
 92 | 
 93 |             # Sometime dose Norm Dose and other analysis data aren't in the same string,
 94 |             # and sometimes if different order. Ensure they are in same string with Norm Dose first.
 95 |             if 'Gy' not in row:
 96 |                 row = "%s %s" % (analysis_data_block.pop(0), row)
 97 |             if '%' not in row:
 98 |                 row = "%s %s" % (row, analysis_data_block.pop(0))
 99 | 
100 |             if 'Gy' in row and '%' in row:
101 |                 row = row.split('%')
102 |                 split = ['Gy', 'cGy']['cGy' in row[0]]  # Report may be in cGy or Gy
103 |                 data = [row[0].split(split)[0].strip(),
104 |                         row[0].split(split)[1].strip()]
105 |                 data.extend(row[1:-1])
106 |                 analysis_data.append(data)
107 | 
108 |         self.data['Norm Dose'] = [row[0] for row in analysis_data]
109 |         self.data['Dev'] = [row[1].strip() for row in analysis_data]
110 |         self.data['DTA'] = [row[2].strip() for row in analysis_data]
111 |         self.data['Gamma-Index'] = [row[3].strip() for row in analysis_data]
112 |         self.data['Dose Dev'] = [row[4].strip() for row in analysis_data]
113 | 
114 |         try:
115 |             self.data['Norm Dose'][0] = float(self.data['Norm Dose'][0])
116 |         except:
117 |             pass
118 | 
119 |         if 'factor' in self.data['Dev'][0]:
120 |             self.data['Dev'][0] = self.data['Dev'][0].replace('factor', '').strip()
121 | 
122 |         # Daily Correction Factor
123 |         self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_end['Analysis'])
124 |         if 'Det within acceptance' in self.text[self.index_start['Daily Corr']]:
125 |             self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr'])
126 |             if 'index dose dev' in self.text[self.index_start['Daily Corr']]:
127 |                 self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr'])
128 |                 if 'factor' in self.text[self.index_start['Daily Corr']]:
129 |                     self.index_start['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr'])
130 |         self.index_end['Daily Corr'] = self.get_index_of_next_text_block(self.index_start['Daily Corr']) - 1
131 |         self.data['Daily Corr'] = ['N/A'] + self.get_data_block('Daily Corr')
132 |         for i, row in enumerate(self.data['Daily Corr']):
133 |             if not row.isdigit():
134 |                 self.data['Daily Corr'][i] = row[-5:]
135 | 
136 |         # Energy
137 |         self.data['Energy'] = None
138 |         if ENERGY_OPTIONS:
139 |             for energy_option in ENERGY_OPTIONS:
140 |                 if self.data['Energy'] is None and energy_option in text_data:
141 |                     self.data['Energy'] = [energy_option.replace(',', '')] * len(energy_override)
142 |         if self.data['Energy'] is None:
143 |             self.index_start['Energy'] = self.get_index_of_next_text_block(self.index_end['Daily Corr'])
144 |             if 'dose dev' in self.text[self.index_start['Energy']]:
145 |                 self.index_start['Energy'] = self.get_index_of_next_text_block(self.index_start['Energy'])
146 |             self.index_end['Energy'] = self.get_index_of_next_text_block(self.index_start['Energy']) - 1
147 |             self.data['Energy'] = ['N/A'] + self.get_data_block('Energy')
148 |             if any(energy_override):  # replace values with overrides found in Gantry code block
149 |                 for i, override in enumerate(energy_override):
150 |                     if override is not None:
151 |                         if len(self.data['Energy']) > i:
152 |                             self.data['Energy'][i] = override
153 | 
154 |         # Gamma Criteria
155 |         self.index_start['Gamma Criteria'] = self.text.index('Parameter Definitions & Acceptance Criteria, Detectors')
156 |         self.index_start['Acceptance Limits'] = self.text.index('Acceptance Limits')
157 |         self.index_end['Gamma Criteria'] = self.index_start['Acceptance Limits'] - 1
158 |         self.index_end['Acceptance Limits'] = self.get_index_of_next_text_block(self.index_start['Acceptance Limits']) - 1
159 | 
160 |         for row in self.get_data_block('Gamma Criteria'):
161 |             if 'mm' in row:
162 |                 temp = row.split('mm')[0].strip()
163 |                 try:
164 |                     float(temp)
165 |                     self.data['gamma_dist'] = temp
166 |                 except:
167 |                     pass
168 |             elif '±' in row:
169 |                 self.data['gamma_dose'] = row.split('±')[1].replace('%', '')
170 | 
171 |         self.data['gamma_pass'] = self.get_data_block('Acceptance Limits')[-1].split('%')[0]
172 | 
173 |     @property
174 |     def radiation_device(self):
175 |         for row in self.text:
176 |             if row.startswith('Radiation Device: '):
177 |                 return row.replace('Radiation Device: ', '')
178 |         return None
179 | 
180 |     @property
181 |     def measured_date(self):
182 |         index_of_first_date = self.get_index_of_first_date()
183 |         date_candidate_1 = self.text[index_of_first_date].split(' ')[0]
184 |         date_candidate_2 = self.text[index_of_first_date+2].split(' ')[0]
185 |         try:
186 |             return str(date_parser(date_candidate_1)).split(' ')[0]
187 |         except:
188 |             try:
189 |                 return str(date_parser(date_candidate_2)).split(' ')[0]
190 |             except:
191 |                 pass
192 |         return None
193 | 
194 |     def get_index_of_first_date(self):
195 |         for i, row in enumerate(self.text):
196 |             if are_all_strings_in_text(row, ['/', ':', 'M']) or \
197 |                     are_all_strings_in_text(row, ['.', ':', 'M']):
198 |                 try:
199 |                     date_parser(row.split(' ')[0].strip())
200 |                     return i
201 |                 except:
202 |                     pass
203 |         return None
204 | 
205 |     def get_string_index_in_text(self, string, start_index=0):
206 |         for i, row in enumerate(self.text[start_index:]):
207 |             if string in row:
208 |                 return i
209 |         return None
210 | 
211 |     def get_index_of_next_text_block(self, start_index):
212 |         for i, row in enumerate(self.text[start_index:]):
213 |             if row.strip() == '':
214 |                 return i + start_index + 1
215 |         return None
216 | 
217 |     def get_data_block(self, data_type):
218 |         return self.text[self.index_start[data_type]:self.index_end[data_type]]
219 | 
220 |     @property
221 |     def summary_data(self):
222 |         try:
223 |             daily_corr = sum([float(f) for f in self.data['Daily Corr'] if f != 'N/A']) / (len(self.data['Daily Corr']) - 1)
224 |         except:
225 |             print('WARNING: Could not process daily corr for %s - %s' %
226 |                   (self.data['patient_name'], self.data['patient_id']))
227 |             daily_corr = 1.
228 | 
229 |         return {'Patient Name': self.data['patient_name'],
230 |                 'Patient ID': self.data['patient_id'],
231 |                 'Plan Date': self.measured_date,
232 |                 'Energy': '/'.join(list(set([e for e in self.data['Energy'] if e != 'N/A']))),
233 |                 'Daily Corr': daily_corr,
234 |                 'Norm Dose': self.data['Norm Dose'][0],
235 |                 'Dev': float(self.data['Dev'][0]),
236 |                 'DTA': float(self.data['DTA'][0]),
237 |                 'Gamma-Index': float(self.data['Gamma-Index'][0]),
238 |                 'Dose Dev': float(self.data['Dose Dev'][0]),
239 |                 'Radiation Dev': self.radiation_device,
240 |                 'Gamma Pass Criteria': float(self.data['gamma_pass']),
241 |                 'Gamma Dose Criteria': float(self.data['gamma_dose']),
242 |                 'Gamma Dist Criteria': float(self.data['gamma_dist']),
243 |                 'Beam Count': len(self.data['Beam'])}
244 | 
245 |     @property
246 |     def csv(self):
247 |         return get_csv(self.summary_data, self.columns)
248 | 


--------------------------------------------------------------------------------
/IQDM/parsers/parser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | main program for IMRT QA PDF report parser
 4 | Created on Thu May 30 2019
 5 | @author: Dan Cutright, PhD
 6 | """
 7 | 
 8 | from IQDM.utilities import are_all_strings_in_text
 9 | from IQDM.parsers.delta4 import Delta4Report
10 | from IQDM.parsers.sncpatient import SNCPatientReport
11 | 
12 | # These classes will be checked in ReportParser.get_report()
13 | REPORT_CLASSES = [Delta4Report, SNCPatientReport]
14 | 
15 | 
16 | class ReportParser:
17 |     """
18 |     This class determines which Report class to use and subsequently processes the data.
19 | 
20 |     Use of this class requires each report class listed in REPORT_CLASSES contains the following properties:
21 |         identifiers:    this is a list of strings that collectively are uniquely found in a report type
22 |         columns:        a list of strings indicating the columns of the csv to be output
23 |         csv:            a string of values for each column, delimited with DELIMITER in utilities.py
24 |         report_type:    a string describing the report, this will be used in the results filename created in main.py
25 | 
26 |     This class also requires the following method:
27 |         process_data(text_data):    processing the data does not occur until this is called
28 | 
29 |     If ReportParser.report is None, the input text was not identified to be any of the report classes listed in
30 |     REPORT_CLASSES
31 |     """
32 |     def __init__(self, text):
33 |         self.report = self.get_report(text)
34 |         if self.report:
35 |             self.columns = self.report.columns
36 |             self.csv = self.report.csv
37 |             self.report_type = self.report.report_type
38 | 
39 |     @staticmethod
40 |     def get_report(text):
41 |         for report_class in REPORT_CLASSES:
42 |             rc = report_class()  # initialize class to access identifiers
43 |             if are_all_strings_in_text(text, rc.identifiers):
44 |                 rc.process_data(text)  # parse the text data
45 |                 return rc
46 |         return None
47 | 


--------------------------------------------------------------------------------
/IQDM/parsers/sncpatient.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | SNC Patient Report class
  4 | Created on Fri Jun 21 2019
  5 | @author: Dan Cutright, PhD
  6 | @contributor: Marc J.P. Chamberland, PhD
  7 | """
  8 | 
  9 | from IQDM.utilities import get_csv
 10 | import re
 11 | 
 12 | 
 13 | class SNCPatientReport:
 14 |     def __init__(self):
 15 |         self.report_type = 'sncpatient'
 16 |         self.columns = ['Patient Last Name', 'Patient First Name', 'Patient ID', 'Plan Date', 'Energy', 'Angle', 'Dose Type', 'Difference (%)', 'Distance (mm)',
 17 |                         'Threshold (%)', 'Meas Uncertainty', 'Analysis Type', 'Total Points', 'Passed', 'Failed',
 18 |                         '% Passed', 'Min', 'Max', 'Average', 'Std Dev', 'X offset (mm)', 'Y offset (mm)', 'Notes']
 19 |         self.identifiers = ['QA File Parameter', 'Threshold', 'Notes', 'Reviewed By :', 'SSD', 'Depth', 'Energy']
 20 |         self.text = None
 21 |         self.data = {}
 22 | 
 23 |     def process_data(self, text_data):
 24 |         self.text = text_data.split('\n')
 25 |         self.data['date'], self.data['hospital'] = [], []
 26 |         for row in self.text:
 27 |             if row.find('Date: ') > -1:
 28 |                 self.data['date'] = row.strip('Date: ')
 29 |             if row.find('Hospital Name: ') > -1:
 30 |                 self.data['hospital'] = row.split('Hospital Name: ', 1)[-1]
 31 | 
 32 |             if self.data['date'] and self.data['hospital']:
 33 |                 break
 34 | 
 35 |         self.data['qa_file_parameter'] = self.get_group_results('QA File Parameter')
 36 | 
 37 |         x_offset = '0'
 38 |         y_offset = '0'
 39 |         try:
 40 |             plan_index = self.text.index('Plan')
 41 |             if self.text[plan_index + 2].find('CAX') > -1:
 42 |                 x_offset, y_offset = re.findall(r'[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?',
 43 |                                                 self.text[plan_index + 2])
 44 |         except ValueError:
 45 |             pass
 46 | 
 47 |         self.data['cax_offset'] = {'X offset': str(x_offset), 'Y offset': str(y_offset)}
 48 | 
 49 |         # Dose Comparison Block
 50 |         try:
 51 |             self.text.index('Absolute Dose Comparison')
 52 |             self.data['dose_comparison_type'] = 'Absolute Dose Comparison'
 53 |         except ValueError:
 54 |             self.data['dose_comparison_type'] = 'Relative Comparison'
 55 |         self.data['dose_comparison'] = self.get_group_results(self.data['dose_comparison_type'])
 56 |         if '% Diff' in list(self.data['dose_comparison']):  # Alternate for Difference (%) for some versions of report?
 57 |             self.data['dose_comparison']['Difference (%)'] = self.data['dose_comparison']['% Diff']
 58 |         if 'Threshold' in list(self.data['dose_comparison']):  # Alternate for Threshold (%) for some versions of report?
 59 |             self.data['dose_comparison']['Threshold (%)'] = self.data['dose_comparison']['Threshold']
 60 | 
 61 |         # Summary Analysis Block
 62 |         try:
 63 |             self.text.index('Summary (Gamma Analysis)')
 64 |             self.data['analysis_type'] = 'Gamma'
 65 |         except ValueError:
 66 |             try:
 67 |                 self.data['analysis_type'] = 'DTA'
 68 |             except ValueError:
 69 |                 self.data['analysis_type'] = 'GC'  # Gradient Correction
 70 | 
 71 |         self.data['summary'] = self.get_group_results('Summary (%s Analysis)' % self.data['analysis_type'])
 72 | 
 73 |         # Gamma Index Summary Block
 74 |         try:
 75 |             self.text.index('Gamma Index Summary')
 76 |             self.data['gamma_stats'] = self.get_gamma_statistics('Gamma Index Summary')
 77 |         except ValueError:
 78 |             self.data['gamma_stats'] = {'Minimum': 'n/a', 'Maximum': 'n/a', 'Average': 'n/a',  'Stdv': 'n/a'}
 79 | 
 80 |         self.data['notes'] = self.text[self.text.index('Notes') + 1]
 81 | 
 82 |     def get_gamma_statistics(self, stats_delimiter):
 83 |         gamma_stats = {}
 84 |         stats_fields = ['Minimum', 'Maximum', 'Average', 'Stdv']
 85 | 
 86 |         group_start = self.text.index(stats_delimiter)
 87 | 
 88 |         for field in stats_fields:
 89 |             field_start = self.text[group_start:-1].index(field) + 1
 90 |             gamma_stats[field] = self.text[group_start:-1][field_start]
 91 | 
 92 |         return gamma_stats
 93 | 
 94 |     def get_group_results(self, data_group):
 95 |         """
 96 |         SNC Patient reports contain three blocks of results. data_group may be among the following:
 97 |             'QA File Parameter'
 98 |             'Absolute Dose Comparison' or 'Relative Comparison'
 99 |             'Gamma' or 'DTA'
100 |         """
101 |         group_start = self.text.index(data_group)
102 |         var_name_start = group_start + 1
103 |         data_start = self.text[var_name_start:-1].index('') + 1 + var_name_start
104 |         data_count = data_start - var_name_start
105 | 
106 |         # If patient name is too long, sometimes the pdf parsing gets off-set
107 |         if self.text[data_start] == 'Set1':
108 |             data_start += 1
109 | 
110 |         group_results = {}
111 |         for i in range(data_count):
112 |             if self.text[var_name_start+i]:
113 |                 group_results[self.text[var_name_start+i]] = self.text[data_start+i].replace(' : ', '')
114 | 
115 |         return group_results
116 | 
117 |     @property
118 |     def summary_data(self):
119 |         """
120 |         Collect the parsed data into a dictionary with keys corresponding to columns
121 |         :return: parsed data
122 |         :rtype: dict
123 |         """
124 |         patient_name = self.data['qa_file_parameter']['Patient Name'].replace('^', ' ').split(', ')
125 |         if len(patient_name) > 1:
126 |             last_name = patient_name[0].title()
127 |             first_name = patient_name[1].title()
128 |         elif len(patient_name) == 1:
129 |             last_name = patient_name[0].title()
130 |             first_name = 'n/a'
131 |         else:
132 |             last_name = 'n/a'
133 |             first_name = 'n/a'
134 | 
135 |         return {'Patient Last Name': last_name,
136 |                 'Patient First Name': first_name,
137 |                 'Patient ID': self.data['qa_file_parameter']['Patient ID'],
138 |                 'Plan Date': self.data['qa_file_parameter']['Plan Date'],
139 |                 'Energy': self.data['qa_file_parameter']['Energy'],
140 |                 'Angle': self.data['qa_file_parameter']['Angle'],
141 |                 'Dose Type': self.data['dose_comparison_type'],
142 |                 'Difference (%)': self.data['dose_comparison']['Difference (%)'],
143 |                 'Distance (mm)': self.data['dose_comparison']['Distance (mm)'],
144 |                 'Threshold (%)': self.data['dose_comparison']['Threshold (%)'],
145 |                 'Meas Uncertainty': self.data['dose_comparison']['Meas Uncertainty'],
146 |                 'Analysis Type': self.data['analysis_type'],
147 |                 'Total Points': self.data['summary']['Total Points'],
148 |                 'Passed': self.data['summary']['Passed'],
149 |                 'Failed': self.data['summary']['Failed'],
150 |                 '% Passed': self.data['summary']['% Passed'],
151 |                 'Min': self.data['gamma_stats']['Minimum'],
152 |                 'Max': self.data['gamma_stats']['Maximum'],
153 |                 'Average': self.data['gamma_stats']['Average'],
154 |                 'Std Dev': self.data['gamma_stats']['Stdv'],
155 |                 'X offset (mm)': self.data['cax_offset']['X offset'],
156 |                 'Y offset (mm)':self.data['cax_offset']['Y offset'],
157 |                 'Notes': self.data['notes']}
158 | 
159 |     @property
160 |     def csv(self):
161 |         return get_csv(self.summary_data, self.columns)
162 | 


--------------------------------------------------------------------------------
/IQDM/pdf_to_text.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | The following code is from StackOverflow
 4 | https://stackoverflow.com/questions/26494211/extracting-text-from-a-pdf-file-using-pdfminer-in-python
 5 | Web page accessed on May 30, 2019
 6 | """
 7 | 
 8 | from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
 9 | from pdfminer.converter import TextConverter
10 | from pdfminer.layout import LAParams
11 | from pdfminer.pdfpage import PDFPage
12 | try:
13 |     from io import StringIO
14 | except ImportError:
15 |     from cStringIO import StringIO  # python 2
16 | 
17 | 
18 | def convert_pdf_to_txt(path):
19 |     rsrcmgr = PDFResourceManager()
20 |     retstr = StringIO()
21 |     laparams = LAParams()
22 |     device = TextConverter(rsrcmgr, retstr, laparams=laparams)
23 |     fp = open(path, 'rb')
24 |     interpreter = PDFPageInterpreter(rsrcmgr, device)
25 |     password = ""
26 |     maxpages = 0
27 |     caching = True
28 |     pagenos = set()
29 | 
30 |     for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password, caching=caching,
31 |                                   check_extractable=True):
32 |         interpreter.process_page(page)
33 | 
34 |     text = retstr.getvalue()
35 | 
36 |     fp.close()
37 |     device.close()
38 |     retstr.close()
39 |     return text
40 | 


--------------------------------------------------------------------------------
/IQDM/pdf_to_text_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Code adapted from Mark Amery's answer at:
  4 | https://stackoverflow.com/questions/22898145/how-to-extract-text-and-text-coordinates-from-a-pdf-file
  5 | Accessed August 8, 2019
  6 | """
  7 | 
  8 | from pdfminer.pdfparser import PDFParser
  9 | from pdfminer.pdfdocument import PDFDocument
 10 | from pdfminer.pdfpage import PDFPage
 11 | from pdfminer.pdfpage import PDFTextExtractionNotAllowed
 12 | from pdfminer.pdfinterp import PDFResourceManager
 13 | from pdfminer.pdfinterp import PDFPageInterpreter
 14 | from pdfminer.pdfdevice import PDFDevice
 15 | from pdfminer.layout import LAParams
 16 | from pdfminer.converter import PDFPageAggregator
 17 | import pdfminer
 18 | 
 19 | 
 20 | class CustomPDFParser:
 21 |     def __init__(self, file_path, verbose=False):
 22 |         self.page = []
 23 |         self.file_path = file_path
 24 |         self.convert_pdf_to_text(verbose=verbose)
 25 |         self.data = []
 26 | 
 27 |     def print(self):
 28 |         for p, page in enumerate(self.page):
 29 |             print("Page %s" % (p+1))
 30 |             page.print()
 31 | 
 32 |     def print_block(self, page, index):
 33 |         self.page[page].print_block(index)
 34 | 
 35 |     def get_block_data(self, page, index):
 36 |         return self.page[page].get_block_data(index)
 37 | 
 38 |     def get_block_data_with_y(self, page, y):
 39 |         return self.page[page].get_block_data_with_y(y)
 40 | 
 41 |     def convert_pdf_to_text(self, verbose=False):
 42 | 
 43 |         # Open a PDF file.
 44 |         fp = open(self.file_path, 'rb')
 45 | 
 46 |         # Create a PDF parser object associated with the file object.
 47 |         parser = PDFParser(fp)
 48 | 
 49 |         # Create a PDF document object that stores the document structure.
 50 |         # Password for initialization as 2nd parameter
 51 |         document = PDFDocument(parser)
 52 | 
 53 |         # Check if the document allows text extraction. If not, abort.
 54 |         if not document.is_extractable:
 55 |             raise PDFTextExtractionNotAllowed
 56 | 
 57 |         # Create a PDF resource manager object that stores shared resources.
 58 |         rsrcmgr = PDFResourceManager()
 59 | 
 60 |         # Create a PDF device object.
 61 |         device = PDFDevice(rsrcmgr)
 62 | 
 63 |         # BEGIN LAYOUT ANALYSIS
 64 |         # Set parameters for analysis.
 65 |         laparams = LAParams()
 66 | 
 67 |         # Create a PDF page aggregator object.
 68 |         device = PDFPageAggregator(rsrcmgr, laparams=laparams)
 69 | 
 70 |         # Create a PDF interpreter object.
 71 |         interpreter = PDFPageInterpreter(rsrcmgr, device)
 72 | 
 73 |         # loop over all pages in the document
 74 |         for p, page in enumerate(PDFPage.create_pages(document)):
 75 |             # read the page into a layout object
 76 |             interpreter.process_page(page)
 77 |             layout = device.get_result()
 78 | 
 79 |             # extract text from this object
 80 |             page_data = {'x': [], 'y': [], 'text': []}
 81 |             self.page.append(PDFPageParser(layout._objs, page_data, verbose=verbose))
 82 | 
 83 | 
 84 | class PDFPageParser:
 85 |     def __init__(self, lt_objs, page_data, verbose=False):
 86 |         self.lt_objs = lt_objs
 87 |         self.data = page_data
 88 |         self.verbose = verbose
 89 | 
 90 |         self.parse_obj(lt_objs)
 91 |         self.sort_all_data_by_y()
 92 |         self.sub_sort_all_data_by_x()
 93 | 
 94 |     def parse_obj(self, lt_objs):
 95 |         # loop over the object list
 96 |         for obj in lt_objs:
 97 |             # if it's a textbox, print text and location
 98 |             if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
 99 |                 if self.verbose:
100 |                     print("%6d, %6d, %s" % (obj.bbox[0], obj.bbox[1], obj.get_text().replace('\n', '_')))
101 |                 self.data['x'].append(round(obj.bbox[0], 2))
102 |                 self.data['y'].append(round(obj.bbox[1], 2))
103 |                 # self.data['text'].append(obj.get_text().replace('\n', '_'))
104 |                 self.data['text'].append(obj.get_text())
105 |             # if it's a container, recurse
106 |             elif isinstance(obj, pdfminer.layout.LTFigure):
107 |                 self.parse_obj(obj._objs)
108 | 
109 |     def sort_all_data_by_y(self):
110 |         self.sort_all_data('y', reverse=True)
111 | 
112 |     def sub_sort_all_data_by_x(self):
113 |         for y in set(self.data['y']):
114 |             # for a given y, collect all indices, y, and text values with given y
115 |             indices, x, text = [], [], []
116 |             for i, y_ in enumerate(self.data['y']):
117 |                 if y_ == y:
118 |                     indices.append(i)
119 |                     x.append(self.data['x'][i])
120 |                     text.append(self.data['text'][i])
121 | 
122 |             for sort_index, data_index in enumerate(self.get_sorted_indices(x)):
123 |                 self.data['x'][indices[sort_index]] = x[data_index]
124 |                 self.data['text'][indices[sort_index]] = text[data_index]
125 | 
126 |     def sort_all_data(self, sort_key, reverse=False):
127 |         sorted_indices = self.get_sorted_indices(self.data[sort_key], reverse=reverse)
128 | 
129 |         for key in list(self.data):
130 |             self.data[key] = [self.data[key][i] for i in sorted_indices]
131 | 
132 |     @staticmethod
133 |     def get_sorted_indices(some_list, reverse=False):
134 |         return [i[0] for i in sorted(enumerate(some_list), key=lambda x: x[1], reverse=reverse)]
135 | 
136 |     def get_coordinates(self, index):
137 |         return [self.data[key][index] for key in ['x', 'y']]
138 | 
139 |     def print(self):
140 |         for index, text in enumerate(self.data['text']):
141 |             coord = self.get_coordinates(index)
142 |             print("x:%s\ty:%s\n%s" % (coord[0], coord[1], text))
143 | 
144 |     def print_block(self, index):
145 |         coord = self.get_coordinates(index)
146 |         print("x:%s\ty:%s\n%s" % (coord[0], coord[1], (self.data['text'][index])))
147 | 
148 |     def get_block_data(self, index):
149 |         coord = self.get_coordinates(index)
150 |         return coord[0], coord[1], self.data['text'][index]
151 | 
152 |     def get_block_data_with_y(self, y, exact=False):
153 |         tolerance = 20
154 |         block_data = []
155 |         for i, data in enumerate(self.data['text']):
156 |             if exact:
157 |                 if int(self.data['y'][i]) == y:
158 |                     block_data.append(data)
159 |             else:
160 |                 if y + tolerance > int(self.data['y'][i]) > y - tolerance:
161 |                     block_data.append(data)
162 |         return block_data
163 | 


--------------------------------------------------------------------------------
/IQDM/trending.py:
--------------------------------------------------------------------------------
 1 | from bokeh.io import curdoc
 2 | from IQDM.trending_delta4 import TrendingDashboard as TrendDelta4
 3 | import sys
 4 | 
 5 | 
 6 | FILE_PATH = sys.argv[1]
 7 | DAY_FIRST = {'true': True, 'false': False}[sys.argv[2]]
 8 | if 'delta4' in FILE_PATH:
 9 |     dashboard = TrendDelta4(FILE_PATH, day_first=DAY_FIRST)
10 |     curdoc().add_root(dashboard.layout)
11 |     curdoc().title = "Delta 4 Trending"
12 | 
13 | else:  # sncpatient
14 |     pass
15 | 


--------------------------------------------------------------------------------
/IQDM/trending_arccheck.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # trending_arccheck.py
  5 | """
  6 | Bokeh server script to analyze a delta4_results csv from IQDM
  7 | """
  8 | # Copyright (c) 2019
  9 | # Dan Cutright, PhD
 10 | # Medical Physicist
 11 | # University of Chicago Medical Center
 12 | # This file is part of IMRT QA Data Miner, partial based on code from DVH Analytics
 13 | 
 14 | from bokeh.io import curdoc
 15 | from bokeh.plotting import figure
 16 | from bokeh.models import HoverTool, ColumnDataSource, Select, Div, TextInput, Legend, Spacer
 17 | from bokeh.layouts import column, row
 18 | from bokeh.models.widgets import DatePicker, CheckboxButtonGroup
 19 | import numpy as np
 20 | from IQDM.utilities import collapse_into_single_dates, moving_avg, get_control_limits, import_csv
 21 | import sys
 22 | 
 23 | FILE_PATH = sys.argv[1]
 24 | DAY_FIRST = day_first = {'true': True, 'false': False}[sys.argv[2]]
 25 | 
 26 | 
 27 | class Plot:
 28 |     def __init__(self, data):
 29 | 
 30 |         self.data = data
 31 |         self.source = {key: {'plot': ColumnDataSource(data=dict(x=[], y=[])),
 32 |                              'trend': ColumnDataSource(data=dict(x=[], y=[])),
 33 |                              'bound': ColumnDataSource(data=dict(x=[], y=[])),
 34 |                              'patch': ColumnDataSource(data=dict(x=[], y=[])),
 35 |                              'hist': ColumnDataSource(data=dict(x=[], y=[]))} for key in [1, 2]}
 36 | 
 37 |         self.ichart = None
 38 | 
 39 |         self.__set_x()
 40 |         self.__create_figure()
 41 |         self.__add_plot_data()
 42 |         self.__add_histogram_data()
 43 |         self.__add_hover()
 44 |         self.__add_legend()
 45 |         self.__set_plot_attr()
 46 | 
 47 |     def __create_figure(self):
 48 | 
 49 |         self.fig = figure(plot_width=1000, plot_height=375, x_axis_type='datetime')
 50 |         self.fig.xaxis.axis_label_text_font_size = "17pt"
 51 |         self.fig.yaxis.axis_label_text_font_size = "17pt"
 52 |         self.fig.xaxis.major_label_text_font_size = "15pt"
 53 |         self.fig.yaxis.major_label_text_font_size = "15pt"
 54 | 
 55 |     def __add_hover(self):
 56 |         self.fig.add_tools(HoverTool(tooltips=[("Plan Date", "@x{%F}"),
 57 |                                                ("Patient", "@id"),
 58 |                                                ("y", "@y"),
 59 |                                                ('Gamma Crit', "@gamma_crit"),
 60 |                                                ('Gamma Pass', '@gamma_index'),
 61 |                                                ('file', '@file_name')],
 62 |                                      formatters={'x': 'datetime'},
 63 |                                      renderers=[self.plot_data_1]))
 64 | 
 65 |     def __set_plot_attr(self):
 66 |         self.fig.title.align = 'center'
 67 | 
 68 |     def __set_x(self):
 69 |         self.x = self.data['date_time_obj']
 70 | 
 71 |     def __add_plot_data(self):
 72 |         self.plot_data_1 = self.fig.circle('x', 'y', source=self.source[1]['plot'], color='blue', size=8, alpha=0.4)
 73 |         self.plot_trend_1 = self.fig.line('x', 'y', source=self.source[1]['trend'], line_color='black', line_width=4)
 74 |         self.plot_avg_1 = self.fig.line('x', 'avg', source=self.source[1]['bound'], line_color='black')
 75 |         self.plot_patch_1 = self.fig.patch('x', 'y', source=self.source[1]['patch'], color='blue', alpha=0.2)
 76 | 
 77 |         # self.plot_data_2 = self.fig.circle('x', 'y', source=self.source[2]['plot'], color='red', size=4, alpha=0.3)
 78 |         # self.plot_trend_2 = self.fig.line('x', 'y', source=self.source[2]['trend'], line_color='black', line_width=4)
 79 |         # self.plot_avg_2 = self.fig.line('x', 'avg', source=self.source[2]['bound'], line_color='black')
 80 |         # self.plot_patch_2 = self.fig.patch('x', 'y', source=self.source[2]['patch'], color='red', alpha=0.2)
 81 | 
 82 |     def __add_legend(self):
 83 |         # Set the legend
 84 |         legend_plot = Legend(items=[("Data 1 ", [self.plot_data_1]),
 85 |                                     ("Avg 1 ", [self.plot_avg_1]),
 86 |                                     ("Rolling Avg 1 ", [self.plot_trend_1]),
 87 |                                     ("Percentile Region 1 ", [self.plot_patch_1])
 88 |                                     ],
 89 |                              orientation='horizontal')
 90 | 
 91 |         # Add the layout outside the plot, clicking legend item hides the line
 92 |         self.fig.add_layout(legend_plot, 'above')
 93 |         self.fig.legend.click_policy = "hide"
 94 | 
 95 |     def __add_histogram_data(self):
 96 |         self.histogram = figure(tools="", plot_width=1000, plot_height=275)
 97 |         # self.histogram.xaxis.axis_label_text_font_size = self.options.PLOT_AXIS_LABEL_FONT_SIZE
 98 |         # self.histogram.yaxis.axis_label_text_font_size = self.options.PLOT_AXIS_LABEL_FONT_SIZE
 99 |         # self.histogram.xaxis.major_label_text_font_size = self.options.PLOT_AXIS_MAJOR_LABEL_FONT_SIZE
100 |         # self.histogram.yaxis.major_label_text_font_size = self.options.PLOT_AXIS_MAJOR_LABEL_FONT_SIZE
101 |         # self.histogram.min_border_left = self.options.MIN_BORDER
102 |         # self.histogram.min_border_bottom = self.options.MIN_BORDER
103 |         self.vbar_1 = self.histogram.vbar(x='x', width='width', bottom=0, top='top', source=self.source[1]['hist'], alpha=0.5, color='blue')
104 |         # self.vbar_2 = self.histogram.vbar(x='x', width='width', bottom=0, top='top', source=self.source[2]['hist'], alpha=0.5, color='red')
105 | 
106 |         self.histogram.xaxis.axis_label = ""
107 |         self.histogram.yaxis.axis_label = "Frequency"
108 | 
109 |         self.histogram.xaxis.axis_label_text_font_size = "17pt"
110 |         self.histogram.yaxis.axis_label_text_font_size = "17pt"
111 |         self.histogram.xaxis.major_label_text_font_size = "15pt"
112 |         self.histogram.yaxis.major_label_text_font_size = "15pt"
113 | 
114 |     def update_source(self, attr, old, new):
115 |         for source_key in [1, 2]:
116 |             new_data = {key: [] for key in ['x', 'y', 'id', 'gamma_crit', 'file_name', 'gamma_index']}
117 |             active_gamma = [gamma_options[a] for a in checkbox_button_group.active]
118 |             # if select_linac[source_key] != 'None':
119 |             for i in range(len(self.x)):
120 |                 # if select_linac[source_key].value == 'All' or self.data['Radiation Dev'][i] == select_linac[source_key].value:
121 |                 if end_date_picker.value > self.x[i] > start_date_picker.value:
122 |                     gamma_crit = "%s%%/%smm" % (self.data['Difference (%)'][i], self.data['Distance (mm)'][i])
123 |                     if 'Any' in active_gamma or gamma_crit in active_gamma:
124 |                         try:
125 |                             new_data['y'].append(float(self.data[select_y.value][i]))
126 |                         except ValueError:
127 |                             continue
128 | 
129 |                         new_data['x'].append(self.x[i])
130 |                         new_data['id'].append(self.data['Patient ID'][i])
131 |                         new_data['gamma_crit'].append(gamma_crit)
132 |                         new_data['file_name'].append(self.data['file_name'][i])
133 |                         new_data['gamma_index'].append('%s%%' % self.data['% Passed'][i])
134 |                         # new_data['daily_corr'].append(self.data['Daily Corr'][i])
135 |                         # new_data['dta'].append('%s%%' % self.data['DTA'][i])
136 | 
137 |             try:
138 |                 y = new_data['y']
139 |                 text[source_key].text = "<b>Linac %s</b>: <b>Min</b>: %0.3f | <b>Low</b>: %0.3f | <b>Mean</b>: %0.3f | <b>Median</b>: %0.3f | <b>Upper</b>: %0.3f | <b>Max</b>: %0.3f" % \
140 |                              (source_key, np.min(y), np.percentile(y, 25), np.sum(y)/len(y), np.percentile(y, 50), np.percentile(y, 75), np.max(y))
141 |             except:
142 |                 text[source_key].text = "<b>Linac %s</b>" % source_key
143 | 
144 |             self.source[source_key]['plot'].data = new_data
145 | 
146 |             self.fig.yaxis.axis_label = select_y.value
147 |             self.fig.xaxis.axis_label = 'Plan Date'
148 | 
149 |             self.update_histogram(source_key, bin_size=20)
150 |             self.update_trend(source_key, int(float(avg_len_input.value)), float(percentile_input.value))
151 |             self.ichart.update_plot()
152 | 
153 |     def update_histogram(self, source_key, bin_size=10):
154 |         width_fraction = 0.9
155 |         hist, bins = np.histogram(self.source[source_key]['plot'].data['y'], bins=bin_size)
156 |         width = [width_fraction * (bins[1] - bins[0])] * bin_size
157 |         center = (bins[:-1] + bins[1:]) / 2.
158 |         self.source[source_key]['hist'].data = {'x': center, 'top': hist, 'width': width}
159 | 
160 |         self.histogram.xaxis.axis_label = select_y.value
161 | 
162 |     def update_trend(self, source_key, avg_len, percentile):
163 |         x = self.source[source_key]['plot'].data['x']
164 |         y = self.source[source_key]['plot'].data['y']
165 |         if x and y:
166 |             x_len = len(x)
167 | 
168 |             data_collapsed = collapse_into_single_dates(x, y)
169 |             x_trend, y_trend = moving_avg(data_collapsed, avg_len)
170 | 
171 |             y_np = np.array(self.source[source_key]['plot'].data['y'])
172 |             upper_bound = float(np.percentile(y_np, 50. + percentile / 2.))
173 |             average = float(np.percentile(y_np, 50))
174 |             lower_bound = float(np.percentile(y_np, 50. - percentile / 2.))
175 | 
176 |             self.source[source_key]['trend'].data = {'x': x_trend,
177 |                                                      'y': y_trend,
178 |                                                      'mrn': ['Avg'] * len(x_trend)}
179 |             self.source[source_key]['bound'].data = {'x': [x[0], x[-1]],
180 |                                                      'mrn': ['Series Avg'] * 2,
181 |                                                      'upper': [upper_bound] * 2,
182 |                                                      'avg': [average] * 2,
183 |                                                      'lower': [lower_bound] * 2,
184 |                                                      'y': [average] * 2}
185 |             self.source[source_key]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]],
186 |                                                      'y': [upper_bound, upper_bound, lower_bound, lower_bound]}
187 |         else:
188 |             self.source[source_key]['trend'].data = {'x': [],
189 |                                                      'y': [],
190 |                                                      'mrn': []}
191 |             self.source[source_key]['bound'].data = {'x': [],
192 |                                                      'mrn': [],
193 |                                                      'upper': [],
194 |                                                      'avg': [],
195 |                                                      'lower': [],
196 |                                                      'y': []}
197 |             self.source[source_key]['patch'].data = {'x': [],
198 |                                                      'y': []}
199 | 
200 | 
201 | class PlotControlChart:
202 |     """
203 |     Generate plot for Control Chart frame
204 |     """
205 |     def __init__(self, main_plot):
206 | 
207 |         self.main_plot = main_plot
208 | 
209 |         self.y_axis_label = ''
210 |         self.source = {'plot': ColumnDataSource(data=dict(x=[], y=[], mrn=[], color=[], alpha=[], dates=[],
211 |                                                           gamma_index=[], daily_corr=[], gamma_crit=[], dta=[])),
212 |                        'center_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
213 |                        'ucl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
214 |                        'lcl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
215 |                        'bound': ColumnDataSource(data=dict(x=[], mrn=[], upper=[], avg=[], lower=[])),
216 |                        'patch': ColumnDataSource(data=dict(x=[], y=[]))}
217 | 
218 |         self.figure = figure(plot_width=1000, plot_height=375)
219 |         self.figure.xaxis.axis_label = "Study #"
220 |         self.figure.xaxis.axis_label_text_font_size = "17pt"
221 |         self.figure.yaxis.axis_label_text_font_size = "17pt"
222 |         self.figure.xaxis.major_label_text_font_size = "15pt"
223 |         self.figure.yaxis.major_label_text_font_size = "15pt"
224 | 
225 |         self.__add_plot_data()
226 |         self.__add_hover()
227 |         self.__create_divs()
228 |         self.__add_legend()
229 | 
230 |     def __add_plot_data(self):
231 |         self.plot_data = self.figure.circle('x', 'y', source=self.source['plot'],
232 |                                             size=8, color='color', alpha='alpha')
233 |         self.plot_data_line = self.figure.line('x', 'y', source=self.source['plot'], color='blue',
234 |                                                line_dash='solid')
235 |         self.plot_patch = self.figure.patch('x', 'y', color='blue', source=self.source['patch'], alpha=0.1)
236 |         self.plot_center_line = self.figure.line('x', 'y', source=self.source['center_line'], alpha=1, color='black',
237 |                                                  line_dash='solid')
238 |         self.plot_lcl_line = self.figure.line('x', 'y', source=self.source['lcl_line'], alpha=1, color='red', line_dash='dashed')
239 |         self.plot_ucl_line = self.figure.line('x', 'y', source=self.source['ucl_line'],  alpha=1, color='red', line_dash='dashed')
240 | 
241 |     def __add_hover(self):
242 |         self.figure.add_tools(HoverTool(show_arrow=True,
243 |                                         tooltips=[('ID', '@mrn'),
244 |                                                   ('Date', '@dates{%F}'),
245 |                                                   ('Study', '@x'),
246 |                                                   ('Value', '@y{0.2f}'),
247 |                                                   ("y", "@y"),
248 |                                                   ('Gamma Crit', "@gamma_crit"),
249 |                                                   ('Gamma Pass', '@gamma_index'),
250 |                                                   ('file', '@file_name')
251 |                                                   ],
252 |                                         formatters={'dates': 'datetime'},
253 |                                         renderers=[self.plot_data]))
254 | 
255 |     def __add_legend(self):
256 |         # Set the legend
257 |         legend_plot = Legend(items=[("Charting Variable   ", [self.plot_data]),
258 |                                     ("Charting Variable Line  ", [self.plot_data_line]),
259 |                                     ('Center Line   ', [self.plot_center_line]),
260 |                                     ('UCL  ', [self.plot_ucl_line]),
261 |                                     ('LCL  ', [self.plot_lcl_line])],
262 |                              orientation='horizontal')
263 | 
264 |         # Add the layout outside the plot, clicking legend item hides the line
265 |         self.figure.add_layout(legend_plot, 'above')
266 |         self.figure.legend.click_policy = "hide"
267 | 
268 |     def __create_divs(self):
269 |         self.div_center_line = Div(text='', width=175)
270 |         self.div_ucl = Div(text='', width=175)
271 |         self.div_lcl = Div(text='', width=175)
272 | 
273 |     def update_plot(self):
274 | 
275 |         self.y_axis_label = select_y.value
276 |         self.figure.yaxis.axis_label = self.y_axis_label
277 | 
278 |         y = self.main_plot.source[1]['plot'].data['y']
279 |         mrn = self.main_plot.source[1]['plot'].data['id']
280 |         dates = self.main_plot.source[1]['plot'].data['x']
281 |         gamma_crit = self.main_plot.source[1]['plot'].data['gamma_crit']
282 |         gamma_index = self.main_plot.source[1]['plot'].data['gamma_index']
283 |         # daily_corr = self.main_plot.source[1]['plot'].data['daily_corr']
284 |         # dta = self.main_plot.source[1]['plot'].data['dta']
285 |         file_name = self.main_plot.source[1]['plot'].data['file_name']
286 |         x = list(range(len(dates)))
287 | 
288 |         center_line, ucl, lcl = get_control_limits(y)
289 | 
290 |         if select_y.value in ['% Passed', 'Gamma-Index', 'DTA'] and ucl > 100:
291 |             ucl = 100
292 | 
293 |         colors = ['red', 'blue']
294 |         alphas = [0.3, 0.4]
295 |         color = [colors[ucl >= value >= lcl] for value in y]
296 |         alpha = [alphas[ucl >= value >= lcl] for value in y]
297 | 
298 |         self.source['plot'].data = {'x': x, 'y': y, 'mrn': mrn, 'gamma_crit': gamma_crit, 'gamma_index': gamma_index,
299 |                                     'color': color, 'alpha': alpha,
300 |                                     'dates': dates, 'file_name': file_name}
301 | 
302 |         self.source['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]],
303 |                                      'y': [ucl, ucl, lcl, lcl]}
304 |         self.source['center_line'].data = {'x': [min(x), max(x)],
305 |                                            'y': [center_line] * 2,
306 |                                            'mrn': ['center line'] * 2}
307 | 
308 |         self.source['lcl_line'].data = {'x': [min(x), max(x)],
309 |                                         'y': [lcl] * 2,
310 |                                         'mrn': ['center line'] * 2}
311 |         self.source['ucl_line'].data = {'x': [min(x), max(x)],
312 |                                         'y': [ucl] * 2,
313 |                                         'mrn': ['center line'] * 2}
314 | 
315 |         self.div_center_line.text = "<b>Center line</b>: %0.3f" % center_line
316 |         self.div_ucl.text = "<b>UCL</b>: %0.3f" % ucl
317 |         self.div_lcl.text = "<b>LCL</b>: %0.3f" % lcl
318 | 
319 |     def clear_div(self):
320 |         self.div_center_line.text = "<b>Center line</b>:"
321 |         self.div_ucl.text = "<b>UCL</b>:"
322 |         self.div_lcl.text = "<b>LCL</b>:"
323 | 
324 | 
325 | data = import_csv(FILE_PATH, day_first=DAY_FIRST)
326 | plot = Plot(data)
327 | ichart = PlotControlChart(plot)
328 | plot.ichart = ichart
329 | ignored_y = ['Patient Last Name', 'Patient First Name', 'Patient ID', 'Plan Date', 'Dose Type', 'Radiation Dev',
330 |              'Energy', 'file_name', 'Meas Uncertainty', 'Analysis Type', 'Notes']
331 | y_options = [option for option in list(data) if option not in ignored_y]
332 | select_y = Select(title='Y-variable:', value='% Passed', options=y_options)
333 | select_y.on_change('value', plot.update_source)
334 | 
335 | # linacs = list(set(data['Radiation Dev']))
336 | # linacs.sort()
337 | # linacs.insert(0, 'All')
338 | # linacs.append('None')
339 | # select_linac = {key: Select(title='Linac %s:' % key, value='All', options=['All'], width=250) for key in [1, 2]}
340 | # select_linac[2].value = 'None'
341 | # select_linac[1].on_change('value', plot.update_source)
342 | # select_linac[2].on_change('value', plot.update_source)
343 | 
344 | avg_len_input = TextInput(title='Avg. Len:', value='10', width=100)
345 | avg_len_input.on_change('value', plot.update_source)
346 | 
347 | percentile_input = TextInput(title='Percentile:', value='90', width=100)
348 | percentile_input.on_change('value', plot.update_source)
349 | 
350 | 
351 | start_date_picker = DatePicker(title='Start Date:', value=plot.x[0])
352 | end_date_picker = DatePicker(title='End Date:', value=plot.x[-1])
353 | start_date_picker.on_change('value', plot.update_source)
354 | end_date_picker.on_change('value', plot.update_source)
355 | 
356 | gamma_options = ['5.0%/3.0mm', '3.0%/3.0mm', '3.0%/2.0mm', 'Any']
357 | checkbox_button_group = CheckboxButtonGroup(labels=gamma_options, active=[3])
358 | checkbox_button_group.on_change('active', plot.update_source)
359 | 
360 | text = {key: Div() for key in [1, 2]}
361 | 
362 | plot.update_source(None, None, None)
363 | 
364 | layout = column(row(select_y, avg_len_input, percentile_input),
365 |                 row(start_date_picker, end_date_picker),
366 |                 row(Div(text='Gamma Criteria: '), checkbox_button_group),
367 |                 text[1],
368 |                 text[2],
369 |                 row(Spacer(width=10), plot.fig),
370 |                 Spacer(height=50),
371 |                 row(Spacer(width=10), plot.histogram),
372 |                 Spacer(height=50),
373 |                 row(Spacer(width=10), ichart.figure),
374 |                 row(ichart.div_center_line, ichart.div_ucl, ichart.div_lcl))
375 | 
376 | 
377 | curdoc().add_root(layout)
378 | curdoc().title = "ArcCheck Trending"
379 | 


--------------------------------------------------------------------------------
/IQDM/trending_delta4.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # trending_delta4.py
  5 | """
  6 | Bokeh server script to analyze a delta4_results csv from IQDM
  7 | """
  8 | # Copyright (c) 2019
  9 | # Dan Cutright, PhD
 10 | # Medical Physicist
 11 | # University of Chicago Medical Center
 12 | # This file is part of IMRT QA Data Miner, partial based on code from DVH Analytics
 13 | 
 14 | from bokeh.plotting import figure
 15 | from bokeh.models import HoverTool, ColumnDataSource, Select, Div, TextInput, Legend, Spacer
 16 | from bokeh.layouts import column, row
 17 | from bokeh.models.widgets import DatePicker, CheckboxButtonGroup
 18 | import numpy as np
 19 | from IQDM.utilities import collapse_into_single_dates, moving_avg, get_control_limits, import_csv
 20 | 
 21 | GROUPS = [1, 2]
 22 | COLORS = {1: 'blue', 2: 'red'}
 23 | 
 24 | # TODO: Generalize for different parsers
 25 | MAIN_PLOT_KEYS = ['x', 'y', 'id', 'gamma_crit', 'file_name', 'gamma_index', 'daily_corr', 'dta']
 26 | 
 27 | 
 28 | class TrendingDashboard:
 29 |     def __init__(self, file_path, day_first=False):
 30 | 
 31 |         self.data = import_csv(file_path, day_first=day_first)
 32 | 
 33 |         self.__create_sources()
 34 |         self.__set_x()
 35 |         self.__create_figures()
 36 |         self.__set_properties()
 37 |         self.__create_divs()
 38 |         self.__add_plot_data()
 39 |         self.__add_histogram_data()
 40 |         self.__add_ichart_data()
 41 |         self.__add_hover()
 42 |         self.__add_legend()
 43 | 
 44 |         self.__create_widgets()
 45 |         self.__bind_widgets()
 46 |         self.__do_layout()
 47 | 
 48 |         self.update()
 49 | 
 50 |     def __create_sources(self):
 51 |         self.source = {grp: {'plot': ColumnDataSource(data={key: [] for key in MAIN_PLOT_KEYS}),
 52 |                              'trend': ColumnDataSource(data=dict(x=[], y=[])),
 53 |                              'bound': ColumnDataSource(data=dict(x=[], y=[])),
 54 |                              'patch': ColumnDataSource(data=dict(x=[], y=[])),
 55 |                              'hist': ColumnDataSource(data=dict(x=[], y=[]))} for grp in GROUPS}
 56 | 
 57 |         self.ichart_source = {grp: {'plot': ColumnDataSource(data=dict(x=[], y=[], mrn=[], color=[], alpha=[], dates=[],
 58 |                                                                        gamma_index=[], daily_corr=[], gamma_crit=[],
 59 |                                                                        dta=[])),
 60 |                                     'center_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
 61 |                                     'ucl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
 62 |                                     'lcl_line': ColumnDataSource(data=dict(x=[], y=[], mrn=[])),
 63 |                                     'bound': ColumnDataSource(data=dict(x=[], mrn=[], upper=[], avg=[], lower=[])),
 64 |                                     'patch': ColumnDataSource(data=dict(x=[], y=[]))} for grp in GROUPS}
 65 | 
 66 |     def __set_x(self):
 67 |         self.x = self.data['date_time_obj']
 68 | 
 69 |     def __create_figures(self):
 70 | 
 71 |         self.fig = figure(plot_width=1000, plot_height=375, x_axis_type='datetime')
 72 |         self.histogram = figure(tools="", plot_width=1000, plot_height=275)
 73 |         self.ichart = figure(plot_width=1000, plot_height=375)
 74 | 
 75 |     def __set_properties(self):
 76 |         self.fig.xaxis.axis_label_text_font_size = "17pt"
 77 |         self.fig.yaxis.axis_label_text_font_size = "17pt"
 78 |         self.fig.xaxis.major_label_text_font_size = "15pt"
 79 |         self.fig.yaxis.major_label_text_font_size = "15pt"
 80 | 
 81 |         self.histogram.xaxis.axis_label_text_font_size = "17pt"
 82 |         self.histogram.yaxis.axis_label_text_font_size = "17pt"
 83 |         self.histogram.xaxis.major_label_text_font_size = "15pt"
 84 |         self.histogram.yaxis.major_label_text_font_size = "15pt"
 85 | 
 86 |         self.ichart.xaxis.axis_label = "Study #"
 87 |         self.ichart.xaxis.axis_label_text_font_size = "17pt"
 88 |         self.ichart.yaxis.axis_label_text_font_size = "17pt"
 89 |         self.ichart.xaxis.major_label_text_font_size = "15pt"
 90 |         self.ichart.yaxis.major_label_text_font_size = "15pt"
 91 | 
 92 |     def __add_plot_data(self):
 93 |         self.plot_data = {grp: self.fig.circle('x', 'y', source=self.source[grp]['plot'],
 94 |                                                color=COLORS[grp], size=4, alpha=0.4) for grp in GROUPS}
 95 |         self.plot_trend = {grp: self.fig.line('x', 'y', source=self.source[grp]['trend'],
 96 |                                               line_color='black', line_width=4) for grp in GROUPS}
 97 |         self.plot_avg = {grp: self.fig.line('x', 'avg', source=self.source[grp]['bound'],
 98 |                                             line_color='black') for grp in GROUPS}
 99 |         self.plot_patch = {grp: self.fig.patch('x', 'y', source=self.source[grp]['patch'],
100 |                                                color=COLORS[grp], alpha=0.2) for grp in GROUPS}
101 | 
102 |     def __add_histogram_data(self):
103 |         self.vbar = {grp: self.histogram.vbar(x='x', width='width', bottom=0, top='top',
104 |                                               source=self.source[grp]['hist'], alpha=0.5, color=COLORS[grp])
105 |                      for grp in GROUPS}
106 | 
107 |         self.histogram.xaxis.axis_label = ""
108 |         self.histogram.yaxis.axis_label = "Frequency"
109 | 
110 |     def __add_ichart_data(self):
111 |         self.ichart_data = {grp: self.ichart.circle('x', 'y', source=self.ichart_source[grp]['plot'],
112 |                                                     size=4, color='color', alpha='alpha') for grp in GROUPS}
113 |         self.ichart_data_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['plot'],
114 |                                                        color=COLORS[grp], line_dash='solid') for grp in GROUPS}
115 |         self.ichart_patch = {grp: self.ichart.patch('x', 'y', color=COLORS[grp],
116 |                                                     source=self.ichart_source[grp]['patch'],
117 |                                                     alpha=0.1) for grp in GROUPS}
118 |         self.ichart_center_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['center_line'],
119 |                                                          alpha=1, color='black', line_dash='solid') for grp in GROUPS}
120 |         self.ichart_lcl_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['lcl_line'], alpha=1,
121 |                                                       color='red', line_dash='dashed') for grp in GROUPS}
122 |         self.ichart_ucl_line = {grp: self.ichart.line('x', 'y', source=self.ichart_source[grp]['ucl_line'], alpha=1,
123 |                                                       color='red', line_dash='dashed') for grp in GROUPS}
124 | 
125 |     def __add_legend(self):
126 |         # Main TrendingDashboard
127 |         group_items = {grp: [("Data %s " % grp, [self.plot_data[grp]]),
128 |                              ("Avg %s " % grp, [self.plot_avg[grp]]),
129 |                              ("Rolling Avg %s " % grp, [self.plot_trend[grp]]),
130 |                              ("Percentile Region %s " % grp, [self.plot_patch[grp]])] for grp in GROUPS}
131 |         items = group_items[GROUPS[0]]
132 |         if len(GROUPS) > 1:
133 |             for grp in GROUPS[1:]:
134 |                 items.extend(group_items[grp])
135 |         legend_plot = Legend(items=items, orientation='horizontal')
136 |         self.fig.add_layout(legend_plot, 'above')
137 |         self.fig.legend.click_policy = "hide"
138 | 
139 |         # Control Chart
140 |         group_items = {grp: [("Value %s  " % grp, [self.ichart_data[grp]]),
141 |                              ("Line  %s" % grp, [self.ichart_data_line[grp]]),
142 |                              ('Center  %s' % grp, [self.ichart_center_line[grp]]),
143 |                              ('UCL  %s' % grp, [self.ichart_ucl_line[grp]]),
144 |                              ('LCL  %s' % grp, [self.ichart_lcl_line[grp]]),
145 |                              ('In Ctrl  %s' % grp, [self.ichart_patch[grp]])] for grp in GROUPS}
146 |         items = group_items[GROUPS[0]]
147 |         if len(GROUPS) > 1:
148 |             for grp in GROUPS[1:]:
149 |                 items.extend(group_items[grp])
150 |         legend_ichart = Legend(items=items,  orientation='horizontal')
151 |         self.ichart.add_layout(legend_ichart, 'above')
152 |         self.ichart.legend.click_policy = "hide"
153 | 
154 |     def __add_hover(self):
155 |         self.fig.add_tools(HoverTool(tooltips=[("Plan Date", "@x{%F}"),
156 |                                                ("Patient", "@id"),
157 |                                                ("y", "@y"),
158 |                                                ('Gamma Crit', "@gamma_crit"),
159 |                                                ('Gamma Pass', '@gamma_index'),
160 |                                                ('DTA', '@dta'),
161 |                                                ('Daily Corr', '@daily_corr'),
162 |                                                ('file', '@file_name')],
163 |                                      formatters={'x': 'datetime'},
164 |                                      renderers=[self.plot_data[grp] for grp in GROUPS]))
165 | 
166 |         self.histogram.add_tools(HoverTool(show_arrow=True, line_policy='next', mode='vline',
167 |                                            tooltips=[("Bin Center", "@x"),
168 |                                                      ('Frequency', '@top')],
169 |                                            renderers=[self.vbar[grp] for grp in GROUPS]))
170 | 
171 |         self.ichart.add_tools(HoverTool(show_arrow=True,
172 |                                         tooltips=[('ID', '@mrn'),
173 |                                                   ('Date', '@dates{%F}'),
174 |                                                   ('Study', '@x'),
175 |                                                   ('Value', '@y{0.2f}'),
176 |                                                   ("y", "@y"),
177 |                                                   ('Gamma Crit', "@gamma_crit"),
178 |                                                   ('Gamma Pass', '@gamma_index'),
179 |                                                   ('DTA', '@dta'),
180 |                                                   ('Daily Corr', '@daily_corr'),
181 |                                                   ('file', '@file_name')
182 |                                                   ],
183 |                                         formatters={'dates': 'datetime'},
184 |                                         renderers=[self.ichart_data[grp] for grp in GROUPS]))
185 | 
186 |     def __create_divs(self):
187 |         self.div_summary = {grp: Div() for grp in GROUPS}
188 |         self.div_center_line = {grp: Div(text='', width=175) for grp in GROUPS}
189 |         self.div_ucl = {grp: Div(text='', width=175) for grp in GROUPS}
190 |         self.div_lcl = {grp: Div(text='', width=175) for grp in GROUPS}
191 | 
192 |     def __create_widgets(self):
193 |         ignored_y = ['Patient Name', 'Patient ID', 'Plan Date', 'Radiation Dev', 'Energy', 'file_name', 'date_time_obj']
194 |         y_options = [option for option in list(self.data) if option not in ignored_y]
195 |         self.select_y = Select(title='Y-variable:', value='Dose Dev', options=y_options)
196 | 
197 |         linacs = list(set(self.data['Radiation Dev']))
198 |         linacs.sort()
199 |         linacs.insert(0, 'All')
200 |         linacs.append('None')
201 |         self.select_linac = {grp: Select(title='Linac %s:' % grp, value='All', options=linacs, width=250)
202 |                              for grp in GROUPS}
203 |         self.select_linac[2].value = 'None'
204 | 
205 |         energies = list(set(self.data['Energy']))
206 |         energies.sort()
207 |         energies.insert(0, 'Any')
208 |         self.select_energies = {grp: Select(title='Energy %s:' % grp, value='Any', options=energies, width=250)
209 |                                 for grp in GROUPS}
210 | 
211 |         self.avg_len_input = TextInput(title='Avg. Len:', value='10', width=100)
212 | 
213 |         self.percentile_input = TextInput(title='Percentile:', value='90', width=100)
214 | 
215 |         self.bins_input = TextInput(title='Bins:', value='20', width=100)
216 | 
217 |         self.start_date_picker = DatePicker(title='Start Date:', value=self.x[0])
218 |         self.end_date_picker = DatePicker(title='End Date:', value=self.x[-1])
219 | 
220 |         self.gamma_options = ['5.0%/3.0mm', '3.0%/3.0mm', '3.0%/2.0mm', 'Any']
221 |         self.checkbox_button_group = CheckboxButtonGroup(labels=self.gamma_options, active=[3])
222 | 
223 |     def __bind_widgets(self):
224 | 
225 |         self.select_y.on_change('value', self.update_source_ticker)
226 |         for grp in GROUPS:
227 |             self.select_linac[grp].on_change('value', self.update_source_ticker)
228 |             self.select_energies[grp].on_change('value', self.update_source_ticker)
229 |         self.avg_len_input.on_change('value', self.update_source_ticker)
230 |         self.percentile_input.on_change('value', self.update_source_ticker)
231 |         self.bins_input.on_change('value', self.update_source_ticker)
232 |         self.start_date_picker.on_change('value', self.update_source_ticker)
233 |         self.end_date_picker.on_change('value', self.update_source_ticker)
234 |         self.checkbox_button_group.on_change('active', self.update_source_ticker)
235 | 
236 |     def __do_layout(self):
237 |         # TODO: Generalize for 1 or 2 groups
238 |         self.layout = column(row(self.select_y, self.select_linac[1], self.select_linac[2], self.avg_len_input,
239 |                                  self.percentile_input, self.bins_input),
240 |                              row(self.select_energies[1], self.select_energies[2]),
241 |                              row(self.start_date_picker, self.end_date_picker),
242 |                              row(Div(text='Gamma Criteria: '), self.checkbox_button_group),
243 |                              self.div_summary[1],
244 |                              self.div_summary[2],
245 |                              row(Spacer(width=10), self.fig),
246 |                              Spacer(height=50),
247 |                              row(Spacer(width=10), self.histogram),
248 |                              Spacer(height=50),
249 |                              row(Spacer(width=10), self.ichart),
250 |                              row(self.div_center_line[1], self.div_ucl[1], self.div_lcl[1]),
251 |                              row(self.div_center_line[2], self.div_ucl[2], self.div_lcl[2]))
252 | 
253 |     def update_source_ticker(self, attr, old, new):
254 |         self.update()
255 | 
256 |     def update(self):
257 |         for grp in GROUPS:
258 |             new_data = {key: [] for key in MAIN_PLOT_KEYS}
259 |             active_gamma = [self.gamma_options[a] for a in self.checkbox_button_group.active]
260 |             if self.select_linac[grp] != 'None':
261 |                 for i in range(len(self.x)):
262 |                     if self.select_linac[grp].value == 'All' or \
263 |                             self.data['Radiation Dev'][i] == self.select_linac[grp].value:
264 |                         if self.end_date_picker.value > self.x[i] > self.start_date_picker.value:
265 |                             gamma_crit = "%s%%/%smm" % (self.data['Gamma Dose Criteria'][i],
266 |                                                         self.data['Gamma Dist Criteria'][i])
267 |                             if 'Any' in active_gamma or gamma_crit in active_gamma:
268 |                                 if 'Any' == self.select_energies[grp].value or \
269 |                                         self.data['Energy'][i] == self.select_energies[grp].value:
270 | 
271 |                                     try:
272 |                                         new_data['y'].append(float(self.data[self.select_y.value][i]))
273 |                                     except ValueError:
274 |                                         continue
275 |                                     new_data['x'].append(self.x[i])
276 |                                     new_data['id'].append(self.data['Patient ID'][i])
277 |                                     new_data['gamma_crit'].append(gamma_crit)
278 |                                     new_data['file_name'].append(self.data['file_name'][i])
279 |                                     new_data['gamma_index'].append('%s%%' % self.data['Gamma-Index'][i])
280 |                                     new_data['daily_corr'].append(self.data['Daily Corr'][i])
281 |                                     new_data['dta'].append('%s%%' % self.data['DTA'][i])
282 | 
283 |             try:
284 |                 y = new_data['y']
285 |                 self.div_summary[grp].text = "<b>Linac %s</b>: <b>Min</b>: %0.3f | <b>Low</b>: %0.3f | " \
286 |                                              "<b>Mean</b>: %0.3f | <b>Median</b>: %0.3f | <b>Upper</b>: %0.3f | " \
287 |                                              "<b>Max</b>: %0.3f" % \
288 |                                              (grp, np.min(y), np.percentile(y, 25), np.sum(y)/len(y),
289 |                                               np.percentile(y, 50), np.percentile(y, 75), np.max(y))
290 |             except:
291 |                 self.div_summary[grp].text = "<b>Linac %s</b>" % grp
292 | 
293 |             self.source[grp]['plot'].data = new_data
294 | 
295 |             self.fig.yaxis.axis_label = self.select_y.value
296 |             self.fig.xaxis.axis_label = 'Plan Date'
297 | 
298 |             self.update_histogram(grp)
299 |             self.update_trend(grp, int(float(self.avg_len_input.value)), float(self.percentile_input.value))
300 |             self.update_ichart()
301 | 
302 |     def update_histogram(self, group):
303 |         width_fraction = 0.9
304 |         try:
305 |             bin_size = int(self.bins_input.value)
306 |         except ValueError:
307 |             bin_size = 20
308 |             self.bins_input.value = str(bin_size)
309 |         hist, bins = np.histogram(self.source[group]['plot'].data['y'], bins=bin_size)
310 |         width = [width_fraction * (bins[1] - bins[0])] * bin_size
311 |         center = (bins[:-1] + bins[1:]) / 2.
312 |         if set(hist) != {0}:
313 |             self.source[group]['hist'].data = {'x': center, 'top': hist, 'width': width}
314 |         else:
315 |             self.source[group]['hist'].data = {'x': [], 'top': [], 'width': []}
316 | 
317 |         self.histogram.xaxis.axis_label = self.select_y.value
318 | 
319 |     def update_trend(self, source_key, avg_len, percentile):
320 |         x = self.source[source_key]['plot'].data['x']
321 |         y = self.source[source_key]['plot'].data['y']
322 |         if x and y:
323 |             data_collapsed = collapse_into_single_dates(x, y)
324 |             x_trend, y_trend = moving_avg(data_collapsed, avg_len)
325 | 
326 |             y_np = np.array(self.source[source_key]['plot'].data['y'])
327 |             upper_bound = float(np.percentile(y_np, 50. + percentile / 2.))
328 |             average = float(np.percentile(y_np, 50))
329 |             lower_bound = float(np.percentile(y_np, 50. - percentile / 2.))
330 | 
331 |             self.source[source_key]['trend'].data = {'x': x_trend, 'y': y_trend, 'mrn': ['Avg'] * len(x_trend)}
332 |             self.source[source_key]['bound'].data = {'x': [x[0], x[-1]],
333 |                                                      'mrn': ['Series Avg'] * 2,
334 |                                                      'upper': [upper_bound] * 2,
335 |                                                      'avg': [average] * 2,
336 |                                                      'lower': [lower_bound] * 2,
337 |                                                      'y': [average] * 2}
338 |             self.source[source_key]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]],
339 |                                                      'y': [upper_bound, upper_bound, lower_bound, lower_bound]}
340 |         else:
341 |             self.source[source_key]['trend'].data = {'x': [], 'y': [], 'mrn': []}
342 |             self.source[source_key]['bound'].data = {'x': [], 'mrn': [], 'upper': [], 'avg': [], 'lower': [], 'y': []}
343 |             self.source[source_key]['patch'].data = {'x': [], 'y': []}
344 | 
345 |     def update_ichart(self):
346 |         self.ichart.yaxis.axis_label = self.select_y.value
347 | 
348 |         for grp in GROUPS:
349 |             y = self.source[grp]['plot'].data['y']
350 |             mrn = self.source[grp]['plot'].data['id']
351 |             dates = self.source[grp]['plot'].data['x']
352 |             gamma_crit = self.source[grp]['plot'].data['gamma_crit']
353 |             gamma_index = self.source[grp]['plot'].data['gamma_index']
354 |             daily_corr = self.source[grp]['plot'].data['daily_corr']
355 |             dta = self.source[grp]['plot'].data['dta']
356 |             file_name = self.source[grp]['plot'].data['file_name']
357 |             x = list(range(len(dates)))
358 | 
359 |             center_line, ucl, lcl = get_control_limits(y)
360 | 
361 |             if self.select_y.value in ['Gamma-Index', 'DTA'] and ucl > 100:
362 |                 ucl = 100
363 | 
364 |             colors = ['red', 'blue']
365 |             alphas = [0.3, 0.4]
366 |             color = [colors[ucl >= value >= lcl] for value in y]
367 |             alpha = [alphas[ucl >= value >= lcl] for value in y]
368 | 
369 |             self.ichart_source[grp]['plot'].data = {'x': x, 'y': y, 'mrn': mrn, 'gamma_crit': gamma_crit,
370 |                                                     'gamma_index': gamma_index, 'daily_corr': daily_corr, 'dta': dta,
371 |                                                     'color': color, 'alpha': alpha, 'dates': dates,
372 |                                                     'file_name': file_name}
373 | 
374 |             if len(x) > 1:
375 |                 self.ichart_source[grp]['patch'].data = {'x': [x[0], x[-1], x[-1], x[0]],
376 |                                                          'y': [ucl, ucl, lcl, lcl]}
377 |                 self.ichart_source[grp]['center_line'].data = {'x': [min(x), max(x)],
378 |                                                                'y': [center_line] * 2,
379 |                                                                'mrn': ['center line'] * 2}
380 | 
381 |                 self.ichart_source[grp]['lcl_line'].data = {'x': [min(x), max(x)],
382 |                                                             'y': [lcl] * 2,
383 |                                                             'mrn': ['center line'] * 2}
384 |                 self.ichart_source[grp]['ucl_line'].data = {'x': [min(x), max(x)],
385 |                                                             'y': [ucl] * 2,
386 |                                                             'mrn': ['center line'] * 2}
387 | 
388 |                 self.div_center_line[grp].text = "<b>Center line</b>: %0.3f" % center_line
389 |                 self.div_ucl[grp].text = "<b>UCL</b>: %0.3f" % ucl
390 |                 self.div_lcl[grp].text = "<b>LCL</b>: %0.3f" % lcl
391 |             else:
392 |                 self.ichart_source[grp]['patch'].data = {'x': [], 'y': []}
393 |                 self.ichart_source[grp]['center_line'].data = {'x': [], 'y': [], 'mrn': []}
394 |                 self.ichart_source[grp]['lcl_line'].data = {'x': [], 'y': [], 'mrn': []}
395 |                 self.ichart_source[grp]['ucl_line'].data = {'x': [], 'y': [], 'mrn': []}
396 | 
397 |                 self.div_center_line[grp].text = "<b>Center line</b>:"
398 |                 self.div_ucl[grp].text = "<b>UCL</b>:"
399 |                 self.div_lcl[grp].text = "<b>LCL</b>:"
400 | 


--------------------------------------------------------------------------------
/IQDM/utilities.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | main program for IMRT QA PDF report parser
  4 | Created on Thu May 30 2019
  5 | @author: Dan Cutright, PhD
  6 | """
  7 | 
  8 | from os.path import isdir, join, splitext, normpath
  9 | from os import walk, listdir
 10 | import zipfile
 11 | from datetime import datetime
 12 | from dateutil.parser import parse as date_parser
 13 | import numpy as np
 14 | import codecs
 15 | 
 16 | DELIMITER = ','  # delimiter for the csv output file for process_files
 17 | ALTERNATE = '^'  # replace the delimiter character with this so not to confuse csv file parsing
 18 | 
 19 | 
 20 | def are_all_strings_in_text(text, list_of_strings):
 21 |     """
 22 |     :param text: output from convert_pdf_to_text
 23 |     :type text: list of str
 24 |     :param list_of_strings: a list of strings used to identify document type
 25 |     :type list_of_strings: list of str
 26 |     :return: Will return true if every string in list_of_strings is found in the text data
 27 |     :rtype: bool
 28 |     """
 29 |     for str_to_find in list_of_strings:
 30 |         if str_to_find not in text:
 31 |             return False
 32 |     return True
 33 | 
 34 | 
 35 | #############################################################
 36 | # CSV related functions
 37 | #############################################################
 38 | def get_csv(data, columns):
 39 |     """
 40 |     Convert a dictionary of data into a row for a csv file
 41 |     :param data: a dictionary with values with str representations
 42 |     :type data: dict
 43 |     :param columns: a list of keys dictating the order of the csv
 44 |     :type columns: list
 45 |     :return: a csv string delimited by DELIMITER
 46 |     :rtype: str
 47 |     """
 48 |     clean_csv = [str(data[column]).replace(DELIMITER, ALTERNATE) for column in columns]
 49 |     return DELIMITER.join(clean_csv)
 50 | 
 51 | 
 52 | def load_csv_file(file_path):
 53 |     with codecs.open(file_path, 'r', encoding='utf-8', errors='ignore') as doc:
 54 |         return [line.split(',') for line in doc]
 55 | 
 56 | 
 57 | def import_csv(file_path, day_first=False):
 58 |     raw_data = load_csv_file(file_path)
 59 |     keys = raw_data.pop(0)  # remove column header row
 60 |     keys = [key.strip() for key in keys if key.strip()] + ['file_name']
 61 |     data = {key: [] for key in keys}
 62 |     for row in raw_data:
 63 |         for col, key in enumerate(keys):
 64 |             data[key].append(row[col])
 65 | 
 66 |     sorted_data = {key: [] for key in keys}
 67 |     sorted_data['date_time_obj'] = []
 68 | 
 69 |     date_time_objs = get_date_times(data, day_first=day_first)
 70 | 
 71 |     for i in get_sorted_indices(date_time_objs):
 72 |         for key in keys:
 73 |             sorted_data[key].append(data[key][i])
 74 |         sorted_data['date_time_obj'].append(date_time_objs[i])
 75 | 
 76 |     return sorted_data
 77 | 
 78 | 
 79 | def get_file_names_from_csv_file(file_path):
 80 |     raw_data = load_csv_file(file_path)
 81 |     column_headers = raw_data.pop(0)  # remove column header row
 82 |     fp_start = len(column_headers)
 83 |     file_names = []
 84 |     for row in raw_data:
 85 |         file_name_fields = [value for value in row[fp_start:]]
 86 |         file_name = ','.join(file_name_fields)
 87 |         file_names.append(normpath(file_name.strip()))
 88 |     return file_names
 89 | 
 90 | 
 91 | #############################################################
 92 | # Plotting and Stat related functions
 93 | #############################################################
 94 | def collapse_into_single_dates(x, y):
 95 |     """
 96 |     Function used for a time plot to convert multiple values into one value, while retaining enough information
 97 |     to perform a moving average over time
 98 |     :param x: a list of dates in ascending order
 99 |     :param y: a list of values and can use the '+' operator as a function of date
100 |     :return: a unique list of dates, sum of y for that date, and number of original points for that date
101 |     :rtype: dict
102 |     """
103 | 
104 |     # average daily data and keep track of points per day
105 |     x_collapsed = [x[0]]
106 |     y_collapsed = [y[0]]
107 |     w_collapsed = [1]
108 |     for n in range(1, len(x)):
109 |         if x[n] == x_collapsed[-1]:
110 |             y_collapsed[-1] = (y_collapsed[-1] + y[n])
111 |             w_collapsed[-1] += 1
112 |         else:
113 |             x_collapsed.append(x[n])
114 |             y_collapsed.append(y[n])
115 |             w_collapsed.append(1)
116 | 
117 |     return {'x': x_collapsed, 'y': y_collapsed, 'w': w_collapsed}
118 | 
119 | 
120 | def moving_avg(xyw, avg_len):
121 |     """
122 |     Calculate a moving average for a given averaging length
123 |     :param xyw: output from collapse_into_single_dates
124 |     :type xyw: dict
125 |     :param avg_len: average of these number of points, i.e., look-back window
126 |     :type avg_len: int
127 |     :return: list of x values, list of y values
128 |     :rtype: tuple
129 |     """
130 |     cumsum, moving_aves, x_final = [0], [], []
131 | 
132 |     for i, y in enumerate(xyw['y'], 1):
133 |         cumsum.append(cumsum[i - 1] + y / xyw['w'][i - 1])
134 |         if i >= avg_len:
135 |             moving_ave = (cumsum[i] - cumsum[i - avg_len]) / avg_len
136 |             moving_aves.append(moving_ave)
137 |     x_final = [xyw['x'][i] for i in range(avg_len - 1, len(xyw['x']))]
138 | 
139 |     return x_final, moving_aves
140 | 
141 | 
142 | def get_sorted_indices(some_list):
143 |     try:
144 |         return [i[0] for i in sorted(enumerate(some_list), key=lambda x: x[1])]
145 |     except TypeError:  # can't sort if a mix of str and float
146 |         try:
147 |             temp_data = [[value, -float('inf')][value == 'None'] for value in some_list]
148 |             return [i[0] for i in sorted(enumerate(temp_data), key=lambda x: x[1])]
149 |         except TypeError:
150 |             temp_data = [str(value) for value in some_list]
151 |             return [i[0] for i in sorted(enumerate(temp_data), key=lambda x: x[1])]
152 | 
153 | 
154 | def get_date_times(data, datetime_key='Plan Date', row_id_key='Patient ID', day_first=False):
155 |     dates = []
156 |     for i, date_str in enumerate(data[datetime_key]):
157 |         try:
158 |             dates.append(date_parser(date_str, dayfirst=day_first).date())
159 |         except ValueError:
160 |             print('ERROR: Could not parse the following into a date: %s' % date_str)
161 |             print("\tPatient ID: %s" % data[row_id_key][i])
162 |             print("\tUsing today's date instead")
163 |             dates.append(datetime.today().date())
164 |     return dates
165 | 
166 | 
167 | def get_control_limits(y):
168 |     """
169 |     Calculate control limits for Control Chart
170 |     :param y: data
171 |     :type y: list
172 |     :return: center line, upper control limit, and lower control limit
173 |     """
174 |     y = np.array(y)
175 | 
176 |     center_line = np.mean(y)
177 |     avg_moving_range = np.mean(np.absolute(np.diff(y)))
178 | 
179 |     scalar_d = 1.128
180 | 
181 |     ucl = center_line + 3 * avg_moving_range / scalar_d
182 |     lcl = center_line - 3 * avg_moving_range / scalar_d
183 | 
184 |     return center_line, ucl, lcl
185 | 
186 | 
187 | #############################################################
188 | # File related functions
189 | #############################################################
190 | def extract_files_from_zipped_files(init_directory, extract_to_path, extension='.pdf'):
191 |     """
192 |     Function to extract .pdf files from zipped files
193 |     :param init_directory: initial top-level directory to walk through
194 |     :type init_directory: str
195 |     :param extract_to_path: directory to extract pdfs into
196 |     :type extract_to_path: str
197 |     :param extension: file extension of file type to extract, set to None to extract all files
198 |     :type extension: str or None
199 |     """
200 |     for dirName, subdirList, fileList in walk(init_directory):  # iterate through files and all sub-directories
201 |         for fileName in fileList:
202 |             if splitext(fileName)[1].lower == '.zip':
203 |                 zip_file_path = join(dirName, fileName)
204 |                 with zipfile.ZipFile(zip_file_path, 'r') as z:
205 |                     for file_name in z.namelist():
206 |                         if not isdir(file_name) and (extension is None or splitext(file_name)[1].lower == extension):
207 |                             temp_path = join(extract_to_path)
208 |                             z.extract(file_name, path=temp_path)
209 | 
210 | 
211 | def find_latest_results(init_directory, no_recursive_search=False):
212 |     """
213 |     Find the most recent IQDM results csv file within the provided directory
214 |     :param init_directory: initial scan directory
215 |     :type init_directory: str
216 |     :param no_recursive_search: set to True to ignore subdirectories
217 |     :type no_recursive_search: bool
218 |     :return: a dictionary like {report_type: {'time_stamp': datetime, 'file_path': str}}
219 |     :rtype: dict
220 |     """
221 |     results = {}
222 |     if no_recursive_search:
223 |         process_result_csvs(listdir(init_directory), results)
224 |     else:
225 |         for dirName, subdirList, fileList in walk(init_directory):  # iterate through files and all sub-directories
226 |             process_result_csvs(fileList, results, directory_name=dirName)
227 |     return results
228 | 
229 | 
230 | def process_result_csvs(file_list, results, directory_name=None):
231 |     """
232 |     Parse each file for report type and time stamp, edit results with the latest file_path for each report_type
233 |     :param file_list: files to be parsed
234 |     :type file_list: list
235 |     :param results: results dict from find_latest_results()
236 |     :type results: dict
237 |     :param directory_name: optionally specify the directory
238 |     :type directory_name: str
239 |     """
240 |     for file_name in file_list:
241 |         fn = splitext(file_name)[0].lower()
242 |         ext = splitext(file_name)[1].lower()
243 |         if ext == '.csv' and '_results_' in fn:
244 |             try:
245 |                 result_info = file_name.split('_')
246 |                 report_type = result_info[0]
247 |                 time_stamp = result_info[2].replace(ext, '')
248 |                 time_stamp = datetime.strptime(time_stamp[:-7], '%Y-%m-%d %H-%M-%S')
249 | 
250 |                 if report_type and report_type not in results.keys() \
251 |                         or results[report_type]['time_stamp'] < time_stamp:
252 |                     if directory_name is None:
253 |                         file_path = file_name
254 |                     else:
255 |                         file_path = join(directory_name, file_name)
256 |                     results[report_type] = {'time_stamp': time_stamp, 'file_path': file_path}
257 |             except Exception:
258 |                 continue
259 | 
260 | 
261 | def get_processed_files(init_directory, no_recursive_search=False):
262 |     processed = []
263 |     if no_recursive_search:
264 |         get_file_names_from_result_csvs(listdir(init_directory), processed)
265 |     else:
266 |         for dirName, subdirList, fileList in walk(init_directory):  # iterate through files and all sub-directories
267 |             get_file_names_from_result_csvs(fileList, processed, directory_name=dirName)
268 |     return list(set(processed))
269 | 
270 | 
271 | def get_file_names_from_result_csvs(file_list, processed, directory_name=None):
272 |     for file_name in file_list:
273 |         fn = splitext(file_name)[0].lower()
274 |         ext = splitext(file_name)[1].lower()
275 |         if ext == '.csv' and '_results_' in fn:
276 |             if directory_name is None:
277 |                 file_path = file_name
278 |             else:
279 |                 file_path = join(directory_name, file_name)
280 |             try:
281 |                 file_names = get_file_names_from_csv_file(file_path)
282 |                 processed.extend(file_names)
283 |             except Exception:
284 |                 continue
285 | 
286 | 
287 | def is_file_name_found_in_processed_files(file_name, directory, processed_files):
288 |     for processed_file in processed_files:
289 |         if normpath(file_name) in processed_file or normpath(join(directory, file_name)) in processed_files:
290 |             return True
291 |     return False
292 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Dan Cutright
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include IQDM/*.py
2 | README.md
3 | LICENSE
4 | setup.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # IMRT-QA-Data-Miner
  2 | Scans a directory for IMRT QA results.
  3 | 
  4 | 
  5 | ### THIS PROJECT HAS MOVED
  6 | This project has since been sponsored by the AAPM's IMRT Working Group. 
  7 | There is a new project here: https://github.com/IQDM/IQDM-PDF
  8 | 
  9 | The new version does not have plotting built in, but we plan to use 
 10 | [DVHA Stats](https://github.com/cutright/DVHA-Stats) to take care of that 
 11 | once enough testing has been done, and more vendors are added. 
 12 | 
 13 | 
 14 | ### Install
 15 | ~~~~
 16 | pip install iqdm
 17 | ~~~~
 18 | 
 19 | ### How to run
 20 | To scan a directory for IMRT QA report files and genereate a results .csv file:
 21 | ~~~~
 22 | iqdm <initial-scan-dir>
 23 | ~~~~
 24 | To launch a trending dashboard (and open the resulting link):
 25 | ~~~~
 26 | iqdm <results-csv-file-path>
 27 | ~~~~
 28 | 
 29 | Screenshot of dashboard:  
 30 | <img src="https://user-images.githubusercontent.com/4778878/71692503-ae78e600-2d6f-11ea-9bd6-851d9980972e.png" width='400'>
 31 | 
 32 | 
 33 | ### Command line usage
 34 | ~~~~
 35 | usage: iqdm [-h] [-ie] [-od OUTPUT_DIR] [-rd RESULTS_DIR] [-all]
 36 |             [-of OUTPUT_FILE] [-ver] [-nr] [-df] [-p PORT]
 37 |             [-wo WEBSOCKET_ORIGIN]
 38 |             [file_path]
 39 | 
 40 | Command line interface for IQDM
 41 | 
 42 | positional arguments:
 43 |   file_path             Initiate scan if directory, launch dashboard if
 44 |                         results file
 45 | 
 46 | optional arguments:
 47 |   -h, --help            show this help message and exit
 48 |   -ie, --ignore-extension
 49 |                         Script will check all files, not just ones with .pdf
 50 |                         extensions
 51 |   -od OUTPUT_DIR, --output-dir OUTPUT_DIR
 52 |                         Output stored in local directory by default, specify
 53 |                         otherwise here
 54 |   -rd RESULTS_DIR, --results-dir RESULTS_DIR
 55 |                         Results assumed to be stored in local directory by
 56 |                         default, specify otherwise here
 57 |   -all, --process-all   Process all identified report files, otherwise only
 58 |                         new reports will be analyzed
 59 |   -of OUTPUT_FILE, --output-file OUTPUT_FILE
 60 |                         Output will be saved as <report_type>_results_<time-
 61 |                         stamp>.csv by default. Define this tag to customize
 62 |                         file name after <report_type>_
 63 |   -ver, --version       Print the IQDM version
 64 |   -nr, --no-recursive-search
 65 |                         Include this flag to skip sub-directories
 66 |   -df, --day-first      Assume day first for ambiguous dates in trending
 67 |                         dashboard
 68 |   -p PORT, --port PORT  Specify port of trending dashboard webserver
 69 |   -wo WEBSOCKET_ORIGIN, --allow-websocket-origin WEBSOCKET_ORIGIN
 70 |                         Allow a websocket origin other than localhost, see
 71 |                         bokeh documentation
 72 | ~~~~
 73 | 
 74 | ### Notes
 75 | This script was written specifically for SNC Patient and Delta4, but I'd be happy to include support for other vendors 
 76 | if someone could provide some anonymized example reports.
 77 | 
 78 | ### Vendor Compatibility
 79 | * **[Sun Nuclear](http://sunnuclear.com)**: *SNC Patient*  
 80 |     * ArcCheck compatibility contributed by [Marc Chamberland](https://github.com/mchamberland)
 81 | * **[ScandiDos](http://scandidos.com)**: *Delta4*  
 82 | This is still in beta, but the reported csv data is largely correct (reported energy might be off). The class parses much 
 83 | more data (including individual beam results), but isn't currently in csv nor validated.
 84 | 
 85 | 
 86 | ### Contributing
 87 | If you'd like to contribute code to support a new vendor, please create a new python file in the parsers directory 
 88 | containing a new class. This class should include the following to be compatible:
 89 | 
 90 | * **PROPERTIES**
 91 |     * **identifiers**  
 92 |     this is a list of strings that collectively and uniquely are found in a report type
 93 |     * **columns**  
 94 |     a list of strings indicating the columns of the csv to be output
 95 |     * **csv**  
 96 |     a string of values for each column, delimited with DELIMITER in utilities.py
 97 |     * **report_type**  
 98 |     a string succinctly describing the report, this will be used in the results filename created in main.py
 99 | 
100 | * **METHODS**
101 |     * **process_data(text_data)**  
102 |     processing the data does not occur until this is called
103 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | requires = [
 4 |     'pdfminer.six',
 5 |     'pdfminer > 19',
 6 |     'numpy',
 7 |     'python-dateutil',
 8 |     'chardet == 3.0.4',
 9 |     'pathvalidate',
10 |     'bokeh',
11 |     'python-dateutil',
12 |     'pathvalidate'
13 | ]
14 | 
15 | with open('README.md', 'r') as doc:
16 |     long_description = doc.read()
17 | 
18 | setup(
19 |     name='IQDM',
20 |     include_package_data=True,
21 |     packages=find_packages(),
22 |     version='0.3.1',
23 |     description='Scans a directory for IMRT QA results',
24 |     author='Dan Cutright',
25 |     author_email='dan.cutright@gmail.com',
26 |     url='https://github.com/cutright/IMRT-QA-Data-Miner/',
27 |     download_url='https://github.com/cutright/IMRT-QA-Data-Miner/archive/master.zip',
28 |     license="MIT License",
29 |     keywords=['radiation therapy', 'qa', 'research'],
30 |     classifiers=[],
31 |     install_requires=requires,
32 |     entry_points={
33 |         'console_scripts': [
34 |             'IQDM=IQDM.main:main',
35 |         ],
36 |     },
37 |     long_description=long_description,
38 |     long_description_content_type="text/markdown"
39 | )


--------------------------------------------------------------------------------