├── .DS_Store ├── .gitignore ├── Code ├── Analysis.py ├── FinalCode.ipynb ├── LLM.py ├── Processing.py └── Testing.py ├── LICENSE ├── Output ├── CPT_MetricDistribution_.png ├── CPT_MetricDistribution__man.png ├── CPT_STS_Score_distributions.png ├── CPT_STS_Score_distributions_error.png ├── CPT_repeats_table.xlsx ├── CPT_repeats_table_man.xlsx ├── Figure1.png ├── ICD10CM_MetricDistribution_.png ├── ICD10CM_MetricDistribution__man.png ├── ICD10CM_STS_Score_distributions.png ├── ICD10CM_STS_Score_distributions_error.png ├── ICD10CM_repeats_table.xlsx ├── ICD10CM_repeats_table_man.xlsx ├── ICD9CM_MetricDistribution_.png ├── ICD9CM_MetricDistribution__man.png ├── ICD9CM_STS_Score_distributions.png ├── ICD9CM_STS_Score_distributions_error.png ├── ICD9CM_repeats_table.xlsx ├── ICD9CM_repeats_table_man.xlsx ├── STS_Score_distributions.png ├── code_histogram.png ├── combined_match_rate_bar_chart.png ├── combined_match_rate_plot.png ├── descriptionpairs_man.xlsx ├── descriptionpairs_man_Ali.xlsx ├── descriptionpairs_man_Eyal.xlsx ├── descriptionpairs_man_test.xlsx ├── metrics_tables.xlsx ├── metrics_tables_man.xlsx ├── metrics_tables_man_nomatch.xlsx ├── metrics_tables_nomatch.xlsx ├── metrics_tables_unmatchedonly.xlsx ├── repeats_table.xlsx ├── repeats_table_formatted.xlsx ├── repeats_table_formatted_trim.xlsx ├── repeats_table_man.xlsx └── repeats_table_man_formatted.xlsx ├── README.md └── Raw ├── 2023_DHS_Code_List_Addendum_12_01_2022.txt ├── 2023_DHS_Code_List_Addendum_12_01_2022.xlsx ├── CMS32_DESC_LONG_SHORT_DX.xlsx ├── Section111ValidICD9-Jan2024.xlsx ├── codes_addenda_2023.txt ├── icd10cm_codes_2023.txt └── icd10cm_codes_addenda_2023.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore directories 2 | DO_NOT_PUBLISH/ 3 | Output/Intermediate/ 4 | 5 | # Ignore system files 6 | *.DS_Store 7 | 8 | # Ignore Python environment and cache files 9 | *.env 10 | .venv/ 11 | __pycache__/ 12 | *.pytest_cache/ 13 | 14 | # Ignore specific file types 15 | *.parquet 16 | *.csv 17 | 18 | # Ignore specific Excel files 19 | *codes_man_text.xlsx 20 | *codes_man.xlsx 21 | *codes.xlsx 22 | *responses.xlsx 23 | *responses_man.xlsx 24 | *responses_man_text.xlsx 25 | 26 | # Ignore VSCode settings 27 | .vscode/ 28 | -------------------------------------------------------------------------------- /Code/Analysis.py: -------------------------------------------------------------------------------- 1 | import os, json, re, pickle, subprocess 2 | from tqdm.notebook import tqdm as tqdm 3 | from tqdm import tqdm as tqdm_conc 4 | from functools import lru_cache 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | 7 | import pandas as pd 8 | import numpy as np 9 | 10 | import mercury as mr 11 | import matplotlib.pyplot as plt 12 | import matplotlib.gridspec as gridspec 13 | import seaborn as sns 14 | 15 | import scipy.stats 16 | from scipy.stats import chi2 17 | from scipy.sparse import csr_matrix 18 | from sklearn.linear_model import LogisticRegression 19 | from sklearn.metrics.pairwise import cosine_similarity 20 | from sklearn.metrics import cohen_kappa_score 21 | from bert_score import score 22 | from icdcodex import icd2vec, hierarchy 23 | import pandas as pd 24 | from nltk.translate import meteor 25 | from nltk import word_tokenize 26 | #import nltk 27 | #nltk.download('punkt') 28 | 29 | API_KEY = '' #UMLS REST API KEY 30 | 31 | os.chdir('/Users/alis/Library/CloudStorage/OneDrive-Personal/Desktop/_Research/Ongoing_Projects/Submitted/ICD_Code_Paper') 32 | 33 | def code_histogram(): 34 | fontsize=10 35 | fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 4), dpi=300) 36 | 37 | for i, (codesystem, ax) in enumerate(zip(['ICD9CM', 'ICD10CM', 'CPT'], axes.flatten())): 38 | df = pd.read_parquet(f"Output/Intermediate/{codesystem}prompts.parquet") 39 | 40 | if codesystem == 'ICD9CM': 41 | codesystem_title = 'ICD-9-CM' 42 | elif codesystem == 'ICD10CM': 43 | codesystem_title = 'ICD-10-CM' 44 | else: 45 | codesystem_title = codesystem 46 | 47 | df = df[[f'{codesystem}_code', f'{codesystem}_codedesc', f'{codesystem}_count']] 48 | bins = np.logspace(0, 6.25, num=28) 49 | 50 | # Plotting in the respective subplot 51 | ax.hist(df[f'{codesystem}_count'], bins=bins, edgecolor='black') 52 | ax.set_xscale('log') 53 | ax.set_xlim(1,1000000) 54 | 55 | # Set labels and title with specific font size 56 | ax.set_xlabel('Frequency of each code', fontsize=fontsize) 57 | ax.set_ylabel('Number of codes in bin', fontsize=fontsize) 58 | ax.set_title(f'{codesystem_title}', fontsize=fontsize) 59 | 60 | # Set the font size for tick labels 61 | for label in (ax.get_xticklabels() + ax.get_yticklabels()): 62 | label.set_fontsize(fontsize) 63 | 64 | plt.tight_layout() 65 | plt.savefig(f"Output/code_histogram.png", format='png', dpi=300) 66 | 67 | # Show the plot 68 | plt.show() 69 | 70 | def convert_to_nested_json(d): 71 | nested_json = {} 72 | for key, value in d.items(): 73 | if isinstance(key, tuple): 74 | temp = nested_json 75 | for item in key[:-1]: # Iterate over the tuple except for the last element 76 | temp = temp.setdefault(item, {}) # Create nested dictionaries 77 | temp[key[-1]] = value # Set the value for the last element 78 | else: 79 | nested_json[key] = value 80 | return nested_json 81 | 82 | def displayJSONpretty(json_file): 83 | mr.JSON(json_file) 84 | 85 | def getcolpercent(df_result, column, model, analysis, metric): 86 | analysis[metric][model] = str(round(df_result[column].mean()*100,2)) + "%" 87 | 88 | def codedigit_check(code, gen_code): 89 | match_count = 0 90 | total_count = 0 91 | match_list = [] 92 | 93 | for i, orig_char in enumerate(code): 94 | if orig_char != ".": 95 | total_count += 1 96 | if isinstance(gen_code, str) == True: 97 | if i < len(gen_code): 98 | gen_char = gen_code[i] 99 | if orig_char == gen_char : 100 | match_count += 1 101 | match_list.append(total_count) 102 | return match_count/total_count, match_list, total_count 103 | 104 | def get_billable_code_dict(): 105 | ### ICD9 import ### 106 | df_icd9 = pd.read_excel('Raw/CMS32_DESC_LONG_SHORT_DX.xlsx', engine='openpyxl', usecols=["DIAGNOSIS CODE","LONG DESCRIPTION"], converters={'DIAGNOSIS CODE':str,'LONG DESCRIPTION':str}) 107 | 108 | ### ICD 10 CM import ### 109 | df_icd10cm = pd.read_fwf('Raw/icd10cm_codes_2023.txt', colspecs=[(0,7),(8,400)], header=None, converters={0:str, 1: str}) 110 | df_icd10cm_addendum = pd.read_fwf('Raw/icd10cm_codes_addenda_2023.txt', colspecs='infer', infer_nrows=100, header=None, type=str) 111 | 112 | # add addenda 113 | mask_add = df_icd10cm_addendum[0] == "Add:" 114 | added_data = df_icd10cm_addendum.loc[mask_add, [1, 2]] 115 | added_data = added_data.rename(columns={1:0, 2:1}) 116 | df_icd10cm = pd.concat([df_icd10cm, added_data], ignore_index=True) 117 | 118 | mask_del = df_icd10cm_addendum[0] == "Delete:" 119 | delete_values = df_icd10cm_addendum.loc[mask_del, 1] 120 | df_icd10cm = df_icd10cm.loc[~df_icd10cm[0].isin(delete_values)] 121 | 122 | mask_rev = df_icd10cm_addendum.iloc[:, 0] == "Revise to:" 123 | 124 | for _, row in df_icd10cm_addendum[mask_rev].iterrows(): 125 | mask_rev2 = df_icd10cm.iloc[:, 0] == row.iloc[1] 126 | df_icd10cm.loc[mask_rev2, df_icd10cm.columns[1]] = row.iloc[2] 127 | 128 | # rename columns for dataframes import from text files 129 | df_icd10cm.rename(columns={0:"DIAGNOSIS CODE", 1:"LONG DESCRIPTION"}, inplace=True) 130 | 131 | ### CPT import ### 132 | 133 | billable_dict ={} 134 | billable_dict['ICD9CM'] = dict(zip(df_icd9["DIAGNOSIS CODE"], df_icd9["LONG DESCRIPTION"])) 135 | billable_dict['ICD10CM'] = dict(zip(df_icd10cm["DIAGNOSIS CODE"], df_icd10cm["LONG DESCRIPTION"])) 136 | # billable_dict['CPT'] = 137 | return billable_dict 138 | 139 | def quickanalyze(model_list, file_path, suffix=""): 140 | analysis_dict={} 141 | df_results_dict={} 142 | billable_dict = get_billable_code_dict() 143 | 144 | with open(file_path, "rb") as file: 145 | results_dict = pickle.load(file) 146 | 147 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 148 | analysis = {'ExactMatch': {}, 'BillableCode':{}, 'LengthMatch':{}, 'DigitMatch_Ovr': {}, 'DigitMatch_Ind':{}}#, 'Part_Code': {}} 149 | df_result = pd.read_parquet(f"Output/Intermediate/{codesystem}prompts{suffix}.parquet") 150 | df_result = df_result.fillna("") 151 | 152 | for model in model_list: 153 | model_name_clean = model.replace(".", "").split(":")[0] 154 | df_result[model_name_clean] = results_dict[(codesystem, model_name_clean)] 155 | 156 | fmatch = model_name_clean + "_ExactMatch" 157 | df_result[fmatch] = (df_result[model_name_clean] == df_result[f"{codesystem}_code"]).astype(int) 158 | getcolpercent(df_result, fmatch, model_name_clean, analysis, "ExactMatch") 159 | 160 | if codesystem != "CPT": 161 | bmatch = model_name_clean + "_BillableCode" 162 | df_result[bmatch] = df_result[model_name_clean].apply(lambda x: 1 if x is not None and x.replace(".", "") in billable_dict[codesystem].keys() else 0) 163 | getcolpercent(df_result, bmatch, model_name_clean, analysis, "BillableCode") 164 | 165 | lmatch = model_name_clean + "_LengthMatch" 166 | df_result[lmatch] = (df_result[model_name_clean].str.len() == df_result[f"{codesystem}_code"].str.len()).astype(int) 167 | getcolpercent(df_result, lmatch, model_name_clean, analysis, "LengthMatch") 168 | 169 | dmatch = model_name_clean + "_DigitMatch_Ovr" 170 | df_result[dmatch], df_result[f'{dmatch}_list'], df_result['code_length'] = zip(*df_result.apply(lambda row: codedigit_check(row[f"{codesystem}_code"], row[model_name_clean]), axis=1)) 171 | getcolpercent(df_result, dmatch, model_name_clean, analysis, "DigitMatch_Ovr") 172 | 173 | digit_match = df_result[f'{dmatch}_list'].explode() 174 | analysis["DigitMatch_Ind"][model_name_clean] = { 175 | key: f"{round(value / (df_result['code_length'] >= key).sum() * 100, 2):.2f}%" 176 | if (df_result['code_length'] >= key).sum() !=0 177 | else '0.00%' 178 | for key, value in digit_match.value_counts().items() 179 | } 180 | 181 | #pmatch = model_name_clean+"_Part_Code" 182 | #df_result[pmatch] = (df_result[model_name_clean].str.split('.').str[0] == df_result[f"{codesystem}_code"].str.split('.').str[0]).astype(int) 183 | #getcolpercent(df_result, pmatch, model_name_clean, analysis, "Part_Code") 184 | 185 | df_results_dict[codesystem] = df_result 186 | analysis_dict[codesystem] = analysis 187 | 188 | with open(f"Output/Intermediate/analysis{suffix}.json", "w") as file: 189 | json.dump(analysis_dict, file) 190 | 191 | with open(f"Output/Intermediate/df_analysis{suffix}.pkl", "wb") as file: 192 | pickle.dump(df_results_dict, file) 193 | 194 | return analysis_dict, df_results_dict 195 | 196 | global_embeddings = None 197 | global global_dict 198 | global_dict = {} 199 | 200 | def load_embeddings(file_path): 201 | global global_embeddings 202 | df = pd.read_parquet(file_path) 203 | df.set_index(df.columns[0], inplace=True) 204 | 205 | # Convert the DataFrame to a SciPy sparse matrix 206 | sparse_matrix = csr_matrix(df.values) 207 | 208 | # Create a Series to map IDs to row indices 209 | id_to_row = {id_val: idx for idx, id_val in enumerate(df.index)} 210 | global_embeddings = sparse_matrix, id_to_row 211 | 212 | return global_embeddings 213 | 214 | def create_icdcodexvectors(overwrite=False, suffix=""): 215 | global global_dict 216 | file_path = f'Output/Intermediate/icdcodex_dict{suffix}.pkl' 217 | 218 | model_list = ["gpt-3.5-turbo-0301", 219 | "gpt-3.5-turbo-0613", 220 | "gpt-3.5-turbo-1106", 221 | "gpt-4-0314", 222 | "gpt-4-0613", 223 | "gpt-4-1106-preview", 224 | "gemini-pro", 225 | "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3", 226 | ] 227 | 228 | #Check if the file exists 229 | if os.path.isfile(file_path) and overwrite == False: 230 | with open(file_path, 'rb') as file: 231 | global_dict = pickle.load(file) 232 | else: 233 | for codesystem in ["ICD9CM", "ICD10CM"]: 234 | df = pd.read_parquet(f'Output/Intermediate/{codesystem}_parsed{suffix}.parquet') 235 | 236 | model_names_clean = [name.replace(".", "").split(":")[0] for name in model_list] 237 | id_cols = [f"{codesystem}_code"] + model_names_clean 238 | embedder = icd2vec.Icd2Vec(workers=-1) 239 | 240 | if codesystem == "ICD9CM": 241 | code_list = pd.concat([df[col].astype(str).str.replace(".", "") for col in id_cols]).unique() 242 | embedder.fit(*hierarchy.icd9()) 243 | elif codesystem == "ICD10CM": 244 | code_list = pd.concat([df[col].astype(str) for col in id_cols]).unique() 245 | embedder.fit(*hierarchy.icd10cm(version="2023")) 246 | 247 | global_dict[codesystem] = {} 248 | 249 | for code in tqdm_conc(code_list): 250 | try: 251 | vector = embedder.to_vec([code]) 252 | global_dict[codesystem][code] = vector[0].flatten() 253 | except KeyError: 254 | global_dict[codesystem][code] = None 255 | 256 | # Save global_dict to a file 257 | with open(file_path, 'wb') as file: 258 | pickle.dump(global_dict, file) 259 | 260 | return global_dict 261 | 262 | def get_vector(embeddings, id_val): 263 | sparse_matrix, id_to_row = embeddings 264 | row_index = id_to_row.get(id_val) 265 | return sparse_matrix[row_index].toarray().flatten() if row_index is not None else None 266 | 267 | @lru_cache(maxsize=None) # Caching for repeated vector pair calculations 268 | def calc_cosine_sim(id_pairs, metric, codesystem): 269 | id1, id2 = id_pairs 270 | vector1, vector2 = None, None 271 | 272 | if metric == "icdcodex" and codesystem != "CPT": 273 | if codesystem == "ICD9CM": 274 | id1 = id1.replace(".", "") 275 | id2 = id2.replace(".", "") 276 | 277 | vector1 = global_dict[codesystem].get(id1) 278 | vector2 = global_dict[codesystem].get(id2) 279 | 280 | elif metric == "cui2vec" and codesystem != "CPT": 281 | vector1 = get_vector(global_embeddings, id1) 282 | vector2 = get_vector(global_embeddings, id2) 283 | 284 | if vector1 is not None and vector2 is not None: 285 | cosine_distance = cosine_similarity(vector1.reshape(1, -1), vector2.reshape(1, -1))[0][0] 286 | if metric == "icdcodex": 287 | cosine_distance = (cosine_distance + 1) / 2 # Normalizing to the range of 0 to 1 288 | similarity = round(cosine_distance, 3) 289 | else: 290 | similarity = pd.NA 291 | 292 | return similarity 293 | 294 | @lru_cache(maxsize=None) # Caching for repeated vector pair calculations 295 | def calc_meteor_score(pair): 296 | desc1, desc2 = pair 297 | score = round(meteor([word_tokenize(desc1)], word_tokenize(desc2)), 3) 298 | return score 299 | 300 | def calc_score(result_dict, df, codesystem, model_name, score_map, metric): 301 | model_name_clean = model_name.replace(".", "").split(":")[0] 302 | df_result = pd.DataFrame() 303 | 304 | if metric == "icdcodex": 305 | vector1_col = f"{codesystem}_code" 306 | vector2_col = f"{model_name_clean}{score_map[metric]}" 307 | elif metric == "bertscore" or metric == "meteor": 308 | vector1_col = f"{codesystem}_codedesc" 309 | vector2_col = f"{model_name_clean}{score_map[metric]}" 310 | elif metric=="cui2vec": 311 | vector1_col = f"{codesystem}{score_map[metric]}" 312 | vector2_col = f"{model_name_clean}{score_map[metric]}" 313 | 314 | # Filter out rows where either code is None 315 | valid_pairs = df[[vector1_col, vector2_col]].dropna() 316 | 317 | # Pre-calculate similarities for unique pairs 318 | unique_pairs = valid_pairs.drop_duplicates() 319 | precalculated_results = {} 320 | 321 | if metric == "bertscore": 322 | refs = unique_pairs[vector1_col].astype(str).tolist() 323 | cands = unique_pairs[vector2_col].astype(str).tolist() 324 | 325 | if cands and refs: 326 | P, R, F1 = score(cands, refs, lang='en-sci', verbose=False) 327 | F1_values = F1.numpy() 328 | 329 | for i, pair in enumerate(unique_pairs.itertuples(index=False, name=None)): 330 | precalculated_results[pair] = F1_values[i] 331 | 332 | elif metric == "meteor": 333 | precalculated_results = { 334 | pair: calc_meteor_score(pair) 335 | for pair in unique_pairs.itertuples(index=False, name=None) 336 | } 337 | 338 | else: 339 | precalculated_results = { 340 | pair: calc_cosine_sim(pair, metric, codesystem) 341 | for pair in unique_pairs.itertuples(index=False, name=None) 342 | } 343 | 344 | # Apply precalculated results 345 | df_result[f'{model_name_clean}_{metric}'] = df.apply( 346 | lambda row: precalculated_results.get((row[vector1_col], row[vector2_col])), axis=1 347 | ) 348 | 349 | df_result[f'{model_name_clean}_{metric}'] = pd.to_numeric(df_result[f'{model_name_clean}_{metric}'], errors='coerce') 350 | df_result[f'{model_name_clean}_{metric}'] = df_result[f'{model_name_clean}_{metric}'].apply(lambda x: round(x, 3) if pd.notna(x) else x) 351 | df_result[f'{model_name_clean}_{metric}'] = df_result[f'{model_name_clean}_{metric}'].round(3) 352 | 353 | result_dict[codesystem][f"{metric} vectors"][model_name_clean] = int(df_result[f'{model_name_clean}_{metric}'].count()) 354 | 355 | mean_value = float(df_result[f'{model_name_clean}_{metric}'].mean()) 356 | result_dict[codesystem][metric][model_name_clean] = round(mean_value,3) 357 | 358 | return df_result 359 | 360 | def get_score(result_dict, datasets, model_list, metric, suffix=""): 361 | print(metric) 362 | 363 | score_map = {"cui2vec":"_CUI", "bertscore":"_desc", "icdcodex":"", "meteor":"_desc"} 364 | 365 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 366 | df = pd.read_parquet(f'Output/Intermediate/{codesystem}_parsed{suffix}.parquet') 367 | 368 | if codesystem not in datasets: 369 | datasets[codesystem] = df.copy() 370 | 371 | if codesystem not in result_dict: 372 | result_dict[codesystem] = {} 373 | 374 | if metric not in result_dict[codesystem]: 375 | result_dict[codesystem][metric] = {} 376 | result_dict[codesystem][f"{metric} vectors"] = {} 377 | 378 | with ThreadPoolExecutor(max_workers=len(model_list)) as executor: 379 | future_to_model = {executor.submit(calc_score, result_dict, df, codesystem, model_name, score_map, metric): model_name for model_name in model_list} 380 | 381 | for future in tqdm_conc(as_completed(future_to_model), total=len(future_to_model), desc=f"Processing {codesystem}"): 382 | df_result = future.result() 383 | model_name_clean = future_to_model[future].replace(".", "").split(":")[0] 384 | datasets[codesystem][f'{model_name_clean}_{metric}'] = df_result[f'{model_name_clean}_{metric}'] 385 | datasets[codesystem].loc[(datasets[codesystem][f'{model_name_clean}_desc'] == datasets[codesystem][f'{codesystem}_codedesc']), f'{model_name_clean}_{metric}'] = 1 386 | 387 | with open(f"Output/Intermediate/analysis{suffix}_automatedscores.json", "w") as file: 388 | json.dump(result_dict, file) 389 | 390 | with open(f"Output/Intermediate/df_analysis{suffix}_automatedscores.pkl", "wb") as file: 391 | pickle.dump(datasets, file) 392 | 393 | return result_dict, datasets 394 | 395 | def run_meteor(java_command): 396 | try: 397 | result = subprocess.run(java_command, capture_output=True, text=True) 398 | return result 399 | except Exception as e: 400 | print(f"Error running subprocess: {e}") 401 | 402 | def process_model(df, codesystem, model_name): 403 | model_name_clean = model_name.replace(".", "").split(":")[0] 404 | model_name_clean_filepath = model_name.replace(".", "").split("/")[0] 405 | 406 | cand_col = f'{model_name_clean}_desc' 407 | ref_col = f'{codesystem}_codedesc' 408 | 409 | meteor_csv_path = f"{os.getcwd()}/Output/Intermediate/meteor" 410 | meteor_jar_path = f"{os.getcwd()}/Raw/meteor-1.5/meteor-1.5.jar" 411 | model_filepath = f'{meteor_csv_path}/{codesystem}_{model_name_clean_filepath}.txt' 412 | ref_filepath = f'{meteor_csv_path}/{codesystem}_{model_name_clean_filepath}_ref.txt' 413 | 414 | valid_pairs = df[[ref_col, cand_col]].dropna().drop_duplicates() 415 | valid_pairs.reset_index(drop=True, inplace=True) 416 | 417 | valid_pairs[ref_col].to_csv(ref_filepath, index=False, encoding='utf-8') 418 | valid_pairs[cand_col].to_csv(model_filepath, index=False, encoding='utf-8') 419 | 420 | java_command = ["java", "-Xmx1G", "-jar", meteor_jar_path, 421 | model_filepath, ref_filepath, 422 | "-l", "en", "-norm"] 423 | 424 | result = run_meteor(java_command) 425 | 426 | return model_name_clean, result, valid_pairs 427 | 428 | def meteor_15(result_dict, datasets, model_list, suffix): 429 | print("Running meteor15") 430 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 431 | 432 | ref_col = f'{codesystem}_codedesc' 433 | 434 | df = pd.read_parquet(f'Output/Intermediate/{codesystem}_parsed{suffix}.parquet') 435 | 436 | if codesystem not in datasets: 437 | datasets[codesystem] = df 438 | 439 | if codesystem not in result_dict: 440 | result_dict[codesystem] = {} 441 | if "meteor15" not in result_dict[codesystem]: 442 | result_dict[codesystem]["meteor15"] = {} 443 | result_dict[codesystem][f"meteor15 vectors"] = {} 444 | 445 | df_result = pd.DataFrame() 446 | df_result[f'{codesystem}_codedesc'] = datasets[codesystem][f'{codesystem}_codedesc'].copy() 447 | 448 | # Parallel execution 449 | with ThreadPoolExecutor(max_workers=len(model_list)) as executor: 450 | futures = {executor.submit(process_model, df, codesystem, model_name): model_name for model_name in model_list} 451 | 452 | for future in tqdm_conc(as_completed(futures), total=len(futures), desc=f"Processing {codesystem}"): 453 | model_name_clean, result, pairs = future.result() 454 | 455 | metric_col = f'{model_name_clean}_meteor15' 456 | cand_col =f'{model_name_clean}_desc' 457 | 458 | if result.returncode == 0: 459 | output = result.stdout 460 | 461 | # Regular expression to find segment scores 462 | pattern = r"Segment (\d+) score:\t([0-9.]+)" 463 | matches = re.findall(pattern, output) 464 | segment_scores = {int(segment)-2: round(float(score),3) for segment, score in matches} 465 | 466 | pairs[metric_col] = pd.Series(segment_scores) 467 | 468 | count = int(pairs[metric_col].count()) 469 | mean_value = float(pairs[metric_col].mean()) 470 | 471 | result_dict[codesystem][f"meteor15 vectors"][model_name_clean] = count 472 | result_dict[codesystem]["meteor15"][model_name_clean] = round(mean_value,3) 473 | 474 | metric_dict = pairs.groupby([ref_col, cand_col])[metric_col].apply(lambda x: x.iloc[0]).to_dict() 475 | 476 | datasets[codesystem][metric_col] = datasets[codesystem].apply(lambda row: metric_dict.get((row[ref_col], row[cand_col]), None), axis=1) 477 | datasets[codesystem].loc[(datasets[codesystem][f'{model_name_clean}_desc'] == datasets[codesystem][f'{codesystem}_codedesc']), metric_col] = 1 478 | 479 | with open(f"Output/Intermediate/analysis{suffix}_automatedscores.json", "w") as file: 480 | json.dump(result_dict, file) 481 | 482 | with open(f"Output/Intermediate/df_analysis{suffix}_automatedscores.pkl", "wb") as file: 483 | pickle.dump(datasets, file) 484 | 485 | return result_dict, datasets 486 | 487 | def frequencychart(model_list, suffix=""): 488 | with open(f"Output/Intermediate/df_analysis{suffix}.pkl", "rb") as file: 489 | df_results_dict = pickle.load(file) 490 | 491 | sns.set(style="whitegrid") 492 | plt.rcParams.update({'font.size': 14}) 493 | 494 | # Prepare an empty DataFrame for the aggregated data 495 | aggregated_data = pd.DataFrame() 496 | 497 | for model_name in model_list: 498 | model_name_clean = model_name.replace(".", "").split(":")[0] 499 | fmatch_col_name = model_name_clean + "_Full_Code" 500 | 501 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 502 | df_result = df_results_dict[codesystem].fillna("") 503 | df_result[fmatch_col_name] = (df_result[model_name_clean] == df_result[f"{codesystem}_code"]).astype(int) 504 | 505 | bins = np.logspace(0, 7, num=8) 506 | 507 | # Assign bins to the 'count' column 508 | 509 | bin_categories = ["<10$^1$", "10$^1$-10$^2$", "10$^2$-10$^3$", "10$^3$-10$^4$", "10$^4$-10$^5$", ">10$^5$", ""] 510 | 511 | df_result['bin'] = pd.cut(df_result[f'{codesystem}_count'], bins=bins, labels=bin_categories, right=False, include_lowest=True) 512 | df_result['bin'] = pd.Categorical(df_result['bin'], categories=bin_categories) 513 | 514 | # Calculate mean and count for each bin 515 | aggregated_stats = df_result.groupby('bin', observed=False)[fmatch_col_name].agg(['mean', 'count', 'sem']).reset_index(drop=True) 516 | aggregated_stats['code system'] = codesystem 517 | aggregated_stats['model'] = model_name_clean 518 | aggregated_stats['bin'] = df_result['bin'].cat.categories 519 | 520 | # Append to the aggregated data 521 | aggregated_data = pd.concat([aggregated_data, aggregated_stats], ignore_index=True) 522 | 523 | # Plotting - one subplot per model 524 | n_models = len(model_list) 525 | n_cols = 2 # for example, 2 columns 526 | n_rows = n_models // n_cols + (n_models % n_cols > 0) 527 | 528 | plt.figure(figsize=(12 * n_cols, 6 * n_rows)) # Adjust the figure size 529 | 530 | for i, model_name in enumerate(model_list): 531 | model_name_clean = model_name.replace(".", "").split(":")[0] 532 | ax = plt.subplot(n_rows, n_cols, i + 1) 533 | 534 | # Filter data for the current model 535 | model_data = aggregated_data[~aggregated_data['mean'].isna()] 536 | model_data = model_data[model_data['model'] == model_name_clean] 537 | model_data = model_data[['model', 'bin', "code system", 'mean', 'count', 'sem']] 538 | model_data['mean'] = model_data['mean'] 539 | model_data = model_data[model_data['count'] >= 10] 540 | model_data = model_data.reset_index(drop=True) 541 | 542 | # Rename 'mean' column to fmatch for plotting 543 | fmatch = model_name_clean + "_Full_Code" 544 | model_data.rename(columns={'mean': fmatch}, inplace=True) 545 | 546 | # Create a bar plot 547 | barplot = sns.barplot(x='bin', y=fmatch, hue='code system', data=model_data, ax=ax, errorbar=None) 548 | 549 | for j in range(len(barplot.patches)): 550 | patch = barplot.patches[j] 551 | x = patch.get_x() + patch.get_width() / 2 552 | y = patch.get_height() 553 | if j < len(model_data): 554 | sem = model_data.iloc[j % len(model_data)]['sem'] 555 | ax.errorbar(x, y, yerr=sem, fmt='none', color='black', capsize=5) 556 | 557 | for j, p in enumerate(barplot.patches): 558 | if p.get_width() != 0: 559 | x = p.get_x() + p.get_width() / 2 560 | y = p.get_height() 561 | 562 | sem = model_data.iloc[j]['sem'] 563 | ax.errorbar(x, y, yerr=sem, fmt='none', color='black', capsize=5) 564 | 565 | #count = model_data.iloc[j]['count'] 566 | #ax.text(x, + 0.01, count, ha='center', va='bottom', color='black', fontsize=10) 567 | 568 | ax.set_xlabel('Annual MSHS Code Frequency') 569 | ax.set_ylabel('Exact Code Match Rate') 570 | ax.set_title(f'{model_name_clean}') 571 | ax.legend(loc='upper left') 572 | ax.set_ylim(0, 1) 573 | 574 | # Adjust layout and save plot 575 | plt.tight_layout() 576 | plt.subplots_adjust(hspace=0.25) 577 | plt.savefig(f"Output/combined_match_rate_bar_chart{suffix}.png", format='png', dpi=300) 578 | 579 | # Show the plot 580 | plt.show() 581 | 582 | def manual_analysis(filepath1, filepath2): 583 | df_dict = {} 584 | results_dict = {} 585 | analysis_dict ={} 586 | 587 | with open(f"Output/Intermediate/df_analysis_man_automatedscores.pkl", "rb") as file: 588 | code_datasets = pickle.load(file) 589 | 590 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 591 | df_merge = pd.DataFrame() 592 | df_dict[codesystem] = {} 593 | df1 = pd.read_excel(filepath1,sheet_name=codesystem) 594 | df2 = pd.read_excel(filepath2,sheet_name=codesystem) 595 | 596 | df_merge = pd.merge(df1,df2, on=['codedesc','desc'], how='outer') 597 | df_merge = df_merge.rename(columns={"code": "Eyal", "Column1": "Ali"}) 598 | df_merge['Eyal'] = df_merge['Eyal'].astype(int) 599 | df_merge['Ali'] = df_merge['Ali'].astype(int) 600 | df_merge['Avg_STS'] = (df_merge['Eyal'] + df_merge['Ali'] )/ 2 601 | 602 | for col in ['Eyal','Ali']: 603 | df_merge.loc[(df_merge[col] == 0) , f'{col}_simple'] = 0 604 | df_merge.loc[(df_merge[col] >= 1) & (df_merge[col] <= 3), f'{col}_simple'] = 1 605 | df_merge.loc[(df_merge[col] >= 4) , f'{col}_simple'] = 2 606 | 607 | df_merge['Avg_STS_simple'] = (df_merge['Eyal_simple'] + df_merge['Ali_simple'] )/ 2 608 | 609 | df_orig = pd.read_parquet(f"Output/Intermediate/{codesystem}_parsed_man.parquet") 610 | 611 | model_list = ["gpt-35-turbo-0301", 612 | "gpt-35-turbo-0613", 613 | "gpt-35-turbo-1106", 614 | "gpt-4-0314", 615 | "gpt-4-0613", 616 | "gpt-4-1106-preview", 617 | "gemini-pro", 618 | "meta/llama-2-70b-chat", 619 | ] 620 | 621 | df_results = code_datasets[codesystem].copy() 622 | df_results = df_results.loc[:,~df_results.columns.duplicated()] 623 | 624 | scores_list = [] 625 | analysis_dict[codesystem] = {'Avg_STS': {}, 'Avg_STS_simple':{}, 'cui2vec':{},'cui2vec vectors':{}, 'icdcodex': {},'icdcodex vectors': {}, 'meteor15':{},'meteor15 vectors':{}, 'bertscore':{},'bertscore vectors':{}} 626 | 627 | # add manual scores to score datasets. 628 | for model_name in model_list: 629 | 630 | manual_columns = [f'{codesystem}_codedesc', 631 | f'{model_name}_desc', 632 | f'{model_name}_Eyal', 633 | f'{model_name}_Eyal_simple', 634 | f'{model_name}_Ali', 635 | f'{model_name}_Ali_simple', 636 | f'{model_name}_Avg_STS', 637 | f'{model_name}_Avg_STS_simple', 638 | f'{model_name}_cui2vec', 639 | f'{model_name}_icdcodex', 640 | f'{model_name}_meteor15', 641 | f'{model_name}_bertscore'] 642 | 643 | results_columns = [f'{codesystem}_codedesc', 644 | f'{model_name}_desc' , 645 | f'{model_name}_cui2vec', 646 | f'{model_name}_icdcodex', 647 | f'{model_name}_meteor15', 648 | f'{model_name}_bertscore'] 649 | 650 | orig_columns = [f'{codesystem}_code', 651 | f'{codesystem}_codedesc', 652 | f'{codesystem}_count', 653 | 'probability', 654 | f'{model_name}', 655 | f'{model_name}_desc'] 656 | 657 | df_merge_model = df_merge.rename(columns={'codedesc':f'{codesystem}_codedesc', 658 | 'desc':f'{model_name}_desc', 659 | "Eyal": f"{model_name}_Eyal", 660 | "Eyal_simple": f"{model_name}_Eyal_simple", 661 | "Ali":f"{model_name}_Ali", 662 | "Ali_simple":f"{model_name}_Ali_simple", 663 | "Avg_STS":f'{model_name}_Avg_STS', 664 | "Avg_STS_simple":f'{model_name}_Avg_STS_simple'}) 665 | 666 | df_merge_model = pd.merge(df_merge_model, df_results[results_columns], on=[f'{codesystem}_codedesc',f'{model_name}_desc'], how="inner") 667 | 668 | scores_list.extend(zip(*[df_merge_model[col] for col in manual_columns])) 669 | 670 | manual_col_simple = [col.split("_", 1)[1] for col in manual_columns] 671 | 672 | df_final = pd.merge(df_orig[orig_columns], df_merge_model[manual_columns], on=[f'{codesystem}_codedesc',f'{model_name}_desc'], how="left") 673 | 674 | metric_dict = {"Avg_STS":f'{model_name}_Avg_STS', 675 | "Avg_STS_simple":f'{model_name}_Avg_STS_simple', 676 | "cui2vec":f'{model_name}_cui2vec', 677 | "icdcodex":f'{model_name}_icdcodex', 678 | "meteor15":f'{model_name}_meteor15', 679 | "bertscore":f'{model_name}_bertscore'} 680 | 681 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Eyal'] = 5 682 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Ali'] = 5 683 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Avg_STS'] = 5 684 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Eyal_simple'] = 3 685 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Ali_simple'] = 3 686 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , f'{model_name}_Avg_STS_simple'] = 3 687 | 688 | for metric, metric_col in metric_dict.items(): 689 | if "STS" not in metric: 690 | df_final.loc[(df_final[f'{model_name}_desc'] == df_final[f'{codesystem}_codedesc']) , metric_col] = 1 691 | 692 | analysis_dict[codesystem][metric][model_name] = str(round(df_final[metric_col].mean(),3)) 693 | if "STS" not in metric: 694 | analysis_dict[codesystem][f'{metric} vectors'][model_name] = str(df_final[metric_col].count()) 695 | 696 | df_dict[codesystem][model_name] = df_final 697 | 698 | ## CORRELATION CALCULATION ## 699 | # create a scores dataframe with unique description pairs 700 | df_results_all = pd.DataFrame() 701 | df_results_all[manual_col_simple] = pd.DataFrame(scores_list, columns=manual_col_simple) 702 | df_results_all.drop_duplicates(inplace=True) 703 | df_results_all['codedesc'].dropna(inplace=True) 704 | df_results_all = df_results_all[df_results_all['codedesc'] != df_results_all['desc']] 705 | 706 | kappa = cohen_kappa_score(df_merge['Eyal'], df_merge['Ali']) 707 | kappa_simple = cohen_kappa_score(df_merge['Eyal_simple'], df_merge['Ali_simple']) 708 | 709 | # Calculate Pearson correlation between manual and automated scores 710 | correlation_dict ={} 711 | for var in ['Avg_STS', "Avg_STS_simple", "Ali_simple", "Eyal_simple"]: 712 | if "ICD" in codesystem: 713 | correlation_matrix = df_results_all[[var, 'cui2vec', 'icdcodex', 'meteor15', 'bertscore']].corr(method='pearson') 714 | correlation = correlation_matrix.loc[var, ['cui2vec', 'icdcodex', 'meteor15', 'bertscore']] 715 | 716 | else: 717 | correlation_matrix = df_results_all[['Avg_STS', 'meteor15', 'bertscore']].corr(method='pearson') 718 | correlation = correlation_matrix.loc[f'Avg_STS', ['meteor15', 'bertscore']] 719 | 720 | correlation_dict[var] = correlation 721 | 722 | results_dict[codesystem] = {"Kappa":round(kappa,3), "Correlation":correlation_dict['Avg_STS'], 723 | "Kappa, simple":round(kappa_simple,3), "Correlation, simple":correlation_dict['Avg_STS_simple'], 724 | "Correlation, Ali":correlation_dict['Ali_simple'], "Correlation, Eyal":correlation_dict['Eyal_simple']} 725 | 726 | with open(f"Output/Intermediate/df_analysis_manauto.pkl", "wb") as file: 727 | pickle.dump(df_dict, file) 728 | 729 | with open(f"Output/Intermediate/analysis_manauto.json", "w") as file: 730 | json.dump(analysis_dict, file) 731 | 732 | return results_dict, df_dict 733 | 734 | def create_metric_table_old(suffix): 735 | ##load data 736 | # simple metrics 737 | with open(f"Output/Intermediate/df_analysis{suffix}.pkl", "rb") as file: 738 | df_results1 = pickle.load(file) 739 | 740 | with open(f"Output/Intermediate/analysis{suffix}.json", "r") as file: 741 | result_dict1 = json.load(file) 742 | 743 | # automated metrics 744 | if suffix == "_man": 745 | with open(f"Output/Intermediate/df_analysis_manauto.pkl", "rb") as file: 746 | df_results2 = pickle.load(file) 747 | 748 | with open(f"Output/Intermediate/analysis_manauto.json", "r") as file: 749 | result_dict2 = json.load(file) 750 | 751 | if suffix == "": 752 | with open(f"Output/Intermediate/df_analysis_automatedscores.pkl", "rb") as file: 753 | df_results2 = pickle.load(file) 754 | 755 | with open(f"Output/Intermediate/analysis_automatedscores.json", "r") as file: 756 | result_dict2 = json.load(file) 757 | 758 | model_list = ["gpt-35-turbo-0301", 759 | "gpt-35-turbo-0613", 760 | "gpt-35-turbo-1106", 761 | "gpt-4-0314", 762 | "gpt-4-0613", 763 | "gpt-4-1106-preview", 764 | "gemini-pro", 765 | "meta/llama-2-70b-chat", 766 | ] 767 | 768 | # merge data 769 | df_merge = {} 770 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 771 | df_merge[codesystem] = {} 772 | 773 | for model_name in model_list: 774 | if suffix == "_man": 775 | col_list2 = [f'{codesystem}_code', 776 | f'{codesystem}_codedesc', 777 | f'{model_name}', 778 | f'{model_name}_desc', 779 | f'{model_name}_Avg_STS', 780 | f'{model_name}_cui2vec', 781 | f'{model_name}_icdcodex', 782 | f'{model_name}_meteor15', 783 | f'{model_name}_bertscore'] 784 | 785 | if suffix == "": 786 | col_list2 = [f'{codesystem}_code', 787 | f'{codesystem}_codedesc', 788 | f'{model_name}', 789 | f'{model_name}_desc', 790 | f'{model_name}_cui2vec', 791 | f'{model_name}_icdcodex', 792 | f'{model_name}_meteor15', 793 | f'{model_name}_bertscore'] 794 | 795 | if codesystem != "CPT": 796 | col_list1 = [f'{codesystem}_code', 797 | f'{model_name}', 798 | f'{model_name}_ExactMatch', 799 | f'{model_name}_BillableCode', 800 | f'{model_name}_LengthMatch', 801 | f'{model_name}_DigitMatch_Ovr', 802 | f'{model_name}_DigitMatch_Ovr_list'] 803 | else: 804 | col_list1 = [f'{codesystem}_code', 805 | f'{model_name}', 806 | f'{model_name}_ExactMatch', 807 | f'{model_name}_LengthMatch', 808 | f'{model_name}_DigitMatch_Ovr', 809 | f'{model_name}_DigitMatch_Ovr_list'] 810 | 811 | if suffix == "_man": 812 | merged_df = pd.merge(df_results1[codesystem][col_list1], 813 | df_results2[codesystem][model_name][col_list2], 814 | on=[f'{codesystem}_code', model_name], 815 | how="inner") 816 | if suffix == "": 817 | merged_df = pd.merge(df_results1[codesystem][col_list1], 818 | df_results2[codesystem][col_list2], 819 | on=[f'{codesystem}_code', model_name], 820 | how="inner") 821 | 822 | df_merge[codesystem][model_name] = merged_df 823 | 824 | for codesystem in result_dict2: 825 | for metric in result_dict2[codesystem]: 826 | result_dict1[codesystem][metric] = result_dict2[codesystem][metric] 827 | 828 | metrics = {} 829 | models = set() 830 | for codesystem, metrics in result_dict1.items(): 831 | for metric, values in metrics.items(): 832 | if not metric.endswith("vectors"): # Exclude vector count entries for column headers 833 | models.update(values.keys()) 834 | 835 | # Convert the set of models to a list to maintain order 836 | models_list = sorted(list(models)) 837 | 838 | # Initialize an empty DataFrame 839 | df = pd.DataFrame(index=pd.MultiIndex.from_product([result_dict1.keys(), metrics.keys()], names=['Code System', 'Metric']), columns=models_list) 840 | 841 | # Populate Table 842 | for codesystem, metrics in result_dict1.items(): 843 | for metric, values in metrics.items(): 844 | if metric.endswith("vectors") or metric.endswith("simple"): 845 | continue 846 | vector_metric = metric + " vectors" # Assuming vector counts follow this naming convention 847 | for model, score in values.items(): 848 | # Retrieve corresponding vector count 849 | vector_count = result_dict1[codesystem].get(vector_metric, {}).get(model, None) 850 | total_count = df_merge[codesystem][model][f'{model}_desc'].count() 851 | 852 | # Format cell as "score (vector count / valid codes)" 853 | cell_value = f"{score} ({round(int(vector_count)/total_count*100,1)}%)" if vector_count is not None else str(score) 854 | df.at[(codesystem, metric), model] = cell_value 855 | 856 | df = df[~df.map(lambda x: 'nan' in str(x)).any(axis=1)] 857 | 858 | # Optional: formatting for publication (adjust as needed) 859 | styled_df = df.style.set_table_styles([{ 860 | 'selector': 'th', 861 | 'props': [('font-size', '10pt'), ('text-align', 'center')] 862 | }]) 863 | styled_df= styled_df.set_properties(**{ 864 | 'text-align': 'center', 865 | 'font-size': '9pt' 866 | }) 867 | 868 | styled_df.to_excel(f"Output/metrics_tables{suffix}.xlsx") 869 | 870 | return df 871 | 872 | def create_metric_table(suffix, error_analysis=False): 873 | ##load data 874 | # simple metrics 875 | with open(f"Output/Intermediate/df_analysis{suffix}.pkl", "rb") as file: 876 | df_results1 = pickle.load(file) 877 | 878 | # automated metrics 879 | if suffix == "_man": 880 | with open(f"Output/Intermediate/df_analysis_manauto.pkl", "rb") as file: 881 | df_results2 = pickle.load(file) 882 | 883 | if suffix == "": 884 | with open(f"Output/Intermediate/df_analysis_automatedscores.pkl", "rb") as file: 885 | df_results2 = pickle.load(file) 886 | 887 | # merge data 888 | df_merge = {} 889 | result_dict1 = {} 890 | model_list = ["gpt-35-turbo-0301", 891 | "gpt-35-turbo-0613", 892 | "gpt-35-turbo-1106", 893 | "gpt-4-0314", 894 | "gpt-4-0613", 895 | "gpt-4-1106-preview", 896 | "gemini-pro", 897 | "meta/llama-2-70b-chat", 898 | ] 899 | 900 | for codesystem in ["ICD9CM", "ICD10CM", "CPT"]: 901 | df_merge[codesystem] = {} 902 | result_dict1[codesystem] = {'ExactMatch':{}, 903 | 'BillableCode':{}, 904 | 'LengthMatch':{}, 905 | 'DigitMatch_Ovr':{}, 906 | 'DigitMatch_Ind':{}, 907 | 'Avg_STS': {}, 908 | 'Avg_STS_simple':{}, 909 | 'cui2vec':{}, 910 | 'cui2vec vectors':{}, 911 | 'icdcodex': {}, 912 | 'icdcodex vectors': {}, 913 | 'meteor15':{}, 914 | 'meteor15 vectors':{}, 915 | 'bertscore':{}, 916 | 'bertscore vectors':{}} 917 | 918 | for model_name in model_list: 919 | #Column List 1 920 | col_list1 = [f'{codesystem}_code', 921 | f'{model_name}', 922 | f'{model_name}_ExactMatch'] 923 | 924 | if codesystem != "CPT": 925 | col_list1.append(f'{model_name}_BillableCode') 926 | 927 | col_list1.extend([ 928 | f'{model_name}_LengthMatch', 929 | f'{model_name}_DigitMatch_Ovr', 930 | f'{model_name}_DigitMatch_Ovr_list', 931 | 'code_length']) 932 | 933 | #Column List 1 934 | col_list2 = [f'{codesystem}_code', 935 | f'{codesystem}_codedesc', 936 | f'{model_name}', 937 | f'{model_name}_desc'] 938 | 939 | if suffix == "_man": 940 | col_list2.append(f'{model_name}_Avg_STS') 941 | 942 | col_list2.extend([f'{model_name}_cui2vec', 943 | f'{model_name}_icdcodex', 944 | f'{model_name}_meteor15', 945 | f'{model_name}_bertscore']) 946 | 947 | if suffix == "_man": 948 | merged_df = pd.merge(df_results1[codesystem][col_list1], 949 | df_results2[codesystem][model_name][col_list2], 950 | on=[f'{codesystem}_code', model_name], 951 | how="inner") 952 | if suffix == "": 953 | merged_df = pd.merge(df_results1[codesystem][col_list1], 954 | df_results2[codesystem][col_list2], 955 | on=[f'{codesystem}_code', model_name], 956 | how="inner") 957 | 958 | if error_analysis == True: 959 | merged_df = merged_df[merged_df[f'{codesystem}_codedesc'] != merged_df[f'{model_name}_desc']] 960 | 961 | df_merge[codesystem][model_name] = merged_df 962 | 963 | # Metric List 964 | metric_dict ={'ExactMatch':f'{model_name}_ExactMatch'} 965 | 966 | if codesystem!= "CPT": 967 | metric_dict.update({'BillableCode':f'{model_name}_BillableCode'}) 968 | 969 | metric_dict.update({'LengthMatch':f'{model_name}_LengthMatch', 970 | 'DigitMatch_Ovr':f'{model_name}_DigitMatch_Ovr', 971 | 'DigitMatch_Ovr_list':f'{model_name}_DigitMatch_Ovr_list'}) 972 | 973 | if suffix=="_man": 974 | metric_dict.update({'Avg_STS':f'{model_name}_Avg_STS'}) 975 | 976 | Automated_metrics = {'cui2vec':f'{model_name}_cui2vec', 977 | 'icdcodex':f'{model_name}_icdcodex', 978 | 'meteor15':f'{model_name}_meteor15', 979 | 'bertscore':f'{model_name}_bertscore'} 980 | 981 | metric_dict.update(Automated_metrics) 982 | 983 | for metric, metric_col in metric_dict.items(): 984 | if metric in ['ExactMatch','BillableCode','LengthMatch','DigitMatch_Ovr']: 985 | value = str(round(merged_df[metric_col].mean()*100,1)) + "%" 986 | if error_analysis==True and metric =='ExactMatch': 987 | continue 988 | result_dict1[codesystem][metric][model_name] = value 989 | if metric == "DigitMatch_Ovr_list": 990 | digit_match = merged_df[metric_col].explode() 991 | result_dict1[codesystem]['DigitMatch_Ind'][model_name] = { 992 | key: (str(round(value / (merged_df['code_length'] >= key).sum() * 100, 1)) + "%") 993 | if (merged_df['code_length'] >= key).sum() != 0 994 | else '0.0%' 995 | for key, value in digit_match.value_counts().items() 996 | } 997 | elif metric == "Avg_STS": 998 | value = round(merged_df[metric_col].mean(),1) 999 | result_dict1[codesystem][metric][model_name] = value 1000 | elif metric in Automated_metrics.keys(): 1001 | vector_count = merged_df[metric_col].count() 1002 | value = round(merged_df[metric_col].mean(),3) 1003 | result_dict1[codesystem][metric][model_name] = value 1004 | result_dict1[codesystem][f'{metric} vectors'][model_name] = vector_count 1005 | 1006 | metrics = {} 1007 | models = set() 1008 | for codesystem, metrics in result_dict1.items(): 1009 | for metric, values in metrics.items(): 1010 | if not metric.endswith("vectors"): # Exclude vector count entries for column headers 1011 | models.update(values.keys()) 1012 | 1013 | # Convert the set of models to a list to maintain order 1014 | models_list = sorted(list(models)) 1015 | 1016 | # Initialize an empty DataFrame 1017 | df = pd.DataFrame(index=pd.MultiIndex.from_product([result_dict1.keys(), metrics.keys()], names=['Code System', 'Metric']), columns=models_list) 1018 | 1019 | # Populate Table 1020 | for codesystem, metrics in result_dict1.items(): 1021 | for metric, values in metrics.items(): 1022 | if metric.endswith("vectors") or metric.endswith("simple"): 1023 | continue 1024 | vector_metric = metric + " vectors" # Assuming vector counts follow this naming convention 1025 | for model, score in values.items(): 1026 | if metric.endswith("Ind"): 1027 | sorted_digit_match = sorted(score.items(), key=lambda x: x[0]) 1028 | score = "\n".join([f"{key}: {value}" for key, value in sorted_digit_match]) 1029 | # Retrieve corresponding vector count 1030 | vector_count = None 1031 | #vector_count = result_dict1[codesystem].get(vector_metric, {}).get(model, None) 1032 | total_count = df_merge[codesystem][model][f'{model}_desc'].count() 1033 | 1034 | # Format cell as "score (vector count / valid codes)" 1035 | cell_value = f"{score} ({round(int(vector_count)/total_count*100,1)}%)" if vector_count is not None else str(score) 1036 | if (metric.endswith("cui2vec") or metric.endswith("icdcodex")) and codesystem == "CPT": 1037 | continue 1038 | df.at[(codesystem, metric), model] = cell_value 1039 | 1040 | df = df[~df.map(lambda x: 'nan' in str(x)).any(axis=1)] 1041 | 1042 | # Optional: formatting for publication (adjust as needed) 1043 | styled_df = df.style.set_table_styles([{ 1044 | 'selector': 'th', 1045 | 'props': [('font-size', '10pt'), ('text-align', 'center')] 1046 | }]) 1047 | styled_df= styled_df.set_properties(**{ 1048 | 'text-align': 'center', 1049 | 'font-size': '9pt' 1050 | }) 1051 | if error_analysis == True: 1052 | styled_df.to_excel(f"Output/metrics_tables{suffix}_nomatch.xlsx") 1053 | else: 1054 | styled_df.to_excel(f"Output/metrics_tables{suffix}.xlsx") 1055 | 1056 | return df 1057 | 1058 | def sts_score_dist_fig(error_analysis=False): 1059 | # Load your DataFrame 1060 | with open(f"Output/Intermediate/df_analysis_manauto.pkl", "rb") as file: 1061 | df_dict = pickle.load(file) 1062 | 1063 | # Define the models and code systems 1064 | models = [ 1065 | "gpt-35-turbo-0301", 1066 | "gpt-35-turbo-0613", 1067 | "gpt-35-turbo-1106", 1068 | "gpt-4-0314", 1069 | "gpt-4-0613", 1070 | "gpt-4-1106-preview", 1071 | "gemini-pro", 1072 | "meta/llama-2-70b-chat", 1073 | ] 1074 | code_systems = ['ICD9CM', 'ICD10CM', 'CPT'] 1075 | 1076 | model_dict = {"gpt-35-turbo-0301":"gpt-3.5-turbo-0301", 1077 | "gpt-35-turbo-0613":"gpt-3.5-turbo-0613", 1078 | "gpt-35-turbo-1106":"gpt-3.5-turbo-1106", 1079 | "meta/llama-2-70b-chat":"llama-2-70b-chat"} 1080 | 1081 | # Plot settings 1082 | num_models = len(models) 1083 | cols = 4 # Adjust as needed 1084 | rows_per_system = num_models // cols + (num_models % cols > 0) 1085 | 1086 | # Determine common axis limits (optional, adjust as needed) 1087 | x_min, x_max = 0, 5 1088 | 1089 | # Iterate over each code system and model 1090 | for codesystem in code_systems: 1091 | plt.figure(figsize=(12, 2 * rows_per_system)) # Adjust figure size as needed 1092 | 1093 | for i, model in enumerate(models): 1094 | if model in df_dict[codesystem]: 1095 | df = df_dict[codesystem][model] 1096 | if error_analysis == True: 1097 | df = df[df[f'{codesystem}_codedesc'] != df[f'{model}_desc']] 1098 | columns_with_avg_STS = [col for col in df.columns if 'Avg_STS' in col and "simple" not in col] 1099 | 1100 | # Aggregate Avg_STS scores from all columns for this model 1101 | all_avg_STS_scores = df[columns_with_avg_STS].values.flatten() 1102 | all_avg_STS_scores = all_avg_STS_scores[~pd.isna(all_avg_STS_scores)] # Remove NaN values 1103 | 1104 | model = model_dict.get(model, model) 1105 | 1106 | # Plotting the histogram in a subplot 1107 | plt.subplot(rows_per_system, cols, i + 1) 1108 | plt.hist(all_avg_STS_scores, bins=20, edgecolor='black') # Adjust bins as needed 1109 | plt.title(model) 1110 | plt.xlabel('Average STS Score') 1111 | plt.ylabel('Frequency') 1112 | plt.xlim(x_min, x_max) 1113 | 1114 | if error_analysis == True: 1115 | suffix = "_error" 1116 | else: 1117 | suffix = "" 1118 | 1119 | plt.suptitle(f'Distribution of {codesystem} STS Scores for Each Model') 1120 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout with room for subtitle 1121 | plt.savefig(f"Output/{codesystem}_STS_Score_distributions{suffix}.png", format='png', dpi=300) 1122 | plt.show() 1123 | 1124 | def code_features_dist(suffix=""): 1125 | with open(f"Output/Intermediate/df_analysis{suffix}.pkl", "rb") as file: 1126 | df_results = pickle.load(file) 1127 | 1128 | model_list = ["gpt-35-turbo-0301", 1129 | "gpt-35-turbo-0613", 1130 | "gpt-35-turbo-1106", 1131 | "gpt-4-0314", 1132 | "gpt-4-0613", 1133 | "gpt-4-1106-preview", 1134 | "gemini-pro", 1135 | "meta/llama-2-70b-chat", 1136 | ] 1137 | codesystems = ['ICD9CM', 'ICD10CM', 'CPT'] 1138 | 1139 | # Create the figure with subplots 1140 | n_models = len(model_list) 1141 | 1142 | for codesystem in codesystems: 1143 | if codesystem == "ICD9CM": 1144 | height_ratios = [1.0, 2.0, 4.0] # Height ratios for 3 rows 1145 | elif codesystem == "ICD10CM": 1146 | height_ratios = [1.4, 2.0, 4.0] # Height ratios for 3 rows 1147 | if codesystem == "CPT": 1148 | height_ratios = [0.5, 2.2, 5.0] # Height ratios for 3 rows 1149 | 1150 | gs = gridspec.GridSpec(nrows=3, ncols=n_models, height_ratios=height_ratios) 1151 | 1152 | fig = plt.figure(figsize=(n_models * 3, sum(height_ratios) * 1)) # Adjust the figsize 1153 | 1154 | df_results[codesystem]['description_length'] = df_results[codesystem][f'{codesystem}_codedesc'].str.len() 1155 | if codesystem=="CPT": 1156 | df_results[codesystem]['description_length_25'] = df_results[codesystem]['description_length']/20 1157 | divide_25 = [(i, i + 20) for i in range(0, 2001, 20)] 1158 | df_results[codesystem]['description_length_bin'] = [val for value in df_results[codesystem]['description_length'] for (lower_bound, upper_bound), val in zip(divide_25, range(0, 2001, 20)) if lower_bound <= value < upper_bound] 1159 | df_results[codesystem]['description_length_bin'] = df_results[codesystem]['description_length_bin'].apply(lambda x: x if x <= 400 else 400) 1160 | else: 1161 | df_results[codesystem]['description_length_10'] = df_results[codesystem]['description_length']/10 1162 | divide_10 = [(i, i + 10) for i in range(0, 401, 10)] 1163 | df_results[codesystem]['description_length_bin'] = [val for value in df_results[codesystem]['description_length'] for (lower_bound, upper_bound), val in zip(divide_10, range(0, 401, 10)) if lower_bound <= value < upper_bound] 1164 | df_results[codesystem]['description_length_bin'] = df_results[codesystem]['description_length_bin'].apply(lambda x: x if x <= 150 else 150) 1165 | 1166 | df_results[codesystem][f'{codesystem}_count_log'] = np.log10(df_results[codesystem][f'{codesystem}_count']) 1167 | df_results[codesystem][f'{codesystem}_count_log_bin'] = np.searchsorted([10**i for i in range(9)], df_results[codesystem][f'{codesystem}_count']) 1168 | 1169 | for i, feature in enumerate(['code_length', f'{codesystem}_count_log_bin', 'description_length_bin']): 1170 | for j, model_name in enumerate(model_list): 1171 | ax = plt.subplot(gs[i, j]) 1172 | df_regress = df_results[codesystem].copy() 1173 | df_regress['ExactMatch'] = df_regress[f'{model_name}_ExactMatch'] 1174 | 1175 | df_match_N = df_regress[df_regress['ExactMatch'] == 0] 1176 | df_match_Y = df_regress[df_regress['ExactMatch'] == 1] 1177 | 1178 | dim_dict = {"":{("ICD9CM","code_length"):(6000,4000,2000), 1179 | ("ICD9CM",'ICD9CM_count_log_bin'):(3000,3000,1500), 1180 | ("ICD9CM",'description_length_bin'):(2000,2000,1000), 1181 | ("ICD10CM","code_length"):(5000,5000,2500), 1182 | ("ICD10CM",'ICD10CM_count_log_bin'):(9000,6000,3000), 1183 | ("ICD10CM",'description_length_bin'):(3000,3000,1500), 1184 | ("CPT","code_length"):(4500,4500,1500), 1185 | ("CPT",'CPT_count_log_bin'):(1500,1000,500), 1186 | ("CPT",'description_length_bin'):(750,500,250) 1187 | }, 1188 | "_man":{('ICD9CM', 'code_length'): (100, 150, 50), 1189 | ('ICD9CM', 'ICD9CM_count_log_bin'): (100, 150, 50), 1190 | ('ICD9CM', 'description_length_bin'): (75, 75, 25), 1191 | ('ICD10CM', 'code_length'): (100, 150, 50), 1192 | ('ICD10CM', 'ICD10CM_count_log_bin'): (100, 150, 50), 1193 | ('ICD10CM', 'description_length_bin'): (50, 75, 25), 1194 | ('CPT', 'code_length'): (250, 250, 125), 1195 | ('CPT', 'CPT_count_log_bin'): (100, 150, 50), 1196 | ('CPT', 'description_length_bin'): (60, 60, 30)}} 1197 | 1198 | 1199 | max_count_N, max_count_Y, tick_interval = dim_dict[suffix][(codesystem,feature)] 1200 | 1201 | sorted_categories = sorted(df_regress[feature].unique(), reverse=True) 1202 | if codesystem == "CPT" and feature == "code_length": 1203 | bar_width = 0.5 1204 | else: 1205 | bar_width = 0.9 1206 | 1207 | # Count the occurrences for each category 1208 | count_0 = df_match_N[feature].value_counts().reindex(sorted_categories, fill_value=0) 1209 | count_1 = df_match_Y[feature].value_counts().reindex(sorted_categories, fill_value=0) 1210 | 1211 | ratio={} 1212 | for category in sorted_categories: 1213 | if count_0[category] == 0 and count_1[category] > 0: 1214 | ratio[str(category)] = 1.0 1215 | elif count_1[category] == 0 and count_0[category] > 0: 1216 | ratio[str(category)] = 0.0 1217 | else: 1218 | total_count = count_0[category] + count_1[category] 1219 | ratio[str(category)] = count_1[category] / total_count if total_count > 0 else 0.0 1220 | 1221 | # Plotting for ExactMatch == 1 1222 | sns.countplot(y=feature, data=df_match_Y, ax=ax, color='green', order=sorted_categories, width=bar_width) 1223 | ax.set_xlim(-max_count_N, max_count_Y) 1224 | # Set x-ticks after defining the limits 1225 | ticks_Y = np.arange(0, max_count_Y + tick_interval, tick_interval) 1226 | negative_ticks_Y = -np.arange(0, max_count_N + tick_interval, tick_interval)[1:] 1227 | combined_ticks_Y = np.concatenate([negative_ticks_Y, ticks_Y]) 1228 | 1229 | ax.set_xlim(-max_count_N, max_count_Y) 1230 | ax.set_xticks(combined_ticks_Y) 1231 | ax.set_xticklabels([f"{int(abs(tick))}" for tick in combined_ticks_Y]) 1232 | 1233 | ax.set_xlabel('Code Count') 1234 | ax.set_ylabel('') 1235 | 1236 | # Creating an inverted plot for ExactMatch == 0 1237 | ax1 = ax.twiny() 1238 | sns.countplot(y=feature, data=df_match_N, ax=ax1, color='red', order=sorted_categories, width=bar_width) 1239 | 1240 | ticks_N = np.arange(0, max_count_N + tick_interval, tick_interval) 1241 | negative_ticks_N = -np.arange(0, max_count_Y + tick_interval, tick_interval)[1:] 1242 | combined_ticks_N = np.concatenate([negative_ticks_N, ticks_N]) 1243 | 1244 | ax1.set_xlim(-max_count_Y, max_count_N) 1245 | ax1.invert_xaxis() 1246 | ax1.set_xticks(combined_ticks_N) 1247 | ax1.set_xticklabels([f"{int(abs(tick))}" for tick in combined_ticks_N]) 1248 | ax1.set_xlabel('') 1249 | 1250 | sorted_categories = sorted(df_regress[feature].unique(), reverse=True) 1251 | 1252 | for a,category in enumerate(sorted_categories): 1253 | # Find patches in both ax and ax1 that correspond to the current category 1254 | patches_ax = [p for p in ax.patches if ax.get_yticklabels()[int(p.get_y() + p.get_height() / 2)].get_text() == str(category)] 1255 | patches_ax1 = [p for p in ax1.patches if ax1.get_yticklabels()[int(p.get_y() + p.get_height() / 2)].get_text() == str(category)] 1256 | 1257 | # Determine which patch to use 1258 | if patches_ax: 1259 | patch = patches_ax[0] 1260 | width = patch.get_width() 1261 | elif patches_ax1: 1262 | patch = patches_ax1[0] 1263 | if count_1[category] > 0: 1264 | pass 1265 | last_category = sorted_categories[a-1] 1266 | last_patches_ax = [p for p in ax.patches if ax.get_yticklabels()[int(p.get_y() + p.get_height() / 2)].get_text() == str(last_category)] 1267 | last_patch = last_patches_ax[0] 1268 | width = count_1[category] / count_1[last_category] * last_patch.get_width() 1269 | else: 1270 | width = 0 1271 | else: 1272 | # Skip this category if there are no patches in either axis 1273 | continue 1274 | 1275 | 1276 | 1277 | height = patch.get_height() 1278 | y = patch.get_y() 1279 | 1280 | # Use the ratio dictionary to get the value 1281 | value = ratio.get(str(category), 0.0) # Default to 0 if the category is not in the ratio dict 1282 | 1283 | ax.annotate(f'{float(value):.2f}', # Format to two decimal places 1284 | (patch.get_x() + width, y + height / 2), 1285 | xytext=(5, 0), # 5 points offset 1286 | textcoords='offset points', 1287 | ha='left', 1288 | va='center') 1289 | 1290 | # Set titles, labels, etc. 1291 | if i == 0: 1292 | ax.set_title(model_name) 1293 | if j == 0: 1294 | if feature == "code_length": 1295 | ax.set_ylabel("Code Length\n(digits)") 1296 | elif feature == f'{codesystem}_count_log_bin': 1297 | ax.set_ylabel("Code Frequency\n(log)") 1298 | elif feature == 'description_length_bin': 1299 | ax.set_ylabel("Description Length\n(characters)") 1300 | 1301 | # # Logistic Regression Analysis 1302 | # if codesystem=="CPT": 1303 | # features = [f'{codesystem}_count_log', 'code_length', 'description_length_25'] 1304 | # else: 1305 | # features = [f'{codesystem}_count_log', 'code_length', 'description_length_10'] 1306 | # X = df_regress[features] 1307 | # y = df_regress['ExactMatch'] 1308 | # model = LogisticRegression() 1309 | # model.fit(X, y) 1310 | 1311 | # # Getting the estimated coefficients 1312 | # coefficients = model.coef_[0] 1313 | 1314 | # # Calculating odds ratios 1315 | # odds_ratios = np.exp(coefficients) 1316 | 1317 | # # Calculating standard errors and other statistics 1318 | # standard_errors = np.sqrt(np.diag(np.linalg.inv(np.dot(X.T, X)))) 1319 | # wald_stats = coefficients / standard_errors 1320 | # p_values = chi2.sf(wald_stats**2, 1) 1321 | # conf_intervals = pd.DataFrame(index=features, columns=["lower", "upper"]) 1322 | # for i in range(X.shape[1]): 1323 | # conf_intervals.iloc[i] = np.exp(coefficients[i] - 1.96 * standard_errors[i]), np.exp(coefficients[i] + 1.96 * standard_errors[i]) 1324 | 1325 | # odds_ratios_with_confidence_intervals = pd.DataFrame({ 1326 | # "Odds Ratio": odds_ratios, 1327 | # "Lower CI (95%)": conf_intervals["lower"], 1328 | # "Upper CI (95%)": conf_intervals["upper"], 1329 | # "p-value": p_values 1330 | # }, index=features) 1331 | 1332 | # print(f"Code System: {codesystem}, Model: {model_name}") 1333 | # print(odds_ratios_with_confidence_intervals) 1334 | # print() 1335 | 1336 | print(codesystem) 1337 | plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust the rect if the title is overlapping 1338 | plt.savefig(f"Output/{codesystem}_MetricDistribution_{suffix}.png", format='png', dpi=300) 1339 | plt.show() -------------------------------------------------------------------------------- /Code/LLM.py: -------------------------------------------------------------------------------- 1 | import os 2 | import asyncio 3 | from aiolimiter import AsyncLimiter 4 | import re 5 | import pickle 6 | from tqdm.notebook import tqdm as tqdm 7 | from tqdm.asyncio import tqdm as async_tqdm 8 | 9 | import pandas as pd 10 | from langchain.chat_models import ChatOpenAI 11 | from langchain_google_genai import ChatGoogleGenerativeAI 12 | from langchain.llms import Replicate 13 | from langchain_experimental.chat_models import Llama2Chat 14 | from langchain.schema.output_parser import StrOutputParser 15 | 16 | import getpass 17 | 18 | API_KEY = '' #UMLS REST API KEY 19 | 20 | os.chdir('/Users/alis/Library/CloudStorage/OneDrive-Personal/Desktop/_Research/Ongoing_Projects/Submitted/ICD_Code_Paper') 21 | 22 | desc_pattern = re.compile(r'<(.*?)>') 23 | cpt_pattern = re.compile(r'\d+') 24 | icd9cm_pattern = re.compile(r'([\d]{3})[.]?[\d]{0,2}|[A-Z][\d]{2}[.]?[\d]{0,2}') 25 | icd9cm_pattern2 = re.compile(r'\d{4,5}') 26 | icd10cm_pattern = re.compile(r'([A-Z][\d]{2})[.]?[\d]{0,3}[A-Z]?') 27 | 28 | ## LLM Functions ## 29 | def extractdesc(input_string): 30 | pattern = desc_pattern 31 | match = re.search(pattern, str(input_string)) 32 | if match: 33 | return match.group(1) 34 | else: 35 | return None 36 | 37 | def extractcode(input_string, codesystem): 38 | pattern = None 39 | if codesystem == "CPT": 40 | pattern = cpt_pattern 41 | elif codesystem == "ICD9CM": 42 | pattern = icd9cm_pattern 43 | 44 | # Check if it matches icd9cm_pattern2 45 | match2 = icd9cm_pattern2.search(str(input_string)) 46 | if match2: 47 | matched_string = match2.group() 48 | 49 | # Add a '.' after the 3rd digit if the matched string's length is more than 3 50 | if len(matched_string) > 3: 51 | return matched_string[:3] + '.' + matched_string[3:] 52 | 53 | elif codesystem == "ICD10CM": 54 | pattern = icd10cm_pattern 55 | 56 | if pattern: 57 | match = pattern.search(str(input_string)) 58 | if match: 59 | return match.group() 60 | else: 61 | return None 62 | 63 | def load_cache(filename): 64 | try: 65 | with open(filename, "rb") as file: 66 | return pickle.load(file) 67 | except FileNotFoundError: 68 | return {} 69 | 70 | def save_cache(data, filename): 71 | with open(filename, 'wb') as pickle_file: 72 | pickle.dump(data, pickle_file) 73 | 74 | async def async_invoke(chain, prompt_text, index, limiter, semaphore, api_timeout=10, max_attempts=3): 75 | attempt = 0 76 | while attempt < max_attempts: 77 | try: 78 | async with limiter: 79 | async with semaphore: 80 | await asyncio.sleep(0.05) 81 | response = await asyncio.wait_for(chain.ainvoke(prompt_text), api_timeout) 82 | return response 83 | except asyncio.TimeoutError: 84 | print(f"Timed out description: {index} {extractdesc(prompt_text)}") 85 | except Exception as e: 86 | print(f"An error occurred: {str(e)}") 87 | finally: 88 | attempt += 1 89 | 90 | async def generate_responses_concurrently(model_name, temperature, max_tokens, code_datasets, codesystem, batch_size, raw_path, RESULTS_CACHE): 91 | model_rps_limits = { 92 | "gpt": 160, 93 | "gemini": 1, 94 | "llama": 10 95 | } 96 | 97 | model_concurrency_limits = { 98 | "gpt": 400, 99 | "gemini": 10, 100 | "llama": 8 101 | } 102 | 103 | if "gpt" in model_name: 104 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name, temperature=temperature, max_tokens=max_tokens) 105 | model_fam="gpt" 106 | elif "gemini" in model_name: 107 | if "GOOGLE_API_KEY" not in os.environ: 108 | os.environ["GOOGLE_API_KEY"] = getpass.getpass("Provide your Google API Key") 109 | llm = ChatGoogleGenerativeAI(google_api_key=os.getenv("GOOGLE_API_KEY"), model=model_name, temperature=temperature, max_tokens=max_tokens) 110 | model_fam="gemini" 111 | elif "llama" in model_name: 112 | if "REPLICATE_API_TOKEN" not in os.environ: 113 | os.environ["REPLICATE_API_TOKEN"] = getpass.getpass("Provide your Replicate API Token") 114 | llm = Llama2Chat(llm=Replicate(model=model_name, model_kwargs={"temperature":temperature, "max_new_tokens": 30})) 115 | model_fam="llama" 116 | 117 | chain = llm | StrOutputParser() 118 | semaphore = asyncio.Semaphore(model_concurrency_limits[model_fam]) 119 | limiter = AsyncLimiter(model_rps_limits[model_fam], 1) 120 | 121 | # Determine the starting batch index based on saved progress 122 | model_name_clean = model_name.replace(".", "").split(":")[0] 123 | start_index = len(RESULTS_CACHE.get((codesystem, model_name_clean), [])) 124 | 125 | total = len(code_datasets[codesystem][f"{codesystem}_prompt"]) 126 | for i in async_tqdm(range(start_index, total, batch_size), desc="Processing batches"): 127 | batch_start = i 128 | batch_end = min(i + batch_size, total) 129 | batch = code_datasets[codesystem][f"{codesystem}_prompt"][batch_start:batch_end] 130 | tasks = [async_invoke(chain, prompt, i, semaphore, limiter) for i, prompt in enumerate(batch, start=batch_start)] 131 | responses_batch = await asyncio.gather(*tasks) 132 | 133 | if (codesystem, model_name_clean) in RESULTS_CACHE: 134 | RESULTS_CACHE[(codesystem, model_name_clean)].extend(responses_batch) 135 | else: 136 | RESULTS_CACHE[(codesystem, model_name_clean)] = responses_batch 137 | save_cache(RESULTS_CACHE, raw_path) 138 | 139 | #print(i, responses_batch) 140 | 141 | return RESULTS_CACHE[(codesystem, model_name_clean)] 142 | 143 | 144 | async def run_llms(model_list, code_datasets, suffix): 145 | codesystem_list = code_datasets.keys() 146 | temp = 0.2 147 | raw_path = f"Output/Intermediate/results{suffix}.pkl" 148 | clean_path = f"Output/Intermediate/results{suffix}_clean.pkl" 149 | 150 | RESULTS_CACHE = load_cache(raw_path) 151 | RESULTS_CACHE_CLEAN = {} 152 | 153 | for codesystem in codesystem_list: 154 | for model_name in model_list: 155 | model_name_clean = model_name.replace(".", "").split(":")[0] 156 | 157 | print("\n", codesystem, model_name_clean) 158 | 159 | #RESULTS_CACHE[(codesystem, model_name_clean)] = [] # Uncomment if you want to overrwite results 160 | 161 | retry_attempts = 3 162 | while retry_attempts > 0: 163 | try: 164 | responses = await generate_responses_concurrently(model_name=model_name, 165 | temperature=temp, 166 | max_tokens=50, 167 | code_datasets=code_datasets, 168 | codesystem=codesystem, 169 | batch_size=100, 170 | raw_path=raw_path, 171 | RESULTS_CACHE=RESULTS_CACHE 172 | ) 173 | break 174 | except Exception as e: 175 | print(f"An error occurred: {str(e)}") 176 | retry_attempts -= 1 177 | if retry_attempts == 0: 178 | print(f"Retry attempts exhausted for {codesystem} {model_name}") 179 | 180 | for key in RESULTS_CACHE.keys(): 181 | code_system = key[0] 182 | extracted_codes = pd.Series(RESULTS_CACHE[key]).apply(lambda r: extractcode(r, code_system)).str.rstrip('.') 183 | RESULTS_CACHE_CLEAN[key] = extracted_codes.tolist() 184 | save_cache(RESULTS_CACHE_CLEAN, clean_path) 185 | print("Clean cache saved") 186 | 187 | save_cache(RESULTS_CACHE, raw_path) 188 | for key in RESULTS_CACHE.keys(): 189 | code_system = key[0] 190 | extracted_codes = pd.Series(RESULTS_CACHE[key]).apply(lambda r: extractcode(r, code_system)).str.rstrip('.') 191 | RESULTS_CACHE_CLEAN[key] = extracted_codes.tolist() 192 | save_cache(RESULTS_CACHE_CLEAN, clean_path) 193 | print("Clean cache saved") 194 | -------------------------------------------------------------------------------- /Code/Processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import aiohttp 3 | import asyncio 4 | from aiolimiter import AsyncLimiter 5 | import pickle 6 | import string 7 | from tqdm.notebook import tqdm as tqdm 8 | from tqdm.asyncio import tqdm as async_tqdm 9 | 10 | import pandas as pd 11 | import numpy as np 12 | 13 | import os 14 | 15 | API_KEY = '' #UMLS REST API KEY 16 | 17 | os.chdir('/Users/alis/Library/CloudStorage/OneDrive-Personal/Desktop/_Research/Ongoing_Projects/Submitted/ICD_Code_Paper') 18 | 19 | limiter = AsyncLimiter(20, 1) # 20 calls per second 20 | 21 | def load_umls_cache(filename="Output/Intermediate/UMLS_CACHE.pkl"): 22 | try: 23 | with open(filename, "rb") as file: 24 | return pickle.load(file) 25 | except FileNotFoundError: 26 | return {} 27 | 28 | UMLS_CACHE = load_umls_cache() 29 | 30 | async def getCUI_desc(code, system, session): 31 | 32 | if (code, system) in UMLS_CACHE: 33 | result = UMLS_CACHE.get((code, system)) 34 | if pd.notna(result[1]): 35 | umls_cui = result[0] 36 | source_desc = result[1] 37 | return source_desc, umls_cui, None, code 38 | # else: 39 | # return np.nan, np.nan, None, code 40 | 41 | async with limiter: 42 | try: 43 | url = f'https://uts-ws.nlm.nih.gov/rest/content/current/source/{system}/{code}/atoms/preferred?apiKey={API_KEY}' 44 | async with session.get(url) as response: 45 | response.raise_for_status() 46 | output = await response.json() 47 | umls_cui = output['result']['concept'].split('/')[-1] 48 | source_desc = output['result']['name'] 49 | UMLS_CACHE[(code, system)] = (umls_cui, source_desc) 50 | return source_desc, umls_cui, None, code 51 | except Exception as e: 52 | UMLS_CACHE[(code, system)] = (np.nan, np.nan) 53 | return np.nan, np.nan, e, code 54 | 55 | def save_desc_cache(): 56 | with open('Output/Intermediate/UMLS_CACHE.pkl', 'wb') as pickle_file: 57 | pickle.dump(UMLS_CACHE, pickle_file) 58 | print("Cache saved") 59 | 60 | ## PROMPT TEMPLATE CREATION ## 61 | async def prompt_dataset_with_langchain(file_path): 62 | session = None 63 | try: 64 | session = aiohttp.ClientSession() 65 | df = pd.read_excel(file_path, engine='openpyxl', index_col=0) 66 | basefilename = os.path.splitext(os.path.basename(file_path))[0] 67 | codesystem = basefilename.upper().translate(str.maketrans('', '', string.punctuation)) 68 | 69 | code_col = {"ICD9CM":"DiagnosisValue","ICD10CM":"DiagnosisValue","CPT":"CptCode"} 70 | desc_col = {"ICD9CM":"DisplayString","ICD10CM":"DisplayString","CPT":"Name"} 71 | 72 | tasks = {df.index[i]: getCUI_desc(row[code_col[codesystem]], codesystem, session) for i, (index, row) in enumerate(df.iterrows())} 73 | progress_bar = async_tqdm(total=len(tasks), desc="Processing API Calls") 74 | 75 | error_messages = [] 76 | code_description_pairs = [] 77 | 78 | for index in df.index: 79 | desc, cui, err, code = await tasks[index] 80 | if err: 81 | error_messages.append(f"\tNo result, error {err}: {code} {desc}") 82 | else: 83 | code_description_pairs.append((index, code, desc)) 84 | progress_bar.update(1) 85 | 86 | progress_bar.close() 87 | 88 | code_description_pairs.sort(key=lambda x: x[0]) 89 | 90 | examples = {"ICD9CM":"045.10","ICD10CM":"M24.131","CPT":"84120"} 91 | 92 | # Creating DataFrame 93 | data = [{ 94 | f"{codesystem}_code": code, 95 | f"{codesystem}_EHRdesc": df.at[index, desc_col[codesystem]], 96 | f"{codesystem}_codedesc": desc, 97 | f"{codesystem}_prompt": f"What is the most correct {codesystem} billing code for this description: <{desc}>. \ 98 | \nOnly generate a single, VALID {codesystem} billing code. Do not explain. ALWAYS respond in the following format: \ 99 | \nCode: {examples.get(codesystem)}", 100 | f"{codesystem}_count": df.at[index, "count"] 101 | } for index, code, desc in code_description_pairs if desc is not None] 102 | 103 | df = pd.DataFrame(data) 104 | 105 | # Print errors after progress bar setup 106 | for error_message in error_messages: 107 | print(error_message) 108 | 109 | if codesystem == "CPT": 110 | regex_pattern = r'^\d{5}$' 111 | df = df[df['CPT_code'].str.match(regex_pattern, na=False)] 112 | df = df.dropna(subset=[f'{codesystem}_codedesc']) 113 | 114 | df.reset_index(drop=True, inplace=True) 115 | 116 | return df 117 | 118 | except asyncio.CancelledError: 119 | # Handle cancellation outside task execution 120 | print("Operation cancelled by user. Cleaning up...") 121 | finally: 122 | save_desc_cache() 123 | if session and not session.closed: 124 | await session.close() 125 | 126 | def sample_optimized(df, num_samples, random_state, df2=None): 127 | df = df.copy() 128 | 129 | system = df.columns[1].split('_')[0] 130 | count_col = f'{system}_count' 131 | code_col = f'{system}_code' 132 | 133 | if df2 is not None: 134 | df = df[~df[code_col].isin(df2[code_col])] 135 | 136 | # Convert counts to probabilities 137 | df['probability'] = df[count_col] / df[count_col].sum() 138 | 139 | # Sample rows based on the probability, ensuring uniqueness 140 | sample_df = df.drop_duplicates(subset=[code_col]).sample(n=num_samples, weights='probability', replace=False, random_state=random_state) 141 | sample_df.reset_index(drop=True,inplace=True) 142 | 143 | return sample_df 144 | 145 | async def process_gen_codes(df, codesystem, results_dict): 146 | async with aiohttp.ClientSession() as session: 147 | # Create tasks for the initial codes 148 | initial_tasks = [asyncio.ensure_future(getCUI_desc(code, codesystem, session)) 149 | for code in df[f'{codesystem}_code']] 150 | 151 | # Wait for all initial tasks to complete 152 | initial_responses = await async_tqdm.gather(*initial_tasks, desc=f"Processing {codesystem} original") 153 | 154 | # Process the initial responses and populate the dataframe 155 | for idx, response in zip(df.index, initial_responses): 156 | source_desc, umls_cui, _, _ = response 157 | df.at[idx, f'{codesystem}_CUI'] = umls_cui 158 | 159 | # Now handle the additional getCUI_desc calls for different models 160 | model_list = ["gpt-35-turbo-0301", 161 | "gpt-35-turbo-0613", 162 | "gpt-35-turbo-1106", 163 | "gpt-4-0314", 164 | "gpt-4-0613", 165 | "gpt-4-1106-preview", 166 | "gemini-pro", 167 | "meta/llama-2-70b-chat", 168 | ] 169 | 170 | for model_name in model_list: 171 | model_name_clean = model_name.replace(".", "").split(":")[0] 172 | df[model_name_clean] = results_dict[(codesystem, model_name_clean)] 173 | df[f'{model_name_clean}_CUI'] = pd.Series(dtype='object') 174 | df[f'{model_name_clean}_desc'] = pd.Series(dtype='object') 175 | 176 | model_tasks = [asyncio.ensure_future(getCUI_desc(code, codesystem, session)) 177 | for code in df[model_name_clean]] 178 | 179 | model_responses = await async_tqdm.gather(*model_tasks, desc=f"Processing {codesystem} {model_name}") 180 | 181 | for idx, response in zip(df.index, model_responses): 182 | source_desc, umls_cui, _, _ = response 183 | df.at[idx, f'{model_name_clean}_CUI'] = umls_cui 184 | df.at[idx, f'{model_name_clean}_desc'] = source_desc 185 | 186 | # Save the cache after processing 187 | save_desc_cache() 188 | 189 | return df 190 | 191 | def semantic_pairs_manual(excel_filename, suffix): 192 | with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer: 193 | man_datasets = {} 194 | 195 | for codesystem in ['ICD9CM', 'ICD10CM', 'CPT']: 196 | df = pd.read_parquet(f"Output/Intermediate/{codesystem}_parsed{suffix}.parquet") 197 | df_new = pd.DataFrame() 198 | pairs_list = [] 199 | 200 | model_list = [ 201 | "gpt-35-turbo-0301", 202 | "gpt-35-turbo-0613", 203 | "gpt-35-turbo-1106", 204 | "gpt-4-0314", 205 | "gpt-4-0613", 206 | "gpt-4-1106-preview", 207 | "gemini-pro", 208 | "meta/llama-2-70b-chat", 209 | ] 210 | 211 | for model_name in model_list: 212 | model_name_clean = model_name.replace(".", "") 213 | pairs_list.extend(zip(df[f'{codesystem}_codedesc'], df[f"{model_name_clean}_desc"])) 214 | 215 | # Create a new DataFrame with 'codedesc' and 'desc' columns 216 | df_new[['codedesc', 'desc']] = pd.DataFrame(pairs_list, columns=['codedesc', 'desc']) 217 | 218 | df_new.drop_duplicates(inplace=True) 219 | df_new.dropna(inplace=True) 220 | df_new = df_new[df_new['codedesc'] != df_new['desc']] 221 | 222 | df_new.to_excel(writer, sheet_name=codesystem, index=False) 223 | 224 | # Add df_new to man_datasets 225 | man_datasets[codesystem] = df_new 226 | 227 | ### CREATE BILLABLE ICD CODE DICTIONARIES ### 228 | 229 | ## ICD9 import ## 230 | df_icd9 = pd.read_excel('Raw/CMS32_DESC_LONG_SHORT_DX.xlsx', engine='openpyxl', usecols=["DIAGNOSIS CODE","LONG DESCRIPTION"], converters={'DIAGNOSIS CODE':str,'LONG DESCRIPTION':str}) 231 | 232 | ## ICD 10 CM import ## 233 | df_icd10cm = pd.read_fwf('Raw/icd10cm_codes_2023.txt', colspecs=[(0,7),(8,400)], header=None, converters={0:str, 1: str}) 234 | df_icd10cm_addendum = pd.read_fwf('Raw/icd10cm_codes_addenda_2023.txt', colspecs='infer', infer_nrows=100, header=None, type=str) 235 | 236 | # add addenda 237 | mask_add = df_icd10cm_addendum[0] == "Add:" 238 | added_data = df_icd10cm_addendum.loc[mask_add, [1, 2]] 239 | added_data = added_data.rename(columns={1:0, 2:1}) 240 | df_icd10cm = pd.concat([df_icd10cm, added_data], ignore_index=True) 241 | 242 | mask_del = df_icd10cm_addendum[0] == "Delete:" 243 | delete_values = df_icd10cm_addendum.loc[mask_del, 1] 244 | df_icd10cm = df_icd10cm.loc[~df_icd10cm[0].isin(delete_values)] 245 | 246 | mask_rev = df_icd10cm_addendum.iloc[:, 0] == "Revise to:" 247 | 248 | for _, row in df_icd10cm_addendum[mask_rev].iterrows(): 249 | mask_rev2 = df_icd10cm.iloc[:, 0] == row.iloc[1] 250 | df_icd10cm.loc[mask_rev2, df_icd10cm.columns[1]] = row.iloc[2] 251 | 252 | # rename columns for dataframes import from text files 253 | df_icd10cm.rename(columns={0:"DIAGNOSIS CODE", 1:"LONG DESCRIPTION"}, inplace=True) 254 | 255 | # set dict 256 | ICD_9_dict = dict(zip(df_icd9["DIAGNOSIS CODE"], df_icd9["LONG DESCRIPTION"])) 257 | ICD_10_CM_dict = dict(zip(df_icd10cm["DIAGNOSIS CODE"], df_icd10cm["LONG DESCRIPTION"])) -------------------------------------------------------------------------------- /Code/Testing.py: -------------------------------------------------------------------------------- 1 | ### TEST BLOCK ### 2 | 3 | import os 4 | import re 5 | import requests 6 | from tqdm.notebook import tqdm as tqdm 7 | from tqdm.asyncio import tqdm as async_tqdm 8 | 9 | import pandas as pd 10 | import numpy as np 11 | 12 | from langchain.chat_models import ChatOpenAI 13 | from langchain_google_genai import ChatGoogleGenerativeAI 14 | from langchain.llms import Replicate 15 | from langchain_experimental.chat_models import Llama2Chat 16 | from langchain.schema.output_parser import StrOutputParser 17 | 18 | import mercury as mr 19 | 20 | import getpass 21 | import os 22 | 23 | API_KEY = '' #UMLS REST API KEY 24 | 25 | os.chdir('/Users/alis/Library/CloudStorage/OneDrive-Personal/Desktop/_Research/Ongoing_Projects/Submitted/ICD_Code_Paper') 26 | 27 | def getCUI_desc(code, system): 28 | try: 29 | url = f'https://uts-ws.nlm.nih.gov/rest/content/current/source/{system}/{code}/atoms/preferred?apiKey={API_KEY}' 30 | response = requests.get(url) 31 | response.raise_for_status() 32 | output = response.json() 33 | umls_cui = output['result']['concept'].split('/')[-1] 34 | source_desc = output['result']['name'] 35 | return source_desc, code 36 | except Exception as e: 37 | return np.nan, code 38 | 39 | desc_pattern = re.compile(r'<(.*?)>') 40 | cpt_pattern = re.compile(r'\d+') 41 | icd9cm_pattern = re.compile(r'([\d]{3})[.]?[\d]{0,2}|[A-Z][\d]{2}[.]?[\d]{0,2}') 42 | icd9cm_pattern2 = re.compile(r'\d{4,5}') 43 | icd10cm_pattern = re.compile(r'([A-Z][\d]{2})[.]?[\d]{0,3}[A-Z]?') 44 | 45 | 46 | def extractcode(input_string, codesystem): 47 | pattern = None 48 | if codesystem == "CPT": 49 | pattern = cpt_pattern 50 | elif codesystem == "ICD9CM": 51 | pattern = icd9cm_pattern 52 | 53 | # Check if it matches icd9cm_pattern2 54 | match2 = icd9cm_pattern2.search(str(input_string)) 55 | if match2: 56 | matched_string = match2.group() 57 | 58 | # Add a '.' after the 3rd digit if the matched string's length is more than 3 59 | if len(matched_string) > 3: 60 | return matched_string[:3] + '.' + matched_string[3:] 61 | 62 | elif codesystem == "ICD10CM": 63 | pattern = icd10cm_pattern 64 | 65 | if pattern: 66 | match = pattern.search(str(input_string)) 67 | if match: 68 | return match.group() 69 | else: 70 | return None 71 | 72 | def create_progress_bar(percentage, width=25): 73 | """Create a text-based progress bar.""" 74 | filled = int(width * percentage) 75 | bar = '[' + '#' * filled + '-' * (width - filled) + ']' 76 | return bar 77 | 78 | def llm_generate(df_prompt, model_name, temperature, max_tokens, print_output=False): 79 | codesystem = df_prompt.columns[1].split('_')[0] 80 | 81 | if "gpt" in model_name: 82 | llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name, temperature=temperature, max_tokens=max_tokens) 83 | elif "gemini" in model_name: 84 | google_api_key = os.getenv("GOOGLE_API_KEY") or getpass.getpass("Provide your Google API Key") 85 | llm = ChatGoogleGenerativeAI(google_api_key=google_api_key, model=model_name, temperature=temperature, max_tokens=max_tokens) 86 | elif "llama" in model_name: 87 | replicate_api_token = os.getenv("REPLICATE_API_TOKEN") or getpass.getpass("Provide your Replicate API Token") 88 | llm = Llama2Chat(llm=Replicate(model=model_name, model_kwargs={"temperature":temperature, "max_new_tokens": 30})) 89 | 90 | chain = llm | StrOutputParser() 91 | 92 | prompt_column = f"{codesystem}_prompt" 93 | responses = [] 94 | total_prompts = len(df_prompt) 95 | 96 | for index, prompt in enumerate(df_prompt[prompt_column]): 97 | if print_output == False: 98 | progress_percentage = (index) / total_prompts 99 | progress_bar = create_progress_bar(progress_percentage) 100 | print(f'\rLLM running: {progress_bar} {index}/{total_prompts}', end='') 101 | 102 | response = extractcode(chain.invoke(prompt),codesystem) 103 | responses.append(response) 104 | 105 | if print_output == False: 106 | progress_percentage = (index + 1) / total_prompts 107 | progress_bar = create_progress_bar(progress_percentage) 108 | print(f'\rLLM running: {progress_bar} {index + 1}/{total_prompts}', end='') 109 | 110 | if print_output: 111 | print(response) 112 | if print_output == False: 113 | print() 114 | 115 | return responses 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Nadkarni-Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Output/CPT_MetricDistribution_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_MetricDistribution_.png -------------------------------------------------------------------------------- /Output/CPT_MetricDistribution__man.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_MetricDistribution__man.png -------------------------------------------------------------------------------- /Output/CPT_STS_Score_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_STS_Score_distributions.png -------------------------------------------------------------------------------- /Output/CPT_STS_Score_distributions_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_STS_Score_distributions_error.png -------------------------------------------------------------------------------- /Output/CPT_repeats_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_repeats_table.xlsx -------------------------------------------------------------------------------- /Output/CPT_repeats_table_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/CPT_repeats_table_man.xlsx -------------------------------------------------------------------------------- /Output/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/Figure1.png -------------------------------------------------------------------------------- /Output/ICD10CM_MetricDistribution_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_MetricDistribution_.png -------------------------------------------------------------------------------- /Output/ICD10CM_MetricDistribution__man.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_MetricDistribution__man.png -------------------------------------------------------------------------------- /Output/ICD10CM_STS_Score_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_STS_Score_distributions.png -------------------------------------------------------------------------------- /Output/ICD10CM_STS_Score_distributions_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_STS_Score_distributions_error.png -------------------------------------------------------------------------------- /Output/ICD10CM_repeats_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_repeats_table.xlsx -------------------------------------------------------------------------------- /Output/ICD10CM_repeats_table_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD10CM_repeats_table_man.xlsx -------------------------------------------------------------------------------- /Output/ICD9CM_MetricDistribution_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_MetricDistribution_.png -------------------------------------------------------------------------------- /Output/ICD9CM_MetricDistribution__man.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_MetricDistribution__man.png -------------------------------------------------------------------------------- /Output/ICD9CM_STS_Score_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_STS_Score_distributions.png -------------------------------------------------------------------------------- /Output/ICD9CM_STS_Score_distributions_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_STS_Score_distributions_error.png -------------------------------------------------------------------------------- /Output/ICD9CM_repeats_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_repeats_table.xlsx -------------------------------------------------------------------------------- /Output/ICD9CM_repeats_table_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/ICD9CM_repeats_table_man.xlsx -------------------------------------------------------------------------------- /Output/STS_Score_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/STS_Score_distributions.png -------------------------------------------------------------------------------- /Output/code_histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/code_histogram.png -------------------------------------------------------------------------------- /Output/combined_match_rate_bar_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/combined_match_rate_bar_chart.png -------------------------------------------------------------------------------- /Output/combined_match_rate_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/combined_match_rate_plot.png -------------------------------------------------------------------------------- /Output/descriptionpairs_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/descriptionpairs_man.xlsx -------------------------------------------------------------------------------- /Output/descriptionpairs_man_Ali.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/descriptionpairs_man_Ali.xlsx -------------------------------------------------------------------------------- /Output/descriptionpairs_man_Eyal.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/descriptionpairs_man_Eyal.xlsx -------------------------------------------------------------------------------- /Output/descriptionpairs_man_test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/descriptionpairs_man_test.xlsx -------------------------------------------------------------------------------- /Output/metrics_tables.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/metrics_tables.xlsx -------------------------------------------------------------------------------- /Output/metrics_tables_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/metrics_tables_man.xlsx -------------------------------------------------------------------------------- /Output/metrics_tables_man_nomatch.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/metrics_tables_man_nomatch.xlsx -------------------------------------------------------------------------------- /Output/metrics_tables_nomatch.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/metrics_tables_nomatch.xlsx -------------------------------------------------------------------------------- /Output/metrics_tables_unmatchedonly.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/metrics_tables_unmatchedonly.xlsx -------------------------------------------------------------------------------- /Output/repeats_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/repeats_table.xlsx -------------------------------------------------------------------------------- /Output/repeats_table_formatted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/repeats_table_formatted.xlsx -------------------------------------------------------------------------------- /Output/repeats_table_formatted_trim.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/repeats_table_formatted_trim.xlsx -------------------------------------------------------------------------------- /Output/repeats_table_man.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/repeats_table_man.xlsx -------------------------------------------------------------------------------- /Output/repeats_table_man_formatted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Output/repeats_table_man_formatted.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM_CodeQuery # 2 | 3 | Code Repository for "Generative Large Language Models are Poor Medical Coders: A Benchmarking Analysis of Medical Code Querying" 4 | -------------------------------------------------------------------------------- /Raw/2023_DHS_Code_List_Addendum_12_01_2022.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Raw/2023_DHS_Code_List_Addendum_12_01_2022.txt -------------------------------------------------------------------------------- /Raw/2023_DHS_Code_List_Addendum_12_01_2022.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Raw/2023_DHS_Code_List_Addendum_12_01_2022.xlsx -------------------------------------------------------------------------------- /Raw/CMS32_DESC_LONG_SHORT_DX.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Raw/CMS32_DESC_LONG_SHORT_DX.xlsx -------------------------------------------------------------------------------- /Raw/Section111ValidICD9-Jan2024.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nadkarni-Lab/LLM_CodeQuery/1e1fef888d6950217dcfcb0cfe7f175c4b870278/Raw/Section111ValidICD9-Jan2024.xlsx -------------------------------------------------------------------------------- /Raw/codes_addenda_2023.txt: -------------------------------------------------------------------------------- 1 | Add: 02LW0DJ Occlusion of Thoracic Aorta, Descending with Intraluminal Device, Temporary, Open Approach 2 | Add: 04L00DJ Occlusion of Abdominal Aorta with Intraluminal Device, Temporary, Open Approach 3 | Add: 0P530Z3 Destruction of Cervical Vertebra using Laser Interstitial Thermal Therapy, Open Approach 4 | Add: 0P533Z3 Destruction of Cervical Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Approach 5 | Add: 0P534Z3 Destruction of Cervical Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Endoscopic Approach 6 | Add: 0P540Z3 Destruction of Thoracic Vertebra using Laser Interstitial Thermal Therapy, Open Approach 7 | Add: 0P543Z3 Destruction of Thoracic Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Approach 8 | Add: 0P544Z3 Destruction of Thoracic Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Endoscopic Approach 9 | Add: 0Q500Z3 Destruction of Lumbar Vertebra using Laser Interstitial Thermal Therapy, Open Approach 10 | Add: 0Q503Z3 Destruction of Lumbar Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Approach 11 | Add: 0Q504Z3 Destruction of Lumbar Vertebra using Laser Interstitial Thermal Therapy, Percutaneous Endoscopic Approach 12 | Add: 0Q510Z3 Destruction of Sacrum using Laser Interstitial Thermal Therapy, Open Approach 13 | Add: 0Q513Z3 Destruction of Sacrum using Laser Interstitial Thermal Therapy, Percutaneous Approach 14 | Add: 0Q514Z3 Destruction of Sacrum using Laser Interstitial Thermal Therapy, Percutaneous Endoscopic Approach 15 | Add: 302A3H0 Transfusion of Autologous Whole Blood into Bone Marrow, Percutaneous Approach 16 | Add: 302A3H1 Transfusion of Nonautologous Whole Blood into Bone Marrow, Percutaneous Approach 17 | Add: 302A3J0 Transfusion of Autologous Serum Albumin into Bone Marrow, Percutaneous Approach 18 | Add: 302A3J1 Transfusion of Nonautologous Serum Albumin into Bone Marrow, Percutaneous Approach 19 | Add: 302A3K0 Transfusion of Autologous Frozen Plasma into Bone Marrow, Percutaneous Approach 20 | Add: 302A3K1 Transfusion of Nonautologous Frozen Plasma into Bone Marrow, Percutaneous Approach 21 | Add: 302A3L0 Transfusion of Autologous Fresh Plasma into Bone Marrow, Percutaneous Approach 22 | Add: 302A3L1 Transfusion of Nonautologous Fresh Plasma into Bone Marrow, Percutaneous Approach 23 | Add: 302A3N0 Transfusion of Autologous Red Blood Cells into Bone Marrow, Percutaneous Approach 24 | Add: 302A3N1 Transfusion of Nonautologous Red Blood Cells into Bone Marrow, Percutaneous Approach 25 | Add: 302A3P0 Transfusion of Autologous Frozen Red Cells into Bone Marrow, Percutaneous Approach 26 | Add: 302A3P1 Transfusion of Nonautologous Frozen Red Cells into Bone Marrow, Percutaneous Approach 27 | Add: 302A3R0 Transfusion of Autologous Platelets into Bone Marrow, Percutaneous Approach 28 | Add: 302A3R1 Transfusion of Nonautologous Platelets into Bone Marrow, Percutaneous Approach 29 | Add: XW013G6 Introduction of REGN-COV2 Monoclonal Antibody into Subcutaneous Tissue, Percutaneous Approach, New Technology Group 6 30 | Add: XW0DXK8 Introduction of Sabizabulin into Mouth and Pharynx, External Approach, New Technology Group 8 31 | Add: XW0G7K8 Introduction of Sabizabulin into Upper GI, Via Natural or Artificial Opening, New Technology Group 8 32 | Add: XW0H7K8 Introduction of Sabizabulin into Lower GI, Via Natural or Artificial Opening, New Technology Group 8 33 | Add: XW133J8 Transfusion of Exagamglogene Autotemcel into Peripheral Vein, Percutaneous Approach, New Technology Group 8 34 | Add: XW143J8 Transfusion of Exagamglogene Autotemcel into Central Vein, Percutaneous Approach, New Technology Group 8 -------------------------------------------------------------------------------- /Raw/icd10cm_codes_addenda_2023.txt: -------------------------------------------------------------------------------- 1 | Add: T74A1XA Adult financial abuse, confirmed, initial encounter 2 | Add: T74A1XD Adult financial abuse, confirmed, subsequent encounter 3 | Add: T74A1XS Adult financial abuse, confirmed, sequela 4 | Add: T74A2XA Child financial abuse, confirmed, initial encounter 5 | Add: T74A2XD Child financial abuse, confirmed, subsequent encounter 6 | Add: T74A2XS Child financial abuse, confirmed, sequela 7 | Add: T76A1XA Adult financial abuse, suspected, initial encounter 8 | Add: T76A1XD Adult financial abuse, suspected, subsequent encounter 9 | Add: T76A1XS Adult financial abuse, suspected, sequela 10 | Add: T76A2XA Child financial abuse, suspected, initial encounter 11 | Add: T76A2XD Child financial abuse, suspected, subsequent encounter 12 | Add: T76A2XS Child financial abuse, suspected, sequela 13 | Delete: Y0701 Husband, perpetrator of maltreatment and neglect 14 | Add: Y07010 Husband, current, perpetrator of maltreatment and neglect 15 | Add: Y07011 Husband, former, perpetrator of maltreatment and neglect 16 | Delete: Y0702 Wife, perpetrator of maltreatment and neglect 17 | Add: Y07020 Wife, current, perpetrator of maltreatment and neglect 18 | Add: Y07021 Wife, former, perpetrator of maltreatment and neglect 19 | Delete: Y0703 Male partner, perpetrator of maltreatment and neglect 20 | Add: Y07030 Male partner, current, perpetrator of maltreatment and neglect 21 | Add: Y07031 Male partner, former, perpetrator of maltreatment and neglect 22 | Delete: Y0704 Female partner, perpetrator of maltreatment and neglect 23 | Add: Y07040 Female partner, current, perpetrator of maltreatment and neglect 24 | Add: Y07041 Female partner, former, perpetrator of maltreatment and neglect 25 | Add: Y07050 Non-binary partner, current, perpetrator of maltreatment and neglect 26 | Add: Y07051 Non-binary partner, former, perpetrator of maltreatment and neglect 27 | Add: Y0744 Child, perpetrator of maltreatment and neglect 28 | Add: Y0745 Grandchild, perpetrator of maltreatment and neglect 29 | Add: Y0746 Grandparent, perpetrator of maltreatment and neglect 30 | Add: Y0747 Parental sibling, perpetrator of maltreatment and neglect 31 | Add: Y0754 Acquaintance or friend, perpetrator of maltreatment and neglect 32 | Add: Z556 Problems related to health literacy 33 | Add: Z5881 Basic services unavailable in physical environment 34 | Add: Z5889 Other problems related to physical environment 35 | Delete: Z591 Inadequate housing 36 | Add: Z5910 Inadequate housing, unspecified 37 | Add: Z5911 Inadequate housing environmental temperature 38 | Add: Z5912 Inadequate housing utilities 39 | Add: Z5919 Other inadequate housing 40 | Revise from: Z5987 Material hardship 41 | Revise to: Z5987 Material hardship due to limited financial resources, not elsewhere classified 42 | Add: Z62814 Personal history of child financial abuse 43 | Add: Z62815 Personal history of intimate partner abuse in childhood 44 | Delete: Z9114 Patient's other noncompliance with medication regimen 45 | Add: Z91141 Patient's other noncompliance with medication regimen due to financial hardship 46 | Add: Z91148 Patient's other noncompliance with medication regimen for other reason 47 | Delete: Z9115 Patient's noncompliance with renal dialysis 48 | Add: Z91151 Patient's noncompliance with renal dialysis due to financial hardship 49 | Add: Z91158 Patient's noncompliance with renal dialysis for other reason 50 | Add: Z91413 Personal history of adult financial abuse 51 | Add: Z91414 Personal history of adult intimate partner abuse 52 | 53 | --------------------------------------------------------------------------------