├── Submit_model ├── model1.mdl ├── model2.mdl ├── model3.mdl ├── model4.mdl └── model5.mdl ├── setup.sh ├── LICENSE ├── shap_explain.py ├── .gitignore ├── README.md ├── test.py ├── model_train.py ├── feature_engineering.py └── evaluate_sepsis_score.py /Submit_model/model1.mdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meicheng-SEU/EASP/HEAD/Submit_model/model1.mdl -------------------------------------------------------------------------------- /Submit_model/model2.mdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meicheng-SEU/EASP/HEAD/Submit_model/model2.mdl -------------------------------------------------------------------------------- /Submit_model/model3.mdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meicheng-SEU/EASP/HEAD/Submit_model/model3.mdl -------------------------------------------------------------------------------- /Submit_model/model4.mdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meicheng-SEU/EASP/HEAD/Submit_model/model4.mdl -------------------------------------------------------------------------------- /Submit_model/model5.mdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Meicheng-SEU/EASP/HEAD/Submit_model/model5.mdl -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | url=https://archive.physionet.org/users/shared/challenge-2019/ 4 | 5 | mkdir data prediction label xgb_model 6 | 7 | cd data 8 | mkdir all_dataset 9 | 10 | curl -O $url/training_setA.zip 11 | unzip training_setA.zip 12 | curl -O $url/training_setB.zip 13 | unzip training_setB.zip 14 | 15 | cd .. 16 | 17 | python build_datasets.py 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Meicheng-SEU 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /shap_explain.py: -------------------------------------------------------------------------------- 1 | import xgboost as xgb 2 | import shap 3 | import numpy as np, os, sys 4 | from feature_engineering import data_process 5 | 6 | def shap_value(input_data, k_fold, model_path): 7 | shap.initjs() 8 | all_shap_values = np.zeros((input_data.shape[0], input_data.shape[1])) 9 | dat = xgb.DMatrix(input_data) 10 | for k in range(k_fold): 11 | file_name = './' + model_path + '/' + 'model{}.mdl'.format(k + 1) 12 | xgb_model = xgb.Booster(model_file = file_name) 13 | explainer = shap.TreeExplainer(xgb_model) 14 | shap_values = explainer.shap_values(dat) 15 | all_shap_values = all_shap_values + shap_values 16 | 17 | return all_shap_values / 5 18 | 19 | if __name__ == "__main__": 20 | if len(sys.argv) != 2: 21 | raise Exception('Include the model directory as arguments, ' 22 | 'e.g., python shap_explain.py Submit_model') 23 | 24 | data_path = "./data/all_dataset/" 25 | train_nosepsis = np.load('./data/train_nosepsis.npy') 26 | train_sepsis = np.load('./data/train_sepsis.npy') 27 | 28 | train_set = np.append(train_sepsis, train_nosepsis) 29 | features, labels = data_process(train_set, data_path) 30 | 31 | xgb_model_path = sys.argv[1] 32 | shap_data = shap_value(features, k_fold = 5, model_path = xgb_model_path) 33 | shap.summary_plot(shap_data, features, max_display = 20, plot_type = "bar") 34 | shap.summary_plot(shap_data, features, max_display = 20, plot_type = "dot") 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EASP 2 | An Explainable Artificial Intelligence Predictor for Early Detection of Sepsis. The highest entry score from ***SailOcean*** in the PhysioNet/Computing in Cardiology Challenge 2019. 3 | 4 | ## Brief Introduction 5 | The PhysioNet/Computing in Cardiology Challenge 2019 facilitated the development of automated, open-source algorithms for the early detection of sepsis from clinical data. Details see (https://physionet.org/content/challenge-2019/1.0.0/). 6 | 7 | We proposed an Explainable Artificial-intelligence Sepsis Predictor (EASP) to predict sepsis risk hour-by-hour, and focused on its interpretability for the clinical EHR data sourced from ICU patients. Final results show that EASP achieved best performance in the challenge. 8 | 9 | ## Data 10 | These instructions go through the training and evaluation of our model on the Physionet 2019 challenge public database (https://archive.physionet.org/users/shared/challenge-2019/). 11 | 12 | To download and build the datasets run: 13 | 14 | ./setup.sh 15 | 16 | ## Training 17 | To train a model use the following command: 18 | 19 | python model_train.py 20 | 21 | Note that the model is saved in directory of 'xgb_model' 22 | 23 | ## Evaluation 24 | After training the model, you can make predictions and then yield the model performance. 25 | 26 | python test.py xgb_model 27 | 28 | Or you can directly use our trained model for quick verification using the following command. 29 | 30 | python test.py Submit_model 31 | 32 | ## Explanation 33 | Impacts of features on risk output were quantified by Shapley values to obtain instant interpretability for the developed EASP model. 34 | 35 | python shap_explain.py xgb_model 36 | or 37 | python shap_explain.py Submit_model 38 | 39 | ## Citation and Reference 40 | This work has been published in ***Critical Care Medicine***. 41 | 42 | [An Explainable Artificial Intelligence Predictor for Early Detection of Sepsis](https://journals.lww.com/ccmjournal/Fulltext/2020/11000/An_Explainable_Artificial_Intelligence_Predictor.37.aspx) 43 | 44 | Conference Paper published in ***2019 Computing in Cardiology Conference*** is as follows. 45 | 46 | [Early Prediction of Sepsis Using Multi-Feature Fusion Based XGBoost Learning and Bayesian Optimization](https://www.researchgate.net/publication/338628580_Early_Prediction_of_Sepsis_Using_Multi-Feature_Fusion_Based_XGBoost_Learning_and_Bayesian_Optimization) 47 | 48 | ## Feadback 49 | If you have any questions or suggestions on this work, please e-mail meicheng@seu.edu.cn 50 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np, os, sys 3 | import xgboost as xgb 4 | from evaluate_sepsis_score import evaluate_sepsis_score 5 | from feature_engineering import feature_extraction 6 | 7 | def save_challenge_predictions(file, scores, labels): 8 | with open(file, 'w') as f: 9 | f.write('PredictedProbability|PredictedLabel\n') 10 | for (s, l) in zip(scores, labels): 11 | f.write('%g|%d\n' % (s, l)) 12 | 13 | def save_challenge_testlabel(file, labels): 14 | with open(file, 'w') as f: 15 | f.write('SepsisLabel\n') 16 | for l in labels: 17 | f.write('%d\n' % l) 18 | 19 | def load_model_predict(X_test, k_fold, path): 20 | "ensemble the five XGBoost models by averaging their output probabilities" 21 | test_pred = np.zeros((X_test.shape[0], k_fold)) 22 | X_test = xgb.DMatrix(X_test) 23 | for k in range(k_fold): 24 | model_path_name = path + 'model{}.mdl'.format(k+1) 25 | xgb_model = xgb.Booster(model_file = model_path_name) 26 | y_test_pred = xgb_model.predict(X_test) 27 | test_pred[:, k] = y_test_pred 28 | test_pred = pd.DataFrame(test_pred) 29 | result_pro = test_pred.mean(axis=1) 30 | 31 | return result_pro 32 | 33 | def predict(data_set, 34 | data_dir, 35 | save_prediction_dir, 36 | save_label_dir, 37 | model_path, 38 | risk_threshold 39 | ): 40 | for psv in data_set: 41 | patient = pd.read_csv(os.path.join(data_dir, psv), sep='|') 42 | features, labels = feature_extraction(patient) 43 | 44 | predict_pro = load_model_predict(features, k_fold = 5, path = './' + model_path + '/') 45 | PredictedProbability = np.array(predict_pro) 46 | PredictedLabel = [0 if i <= risk_threshold else 1 for i in predict_pro] 47 | 48 | save_prediction_name = save_prediction_dir + psv 49 | save_challenge_predictions(save_prediction_name, PredictedProbability, PredictedLabel) 50 | save_testlabel_name = save_label_dir + psv 51 | save_challenge_testlabel(save_testlabel_name, labels) 52 | 53 | if __name__ == "__main__": 54 | if len(sys.argv) != 2: 55 | raise Exception('Include the model directory as arguments, ' 56 | 'e.g., python test.py Submit_model') 57 | 58 | test_set = np.load('./data/test_set.npy') 59 | test_data_path = "./data/all_dataset/" 60 | prediction_directory = './prediction/' 61 | label_directory = './label/' 62 | model_path = sys.argv[1] 63 | 64 | predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.525) 65 | 66 | auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory) 67 | output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format( 68 | auroc, auprc, accuracy, f_measure, utility) 69 | print(output_string) 70 | -------------------------------------------------------------------------------- /model_train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.model_selection import KFold 3 | from sklearn.metrics import roc_auc_score, accuracy_score 4 | import xgboost as xgb 5 | from hyperopt import STATUS_OK, hp, fmin, tpe 6 | from feature_engineering import data_process 7 | 8 | def BO_TPE(X_train, y_train, X_val, y_val): 9 | "Hyperparameter optimization" 10 | train = xgb.DMatrix(X_train, label=y_train) 11 | val = xgb.DMatrix(X_val, label=y_val) 12 | X_val_D = xgb.DMatrix(X_val) 13 | 14 | def objective(params): 15 | xgb_model = xgb.train(params, dtrain=train, num_boost_round=1000, evals=[(val, 'eval')], 16 | verbose_eval=False, early_stopping_rounds=80) 17 | y_vd_pred = xgb_model.predict(X_val_D, ntree_limit=xgb_model.best_ntree_limit) 18 | y_val_class = [0 if i <= 0.5 else 1 for i in y_vd_pred] 19 | 20 | acc = accuracy_score(y_val, y_val_class) 21 | loss = 1 - acc 22 | 23 | return {'loss': loss, 'params': params, 'status': STATUS_OK} 24 | 25 | max_depths = [3, 4] 26 | learning_rates = [0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.15, 0.2] 27 | subsamples = [0.5, 0.6, 0.7, 0.8, 0.9] 28 | colsample_bytrees = [0.5, 0.6, 0.7, 0.8, 0.9] 29 | reg_alphas = [0.0, 0.005, 0.01, 0.05, 0.1] 30 | reg_lambdas = [0.8, 1, 1.5, 2, 4] 31 | 32 | space = { 33 | 'max_depth': hp.choice('max_depth', max_depths), 34 | 'learning_rate': hp.choice('learning_rate', learning_rates), 35 | 'subsample': hp.choice('subsample', subsamples), 36 | 'colsample_bytree': hp.choice('colsample_bytree', colsample_bytrees), 37 | 'reg_alpha': hp.choice('reg_alpha', reg_alphas), 38 | 'reg_lambda': hp.choice('reg_lambda', reg_lambdas), 39 | } 40 | 41 | best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20) 42 | 43 | best_param = {'max_depth': max_depths[(best['max_depth'])], 44 | 'learning_rate': learning_rates[(best['learning_rate'])], 45 | 'subsample': subsamples[(best['subsample'])], 46 | 'colsample_bytree': colsample_bytrees[(best['colsample_bytree'])], 47 | 'reg_alpha': reg_alphas[(best['reg_alpha'])], 48 | 'reg_lambda': reg_lambdas[(best['reg_lambda'])] 49 | } 50 | 51 | return best_param 52 | 53 | def train_model(k, X_train, y_train, X_val, y_val, save_model_dir): 54 | print('*************************************************************') 55 | print('{}th training ..............'.format(k + 1)) 56 | print('Hyperparameters optimization') 57 | best_param = BO_TPE(X_train, y_train, X_val, y_val) 58 | xgb_model = xgb.XGBClassifier(max_depth = best_param['max_depth'], 59 | eta = best_param['learning_rate'], 60 | n_estimators = 1000, 61 | subsample = best_param['subsample'], 62 | colsample_bytree = best_param['colsample_bytree'], 63 | reg_alpha = best_param['reg_alpha'], 64 | reg_lambda = best_param['reg_lambda'], 65 | objective = "binary:logistic" 66 | ) 67 | 68 | xgb_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], eval_metric='error', 69 | early_stopping_rounds=80, verbose=False) 70 | 71 | y_tr_pred = (xgb_model.predict_proba(X_train, ntree_limit=xgb_model.best_ntree_limit))[:, 1] 72 | train_auc = roc_auc_score(y_train, y_tr_pred) 73 | print('training dataset AUC: ' + str(train_auc)) 74 | y_tr_class = [0 if i <= 0.5 else 1 for i in y_tr_pred] 75 | acc = accuracy_score(y_train, y_tr_class) 76 | print('training dataset acc: ' + str(acc)) 77 | 78 | y_vd_pred = (xgb_model.predict_proba(X_val, ntree_limit=xgb_model.best_ntree_limit))[:, 1] 79 | valid_auc = roc_auc_score(y_val, y_vd_pred) 80 | print('validation dataset AUC: ' + str(valid_auc)) 81 | y_val_class = [0 if i <= 0.5 else 1 for i in y_vd_pred] 82 | acc = accuracy_score(y_val, y_val_class) 83 | print('validation dataset acc: ' + str(acc)) 84 | print('************************************************************') 85 | # save the model 86 | save_model_path = save_model_dir + 'model{}.mdl'.format(k + 1) 87 | xgb_model.get_booster().save_model(fname=save_model_path) 88 | 89 | def downsample(data_set, data_dir): 90 | """ 91 | Using our feature extraction approach will result in over 1 million hours of data in the training process. 92 | However, only roughly 1.8% of these data corresponds to a positive outcome. 93 | Consequently, in order to deal with the serious class imbalance, a systematic way is provided by 94 | down sampling the excessive data instances of the majority class in each cross validation. 95 | """ 96 | x, y = data_process(data_set, data_dir) 97 | index_0 = np.where(y == 0)[0] 98 | index_1 = np.where(y == 1)[0] 99 | 100 | index = index_0[len(index_1): -1] 101 | x_del = np.delete(x, index, 0) 102 | y_del = np.delete(y, index, 0) 103 | index = [i for i in range(len(y_del))] 104 | np.random.shuffle(index) 105 | x_del = x_del[index] 106 | y_del = y_del[index] 107 | 108 | return x_del, y_del 109 | 110 | if __name__ == "__main__": 111 | data_path = "./data/all_dataset/" 112 | train_nosepsis = np.load('./data/train_nosepsis.npy') 113 | train_sepsis = np.load('./data/train_sepsis.npy') 114 | 115 | # 5-fold cross validation was implemented and five XGBoost models were produced 116 | kfold = KFold(n_splits=5, shuffle=True, random_state=np.random.seed(12306)) 117 | for (k, (train0_index, val0_index)), (k, (train1_index, val1_index)) in \ 118 | zip(enumerate(kfold.split(train_nosepsis)), enumerate(kfold.split(train_sepsis))): 119 | 120 | train_set = np.append(train_nosepsis[train0_index], train_sepsis[train1_index]) 121 | x_train, y_train = downsample(train_set, data_path) 122 | 123 | val_set = np.append(train_nosepsis[val0_index], train_sepsis[val1_index]) 124 | x_val, y_val = downsample(val_set, data_path) 125 | 126 | train_model(k, x_train, y_train, x_val, y_val, save_model_dir = './xgb_model/') 127 | -------------------------------------------------------------------------------- /feature_engineering.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from itertools import chain 5 | 6 | sep_index = ['BaseExcess', 'HCO3', 'FiO2', 'pH', 'PaCO2', 'SaO2', 'AST', 7 | 'BUN', 'Alkalinephos', 'Calcium', 'Chloride', 'Creatinine', 8 | 'Glucose', 'Lactate', 'Magnesium', 'Phosphate', 'Potassium', 9 | 'Bilirubin_total', 'Hct', 'Hgb', 'PTT', 'WBC', 'Platelets'] 10 | con_index = ['HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp', 'EtCO2'] 11 | 12 | def feature_informative_missingness(case, sep_columns): 13 | """ 14 | informative missingness features reflecting measurement frequency 15 | or time interval of raw variables 16 | differential features, defined by calculating the difference between 17 | the current record and the previous measurement value 18 | :param case: one patient's EHR data 19 | :param sep_columns: selected variables 20 | :return: calculated features 21 | """ 22 | temp_data = np.array(case) 23 | for sep_column in sep_columns: 24 | sep_data = np.array(case[sep_column]) 25 | nan_pos = np.where(~np.isnan(sep_data))[0] 26 | # Measurement frequency sequence 27 | interval_f1 = sep_data.copy() 28 | # Measurement time interval 29 | interval_f2 = sep_data.copy() 30 | if len(nan_pos) == 0: 31 | interval_f1[:] = 0 32 | temp_data = np.column_stack((temp_data, interval_f1)) 33 | interval_f2[:] = -1 34 | temp_data = np.column_stack((temp_data, interval_f2)) 35 | else: 36 | interval_f1[: nan_pos[0]] = 0 37 | for p in range(len(nan_pos)-1): 38 | interval_f1[nan_pos[p]: nan_pos[p+1]] = p + 1 39 | interval_f1[nan_pos[-1]:] = len(nan_pos) 40 | temp_data = np.column_stack((temp_data, interval_f1)) 41 | 42 | interval_f2[:nan_pos[0]] = -1 43 | for q in range(len(nan_pos) - 1): 44 | length = nan_pos[q+1] - nan_pos[q] 45 | for l in range(length): 46 | interval_f2[nan_pos[q] + l] = l 47 | 48 | length = len(case) - nan_pos[-1] 49 | for l in range(length): 50 | interval_f2[nan_pos[-1] + l] = l 51 | temp_data = np.column_stack((temp_data, interval_f2)) 52 | 53 | # Differential features 54 | diff_f = sep_data.copy() 55 | diff_f = diff_f.astype(float) 56 | if len(nan_pos) <= 1: 57 | diff_f[:] = np.NaN 58 | temp_data = np.column_stack((temp_data, diff_f)) 59 | else: 60 | diff_f[:nan_pos[1]] = np.NaN 61 | for p in range(1, len(nan_pos)-1): 62 | diff_f[nan_pos[p] : nan_pos[p+1]] = sep_data[nan_pos[p]] - sep_data[nan_pos[p-1]] 63 | diff_f[nan_pos[-1]:] = sep_data[nan_pos[-1]] - sep_data[nan_pos[-2]] 64 | temp_data = np.column_stack((temp_data, diff_f)) 65 | 66 | return temp_data 67 | 68 | def feature_slide_window(temp, con_index): 69 | """ 70 | Calculate dynamic statistics in a six-hour sliding window 71 | :param temp: data after using a forward-filling strategy 72 | :param con_index: selected variables 73 | :return: time-series features 74 | """ 75 | sepdata = temp[:, con_index] 76 | max_values = [[0 for col in range(len(sepdata))] 77 | for row in range(len(con_index))] 78 | min_values = [[0 for col in range(len(sepdata))] 79 | for row in range(len(con_index))] 80 | mean_values = [[0 for col in range(len(sepdata))] 81 | for row in range(len(con_index))] 82 | median_values = [[0 for col in range(len(sepdata))] 83 | for row in range(len(con_index))] 84 | std_values = [[0 for col in range(len(sepdata))] 85 | for row in range(len(con_index))] 86 | diff_std_values = [[0 for col in range(len(sepdata))] 87 | for row in range(len(con_index))] 88 | 89 | for i in range(len(sepdata)): 90 | if i < 6: 91 | win_data = sepdata[0:i + 1] 92 | for ii in range(6 - i): 93 | win_data = np.row_stack((win_data, sepdata[i])) 94 | else: 95 | win_data = sepdata[i - 6: i + 1] 96 | 97 | for j in range(len(con_index)): 98 | dat = win_data[:, j] 99 | if len(np.where(~np.isnan(dat))[0]) == 0: 100 | max_values[j][i] = np.nan 101 | min_values[j][i] = np.nan 102 | mean_values[j][i] = np.nan 103 | median_values[j][i] = np.nan 104 | std_values[j][i] = np.nan 105 | diff_std_values[j][i] = np.nan 106 | else: 107 | max_values[j][i] = np.nanmax(dat) 108 | min_values[j][i] = np.nanmin(dat) 109 | mean_values[j][i] = np.nanmean(dat) 110 | median_values[j][i] = np.nanmedian(dat) 111 | std_values[j][i] = np.nanstd(dat) 112 | diff_std_values[j][i] = np.std(np.diff(dat)) 113 | 114 | win_features = list(chain(max_values, min_values, mean_values, 115 | median_values, std_values, diff_std_values)) 116 | win_features = (np.array(win_features)).T 117 | 118 | return win_features 119 | 120 | def feature_empiric_score(dat): 121 | """ 122 | empiric features scoring for 123 | heart rate (HR), systolic blood pressure (SBP), mean arterial pressure (MAP), 124 | respiration rate (Resp), temperature (Temp), creatinine, platelets and total bilirubin 125 | according to the scoring systems of NEWS, SOFA and qSOFA 126 | """ 127 | scores = np.zeros((len(dat), 8)) 128 | for ii in range(len(dat)): 129 | HR = dat[ii, 0] 130 | if HR == np.nan: 131 | HR_score = np.nan 132 | elif (HR <= 40) | (HR >= 131): 133 | HR_score = 3 134 | elif 111 <= HR <= 130: 135 | HR_score = 2 136 | elif (41 <= HR <= 50) | (91 <= HR <= 110): 137 | HR_score = 1 138 | else: 139 | HR_score = 0 140 | scores[ii, 0] = HR_score 141 | 142 | Temp = dat[ii, 2] 143 | if Temp == np.nan: 144 | Temp_score = np.nan 145 | elif Temp <= 35: 146 | Temp_score = 3 147 | elif Temp >= 39.1: 148 | Temp_score = 2 149 | elif (35.1 <= Temp <= 36.0) | (38.1 <= Temp <= 39.0): 150 | Temp_score = 1 151 | else: 152 | Temp_score = 0 153 | scores[ii, 1] = Temp_score 154 | 155 | Resp = dat[ii, 6] 156 | if Resp == np.nan: 157 | Resp_score = np.nan 158 | elif (Resp < 8) | (Resp > 25): 159 | Resp_score = 3 160 | elif 21 <= Resp <= 24: 161 | Resp_score = 2 162 | elif 9 <= Resp <= 11: 163 | Resp_score = 1 164 | else: 165 | Resp_score = 0 166 | scores[ii, 2] = Resp_score 167 | 168 | Creatinine = dat[ii, 19] 169 | if Creatinine == np.nan: 170 | Creatinine_score = np.nan 171 | elif Creatinine < 1.2: 172 | Creatinine_score = 0 173 | elif Creatinine < 2: 174 | Creatinine_score = 1 175 | elif Creatinine < 3.5: 176 | Creatinine_score = 2 177 | else: 178 | Creatinine_score = 3 179 | scores[ii, 3] = Creatinine_score 180 | 181 | MAP = dat[ii, 4] 182 | if MAP == np.nan: 183 | MAP_score = np.nan 184 | elif MAP >= 70: 185 | MAP_score = 0 186 | else: 187 | MAP_score = 1 188 | scores[ii, 4] = MAP_score 189 | 190 | SBP = dat[ii, 3] 191 | Resp = dat[ii, 6] 192 | if SBP + Resp == np.nan: 193 | qsofa = np.nan 194 | elif (SBP <= 100) & (Resp >= 22): 195 | qsofa = 1 196 | else: 197 | qsofa = 0 198 | scores[ii, 5] = qsofa 199 | 200 | Platelets = dat[ii, 30] 201 | if Platelets == np.nan: 202 | Platelets_score = np.nan 203 | elif Platelets <= 50: 204 | Platelets_score = 3 205 | elif Platelets <= 100: 206 | Platelets_score = 2 207 | elif Platelets <= 150: 208 | Platelets_score = 1 209 | else: 210 | Platelets_score = 0 211 | scores[ii, 6] = Platelets_score 212 | 213 | Bilirubin = dat[ii, 25] 214 | if Bilirubin == np.nan: 215 | Bilirubin_score = np.nan 216 | elif Bilirubin < 1.2: 217 | Bilirubin_score = 0 218 | elif Bilirubin < 2: 219 | Bilirubin_score = 1 220 | elif Bilirubin < 6: 221 | Bilirubin_score = 2 222 | else: 223 | Bilirubin_score = 3 224 | scores[ii, 7] = Bilirubin_score 225 | 226 | return scores 227 | 228 | def feature_extraction(case): 229 | labels = np.array(case['SepsisLabel']) 230 | # drop three variables due to their massive missing values 231 | pid = case.drop(columns=['Bilirubin_direct', 'TroponinI', 'Fibrinogen', 'SepsisLabel']) 232 | 233 | temp_data = feature_informative_missingness(pid, con_index + sep_index) 234 | temp = pd.DataFrame(temp_data) 235 | # Missing values used a forward-filling strategy 236 | temp = temp.fillna(method='ffill') 237 | # 62 informative missingness features, 31 differential features 238 | # and 37 raw variables 239 | feature_A = np.array(temp) 240 | # Statistics in a six-hour window for the selected measurements 241 | # [0, 1, 3, 4, 6] = ['HR', 'O2Sat', 'SBP', 'MAP', 'Resp'] 242 | # 30 statistical features in the window 243 | feature_B = feature_slide_window(feature_A, [0, 1, 3, 4, 6]) 244 | # 8 empiric features 245 | feature_C = feature_empiric_score(feature_A) 246 | # A total of 168 features were obtained 247 | features = np.column_stack((feature_A, feature_B, feature_C)) 248 | 249 | return features, labels 250 | 251 | def data_process(data_set, data_path_dir): 252 | """ 253 | Feature matrix across all patients in the data_set 254 | """ 255 | frames_features = [] 256 | frames_labels = [] 257 | for psv in data_set: 258 | patient = pd.read_csv(os.path.join(data_path_dir, psv), sep='|') 259 | features, labels = feature_extraction(patient) 260 | features = pd.DataFrame(features) 261 | labels = pd.DataFrame(labels) 262 | frames_features.append(features) 263 | frames_labels.append(labels) 264 | 265 | dat_features = np.array(pd.concat(frames_features)) 266 | dat_labels = (np.array(pd.concat(frames_labels)))[:, 0] 267 | 268 | index = [i for i in range(len(dat_labels))] 269 | np.random.shuffle(index) 270 | dat_features = dat_features[index] 271 | dat_labels = dat_labels[index] 272 | 273 | return dat_features, dat_labels 274 | -------------------------------------------------------------------------------- /evaluate_sepsis_score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This file contains functions for evaluating algorithms for the 2019 PhysioNet/ 4 | # CinC Challenge. You can run it as follows: 5 | # 6 | # python evaluate_sepsis_score.py labels predictions scores.psv 7 | # 8 | # where 'labels' is a directory containing files with labels, 'predictions' is a 9 | # directory containing files with predictions, and 'scores.psv' (optional) is a 10 | # collection of scores for the predictions. 11 | 12 | ################################################################################ 13 | 14 | # The evaluate_scores function computes a normalized utility score for a cohort 15 | # of patients along with several traditional scoring metrics. 16 | # 17 | # Inputs: 18 | # 'label_directory' is a directory of pipe-delimited text files containing a 19 | # binary vector of labels for whether a patient is not septic (0) or septic 20 | # (1) for each time interval. 21 | # 22 | # 'prediction_directory' is a directory of pipe-delimited text files, where 23 | # the first column of the file gives the predicted probability that the 24 | # patient is septic at each time, and the second column of the file is a 25 | # binarized version of this vector. Note that there must be a prediction for 26 | # every label. 27 | # 28 | # Outputs: 29 | # 'auroc' is the area under the receiver operating characteristic curve 30 | # (AUROC). 31 | # 32 | # 'auprc' is the area under the precision recall curve (AUPRC). 33 | # 34 | # 'accuracy' is accuracy. 35 | # 36 | # 'f_measure' is F-measure. 37 | # 38 | # 'normalized_observed_utility' is a normalized utility-based measure that we 39 | # created for the Challenge. This score is normalized so that a perfect score 40 | # is 1 and no positive predictions is 0. 41 | # 42 | # Example: 43 | # Omitted due to length. See the below examples. 44 | 45 | import numpy as np, os, os.path, sys, warnings 46 | 47 | def evaluate_sepsis_score(label_directory, prediction_directory): 48 | # Set parameters. 49 | label_header = 'SepsisLabel' 50 | prediction_header = 'PredictedLabel' 51 | probability_header = 'PredictedProbability' 52 | 53 | dt_early = -12 54 | dt_optimal = -6 55 | dt_late = 3 56 | 57 | max_u_tp = 1 58 | min_u_fn = -2 59 | u_fp = -0.05 60 | u_tn = 0 61 | 62 | # Find label and prediction files. 63 | label_files = [] 64 | for f in os.listdir(label_directory): 65 | g = os.path.join(label_directory, f) 66 | if os.path.isfile(g) and not f.lower().startswith('.') and f.lower().endswith('psv'): 67 | label_files.append(g) 68 | label_files = sorted(label_files) 69 | 70 | prediction_files = [] 71 | for f in os.listdir(prediction_directory): 72 | g = os.path.join(prediction_directory, f) 73 | if os.path.isfile(g) and not f.lower().startswith('.') and f.lower().endswith('psv'): 74 | prediction_files.append(g) 75 | prediction_files = sorted(prediction_files) 76 | 77 | if len(label_files) != len(prediction_files): 78 | raise Exception('Numbers of label and prediction files must be the same.') 79 | 80 | # Load labels and predictions. 81 | num_files = len(label_files) 82 | cohort_labels = [] 83 | cohort_predictions = [] 84 | cohort_probabilities = [] 85 | 86 | for k in range(num_files): 87 | labels = load_column(label_files[k], label_header, '|') 88 | predictions = load_column(prediction_files[k], prediction_header, '|') 89 | probabilities = load_column(prediction_files[k], probability_header, '|') 90 | 91 | # Check labels and predictions for errors. 92 | if not (len(labels) == len(predictions) and len(predictions) == len(probabilities)): 93 | raise Exception('Numbers of labels and predictions for a file must be the same.') 94 | 95 | num_rows = len(labels) 96 | 97 | for i in range(num_rows): 98 | if labels[i] not in (0, 1): 99 | raise Exception('Labels must satisfy label == 0 or label == 1.') 100 | 101 | if predictions[i] not in (0, 1): 102 | raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.') 103 | 104 | if not 0 <= probabilities[i] <= 1: 105 | warnings.warn('Probabilities do not satisfy 0 <= probability <= 1.') 106 | 107 | if 0 < np.sum(predictions) < num_rows: 108 | min_probability_positive = np.min(probabilities[predictions == 1]) 109 | max_probability_negative = np.max(probabilities[predictions == 0]) 110 | 111 | if min_probability_positive <= max_probability_negative: 112 | warnings.warn('Predictions are inconsistent with probabilities, i.e., a positive prediction has a lower (or equal) probability than a negative prediction.') 113 | 114 | # Record labels and predictions. 115 | cohort_labels.append(labels) 116 | cohort_predictions.append(predictions) 117 | cohort_probabilities.append(probabilities) 118 | 119 | # Compute AUC, accuracy, and F-measure. 120 | labels = np.concatenate(cohort_labels) 121 | predictions = np.concatenate(cohort_predictions) 122 | probabilities = np.concatenate(cohort_probabilities) 123 | 124 | auroc, auprc = compute_auc(labels, probabilities) 125 | accuracy, f_measure = compute_accuracy_f_measure(labels, predictions) 126 | 127 | # Compute utility. 128 | observed_utilities = np.zeros(num_files) 129 | best_utilities = np.zeros(num_files) 130 | worst_utilities = np.zeros(num_files) 131 | inaction_utilities = np.zeros(num_files) 132 | 133 | for k in range(num_files): 134 | labels = cohort_labels[k] 135 | num_rows = len(labels) 136 | observed_predictions = cohort_predictions[k] 137 | best_predictions = np.zeros(num_rows) 138 | worst_predictions = np.zeros(num_rows) 139 | inaction_predictions = np.zeros(num_rows) 140 | 141 | if np.any(labels): 142 | t_sepsis = np.argmax(labels) - dt_optimal 143 | best_predictions[max(0, t_sepsis + dt_early) : min(t_sepsis + dt_late + 1, num_rows)] = 1 144 | worst_predictions = 1 - best_predictions 145 | 146 | observed_utilities[k] = compute_prediction_utility(labels, observed_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn) 147 | best_utilities[k] = compute_prediction_utility(labels, best_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn) 148 | worst_utilities[k] = compute_prediction_utility(labels, worst_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn) 149 | inaction_utilities[k] = compute_prediction_utility(labels, inaction_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn) 150 | 151 | unnormalized_observed_utility = np.sum(observed_utilities) 152 | unnormalized_best_utility = np.sum(best_utilities) 153 | unnormalized_worst_utility = np.sum(worst_utilities) 154 | unnormalized_inaction_utility = np.sum(inaction_utilities) 155 | 156 | normalized_observed_utility = (unnormalized_observed_utility - unnormalized_inaction_utility) / (unnormalized_best_utility - unnormalized_inaction_utility) 157 | 158 | return auroc, auprc, accuracy, f_measure, normalized_observed_utility 159 | 160 | # The load_column function loads a column from a table. 161 | # 162 | # Inputs: 163 | # 'filename' is a string containing a filename. 164 | # 165 | # 'header' is a string containing a header. 166 | # 167 | # Outputs: 168 | # 'column' is a vector containing a column from the file with the given 169 | # header. 170 | # 171 | # Example: 172 | # Omitted. 173 | 174 | def load_column(filename, header, delimiter): 175 | column = [] 176 | with open(filename, 'r') as f: 177 | for i, l in enumerate(f): 178 | arrs = l.strip().split(delimiter) 179 | if i == 0: 180 | try: 181 | j = arrs.index(header) 182 | except: 183 | raise Exception('{} must contain column with header {} containing numerical entries.'.format(filename, header)) 184 | else: 185 | if len(arrs[j]): 186 | column.append(float(arrs[j])) 187 | return np.array(column) 188 | 189 | # The compute_auc function computes AUROC and AUPRC as well as other summary 190 | # statistics (TP, FP, FN, TN, TPR, TNR, PPV, NPV, etc.) that can be exposed 191 | # from this function. 192 | # 193 | # Inputs: 194 | # 'labels' is a binary vector, where labels[i] == 0 if the patient is not 195 | # labeled as septic at time i and labels[i] == 1 if the patient is labeled as 196 | # septic at time i. 197 | # 198 | # 'predictions' is a probability vector, where predictions[i] gives the 199 | # predicted probability that the patient is septic at time i. Note that there 200 | # must be a prediction for every label, i.e, len(labels) == 201 | # len(predictions). 202 | # 203 | # Outputs: 204 | # 'auroc' is a scalar that gives the AUROC of the algorithm using its 205 | # predicted probabilities, where specificity is interpolated for intermediate 206 | # sensitivity values. 207 | # 208 | # 'auprc' is a scalar that gives the AUPRC of the algorithm using its 209 | # predicted probabilities, where precision is a piecewise constant function of 210 | # recall. 211 | # 212 | # Example: 213 | # In [1]: labels = [0, 0, 0, 0, 1, 1] 214 | # In [2]: predictions = [0.3, 0.4, 0.6, 0.7, 0.8, 0.8] 215 | # In [3]: auroc, auprc = compute_auc(labels, predictions) 216 | # In [4]: auroc 217 | # Out[4]: 1.0 218 | # In [5]: auprc 219 | # Out[5]: 1.0 220 | 221 | def compute_auc(labels, predictions, check_errors=True): 222 | # Check inputs for errors. 223 | if check_errors: 224 | if len(predictions) != len(labels): 225 | raise Exception('Numbers of predictions and labels must be the same.') 226 | 227 | for label in labels: 228 | if not label in (0, 1): 229 | raise Exception('Labels must satisfy label == 0 or label == 1.') 230 | 231 | for prediction in predictions: 232 | if not 0 <= prediction <= 1: 233 | warnings.warn('Predictions do not satisfy 0 <= prediction <= 1.') 234 | 235 | # Find prediction thresholds. 236 | thresholds = np.unique(predictions)[::-1] 237 | if thresholds[0] != 1: 238 | thresholds = np.insert(thresholds, 0, 1) 239 | if thresholds[-1] == 0: 240 | thresholds = thresholds[:-1] 241 | 242 | n = len(labels) 243 | m = len(thresholds) 244 | 245 | # Populate contingency table across prediction thresholds. 246 | tp = np.zeros(m) 247 | fp = np.zeros(m) 248 | fn = np.zeros(m) 249 | tn = np.zeros(m) 250 | 251 | # Find indices that sort the predicted probabilities from largest to 252 | # smallest. 253 | idx = np.argsort(predictions)[::-1] 254 | 255 | i = 0 256 | for j in range(m): 257 | # Initialize contingency table for j-th prediction threshold. 258 | if j == 0: 259 | tp[j] = 0 260 | fp[j] = 0 261 | fn[j] = np.sum(labels) 262 | tn[j] = n - fn[j] 263 | else: 264 | tp[j] = tp[j - 1] 265 | fp[j] = fp[j - 1] 266 | fn[j] = fn[j - 1] 267 | tn[j] = tn[j - 1] 268 | 269 | # Update contingency table for i-th largest predicted probability. 270 | while i < n and predictions[idx[i]] >= thresholds[j]: 271 | if labels[idx[i]]: 272 | tp[j] += 1 273 | fn[j] -= 1 274 | else: 275 | fp[j] += 1 276 | tn[j] -= 1 277 | i += 1 278 | 279 | # Summarize contingency table. 280 | tpr = np.zeros(m) 281 | tnr = np.zeros(m) 282 | ppv = np.zeros(m) 283 | npv = np.zeros(m) 284 | 285 | for j in range(m): 286 | if tp[j] + fn[j]: 287 | tpr[j] = tp[j] / (tp[j] + fn[j]) 288 | else: 289 | tpr[j] = 1 290 | if fp[j] + tn[j]: 291 | tnr[j] = tn[j] / (fp[j] + tn[j]) 292 | else: 293 | tnr[j] = 1 294 | if tp[j] + fp[j]: 295 | ppv[j] = tp[j] / (tp[j] + fp[j]) 296 | else: 297 | ppv[j] = 1 298 | if fn[j] + tn[j]: 299 | npv[j] = tn[j] / (fn[j] + tn[j]) 300 | else: 301 | npv[j] = 1 302 | 303 | # Compute AUROC as the area under a piecewise linear function with TPR / 304 | # sensitivity (x-axis) and TNR / specificity (y-axis) and AUPRC as the area 305 | # under a piecewise constant with TPR / recall (x-axis) and PPV / precision 306 | # (y-axis). 307 | auroc = 0 308 | auprc = 0 309 | for j in range(m-1): 310 | auroc += 0.5 * (tpr[j + 1] - tpr[j]) * (tnr[j + 1] + tnr[j]) 311 | auprc += (tpr[j + 1] - tpr[j]) * ppv[j + 1] 312 | 313 | return auroc, auprc 314 | 315 | # The compute_accuracy_f_measure function computes the accuracy and F-measure 316 | # for a patient. 317 | # 318 | # Inputs: 319 | # 'labels' is a binary vector, where labels[i] == 0 if the patient is not 320 | # labeled as septic at time i and labels[i] == 1 if the patient is labeled as 321 | # septic at time i. 322 | # 323 | # 'predictions' is a binary vector, where predictions[i] == 0 if the patient 324 | # is not predicted to be septic at time i and predictions[i] == 1 if the 325 | # patient is predicted to be septic at time i. Note that there must be a 326 | # prediction for every label, i.e, len(labels) == len(predictions). 327 | # 328 | # Output: 329 | # 'accuracy' is a scalar that gives the accuracy of the predictions using its 330 | # binarized predictions. 331 | # 332 | # 'f_measure' is a scalar that gives the F-measure of the predictions using its 333 | # binarized predictions. 334 | # 335 | # Example: 336 | # In [1]: labels = [0, 0, 0, 0, 1, 1] 337 | # In [2]: predictions = [0, 0, 1, 1, 1, 1] 338 | # In [3]: accuracy, f_measure = compute_accuracy_f_measure(labels, predictions) 339 | # In [4]: accuracy 340 | # Out[4]: 0.666666666667 341 | # In [5]: f_measure 342 | # Out[5]: 0.666666666667 343 | 344 | def compute_accuracy_f_measure(labels, predictions, check_errors=True): 345 | # Check inputs for errors. 346 | if check_errors: 347 | if len(predictions) != len(labels): 348 | raise Exception('Numbers of predictions and labels must be the same.') 349 | 350 | for label in labels: 351 | if not label in (0, 1): 352 | raise Exception('Labels must satisfy label == 0 or label == 1.') 353 | 354 | for prediction in predictions: 355 | if not prediction in (0, 1): 356 | raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.') 357 | 358 | # Populate contingency table. 359 | n = len(labels) 360 | tp = 0 361 | fp = 0 362 | fn = 0 363 | tn = 0 364 | 365 | for i in range(n): 366 | if labels[i] and predictions[i]: 367 | tp += 1 368 | elif not labels[i] and predictions[i]: 369 | fp += 1 370 | elif labels[i] and not predictions[i]: 371 | fn += 1 372 | elif not labels[i] and not predictions[i]: 373 | tn += 1 374 | 375 | # Summarize contingency table. 376 | if tp + fp + fn + tn: 377 | accuracy = float(tp + tn) / float(tp + fp + fn + tn) 378 | else: 379 | accuracy = 1.0 380 | 381 | if 2 * tp + fp + fn: 382 | f_measure = float(2 * tp) / float(2 * tp + fp + fn) 383 | else: 384 | f_measure = 1.0 385 | 386 | return accuracy, f_measure 387 | 388 | # The compute_prediction_utility function computes the total time-dependent 389 | # utility for a patient. 390 | # 391 | # Inputs: 392 | # 'labels' is a binary vector, where labels[i] == 0 if the patient is not 393 | # labeled as septic at time i and labels[i] == 1 if the patient is labeled as 394 | # septic at time i. 395 | # 396 | # 'predictions' is a binary vector, where predictions[i] == 0 if the patient 397 | # is not predicted to be septic at time i and predictions[i] == 1 if the 398 | # patient is predicted to be septic at time i. Note that there must be a 399 | # prediction for every label, i.e, len(labels) == len(predictions). 400 | # 401 | # Output: 402 | # 'utility' is a scalar that gives the total time-dependent utility of the 403 | # algorithm using its binarized predictions. 404 | # 405 | # Example: 406 | # In [1]: labels = [0, 0, 0, 0, 1, 1] 407 | # In [2]: predictions = [0, 0, 1, 1, 1, 1] 408 | # In [3]: utility = compute_prediction_utility(labels, predictions) 409 | # In [4]: utility 410 | # Out[4]: 3.388888888888889 411 | 412 | def compute_prediction_utility(labels, predictions, dt_early=-12, dt_optimal=-6, dt_late=3.0, max_u_tp=1, min_u_fn=-2, u_fp=-0.05, u_tn=0, check_errors=True): 413 | # Check inputs for errors. 414 | if check_errors: 415 | if len(predictions) != len(labels): 416 | raise Exception('Numbers of predictions and labels must be the same.') 417 | 418 | for label in labels: 419 | if not label in (0, 1): 420 | raise Exception('Labels must satisfy label == 0 or label == 1.') 421 | 422 | for prediction in predictions: 423 | if not prediction in (0, 1): 424 | raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.') 425 | 426 | if dt_early >= dt_optimal: 427 | raise Exception('The earliest beneficial time for predictions must be before the optimal time.') 428 | 429 | if dt_optimal >= dt_late: 430 | raise Exception('The optimal time for predictions must be before the latest beneficial time.') 431 | 432 | # Does the patient eventually have sepsis? 433 | if np.any(labels): 434 | is_septic = True 435 | t_sepsis = np.argmax(labels) - dt_optimal 436 | else: 437 | is_septic = False 438 | t_sepsis = float('inf') 439 | 440 | n = len(labels) 441 | 442 | # Define slopes and intercept points for utility functions of the form 443 | # u = m * t + b. 444 | m_1 = float(max_u_tp) / float(dt_optimal - dt_early) 445 | b_1 = -m_1 * dt_early 446 | m_2 = float(-max_u_tp) / float(dt_late - dt_optimal) 447 | b_2 = -m_2 * dt_late 448 | m_3 = float(min_u_fn) / float(dt_late - dt_optimal) 449 | b_3 = -m_3 * dt_optimal 450 | 451 | # Compare predicted and true conditions. 452 | u = np.zeros(n) 453 | for t in range(n): 454 | if t <= t_sepsis + dt_late: 455 | # TP 456 | if is_septic and predictions[t]: 457 | if t <= t_sepsis + dt_optimal: 458 | u[t] = max(m_1 * (t - t_sepsis) + b_1, u_fp) 459 | elif t <= t_sepsis + dt_late: 460 | u[t] = m_2 * (t - t_sepsis) + b_2 461 | # FP 462 | elif not is_septic and predictions[t]: 463 | u[t] = u_fp 464 | # FN 465 | elif is_septic and not predictions[t]: 466 | if t <= t_sepsis + dt_optimal: 467 | u[t] = 0 468 | elif t <= t_sepsis + dt_late: 469 | u[t] = m_3 * (t - t_sepsis) + b_3 470 | # TN 471 | elif not is_septic and not predictions[t]: 472 | u[t] = u_tn 473 | 474 | # Find total utility for patient. 475 | return np.sum(u) 476 | 477 | if __name__ == '__main__': 478 | auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(sys.argv[1], sys.argv[2]) 479 | 480 | output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(auroc, auprc, accuracy, f_measure, utility) 481 | if len(sys.argv) > 3: 482 | with open(sys.argv[3], 'w') as f: 483 | f.write(output_string) 484 | else: 485 | print(output_string) 486 | --------------------------------------------------------------------------------