├── README.md └── Code ├── 4.ROC_PRC_Test.R ├── 1.GridSearch.py ├── 9.BrierScore_Test.R ├── 8.BrierScore_Training.R ├── README ├── 5.Evaluate_Performance_Training_Pan.py ├── 2.RandomForestClassifier.py ├── 11.Cindex_Test.R ├── 10.Cindex_Training.R ├── 3.ROC_PRC_Training.R ├── 7.Evaluate_Performance_Test.py ├── 6.Evaluate_Performance_Training.py ├── 13.Survival_Test.R └── 12.Survival_Training.R /README.md: -------------------------------------------------------------------------------- 1 | Code to perform the analysis presented in the manuscript "An integrated clinical-genetic model for accurate prediction of immune checkpoint blockade efficacy across multiple cancer types". 2 | -------------------------------------------------------------------------------- /Code/4.ROC_PRC_Test.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(precrec) 3 | library(pROC) 4 | 5 | data <- read.table('Test_RF_Prob.txt', header=T, sep='\t') 6 | ## Generate ROC and PRC, Pan-cancer 7 | scores1 <- join_scores(data$RF16_prob, data$RF11_prob, data$TMB) 8 | msmdat <- mmdata(scores1, data$Response) 9 | msmdat2 <- mmdata(scores1, data$Response, modnames = c("RF16", "RF11", "TMB")) 10 | mscurves <- evalmod(msmdat2) 11 | autoplot(mscurves) 12 | mscurves 13 | 14 | ## Generate ROC and PRC, Melanoma 15 | data2 <- data[grep("0", data$Cancer_Type),] 16 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 17 | msmdat <- mmdata(scores1, data2$Response) 18 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 19 | mscurves <- evalmod(msmdat2) 20 | autoplot(mscurves) 21 | mscurves 22 | 23 | ## Generate ROC and PRC, NSCLC 24 | data2 <- data[grep("1", data$Cancer_Type),] 25 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 26 | msmdat <- mmdata(scores1, data2$Response) 27 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 28 | mscurves <- evalmod(msmdat2) 29 | autoplot(mscurves) 30 | mscurves 31 | 32 | ## Generate ROC and PRC, Others 33 | data2 <- data[grep("2", data$Cancer_Type),] 34 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 35 | msmdat <- mmdata(scores1, data2$Response) 36 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 37 | mscurves <- evalmod(msmdat2) 38 | autoplot(mscurves) 39 | mscurves 40 | 41 | -------------------------------------------------------------------------------- /Code/1.GridSearch.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.ensemble import RandomForestClassifier 3 | from sklearn.model_selection import GridSearchCV 4 | 5 | data = pd.read_excel('61971_1_data_set_523545_qkrh1s.xlsx', sheet_name='Training') 6 | 7 | params = { 'n_estimators' : list(range(100, 1100, 100)), 8 | 'max_depth' : list(range(2, 22, 2)), 9 | 'min_samples_leaf' : list(range(2, 22, 2)), 10 | 'min_samples_split' : list(range(2, 22, 2)) 11 | } 12 | y_train = pd.DataFrame(data, columns=["Response"]) 13 | 14 | ## GridSearchCV for RF16 15 | rf16=["Cancer_Type2", "Albumin", "HED", "TMB", "FCNA", "BMI", "NLR", "Platelets", "HGB", "Stage", "Age", "Drug", "Chemo_before_IO", "HLA_LOH", "MSI", "Sex"] 16 | 17 | x_train = pd.DataFrame(data, columns=rf16) 18 | rf_clf = RandomForestClassifier(random_state = 0, n_jobs = -1) 19 | grid_cv = GridSearchCV(rf_clf, param_grid = params, cv = 5, n_jobs = -1) 20 | grid_cv.fit(x_train, y_train.values.ravel()) 21 | 22 | print('Optimal Hyper Parameter, RF16: ', grid_cv.best_params_) 23 | print('Maximum Accuracy, RF16: {:.4f}'.format(grid_cv.best_score_)) 24 | 25 | ## GridSearchCV for RF11 26 | rf11=["HED", "TMB", "FCNA", "BMI", "NLR", "Stage", "Age", "Drug", "HLA_LOH", "MSI", "Sex"] 27 | 28 | x_train = pd.DataFrame(data, columns=rf11) 29 | rf_clf = RandomForestClassifier(random_state = 0, n_jobs = -1) 30 | grid_cv = GridSearchCV(rf_clf, param_grid = params, cv = 5, n_jobs = -1) 31 | grid_cv.fit(x_train, y_train.values.ravel()) 32 | 33 | print('Optimal Hyper Parameter, RF11: ', grid_cv.best_params_) 34 | print('Maximum Accuracy, RF11: {:.4f}'.format(grid_cv.best_score_)) 35 | -------------------------------------------------------------------------------- /Code/9.BrierScore_Test.R: -------------------------------------------------------------------------------- 1 | library("pec") 2 | data <- read.table("Test_RF_Prob.txt", header=T, sep='\t') 3 | 4 | ## Brier score for OS, Pan-cancer 5 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data,x=TRUE,y=TRUE) 6 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data,x=TRUE,y=TRUE) 7 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data,x=TRUE,y=TRUE) 8 | 9 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data,formula=Surv(OS_Months,OS_Event)~1) 10 | print(brier) 11 | plot(brier,xlim=c(0,50)) 12 | 13 | ## Brier score for OS, Melanoma 14 | data2 <- data[grep("0", data$Cancer_Type),] 15 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 16 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 17 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 18 | 19 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 20 | print(brier) 21 | plot(brier,xlim=c(0,50)) 22 | 23 | ## Brier score for OS, NSCLC 24 | data2 <- data[grep("1", data$Cancer_Type),] 25 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 26 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 27 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 28 | 29 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 30 | print(brier) 31 | plot(brier,xlim=c(0,50)) 32 | 33 | ## Brier score for OS, Others 34 | data2 <- data[grep("2", data$Cancer_Type),] 35 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 36 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 37 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 38 | 39 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 40 | print(brier) 41 | plot(brier,xlim=c(0,50)) 42 | -------------------------------------------------------------------------------- /Code/8.BrierScore_Training.R: -------------------------------------------------------------------------------- 1 | library("pec") 2 | data <- read.table("Training_RF_Prob.txt", header=T, sep='\t') 3 | 4 | ## Brier score for OS, Pan-cancer 5 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data,x=TRUE,y=TRUE) 6 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data,x=TRUE,y=TRUE) 7 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data,x=TRUE,y=TRUE) 8 | 9 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data,formula=Surv(OS_Months,OS_Event)~1) 10 | print(brier) 11 | plot(brier,xlim=c(0,50)) 12 | 13 | ## Brier score for OS, Melanoma 14 | data2 <- data[grep("0", data$Cancer_Type),] 15 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 16 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 17 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 18 | 19 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 20 | print(brier) 21 | plot(brier,xlim=c(0,50)) 22 | 23 | ## Brier score for OS, NSCLC 24 | data2 <- data[grep("1", data$Cancer_Type),] 25 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 26 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 27 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 28 | 29 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 30 | print(brier) 31 | plot(brier,xlim=c(0,50)) 32 | 33 | ## Brier score for OS, Others 34 | data2 <- data[grep("2", data$Cancer_Type),] 35 | f1 <- coxph(Surv(OS_Months,OS_Event)~RF16_prob,data=data2,x=TRUE,y=TRUE) 36 | f2 <- coxph(Surv(OS_Months,OS_Event)~RF11_prob,data=data2,x=TRUE,y=TRUE) 37 | f3 <- coxph(Surv(OS_Months,OS_Event)~TMB,data=data2,x=TRUE,y=TRUE) 38 | 39 | brier <- pec(list("RF16"=f1, "RF11"=f2, "TMB"=f3),data=data2,formula=Surv(OS_Months,OS_Event)~1) 40 | print(brier) 41 | plot(brier,xlim=c(0,50)) 42 | -------------------------------------------------------------------------------- /Code/README: -------------------------------------------------------------------------------- 1 | An integrated clinical-genetic model for accurate prediction of immune checkpoint blockade efficacy across multiple cancer types 2 | 3 | Required packages or libraries 4 | Python3 (3.8.3): scikit-learn (0.23.1), pandas, csv 5 | R (4.0.2): ggplot2 (3.3.2), precrec (0.11.2), pROC (1.16.2), pec (2019.11.3), survcomp (1.38.0), survminer (0.4.8), survival (3.2.7) 6 | 7 | The scripts provided in this GitHub account should be executed in that particular order to generate the analysis presented in the manuscript. 8 | 9 | Please use the data ('61971_1_data_set_523545_qkrh1s.xlsx') which is provided as supplementary data in the manuscript as the input accordingly. 10 | 11 | Description 12 | 1.GridSearch.py: Generates hyperparameters for RF16 and RF11. 13 | 2.RandomForestClassifier.py: Generates response probability of RF16 and RF11. Outputs are required for the other scripts. 14 | 3.ROC_PRC_Training.R: Generates receiver operating characteristic and precision-recall curves. Also creates outputs with pan-cancer threshold and cancer-type-specific thresholds for RF16. 15 | 4.ROC_PRC_Test.R: Generates receiver operating characteristic and precision-recall curves. 16 | 5.Evaluate_Performance_Training_Pan.py: Generates confusion matrix and evaluation metrics (sensitivity, specificity, accuracy, PPV, NPV) of each cancer type based on pan-cancer threshold. 17 | 6.Evaluate_Performance_Training.py: Generates confusion matrix and evaluation metrics (sensitivity, specificity, accuracy, PPV, NPV) of each cancer type based on cancer-type-specific threshold. 18 | 7.Evaluate_Performance_Test.py: Generates confusion matrix and evaluation metrics (sensitivity, specificity, accuracy, PPV, NPV) of each cancer type based on cancer-type-specific threshold. 19 | 8.BrierScore_Training.R: Generates Brier score and plot of each cancer type. 20 | 9.BrierScore_Test.R: Generates Brier score and plot of each cancer type. 21 | 10.Cindex_Training.R: Generates concordance index and compares values of RF16, RF11, and TMB. 22 | 11.Cindex_Test.R: Generates concordance index and compares values of RF16, RF11, and TMB. 23 | 12.Survival_Training.R: Generates Kaplan-Meier plots and hazard ratio comparing responder and non-responder predicted by RF16 or TMB. 24 | 13.Survival_Test.R: Generates Kaplan-Meier plots and hazard ratio comparing responder and non-responder predicted by RF16 or TMB. 25 | -------------------------------------------------------------------------------- /Code/5.Evaluate_Performance_Training_Pan.py: -------------------------------------------------------------------------------- 1 | ## Evaluate model performance using pan-cancer threshold 2 | 3 | def isfloat(n): 4 | try: 5 | float(n) 6 | except: 7 | return False 8 | return True 9 | 10 | rf = open('Pan_Thresholds.txt', 'r') 11 | line = rf.readline() 12 | cutoff = [] 13 | while line != '': 14 | line = line.split() 15 | if isfloat(line[0]): 16 | cutoff.append(round(float(line[0]),3)) 17 | line = rf.readline() 18 | rf.close() 19 | 20 | rf = open('Training_RF_Prob.txt', 'r') 21 | wf = open('Training_RF_Prob_Pan_Predicted.txt', 'w') 22 | line = rf.readline() 23 | while line != '': 24 | line = line.strip().split('\t') 25 | if line[0] == 'Sample_ID': 26 | wf.write('\t'.join(line) + '\t' + 'RF16' + '\n') 27 | else: 28 | if float(line[-2]) >= cutoff[0]: 29 | wf.write('\t'.join(line) + '\t' + 'R' + '\n') 30 | else: 31 | wf.write('\t'.join(line) + '\t' + 'NR' + '\n') 32 | line = rf.readline() 33 | rf.close() 34 | wf.close() 35 | 36 | print('') 37 | order = ['Melanoma', 'NSCLC', 'Others'] 38 | tp_p=tn_p=fp_p=fn_p=0 39 | for i in range(3): 40 | rf = open('Training_RF_Prob_Pan_Predicted.txt', 'r') 41 | line = rf.readline() 42 | tp=tn=fp=fn=0 43 | while line != '': 44 | line = line.strip().split('\t') 45 | if line[0] != 'Sample_ID': 46 | if line[1] == str(i): 47 | if line[2] == '1': 48 | if line[-1] == 'R': 49 | tp += 1 50 | tp_p += 1 51 | else: 52 | fn += 1 53 | fn_p += 1 54 | else: 55 | if line[-1] == 'R': 56 | fp += 1 57 | fp_p += 1 58 | else: 59 | tn += 1 60 | tn_p += 1 61 | line = rf.readline() 62 | print(order[i]) 63 | print(str(tn) + '\t' + str(fp) + '\n' + str(fn) + '\t' + str(tp) + '\n') 64 | sensitivity = float(tp) / (float(tp + fn)) * 100 65 | specificity = float(tn) / (float(fp + tn)) * 100 66 | accuracy = float(tp + tn) / (float(tp + fp + fn + tn)) * 100 67 | ppv = float(tp) / (float(tp + fp)) * 100 68 | npv = float(tn) / (float(fn + tn)) * 100 69 | print(str(sensitivity) + '\t' + str(specificity) + '\t' + str(accuracy) + '\t' + str(ppv) + '\t' + str(npv) + '\n') 70 | rf.close() 71 | print('Pan-cancer') 72 | print(str(tn_p) + '\t' + str(fp_p) + '\n' + str(fn_p) + '\t' + str(tp_p) + '\n') 73 | sensitivity_p = float(tp_p) / (float(tp_p + fn_p)) * 100 74 | specificity_p = float(tn_p) / (float(fp_p + tn_p)) * 100 75 | accuracy_p = float(tp_p + tn_p) / (float(tp_p + fp_p + fn_p + tn_p)) * 100 76 | ppv_p = float(tp_p) / (float(tp_p + fp_p)) * 100 77 | npv_p = float(tn_p) / (float(fn_p + tn_p)) * 100 78 | print(str(sensitivity_p) + '\t' + str(specificity_p) + '\t' + str(accuracy_p) + '\t' + str(ppv_p) + '\t' + str(npv_p) + '\n') 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /Code/2.RandomForestClassifier.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import eli5 5 | from sklearn.ensemble import RandomForestClassifier 6 | from eli5.sklearn import PermutationImportance 7 | 8 | data_train = pd.read_excel('61971_1_data_set_523545_qkrh1s.xlsx', sheet_name='Training') 9 | data_test = pd.read_excel('61971_1_data_set_523545_qkrh1s.xlsx', sheet_name='Test') 10 | 11 | y_train = pd.DataFrame(data_train, columns=['Response']) 12 | y_test = pd.DataFrame(data_test, columns=['Response']) 13 | 14 | ### RF16 15 | rf16=['Cancer_Type2', 'Albumin', 'HED', 'TMB', 'FCNA', 'BMI', 'NLR', 'Platelets', 'HGB','Stage', 'Age', 'Drug', 'Chemo_before_IO', 'HLA_LOH', 'MSI','Sex'] 16 | x_train16 = pd.DataFrame(data_train, columns=rf16) 17 | x_test16 = pd.DataFrame(data_test, columns=rf16) 18 | 19 | ## Run random forest classifier 20 | forest16 = RandomForestClassifier(min_samples_split=2, n_estimators=1000, max_depth=8, min_samples_leaf=20, random_state = 0, n_jobs = -1) 21 | forest16.fit(x_train16, y_train.values.ravel()) 22 | forest16_predict = forest16.predict(x_test16) 23 | 24 | ### RF11 25 | rf11=['Stage', 'Drug', 'HED', 'TMB', 'FCNA', 'BMI', 'NLR','HLA_LOH', 'MSI', 'Sex', 'Age'] 26 | x_train11 = pd.DataFrame(data_train, columns=rf11) 27 | x_test11 = pd.DataFrame(data_test, columns=rf11) 28 | 29 | ## Run random forest classifier 30 | forest11 = RandomForestClassifier(min_samples_split=2, n_estimators=300, max_depth=4, min_samples_leaf=12, random_state = 0, n_jobs = -1) 31 | forest11.fit(x_train11, y_train.values.ravel()) 32 | forest11_predict = forest11.predict(x_test11) 33 | 34 | ## Save response probability of each sample 35 | header=['Sample_ID', 'Cancer_Type', 'Response', 'OS_Event', 'OS_Months', 'PFS_Event', 'PFS_Months', 'TMB', 'RF16_prob', 'RF11_prob'] 36 | with open('Training_RF_Prob.txt', 'w', newline='') as wf: 37 | wf.write('\t'.join(header) + '\n') 38 | writer = csv.writer(wf, delimiter='\t') 39 | writer.writerows(zip(data_train['SAMPLE_ID'], data_train['Cancer_Type2'], data_train['Response'], data_train['OS_Event'], data_train['OS_Months'], data_train['PFS_Event'], data_train['PFS_Months'], data_train['TMB'], forest16.predict_proba(x_train16)[:,1], forest11.predict_proba(x_train11)[:,1])) 40 | with open('Test_RF_Prob.txt', 'w', newline='') as wf: 41 | wf.write('\t'.join(header) + '\n') 42 | writer = csv.writer(wf, delimiter='\t') 43 | writer.writerows(zip(data_test['SAMPLE_ID'], data_test['Cancer_Type2'], data_test['Response'], data_test['OS_Event'], data_test['OS_Months'], data_test['PFS_Event'], data_test['PFS_Months'], data_test['TMB'], forest16.predict_proba(x_test16)[:,1], forest11.predict_proba(x_test11)[:,1])) 44 | 45 | ## Feature importance of RF16 & RF11 46 | print('\n') 47 | perm = PermutationImportance(forest16, scoring = "roc_auc", cv='prefit', random_state = 42).fit(x_train16, y_train) 48 | eli5.show_weights(perm, top = 16, feature_names = x_train16.columns.tolist()) 49 | 50 | print('\n') 51 | perm = PermutationImportance(forest11, scoring = "roc_auc", cv='prefit', random_state = 42).fit(x_train11, y_train) 52 | eli5.show_weights(perm, top = 11, feature_names = x_train11.columns.tolist()) 53 | 54 | -------------------------------------------------------------------------------- /Code/11.Cindex_Test.R: -------------------------------------------------------------------------------- 1 | library("survcomp") 2 | data <- read.table("Test_RF_Prob.txt", header=T, sep='\t') 3 | 4 | #C-index for OS, Pan-cancer 5 | c_rf16 <- concordance.index(x=-data$RF16_prob, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 6 | c_rf11 <- concordance.index(x=-data$RF11_prob, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 7 | c_tmb <- concordance.index(x=-data$TMB, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 8 | cindex.comp(c_rf16, c_rf11) 9 | cindex.comp(c_rf16, c_tmb) 10 | cindex.comp(c_rf11, c_tmb) 11 | 12 | #C-index for PFS, Pan-cancer 13 | c_rf16 <- concordance.index(x=-data$RF16_prob, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 14 | c_rf11 <- concordance.index(x=-data$RF11_prob, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 15 | c_tmb <- concordance.index(x=-data$TMB, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 16 | cindex.comp(c_rf16, c_rf11) 17 | cindex.comp(c_rf16, c_tmb) 18 | cindex.comp(c_rf11, c_tmb) 19 | 20 | #C-index for OS, Melanoma 21 | data2 <- data[grep("0", data$Cancer_Type),] 22 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 23 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 24 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 25 | cindex.comp(c_rf16, c_rf11) 26 | cindex.comp(c_rf16, c_tmb) 27 | cindex.comp(c_rf11, c_tmb) 28 | 29 | #C-index for PFS, Melanoma 30 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 31 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 32 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 33 | cindex.comp(c_rf16, c_rf11) 34 | cindex.comp(c_rf16, c_tmb) 35 | cindex.comp(c_rf11, c_tmb) 36 | 37 | #C-index for OS, NSCLC 38 | data2 <- data[grep("1", data$Cancer_Type),] 39 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 40 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 41 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 42 | cindex.comp(c_rf16, c_rf11) 43 | cindex.comp(c_rf16, c_tmb) 44 | cindex.comp(c_rf11, c_tmb) 45 | 46 | #C-index for PFS, NSCLC 47 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 48 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 49 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 50 | cindex.comp(c_rf16, c_rf11) 51 | cindex.comp(c_rf16, c_tmb) 52 | cindex.comp(c_rf11, c_tmb) 53 | 54 | #C-index for OS, Others 55 | data2 <- data[grep("2", data$Cancer_Type),] 56 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 57 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 58 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 59 | cindex.comp(c_rf16, c_rf11) 60 | cindex.comp(c_rf16, c_tmb) 61 | cindex.comp(c_rf11, c_tmb) 62 | 63 | #C-index for PFS, Others 64 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 65 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 66 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 67 | cindex.comp(c_rf16, c_rf11) 68 | cindex.comp(c_rf16, c_tmb) 69 | cindex.comp(c_rf11, c_tmb) -------------------------------------------------------------------------------- /Code/10.Cindex_Training.R: -------------------------------------------------------------------------------- 1 | library("survcomp") 2 | data <- read.table("Training_RF_Prob.txt", header=T, sep='\t') 3 | 4 | #C-index for OS, Pan-cancer 5 | c_rf16 <- concordance.index(x=-data$RF16_prob, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 6 | c_rf11 <- concordance.index(x=-data$RF11_prob, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 7 | c_tmb <- concordance.index(x=-data$TMB, surv.time=data$OS_Months, surv.event=data$OS_Event, method="noether") 8 | cindex.comp(c_rf16, c_rf11) 9 | cindex.comp(c_rf16, c_tmb) 10 | cindex.comp(c_rf11, c_tmb) 11 | 12 | #C-index for PFS, Pan-cancer 13 | c_rf16 <- concordance.index(x=-data$RF16_prob, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 14 | c_rf11 <- concordance.index(x=-data$RF11_prob, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 15 | c_tmb <- concordance.index(x=-data$TMB, surv.time=data$PFS_Months, surv.event=data$PFS_Event, method="noether") 16 | cindex.comp(c_rf16, c_rf11) 17 | cindex.comp(c_rf16, c_tmb) 18 | cindex.comp(c_rf11, c_tmb) 19 | 20 | #C-index for OS, Melanoma 21 | data2 <- data[grep("0", data$Cancer_Type),] 22 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 23 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 24 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 25 | cindex.comp(c_rf16, c_rf11) 26 | cindex.comp(c_rf16, c_tmb) 27 | cindex.comp(c_rf11, c_tmb) 28 | 29 | #C-index for PFS, Melanoma 30 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 31 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 32 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 33 | cindex.comp(c_rf16, c_rf11) 34 | cindex.comp(c_rf16, c_tmb) 35 | cindex.comp(c_rf11, c_tmb) 36 | 37 | #C-index for OS, NSCLC 38 | data2 <- data[grep("1", data$Cancer_Type),] 39 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 40 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 41 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 42 | cindex.comp(c_rf16, c_rf11) 43 | cindex.comp(c_rf16, c_tmb) 44 | cindex.comp(c_rf11, c_tmb) 45 | 46 | #C-index for PFS, NSCLC 47 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 48 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 49 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 50 | cindex.comp(c_rf16, c_rf11) 51 | cindex.comp(c_rf16, c_tmb) 52 | cindex.comp(c_rf11, c_tmb) 53 | 54 | #C-index for OS, Others 55 | data2 <- data[grep("2", data$Cancer_Type),] 56 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 57 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 58 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$OS_Months, surv.event=data2$OS_Event, method="noether") 59 | cindex.comp(c_rf16, c_rf11) 60 | cindex.comp(c_rf16, c_tmb) 61 | cindex.comp(c_rf11, c_tmb) 62 | 63 | #C-index for PFS, Others 64 | c_rf16 <- concordance.index(x=-data2$RF16_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 65 | c_rf11 <- concordance.index(x=-data2$RF11_prob, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 66 | c_tmb <- concordance.index(x=-data2$TMB, surv.time=data2$PFS_Months, surv.event=data2$PFS_Event, method="noether") 67 | cindex.comp(c_rf16, c_rf11) 68 | cindex.comp(c_rf16, c_tmb) 69 | cindex.comp(c_rf11, c_tmb) 70 | 71 | -------------------------------------------------------------------------------- /Code/3.ROC_PRC_Training.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(precrec) 3 | library(pROC) 4 | 5 | data <- read.table('Training_RF_Prob.txt', header=T, sep='\t') 6 | ## Generate ROC and PRC, Pan-cancer 7 | scores1 <- join_scores(data$RF16_prob, data$RF11_prob, data$TMB) 8 | msmdat <- mmdata(scores1, data$Response) 9 | msmdat2 <- mmdata(scores1, data$Response, modnames = c("RF16", "RF11", "TMB")) 10 | mscurves <- evalmod(msmdat2) 11 | autoplot(mscurves) 12 | mscurves 13 | 14 | ## Detect the optimal threshold, Pan-cancer 15 | prob_roc <- roc(data$Response, data$RF16_prob) 16 | par(pty="s") 17 | plot.roc(prob_roc, col="red", print.auc=TRUE, print.auc.adj=c(2,-12), 18 | max.auc.polygon=TRUE, print.thres=TRUE, print.thres.pch=19, print.thres.col = "red", print.thres.adj=c(0.3,-1.2), 19 | auc.polygon=TRUE, auc.polygon.col="#D1F2EB",legacy.axes = TRUE) 20 | legend("bottomright", legend=c("RF16"), col=c("red"), lwd=2) 21 | pan_threshold <- ci.thresholds(prob_roc, thresholds="best") 22 | cat("Pan-cancer Threshold", file = "Pan_Thresholds.txt", append = TRUE) 23 | capture.output(pan_threshold, file = "Pan_Thresholds.txt", append = TRUE) 24 | 25 | ## Generate ROC and PRC, Melanoma 26 | data2 <- data[grep("0", data$Cancer_Type),] 27 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 28 | msmdat <- mmdata(scores1, data2$Response) 29 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 30 | mscurves <- evalmod(msmdat2) 31 | autoplot(mscurves) 32 | mscurves 33 | 34 | ## Detect the optimal threshold, Melanoma 35 | prob_roc <- roc(data2$Response, data2$RF16_prob) 36 | par(pty="s") 37 | plot.roc(prob_roc, col="red", print.auc=TRUE, print.auc.adj=c(2,-12), 38 | max.auc.polygon=TRUE, print.thres=TRUE, print.thres.pch=19, print.thres.col = "red", print.thres.adj=c(0.3,-1.2), 39 | auc.polygon=TRUE, auc.polygon.col="#D1F2EB",legacy.axes = TRUE) 40 | legend("bottomright", legend=c("RF16"), col=c("red"), lwd=2) 41 | 42 | mel_threshold <- ci.thresholds(prob_roc, thresholds="best") 43 | cat("Melanoma Threshold", file = "Thresholds.txt", append = TRUE) 44 | capture.output(mel_threshold, file = "Thresholds.txt", append = TRUE) 45 | 46 | ## Generate ROC and PRC, NSCLC 47 | data2 <- data[grep("1", data$Cancer_Type),] 48 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 49 | msmdat <- mmdata(scores1, data2$Response) 50 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 51 | mscurves <- evalmod(msmdat2) 52 | autoplot(mscurves) 53 | mscurves 54 | 55 | ## Detect the optimal threshold, NSCLC 56 | prob_roc <- roc(data2$Response, data2$RF16_prob) 57 | par(pty="s") 58 | plot.roc(prob_roc, col="red", print.auc=TRUE, print.auc.adj=c(2,-12), 59 | max.auc.polygon=TRUE, print.thres=TRUE, print.thres.pch=19, print.thres.col = "red", print.thres.adj=c(0.3,-1.2), 60 | auc.polygon=TRUE, auc.polygon.col="#D1F2EB",legacy.axes = TRUE) 61 | legend("bottomright", legend=c("RF16"), col=c("red"), lwd=2) 62 | 63 | nsclc_threshold <- ci.thresholds(prob_roc, thresholds="best") 64 | cat("NSCLC Threshold", file = "Thresholds.txt", append = TRUE) 65 | capture.output(nsclc_threshold, file = "Thresholds.txt", append = TRUE) 66 | 67 | ## Generate ROC and PRC, Others 68 | data2 <- data[grep("2", data$Cancer_Type),] 69 | scores1 <- join_scores(data2$RF16_prob,data2$RF11_prob,data2$TMB) 70 | msmdat <- mmdata(scores1, data2$Response) 71 | msmdat2 <- mmdata(scores1, data2$Response, modnames = c("RF16","RF11","TMB")) 72 | mscurves <- evalmod(msmdat2) 73 | autoplot(mscurves) 74 | mscurves 75 | 76 | ## Detect the optimal threshold, Others 77 | prob_roc <- roc(data2$Response, data2$RF16_prob) 78 | par(pty="s") 79 | plot.roc(prob_roc,col="red", print.auc=TRUE, print.auc.adj=c(2,-12), 80 | max.auc.polygon=TRUE, print.thres=TRUE, print.thres.pch=19, print.thres.col = "red", print.thres.adj=c(0.3,-1.2), 81 | auc.polygon=TRUE, auc.polygon.col="#D1F2EB",legacy.axes = TRUE) 82 | legend("bottomright", legend=c("RF16"), col=c("red"), lwd=2) 83 | 84 | others_threshold <- ci.thresholds(prob_roc, thresholds="best") 85 | cat("Others Threshold", file = "Thresholds.txt", append = TRUE) 86 | capture.output(others_threshold, file = "Thresholds.txt", append = TRUE) 87 | -------------------------------------------------------------------------------- /Code/7.Evaluate_Performance_Test.py: -------------------------------------------------------------------------------- 1 | ## Evaluate model performance using cancer-type-specific thresholds 2 | 3 | def isfloat(n): 4 | try: 5 | float(n) 6 | except: 7 | return False 8 | return True 9 | 10 | rf = open('Thresholds.txt', 'r') 11 | line = rf.readline() 12 | cutoff = [] 13 | while line != '': 14 | line = line.split() 15 | if isfloat(line[0]): 16 | cutoff.append(round(float(line[0]),3)) 17 | line = rf.readline() 18 | rf.close() 19 | 20 | rf = open('Test_RF_Prob.txt', 'r') 21 | wf = open('Test_RF_Prob_Predicted.txt', 'w') 22 | line = rf.readline() 23 | while line != '': 24 | line = line.strip().split('\t') 25 | if line[0] == 'Sample_ID': 26 | wf.write('\t'.join(line) + '\t' + 'RF16' + '\t' + 'TMB_10' + '\n') 27 | else: 28 | if int(line[1]) == 0: 29 | if float(line[-2]) >= cutoff[0]: 30 | if float(line[-3]) >= float(10): 31 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 32 | else: 33 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 34 | else: 35 | if float(line[-3]) >= float(10): 36 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 37 | else: 38 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 39 | elif int(line[1]) == 1: 40 | if float(line[-2]) >= cutoff[1]: 41 | if float(line[-3]) >= float(10): 42 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 43 | else: 44 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 45 | else: 46 | if float(line[-3]) >= float(10): 47 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 48 | else: 49 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 50 | elif int(line[1]) == 2: 51 | if float(line[-2]) >= cutoff[2]: 52 | if float(line[-3]) >= float(10): 53 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 54 | else: 55 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 56 | else: 57 | if float(line[-3]) >= float(10): 58 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 59 | else: 60 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 61 | line = rf.readline() 62 | rf.close() 63 | wf.close() 64 | 65 | 66 | def evaluation(input, target): 67 | order = ['Melanoma', 'NSCLC', 'Others'] 68 | tp_p=tn_p=fp_p=fn_p=0 69 | for i in range(3): 70 | rf = open(input, 'r') 71 | line = rf.readline() 72 | tp=tn=fp=fn=0 73 | while line != '': 74 | line = line.strip().split('\t') 75 | if line[0] != 'Sample_ID': 76 | if line[1] == str(i): 77 | if line[2] == '1': 78 | if line[target] == 'R': 79 | tp += 1 80 | tp_p += 1 81 | else: 82 | fn += 1 83 | fn_p += 1 84 | else: 85 | if line[target] == 'R': 86 | fp += 1 87 | fp_p += 1 88 | else: 89 | tn += 1 90 | tn_p += 1 91 | line = rf.readline() 92 | print(order[i]) 93 | print(str(tn) + '\t' + str(fp) + '\n' + str(fn) + '\t' + str(tp) + '\n') 94 | sensitivity = float(tp) / (float(tp + fn)) * 100 95 | specificity = float(tn) / (float(fp + tn)) * 100 96 | accuracy = float(tp + tn) / (float(tp + fp + fn + tn)) * 100 97 | ppv = float(tp) / (float(tp + fp)) * 100 98 | npv = float(tn) / (float(fn + tn)) * 100 99 | print(str(sensitivity) + '\t' + str(specificity) + '\t' + str(accuracy) + '\t' + str(ppv) + '\t' + str(npv) + '\n') 100 | rf.close() 101 | print('Pan-cancer') 102 | print(str(tn_p) + '\t' + str(fp_p) + '\n' + str(fn_p) + '\t' + str(tp_p) + '\n') 103 | sensitivity_p = float(tp_p) / (float(tp_p + fn_p)) * 100 104 | specificity_p = float(tn_p) / (float(fp_p + tn_p)) * 100 105 | accuracy_p = float(tp_p + tn_p) / (float(tp_p + fp_p + fn_p + tn_p)) * 100 106 | ppv_p = float(tp_p) / (float(tp_p + fp_p)) * 100 107 | npv_p = float(tn_p) / (float(fn_p + tn_p)) * 100 108 | print(str(sensitivity_p) + '\t' + str(specificity_p) + '\t' + str(accuracy_p) + '\t' + str(ppv_p) + '\t' + str(npv_p) + '\n') 109 | 110 | 111 | print('') 112 | evaluation('Test_RF_Prob_Predicted.txt', -2) 113 | 114 | print('') 115 | evaluation('Test_RF_Prob_Predicted.txt', -1) 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /Code/6.Evaluate_Performance_Training.py: -------------------------------------------------------------------------------- 1 | ## Evaluate model performance using cancer-type-specific thresholds 2 | 3 | def isfloat(n): 4 | try: 5 | float(n) 6 | except: 7 | return False 8 | return True 9 | 10 | rf = open('Thresholds.txt', 'r') 11 | line = rf.readline() 12 | cutoff = [] 13 | while line != '': 14 | line = line.split() 15 | if isfloat(line[0]): 16 | cutoff.append(round(float(line[0]),3)) 17 | line = rf.readline() 18 | rf.close() 19 | 20 | rf = open('Training_RF_Prob.txt', 'r') 21 | wf = open('Training_RF_Prob_Predicted.txt', 'w') 22 | line = rf.readline() 23 | while line != '': 24 | line = line.strip().split('\t') 25 | if line[0] == 'Sample_ID': 26 | wf.write('\t'.join(line) + '\t' + 'RF16' + '\t' + 'TMB_10' + '\n') 27 | else: 28 | if int(line[1]) == 0: 29 | if float(line[-2]) >= cutoff[0]: 30 | if float(line[-3]) >= float(10): 31 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 32 | else: 33 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 34 | else: 35 | if float(line[-3]) >= float(10): 36 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 37 | else: 38 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 39 | elif int(line[1]) == 1: 40 | if float(line[-2]) >= cutoff[1]: 41 | if float(line[-3]) >= float(10): 42 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 43 | else: 44 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 45 | else: 46 | if float(line[-3]) >= float(10): 47 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 48 | else: 49 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 50 | elif int(line[1]) == 2: 51 | if float(line[-2]) >= cutoff[2]: 52 | if float(line[-3]) >= float(10): 53 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'R' + '\n') 54 | else: 55 | wf.write('\t'.join(line) + '\t' + 'R' + '\t' + 'NR' + '\n') 56 | else: 57 | if float(line[-3]) >= float(10): 58 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'R' + '\n') 59 | else: 60 | wf.write('\t'.join(line) + '\t' + 'NR' + '\t' + 'NR' + '\n') 61 | line = rf.readline() 62 | rf.close() 63 | wf.close() 64 | 65 | 66 | def evaluation(input, target): 67 | order = ['Melanoma', 'NSCLC', 'Others'] 68 | tp_p=tn_p=fp_p=fn_p=0 69 | for i in range(3): 70 | rf = open(input, 'r') 71 | line = rf.readline() 72 | tp=tn=fp=fn=0 73 | while line != '': 74 | line = line.strip().split('\t') 75 | if line[0] != 'Sample_ID': 76 | if line[1] == str(i): 77 | if line[2] == '1': 78 | if line[target] == 'R': 79 | tp += 1 80 | tp_p += 1 81 | else: 82 | fn += 1 83 | fn_p += 1 84 | else: 85 | if line[target] == 'R': 86 | fp += 1 87 | fp_p += 1 88 | else: 89 | tn += 1 90 | tn_p += 1 91 | line = rf.readline() 92 | print(order[i]) 93 | print(str(tn) + '\t' + str(fp) + '\n' + str(fn) + '\t' + str(tp) + '\n') 94 | sensitivity = float(tp) / (float(tp + fn)) * 100 95 | specificity = float(tn) / (float(fp + tn)) * 100 96 | accuracy = float(tp + tn) / (float(tp + fp + fn + tn)) * 100 97 | ppv = float(tp) / (float(tp + fp)) * 100 98 | npv = float(tn) / (float(fn + tn)) * 100 99 | print(str(sensitivity) + '\t' + str(specificity) + '\t' + str(accuracy) + '\t' + str(ppv) + '\t' + str(npv) + '\n') 100 | rf.close() 101 | print('Pan-cancer') 102 | print(str(tn_p) + '\t' + str(fp_p) + '\n' + str(fn_p) + '\t' + str(tp_p) + '\n') 103 | sensitivity_p = float(tp_p) / (float(tp_p + fn_p)) * 100 104 | specificity_p = float(tn_p) / (float(fp_p + tn_p)) * 100 105 | accuracy_p = float(tp_p + tn_p) / (float(tp_p + fp_p + fn_p + tn_p)) * 100 106 | ppv_p = float(tp_p) / (float(tp_p + fp_p)) * 100 107 | npv_p = float(tn_p) / (float(fn_p + tn_p)) * 100 108 | print(str(sensitivity_p) + '\t' + str(specificity_p) + '\t' + str(accuracy_p) + '\t' + str(ppv_p) + '\t' + str(npv_p) + '\n') 109 | 110 | 111 | print('') 112 | evaluation('Training_RF_Prob_Predicted.txt', -2) 113 | 114 | print('') 115 | evaluation('Training_RF_Prob_Predicted.txt', -1) 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /Code/13.Survival_Test.R: -------------------------------------------------------------------------------- 1 | library("ggplot2") 2 | library("survminer") 3 | library("survival") 4 | 5 | data <- read.table('Test_RF_Prob_Predicted.txt', header=T, sep='\t') 6 | 7 | #OS analysis for RF16, Pan-cancer 8 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data) 9 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 10 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 11 | 12 | surv_object <- Surv(time = data$OS_Months, event = data$OS_Event) 13 | coxph_survfit <- coxph(surv_object ~ RF16, data = data) 14 | summary(coxph_survfit) 15 | 16 | #PFS analysis for RF16, Pan-cancer 17 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data) 18 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 19 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 20 | 21 | surv_object <- Surv(time = data$PFS_Months, event = data$PFS_Event) 22 | coxph_survfit <- coxph(surv_object ~ RF16, data = data) 23 | summary(coxph_survfit) 24 | 25 | #OS analysis for TMB, Pan-cancer 26 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data) 27 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 28 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 29 | 30 | surv_object <- Surv(time = data$OS_Months, event = data$OS_Event) 31 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data) 32 | summary(coxph_survfit) 33 | 34 | #PFS analysis for TMB, Pan-cancer 35 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data) 36 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 37 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 38 | 39 | surv_object <- Surv(time = data$PFS_Months, event = data$PFS_Event) 40 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data) 41 | summary(coxph_survfit) 42 | 43 | #OS analysis for RF16, Melanoma 44 | data2 <- data[grep("0", data$Cancer_Type),] 45 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 46 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 47 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 48 | 49 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 50 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 51 | summary(coxph_survfit) 52 | 53 | #PFS analysis for RF16, Melanoma 54 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 55 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 56 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 57 | 58 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 59 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 60 | summary(coxph_survfit) 61 | 62 | #OS analysis for TMB, Melanoma 63 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 64 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 65 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 66 | 67 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 68 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 69 | summary(coxph_survfit) 70 | 71 | #PFS analysis for TMB, Melanoma 72 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 73 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 74 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 75 | 76 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 77 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 78 | summary(coxph_survfit) 79 | 80 | #OS analysis for RF16, NSCLC 81 | data2 <- data[grep("1", data$Cancer_Type),] 82 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 83 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 84 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 85 | 86 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 87 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 88 | summary(coxph_survfit) 89 | 90 | #PFS analysis for RF16, NSCLC 91 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 92 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 93 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 94 | 95 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 96 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 97 | summary(coxph_survfit) 98 | 99 | #OS analysis for TMB, NSCLC 100 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 101 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 102 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 103 | 104 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 105 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 106 | summary(coxph_survfit) 107 | 108 | #PFS analysis for TMB, NSCLC 109 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 110 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 111 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 112 | 113 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 114 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 115 | summary(coxph_survfit) 116 | 117 | #OS analysis for RF16, Others 118 | data2 <- data[grep("2", data$Cancer_Type),] 119 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 120 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 121 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 122 | 123 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 124 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 125 | summary(coxph_survfit) 126 | 127 | #PFS analysis for RF16, Others 128 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 129 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 130 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 131 | 132 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 133 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 134 | summary(coxph_survfit) 135 | 136 | #OS analysis for TMB, Others 137 | data2 <- data[grep("2", data$Cancer_Type),] 138 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 139 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 140 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 141 | 142 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 143 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 144 | summary(coxph_survfit) 145 | 146 | #PFS analysis for TMB, Others 147 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 148 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 149 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 150 | 151 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 152 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 153 | summary(coxph_survfit) 154 | 155 | -------------------------------------------------------------------------------- /Code/12.Survival_Training.R: -------------------------------------------------------------------------------- 1 | library("ggplot2") 2 | library("survminer") 3 | library("survival") 4 | 5 | data <- read.table('Training_RF_Prob_Predicted.txt', header=T, sep='\t') 6 | 7 | #OS analysis for RF16, Pan-cancer 8 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data) 9 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 10 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 11 | 12 | surv_object <- Surv(time = data$OS_Months, event = data$OS_Event) 13 | coxph_survfit <- coxph(surv_object ~ RF16, data = data) 14 | summary(coxph_survfit) 15 | 16 | #PFS analysis for RF16, Pan-cancer 17 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data) 18 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 19 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 20 | 21 | surv_object <- Surv(time = data$PFS_Months, event = data$PFS_Event) 22 | coxph_survfit <- coxph(surv_object ~ RF16, data = data) 23 | summary(coxph_survfit) 24 | 25 | #OS analysis for TMB, Pan-cancer 26 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data) 27 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 28 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 29 | 30 | surv_object <- Surv(time = data$OS_Months, event = data$OS_Event) 31 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data) 32 | summary(coxph_survfit) 33 | 34 | #PFS analysis for TMB, Pan-cancer 35 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data) 36 | ggsurvplot(fit, data, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 37 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 38 | 39 | surv_object <- Surv(time = data$PFS_Months, event = data$PFS_Event) 40 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data) 41 | summary(coxph_survfit) 42 | 43 | #OS analysis for RF16, Melanoma 44 | data2 <- data[grep("0", data$Cancer_Type),] 45 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 46 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 47 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 48 | 49 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 50 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 51 | summary(coxph_survfit) 52 | 53 | #PFS analysis for RF16, Melanoma 54 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 55 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 56 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 57 | 58 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 59 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 60 | summary(coxph_survfit) 61 | 62 | #OS analysis for TMB, Melanoma 63 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 64 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 65 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 66 | 67 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 68 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 69 | summary(coxph_survfit) 70 | 71 | #PFS analysis for TMB, Melanoma 72 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 73 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 74 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 75 | 76 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 77 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 78 | summary(coxph_survfit) 79 | 80 | #OS analysis for RF16, NSCLC 81 | data2 <- data[grep("1", data$Cancer_Type),] 82 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 83 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 84 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 85 | 86 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 87 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 88 | summary(coxph_survfit) 89 | 90 | #PFS analysis for RF16, NSCLC 91 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 92 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 93 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 94 | 95 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 96 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 97 | summary(coxph_survfit) 98 | 99 | #OS analysis for TMB, NSCLC 100 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 101 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 102 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 103 | 104 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 105 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 106 | summary(coxph_survfit) 107 | 108 | #PFS analysis for TMB, NSCLC 109 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 110 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 111 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 112 | 113 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 114 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 115 | summary(coxph_survfit) 116 | 117 | #OS analysis for RF16, Others 118 | data2 <- data[grep("2", data$Cancer_Type),] 119 | fit <- survfit(Surv(OS_Months, OS_Event) ~ RF16, data = data2) 120 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 121 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 122 | 123 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 124 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 125 | summary(coxph_survfit) 126 | 127 | #PFS analysis for RF16, Others 128 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ RF16, data = data2) 129 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 130 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 131 | 132 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 133 | coxph_survfit <- coxph(surv_object ~ RF16, data = data2) 134 | summary(coxph_survfit) 135 | 136 | #OS analysis for TMB, Others 137 | data2 <- data[grep("2", data$Cancer_Type),] 138 | fit <- survfit(Surv(OS_Months, OS_Event) ~ TMB_10, data = data2) 139 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 140 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 141 | 142 | surv_object <- Surv(time = data2$OS_Months, event = data2$OS_Event) 143 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 144 | summary(coxph_survfit) 145 | 146 | #PFS analysis for TMB, Others 147 | fit <- survfit(Surv(PFS_Months, PFS_Event) ~ TMB_10, data = data2) 148 | ggsurvplot(fit, data2, pval = TRUE, conf.int = FALSE, palette = c("#2E9FDF", "#E7B800"), 149 | risk.table = TRUE, risk.table.y.text.col = TRUE, font.legend=13) 150 | 151 | surv_object <- Surv(time = data2$PFS_Months, event = data2$PFS_Event) 152 | coxph_survfit <- coxph(surv_object ~ TMB_10, data = data2) 153 | summary(coxph_survfit) 154 | 155 | --------------------------------------------------------------------------------