0) & (S < k*k))[0])\n", 208 | "\n", 209 | "\n", 210 | " Z = (Z < threshold)\n", 211 | "\n", 212 | "\n", 213 | " p = min(Z.shape)\n", 214 | "\n", 215 | " n = 2**np.floor(np.log(p)/np.log(2))\n", 216 | "\n", 217 | " n = int(np.log(n)/np.log(2))\n", 218 | "\n", 219 | " sizes = 2**np.arange(n, 1, -1)\n", 220 | "\n", 221 | " counts = []\n", 222 | " for size in sizes:\n", 223 | " counts.append(boxcount(Z, size))\n", 224 | "\n", 225 | " coeffs = np.polyfit(np.log(sizes), np.log(counts), 1)\n", 226 | " return -coeffs[0]\n", 227 | "\n", 228 | " FractalDim = fractal_dimension(img)\n", 229 | " Entropy = skimage.measure.shannon_entropy(img, base=2)\n", 230 | " parameters = []\n", 231 | " parameters.append(Centroid)\n", 232 | " parameters.append(MajorAxisLength)\n", 233 | " parameters.append(MinorAxisLength)\n", 234 | " parameters.append(DiagonalAxis)\n", 235 | " parameters.append(DiagonalPerp)\n", 236 | " parameters.append(Extent)\n", 237 | " parameters.append(Diameter)\n", 238 | " parameters.append(EigenValues)\n", 239 | " parameters.append(Solidity)\n", 240 | " parameters.append(FirstAxis)\n", 241 | " parameters.append(SecondAxis)\n", 242 | " parameters.append(ThirdAxis)\n", 243 | " parameters.append(FirstAxisLength)\n", 244 | " parameters.append(SecondAxisLength)\n", 245 | " parameters.append(ThirdAxisLength)\n", 246 | " parameters.append(kurt)\n", 247 | " parameters.append(histo)\n", 248 | " parameters.append(hemorrhage)\n", 249 | " parameters.append(FractalDim)\n", 250 | " parameters.append(Entropy)\n", 251 | " parameters = np.asarray(parameters)\n", 252 | " np.save(mri_file[-27:-21]+\"_seg.npy\",parameters)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "#Format\n", 262 | "how2read = []\n", 263 | "how2read.append(\"Centroid,3\")\n", 264 | "how2read.append(\"MajorAxisLength,1\")\n", 265 | "how2read.append()" 266 | ] 267 | } 268 | ], 269 | "metadata": { 270 | "kernelspec": { 271 | "display_name": "Python 2", 272 | "language": "python", 273 | "name": "python2" 274 | }, 275 | "language_info": { 276 | "codemirror_mode": { 277 | "name": "ipython", 278 | "version": 2 279 | }, 280 | "file_extension": ".py", 281 | "mimetype": "text/x-python", 282 | "name": "python", 283 | "nbconvert_exporter": "python", 284 | "pygments_lexer": "ipython2", 285 | "version": "2.7.12" 286 | } 287 | }, 288 | "nbformat": 4, 289 | "nbformat_minor": 2 290 | } 291 | -------------------------------------------------------------------------------- /survival_prediction/python/Regression/GroundTruth.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mobarakol/3D_Attention_UNet/6c6ef922b12673d53a8a11e29ad14df36fbb92ed/survival_prediction/python/Regression/GroundTruth.xlsx -------------------------------------------------------------------------------- /survival_prediction/python/Regression/random forest regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import numpy as np\n", 13 | "import keras.backend as K\n", 14 | "from keras.wrappers.scikit_learn import KerasRegressor\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn import metrics\n", 17 | "from sklearn.feature_selection import RFE\n", 18 | "from sklearn import preprocessing\n", 19 | "from sklearn.ensemble import RandomForestRegressor\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import torch\n", 22 | "import torch.nn as nn\n", 23 | "from torch.autograd import Variable\n", 24 | "from sklearn.preprocessing import StandardScaler,Normalizer\n", 25 | "import pandas as pd\n", 26 | "import sklearn.svm as svm\n", 27 | "from imblearn.datasets import make_imbalance\n", 28 | "from imblearn.over_sampling import RandomOverSampler\n", 29 | "from sklearn.metrics import confusion_matrix, r2_score\n", 30 | "from sklearn.metrics import mean_squared_error\n", 31 | "\n", 32 | "def categorize(array):\n", 33 | " #print(array)\n", 34 | " new_array=np.zeros_like(array)\n", 35 | " for i in range(0,array.shape[0]): \n", 36 | " k=array[i]\n", 37 | " if k>0.33:\n", 38 | " new_array[i,:]=1\n", 39 | " else: \n", 40 | " new_array[i,:]=0\n", 41 | " return new_array\n", 42 | "#df = pd.DataFrame(columns=['Parameter','fold1','fold2','fold3','fol4'])\n", 43 | "df = pd.DataFrame(columns=['Parameter','fold1','fold2','fold3','fol4'])\n", 44 | "#for i in range(1,80):\n", 45 | "for i in range(1,25):\n", 46 | "\n", 47 | " print(i)\n", 48 | " MSE = np.array('mse')\n", 49 | " Accuracy = np.array('Acc')\n", 50 | " r2_sc = np.array('r2_score')\n", 51 | " \n", 52 | " for fold in range(1,5):\n", 53 | " features = pd.read_csv('ICHFeatures.csv',header=0)\n", 54 | " OS_train = pd.read_excel(r'GroundTruth.xlsx', sheet_name=\"Fold\"+str(fold)+'_Seg',header = 0, dtype=str)\n", 55 | " OS_train[\"ID\"] = OS_train[\"ID\"].str.zfill(3)\n", 56 | " #OS_train.columns = ['ID','OS']\n", 57 | " OS_valid = pd.read_excel(r'GroundTruth.xlsx', sheet_name=\"Fold\"+str(fold)+'_Val',header = 0, dtype=str)\n", 58 | " OS_valid[\"ID\"] = OS_valid[\"ID\"].str.zfill(3)\n", 59 | " #OS_valid.columns = ['ID','OS']\n", 60 | " features['ID']=features['ID'].str.replace('ct1','')\n", 61 | " train = pd.merge(features, OS_train, how='right', on='ID')\n", 62 | " test = pd.merge(features, OS_valid, how='right', on='ID')\n", 63 | " norm_wihtout = [col for col in train.columns if col not in ['ID','Delta','Class']]\n", 64 | " #norm_valid = [col for col in test.columns if col not in ['ID','GCS','Onset','OS']]\n", 65 | " scaler = StandardScaler()\n", 66 | " train_ss = scaler.fit_transform(train[norm_wihtout])\n", 67 | " test_ss = scaler.transform(test[norm_wihtout])\n", 68 | " train[norm_wihtout] = train_ss\n", 69 | " test[norm_wihtout] = test_ss\n", 70 | " #train = train.assign(norm_train.values = train_ss)\n", 71 | " col_withoutID = [col for col in train.columns if col not in ['ID','Class']]\n", 72 | " ros = RandomOverSampler(random_state=42)\n", 73 | " X_res, y_res = ros.fit_resample(train[col_withoutID], train['Class'].values.astype(float))\n", 74 | " X_withDelta = pd.DataFrame(X_res,columns = col_withoutID)\n", 75 | " train_class = pd.DataFrame(y_res, columns = ['Class'])\n", 76 | " col_withoutDelta = [col for col in X_withDelta.columns if col not in ['Delta']]\n", 77 | " train_X = X_withDelta[col_withoutDelta]\n", 78 | " train_y = X_withDelta[\"Delta\"]\n", 79 | " num_features = i\n", 80 | " estimator = RandomForestRegressor(max_depth=2, random_state=0)\n", 81 | " #print(num_features)\n", 82 | " rfe=RFE(estimator, n_features_to_select=num_features,step=1)\n", 83 | " rfe.fit(train_X,train_y)\n", 84 | " ranking_RFE=rfe.ranking_\n", 85 | " indices=np.where(ranking_RFE==1)\n", 86 | " indices = list(indices[0])\n", 87 | " data_RFE=train_X.iloc[:,indices]\n", 88 | " valid_RFE = test[col_withoutID].iloc[:,indices]\n", 89 | " #print(data_RFE.columns)\n", 90 | " model = RandomForestRegressor(max_depth=2, random_state=0)\n", 91 | " model.fit(data_RFE, train_y)\n", 92 | "\n", 93 | " Y_pred=model.predict(valid_RFE).ravel()\n", 94 | " #acc=metrics.accuracy_score(test['Delta'].values,Y_pred)\n", 95 | " #print(\"accuracy score = \"+str(acc)) \n", 96 | " mse = mean_squared_error(test['Delta'].values, Y_pred)\n", 97 | " MSE = np.append(MSE,mse)\n", 98 | " r2_s = r2_score(test['Delta'].values, Y_pred)\n", 99 | " #print(mse)\n", 100 | " #print(r2_s)\n", 101 | " r2_sc = np.append(r2_sc,r2_s)\n", 102 | " # con_matrix = confusion_matrix(test['Delta'].values.tolist(),Y_pred.tolist())\n", 103 | " # TN,FP,FN,TP = con_matrix.ravel()\n", 104 | " # # Sensitivity, hit rate, recall, or true positive rate\n", 105 | " # TPR = TP/(TP+FN)\n", 106 | " # # Specificity or true negative rate\n", 107 | " # TNR = TN/(TN+FP) \n", 108 | " # #Precision\n", 109 | " # PPV = TP/(TP+FP)\n", 110 | " # Prec = np.append(Prec,PPV)\n", 111 | " # Sens = np.append(Sens,TPR)\n", 112 | " # Spec = np.append(Spec,TNR)\n", 113 | " predictions = categorize(Y_pred.reshape(20,1))\n", 114 | " #print(predictions)\n", 115 | " y_test_class = categorize(pd.to_numeric(test['Delta']).values.reshape(20,1))#.reshape(20,1)\n", 116 | " # evaluate predictions\n", 117 | " accuracy = metrics.accuracy_score(y_test_class, predictions)\n", 118 | " #print(\"Accuracy: %.2f%%\" % (accuracy * 100.0))\n", 119 | " Accuracy = np.append(Accuracy,accuracy)\n", 120 | " #best=metrics.mean_squared_error(y_test*1000, y_pred*1000) \n", 121 | " #print(best)\n", 122 | " MSE = pd.DataFrame(data = MSE.reshape(1,5),columns = df.columns)\n", 123 | " ACC = pd.DataFrame(data = Accuracy.reshape(1,5),columns = df.columns)\n", 124 | " R2_Score = pd.DataFrame(data = r2_sc.reshape(1,5),columns = df.columns)\n", 125 | " # Spec = pd.DataFrame(data = Spec.reshape(1,5),columns = df.columns)\n", 126 | " # df = df.append(Accuracy)\n", 127 | " df = df.append(MSE)\n", 128 | " df = df.append(ACC)\n", 129 | " df= df.append(R2_Score)\n", 130 | " #del Accuracy\n", 131 | " print(np.average(Accuracy[1:].astype(np.float)))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 1, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stderr", 141 | "output_type": "stream", 142 | "text": [ 143 | "Using TensorFlow backend.\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "import os\n", 149 | "import numpy as np\n", 150 | "import keras.backend as K\n", 151 | "from keras.wrappers.scikit_learn import KerasRegressor\n", 152 | "from sklearn.model_selection import train_test_split\n", 153 | "from sklearn import metrics\n", 154 | "from sklearn.feature_selection import RFE\n", 155 | "from sklearn import preprocessing\n", 156 | "from sklearn.ensemble import RandomForestRegressor\n", 157 | "import matplotlib.pyplot as plt\n", 158 | "import torch\n", 159 | "import torch.nn as nn\n", 160 | "from torch.autograd import Variable\n", 161 | "from sklearn.preprocessing import StandardScaler,Normalizer\n", 162 | "import pandas as pd\n", 163 | "import sklearn.svm as svm\n", 164 | "from imblearn.datasets import make_imbalance\n", 165 | "from imblearn.over_sampling import RandomOverSampler\n", 166 | "from sklearn.metrics import confusion_matrix, r2_score\n", 167 | "from sklearn.metrics import mean_squared_error\n", 168 | "\n", 169 | "def categorize(array):\n", 170 | " #print(array)\n", 171 | " new_array=np.zeros_like(array)\n", 172 | " for i in range(0,array.shape[0]): \n", 173 | " k=array[i]\n", 174 | " if k>0.33:\n", 175 | " new_array[i,:]=1\n", 176 | " else: \n", 177 | " new_array[i,:]=0\n", 178 | " return new_array\n", 179 | "df = pd.DataFrame(columns=['Parameter','fold1','fold2','fold3','fol4'])\n", 180 | "# df = pd.DataFrame(columns=['Parameter','fold1','fold2','fold3','fol4'])\n", 181 | "# #for i in range(1,80):\n", 182 | "# for i in range(1,25):\n", 183 | "\n", 184 | "# print(i)\n", 185 | "MSE = np.array('mse')\n", 186 | "Accuracy = np.array('Acc')\n", 187 | "r2_sc = np.array('r2_score')\n", 188 | "Prec = np.array('Precision')\n", 189 | "Sens = np.array('Sensitivity')\n", 190 | "Spec =np.array('Specificity')\n", 191 | "for fold in range(3,4):\n", 192 | " features = pd.read_csv('ICHFeatures.csv',header=0)\n", 193 | " OS_train = pd.read_excel(r'GroundTruth.xlsx', sheet_name=\"Fold\"+str(fold)+'_Seg',header = 0, dtype=str)\n", 194 | " OS_train[\"ID\"] = OS_train[\"ID\"].str.zfill(3)\n", 195 | " #OS_train.columns = ['ID','OS']\n", 196 | " OS_valid = pd.read_excel(r'GroundTruth.xlsx', sheet_name=\"Fold\"+str(fold)+'_Val',header = 0, dtype=str)\n", 197 | " OS_valid[\"ID\"] = OS_valid[\"ID\"].str.zfill(3)\n", 198 | " #OS_valid.columns = ['ID','OS']\n", 199 | " features['ID']=features['ID'].str.replace('ct1','')\n", 200 | " train = pd.merge(features, OS_train, how='right', on='ID')\n", 201 | " test = pd.merge(features, OS_valid, how='right', on='ID')\n", 202 | " norm_wihtout = [col for col in train.columns if col not in ['ID','Delta','Class']]\n", 203 | " #norm_valid = [col for col in test.columns if col not in ['ID','GCS','Onset','OS']]\n", 204 | " scaler = StandardScaler()\n", 205 | " train_ss = scaler.fit_transform(train[norm_wihtout])\n", 206 | " test_ss = scaler.transform(test[norm_wihtout])\n", 207 | " train[norm_wihtout] = train_ss\n", 208 | " test[norm_wihtout] = test_ss\n", 209 | " #train = train.assign(norm_train.values = train_ss)\n", 210 | " col_withoutID = [col for col in train.columns if col not in ['ID','Class']]\n", 211 | " ros = RandomOverSampler(random_state=42)\n", 212 | " X_res, y_res = ros.fit_resample(train[col_withoutID], train['Class'].values.astype(float))\n", 213 | " X_withDelta = pd.DataFrame(X_res,columns = col_withoutID)\n", 214 | " train_class = pd.DataFrame(y_res, columns = ['Class'])\n", 215 | " col_withoutDelta = [col for col in X_withDelta.columns if col not in ['Delta']]\n", 216 | " train_X = X_withDelta[col_withoutDelta]\n", 217 | " train_y = X_withDelta[\"Delta\"]\n", 218 | " num_features = 6\n", 219 | " estimator = RandomForestRegressor(max_depth=2, random_state=0)\n", 220 | " #print(num_features)\n", 221 | " rfe=RFE(estimator, n_features_to_select=num_features,step=1)\n", 222 | " rfe.fit(train_X,train_y)\n", 223 | " ranking_RFE=rfe.ranking_\n", 224 | " indices=np.where(ranking_RFE==1)\n", 225 | " indices = list(indices[0])\n", 226 | " data_RFE=train_X.iloc[:,indices]\n", 227 | " valid_RFE = test[col_withoutID].iloc[:,indices]\n", 228 | " #print(data_RFE.columns)\n", 229 | " model = RandomForestRegressor(max_depth=2, random_state=0)\n", 230 | " model.fit(data_RFE, train_y)\n", 231 | "\n", 232 | " Y_pred=model.predict(valid_RFE).ravel()\n", 233 | " #acc=metrics.accuracy_score(test['Delta'].values,Y_pred)\n", 234 | " #print(\"accuracy score = \"+str(acc)) \n", 235 | " mse = mean_squared_error(test['Delta'].values, Y_pred)\n", 236 | " MSE = np.append(MSE,mse)\n", 237 | " r2_s = r2_score(test['Delta'].values, Y_pred)\n", 238 | " #print(mse)\n", 239 | " #print(r2_s)\n", 240 | " r2_sc = np.append(r2_sc,r2_s)\n", 241 | "\n", 242 | " predictions = categorize(Y_pred.reshape(20,1))\n", 243 | " #print(predictions)\n", 244 | " y_test_class = categorize(pd.to_numeric(test['Delta']).values.reshape(20,1))#.reshape(20,1)\n", 245 | " # evaluate predictions\n", 246 | " accuracy = metrics.accuracy_score(y_test_class, predictions)\n", 247 | " #print(\"Accuracy: %.2f%%\" % (accuracy * 100.0))\n", 248 | " Accuracy = np.append(Accuracy,accuracy)\n", 249 | " #best=metrics.mean_squared_error(y_test*1000, y_pred*1000) \n", 250 | " #print(best)\n", 251 | " con_matrix = confusion_matrix(y_test_class,predictions)\n", 252 | " TN,FP,FN,TP = con_matrix.ravel()\n", 253 | " # Sensitivity, hit rate, recall, or true positive rate\n", 254 | " TPR = TP/(TP+FN)\n", 255 | " # Specificity or true negative rate\n", 256 | " TNR = TN/(TN+FP) \n", 257 | " #Precision\n", 258 | " PPV = TP/(TP+FP)\n", 259 | " Prec = np.append(Prec,PPV)\n", 260 | " Sens = np.append(Sens,TPR)\n", 261 | " Spec = np.append(Spec,TNR)\n", 262 | "# MSE = pd.DataFrame(data = MSE.reshape(1,5),columns = df.columns)\n", 263 | "# ACC = pd.DataFrame(data = Accuracy.reshape(1,5),columns = df.columns)\n", 264 | "# R2_Score = pd.DataFrame(data = r2_sc.reshape(1,5),columns = df.columns)\n", 265 | "# Spec = pd.DataFrame(data = Spec.reshape(1,5),columns = df.columns)\n", 266 | "# Prec = pd.DataFrame(data = Prec.reshape(1,5),columns = df.columns)\n", 267 | "# Sens = pd.DataFrame(data = Sens.reshape(1,5),columns = df.columns)\n", 268 | "# # df = df.append(Accuracy)\n", 269 | "# df = df.append(MSE)\n", 270 | "# df = df.append(ACC)\n", 271 | "# df= df.append(R2_Score)\n", 272 | "# df = df.append(Spec)\n", 273 | "# df = df.append(Sens)\n", 274 | "# df = df.append(Prec)\n", 275 | "# #del Accuracy\n", 276 | "# print(np.average(Accuracy[1:].astype(np.float)))" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "df.to_csv('RFR_results.csv')" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "df2 = pd.DataFrame(Y_pred)\n", 295 | "df2.to_csv('rfr_pred.csv')" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 3, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "df3 = pd.DataFrame(test['Delta'].values)\n", 305 | "df3.to_csv('gt.csv')" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "\n", 315 | "\n", 316 | "\n", 317 | "\n", 318 | "\n", 319 | "\n", 320 | "\n", 321 | "\n", 322 | "\n", 323 | "\n", 324 | "\n", 325 | "\n", 326 | "\n", 327 | "\n", 328 | "\n", 329 | "\n", 330 | "\n", 331 | "\n", 332 | "\n", 333 | "\n", 334 | "\n" 335 | ] 336 | } 337 | ], 338 | "metadata": { 339 | "kernelspec": { 340 | "display_name": "py3", 341 | "language": "python", 342 | "name": "py3" 343 | }, 344 | "language_info": { 345 | "codemirror_mode": { 346 | "name": "ipython", 347 | "version": 3 348 | }, 349 | "file_extension": ".py", 350 | "mimetype": "text/x-python", 351 | "name": "python", 352 | "nbconvert_exporter": "python", 353 | "pygments_lexer": "ipython3", 354 | "version": "3.5.2" 355 | } 356 | }, 357 | "nbformat": 4, 358 | "nbformat_minor": 2 359 | } 360 | -------------------------------------------------------------------------------- /survival_prediction/python/base_nn.py: -------------------------------------------------------------------------------- 1 | #System 2 | import numpy as np 3 | import sys 4 | import os 5 | import random 6 | from glob import glob 7 | from skimage import io 8 | from PIL import Image 9 | import random 10 | import SimpleITK as sitk 11 | #Torch 12 | from torch.autograd import Variable 13 | from torch.utils.data import Dataset, DataLoader 14 | import torch.optim as optim 15 | import torch.nn.functional as F 16 | from torch.autograd import Function 17 | import torch 18 | import torch.nn as nn 19 | import torchvision.transforms as standard_transforms 20 | #from torchvision.models import resnet18 21 | import nibabel as nib 22 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 23 | 24 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 25 | ckpt_path = 'ckpt' 26 | exp_name = 'lol' 27 | if not os.path.exists(ckpt_path): 28 | os.makedirs(ckpt_path) 29 | if not os.path.exists(os.path.join(ckpt_path, exp_name)): 30 | os.makedirs(os.path.join(ckpt_path, exp_name)) 31 | args = { 32 | 'num_class': 2, 33 | 'num_gpus': 1, 34 | 'start_epoch': 1, 35 | 'num_epoch': 100, 36 | 'batch_size': 8, 37 | 'lr': 0.001, 38 | 'lr_decay': 0.9, 39 | 'weight_decay': 1e-4, 40 | 'momentum': 0.9, 41 | 'snapshot': '', 42 | 'opt': 'adam', 43 | 'crop_size1': 138, 44 | 45 | } 46 | 47 | class HEMDataset(Dataset): 48 | def __init__(self, text_dir): 49 | file_pairs = open(text_dir,'r') 50 | self.img_anno_pairs = file_pairs.readlines() 51 | self.req_file, self.req_tar = [],[] 52 | for i in range(len(self.img_anno_pairs)): 53 | net = self.img_anno_pairs[i][:-1] 54 | self.req_file.append(net[:3]) 55 | self.req_tar.append(net[4]) 56 | 57 | 58 | def __len__(self): 59 | return len(self.req_tar) 60 | 61 | def __getitem__(self, index): 62 | _file_num = self.req_file[index] 63 | _gt = float(self.req_tar[index]) 64 | 65 | req_npy = './Features_Train/'+ str(_file_num) + 'ct1_seg.npy' 66 | _input_arr = np.load(req_npy, allow_pickle=True) 67 | _input = np.array([]) 68 | for i in range(len(_input_arr)): 69 | if i > 18: 70 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 71 | _input = torch.from_numpy(np.array(_input)).float() 72 | _target = torch.from_numpy(np.array(_gt)).long() 73 | 74 | return _input, _target 75 | 76 | class HEMDataset_test(Dataset): 77 | def __init__(self, text_dir): 78 | file_pairs = open(text_dir,'r') 79 | self.img_anno_pairs = file_pairs.readlines() 80 | self.req_file, self.req_tar = [],[] 81 | for i in range(len(self.img_anno_pairs)): 82 | net = self.img_anno_pairs[i][:-1] 83 | self.req_file.append(net[:3]) 84 | self.req_tar.append(net[4]) 85 | 86 | 87 | def __len__(self): 88 | return len(self.req_tar) 89 | 90 | def __getitem__(self, index): 91 | _file_num = self.req_file[index] 92 | _gt = float(self.req_tar[index]) 93 | 94 | req_npy = './Features_Val/'+ str(_file_num) + 'ct1_seg.npy' 95 | _input_arr = np.load(req_npy, allow_pickle=True) 96 | _input = np.array([]) 97 | for i in range(len(_input_arr)): 98 | if i > 18: 99 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 100 | _input = torch.from_numpy(np.array(_input)).float() 101 | _target = torch.from_numpy(np.array(_gt)).long() 102 | 103 | return _input, _target 104 | 105 | class Net(nn.Module): 106 | def __init__(self): 107 | super(Net, self).__init__() 108 | self.fc1 = nn.Linear(4, 2048) 109 | self.fc2 = nn.Linear(2048, 1024) 110 | self.fc3 = nn.Linear(1024, 2) 111 | 112 | def forward(self, x): 113 | x = F.relu(self.fc1(x)) 114 | x = F.relu(self.fc2(x)) 115 | x = self.fc3(x) 116 | return x 117 | 118 | if __name__ == '__main__': 119 | 120 | train_file = 'Train_dir.txt' 121 | test_file = 'Val_dir.txt' 122 | train_dataset = HEMDataset(text_dir=train_file) 123 | test_dataset = HEMDataset_test(text_dir=test_file) 124 | train_loader = DataLoader(dataset=train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=2,drop_last=True) 125 | test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=2,drop_last=False) 126 | 127 | net = Net().cuda() 128 | criterion = nn.NLLLoss() 129 | optimizer = torch.optim.Adam(net.parameters(), lr=args['lr']) 130 | max_epoch = 50 131 | for epoch in range (max_epoch): 132 | net.train() 133 | for batch_idx, data in enumerate(train_loader): 134 | inputs, labels = data 135 | inputs = Variable(inputs).cuda() 136 | labels = Variable(labels).cuda() 137 | 138 | optimizer.zero_grad() 139 | outputs = net(inputs) 140 | loss = criterion(outputs, labels) 141 | loss.backward() 142 | optimizer.step() 143 | 144 | net.eval() 145 | correct, total = 0, 0 146 | class_pred, class_gt = [], [] 147 | with torch.no_grad(): 148 | for batch_idx, (inputs, targets) in enumerate(test_loader): 149 | inputs, targets = inputs.cuda(), targets.cuda() 150 | inputs, targets = Variable(inputs), Variable(targets) 151 | outputs = net(inputs) 152 | 153 | _, predicted = torch.max(outputs.data, 1) 154 | class_pred.append(predicted.item()) 155 | class_gt.append(targets.item()) 156 | total += targets.size(0) 157 | correct += (predicted == targets).sum().item() 158 | 159 | print('Epoch:', epoch)#, 'Accuracy: %f %%' % (100 * correct / total)) 160 | print(confusion_matrix(np.array(class_pred),np.array(class_gt))) 161 | print(classification_report(np.array(class_pred),np.array(class_gt))) 162 | print(accuracy_score(np.array(class_pred),np.array(class_gt))) 163 | print('') 164 | print('Finished Training') 165 | -------------------------------------------------------------------------------- /survival_prediction/python/check_snap.py: -------------------------------------------------------------------------------- 1 | import sklearn 2 | import shap 3 | from sklearn.model_selection import train_test_split 4 | 5 | # print the JS visualization code to the notebook 6 | shap.initjs() 7 | 8 | # train a SVM classifier 9 | X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0) 10 | svm = sklearn.svm.SVC(kernel='rbf', probability=True) 11 | svm.fit(X_train, Y_train) 12 | 13 | # use Kernel SHAP to explain test set predictions 14 | explainer = shap.KernelExplainer(svm.predict_proba, X_train, link="logit") 15 | shap_values = explainer.shap_values(X_test, nsamples=100) 16 | 17 | # plot the SHAP values for the Setosa output of the first instance 18 | shap.summary_plot(shap_values, X_train, plot_type="bar") 19 | -------------------------------------------------------------------------------- /survival_prediction/python/shap_box.py: -------------------------------------------------------------------------------- 1 | #System 2 | import numpy as np 3 | import sys 4 | import os 5 | import random 6 | from glob import glob 7 | from skimage import io 8 | from PIL import Image 9 | import random 10 | import SimpleITK as sitk 11 | #Torch 12 | from torch.autograd import Variable 13 | from torch.utils.data import Dataset, DataLoader 14 | import torch.optim as optim 15 | import torch.nn.functional as F 16 | from torch.autograd import Function 17 | import torch 18 | import torch.nn as nn 19 | import torchvision.transforms as standard_transforms 20 | #from torchvision.models import resnet18 21 | import nibabel as nib 22 | import matplotlib.pyplot as plt 23 | 24 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 25 | from sklearn.feature_selection import RFE 26 | from sklearn.linear_model import LogisticRegression 27 | from sklearn.datasets import make_friedman1 28 | from sklearn.svm import LinearSVC 29 | from sklearn.svm import SVR 30 | 31 | import xgboost 32 | import shap 33 | import sklearn 34 | 35 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 36 | ckpt_path = 'ckpt' 37 | exp_name = 'lol' 38 | if not os.path.exists(ckpt_path): 39 | os.makedirs(ckpt_path) 40 | if not os.path.exists(os.path.join(ckpt_path, exp_name)): 41 | os.makedirs(os.path.join(ckpt_path, exp_name)) 42 | args = { 43 | 'num_class': 2, 44 | 'num_gpus': 1, 45 | 'start_epoch': 1, 46 | 'num_epoch': 100, 47 | 'batch_size': 1, 48 | 'lr': 3, 49 | 'lr_decay': 0.9, 50 | 'weight_decay': 1e-4, 51 | 'momentum': 0.9, 52 | 'snapshot': '', 53 | 'opt': 'adam', 54 | 'crop_size1': 138, 55 | 56 | } 57 | 58 | class HEMDataset(Dataset): 59 | def __init__(self, text_dir): 60 | file_pairs = open(text_dir,'r') 61 | self.img_anno_pairs = file_pairs.readlines() 62 | print(self.img_anno_pairs) 63 | self.req_file, self.req_tar = [],[] 64 | for i in range(len(self.img_anno_pairs)): 65 | net = self.img_anno_pairs[i][:-1] 66 | self.req_file.append(net[:3]) 67 | self.req_tar.append(net[4]) 68 | 69 | def __len__(self): 70 | return len(self.req_tar) 71 | 72 | def __getitem__(self, index): 73 | _file_num = self.req_file[index] 74 | _gt = float(self.req_tar[index]) 75 | 76 | req_npy = './Features_Train/'+ str(_file_num) + 'ct1_seg.npy' 77 | _input_arr = np.load(req_npy, allow_pickle=True) 78 | _input = np.array([]) 79 | for i in range(len(_input_arr)): 80 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 81 | _input = torch.from_numpy(np.array(_input)).float() 82 | _target = torch.from_numpy(np.array(_gt)).long() 83 | 84 | return _input, _target 85 | 86 | class HEMDataset_test(Dataset): 87 | def __init__(self, text_dir): 88 | file_pairs = open(text_dir,'r') 89 | self.img_anno_pairs = file_pairs.readlines() 90 | print(self.img_anno_pairs) 91 | self.req_file, self.req_tar = [],[] 92 | for i in range(len(self.img_anno_pairs)): 93 | net = self.img_anno_pairs[i][:-1] 94 | self.req_file.append(net[:3]) 95 | self.req_tar.append(net[4]) 96 | 97 | def __len__(self): 98 | return len(self.req_tar) 99 | 100 | def __getitem__(self, index): 101 | _file_num = self.req_file[index] 102 | _gt = float(self.req_tar[index]) 103 | 104 | req_npy = './Features_Val/'+ str(_file_num) + 'ct1_seg.npy' 105 | _input_arr = np.load(req_npy, allow_pickle=True) 106 | _input = np.array([]) 107 | for i in range(len(_input_arr)): 108 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 109 | _input = torch.from_numpy(np.array(_input)).float() 110 | _target = torch.from_numpy(np.array(_gt)).long() 111 | 112 | return _input, _target 113 | 114 | class Net(nn.Module): 115 | def __init__(self): 116 | super(Net, self).__init__() 117 | self.fc1 = nn.Linear(4, 2048) 118 | self.fc2 = nn.Linear(2048, 256) 119 | self.fc3 = nn.Linear(256, 2) 120 | self.out_act = nn.LogSoftmax(dim=1) 121 | 122 | def forward(self, x): 123 | x = F.relu(self.fc1(x)) 124 | x = F.relu(self.fc2(x)) 125 | x = self.fc3(x) 126 | x = self.out_act(x) 127 | return x 128 | 129 | 130 | 131 | if __name__ == '__main__': 132 | 133 | train_file = 'Train_dir.txt' 134 | test_file = 'Val_dir.txt' 135 | train_dataset = HEMDataset(text_dir=train_file) 136 | test_dataset = HEMDataset_test(text_dir=test_file) 137 | train_loader = DataLoader(dataset=train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=2,drop_last=True) 138 | test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=2,drop_last=False) 139 | 140 | max_epoch = 1 141 | X_train, Y_train = [], [] 142 | for epoch in range (max_epoch): 143 | for batch_idx, data in enumerate(train_loader): 144 | inputs, labels = data 145 | inputs, labels = inputs.cpu().numpy(), labels.cpu().numpy() 146 | X_train.append(inputs) 147 | Y_train.append(labels) 148 | print(batch_idx) 149 | print('okay') 150 | X_train, Y_train = np.squeeze(X_train, axis=1), np.squeeze(Y_train, axis=1) 151 | print(X_train.shape, Y_train.shape) 152 | 153 | X_test, Y_test = [], [] 154 | for epoch in range(max_epoch): 155 | for batch_idx, data in enumerate(test_loader): 156 | inputs, labels = data 157 | inputs, labels = inputs.cpu().numpy(), labels.cpu().numpy() 158 | X_test.append(inputs) 159 | Y_test.append(labels) 160 | print('okay') 161 | X_test, Y_test = np.squeeze(X_test, axis=1), np.squeeze(Y_test, axis=1) 162 | print(X_test.shape, Y_test.shape) 163 | 164 | X_train, X_test, Y_train, Y_test = np.array(X_train, dtype='f'), np.array(X_test, dtype='f'), np.array(Y_train, dtype='f'), np.array(Y_test, dtype='f') 165 | print(np.max(X_train),np.max(X_test),np.max(Y_train),np.max(Y_test)) 166 | print(np.where(np.isnan(X_test))) 167 | 168 | svm = sklearn.svm.SVC(kernel='rbf', probability=True) 169 | svm.fit(X_train, Y_train) 170 | 171 | # use Kernel SHAP to explain test set predictions 172 | explainer = shap.KernelExplainer(svm.predict_proba, X_train, link="logit") 173 | shap_values = explainer.shap_values(X_test, nsamples=100) 174 | 175 | # plot the SHAP values for the Setosa output of the first instance 176 | shap.summary_plot(shap_values, X_train, plot_type="bar") 177 | 178 | 179 | -------------------------------------------------------------------------------- /survival_prediction/python/svm_rfe.py: -------------------------------------------------------------------------------- 1 | #System 2 | import numpy as np 3 | import sys 4 | import os 5 | import random 6 | from glob import glob 7 | from skimage import io 8 | from PIL import Image 9 | import random 10 | import SimpleITK as sitk 11 | #Torch 12 | from torch.autograd import Variable 13 | from torch.utils.data import Dataset, DataLoader 14 | import torch.optim as optim 15 | import torch.nn.functional as F 16 | from torch.autograd import Function 17 | import torch 18 | import torch.nn as nn 19 | import torchvision.transforms as standard_transforms 20 | #from torchvision.models import resnet18 21 | import nibabel as nib 22 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score 23 | 24 | from sklearn.feature_selection import RFE 25 | from sklearn.linear_model import LogisticRegression 26 | from sklearn.datasets import make_friedman1 27 | from sklearn.svm import LinearSVC 28 | from sklearn.svm import SVR 29 | from sklearn.svm import SVC 30 | 31 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 32 | ckpt_path = 'ckpt' 33 | exp_name = 'lol' 34 | if not os.path.exists(ckpt_path): 35 | os.makedirs(ckpt_path) 36 | if not os.path.exists(os.path.join(ckpt_path, exp_name)): 37 | os.makedirs(os.path.join(ckpt_path, exp_name)) 38 | args = { 39 | 'num_class': 2, 40 | 'num_gpus': 1, 41 | 'start_epoch': 1, 42 | 'num_epoch': 100, 43 | 'batch_size': 1, 44 | 'lr': 0.01, 45 | 'lr_decay': 0.9, 46 | 'weight_decay': 1e-4, 47 | 'momentum': 0.9, 48 | 'snapshot': '', 49 | 'opt': 'adam', 50 | 'crop_size1': 138, 51 | 52 | } 53 | 54 | class HEMDataset(Dataset): 55 | def __init__(self, text_dir): 56 | file_pairs = open(text_dir,'r') 57 | self.img_anno_pairs = file_pairs.readlines() 58 | self.req_file, self.req_tar = [],[] 59 | for i in range(len(self.img_anno_pairs)): 60 | net = self.img_anno_pairs[i][:-1] 61 | self.req_file.append(net[:3]) 62 | self.req_tar.append(net[4]) 63 | 64 | 65 | def __len__(self): 66 | return len(self.req_tar) 67 | 68 | def __getitem__(self, index): 69 | _file_num = self.req_file[index] 70 | _gt = float(self.req_tar[index]) 71 | 72 | req_npy = './Features_Train/'+ str(_file_num) + 'ct1_seg.npy' 73 | _input_arr = np.load(req_npy, allow_pickle=True) 74 | _input = np.array([]) 75 | for i in range(len(_input_arr)): 76 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 77 | _input = torch.from_numpy(np.array(_input)).float() 78 | _target = torch.from_numpy(np.array(_gt)).long() 79 | 80 | return _input, _target 81 | 82 | class HEMDataset_test(Dataset): 83 | def __init__(self, text_dir): 84 | file_pairs = open(text_dir,'r') 85 | self.img_anno_pairs = file_pairs.readlines() 86 | self.req_file, self.req_tar = [],[] 87 | for i in range(len(self.img_anno_pairs)): 88 | net = self.img_anno_pairs[i][:-1] 89 | self.req_file.append(net[:3]) 90 | self.req_tar.append(net[4]) 91 | 92 | 93 | def __len__(self): 94 | return len(self.req_tar) 95 | 96 | def __getitem__(self, index): 97 | _file_num = self.req_file[index] 98 | _gt = float(self.req_tar[index]) 99 | 100 | req_npy = './Features_Val/'+ str(_file_num) + 'ct1_seg.npy' 101 | _input_arr = np.load(req_npy, allow_pickle=True) 102 | _input = np.array([]) 103 | for i in range(len(_input_arr)): 104 | _input = np.concatenate((_input, _input_arr[i]), axis=None) 105 | #print(_input) 106 | _input = torch.from_numpy(np.array(_input)).float() 107 | _target = torch.from_numpy(np.array(_gt)).long() 108 | 109 | return _input, _target 110 | 111 | class Net(nn.Module): 112 | def __init__(self): 113 | super(Net, self).__init__() 114 | self.fc1 = nn.Linear(10, 1024) 115 | self.fc2 = nn.Linear(1024, 128) 116 | self.fc3 = nn.Linear(128, 2) 117 | 118 | def forward(self, x): 119 | x = F.relu(self.fc1(x)) 120 | x = F.relu(self.fc2(x)) 121 | x = self.fc3(x) 122 | return x 123 | 124 | def important_features(fea, idx): 125 | batch = [] 126 | for j in range(len(fea)): 127 | req_inputs = [] 128 | for i in idx[0]: 129 | req_inputs.append(fea[0][i]) 130 | batch.append(req_inputs) 131 | return req_inputs 132 | 133 | if __name__ == '__main__': 134 | 135 | train_file = 'Train_dir.txt' 136 | test_file = 'Val_dir.txt' 137 | train_dataset = HEMDataset(text_dir=train_file) 138 | test_dataset = HEMDataset_test(text_dir=test_file) 139 | rfe_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True, num_workers=2, drop_last=True) 140 | train_loader = DataLoader(dataset=train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=2,drop_last=True) 141 | test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=2,drop_last=False) 142 | 143 | max_epoch = 1 144 | X_rfe, Y_rfe = [], [] 145 | for epoch in range (max_epoch): 146 | for batch_idx, data in enumerate(rfe_loader): 147 | inputs, labels = data 148 | inputs, labels = inputs.cpu().numpy(), labels.cpu().numpy() 149 | X_rfe.append(inputs) 150 | Y_rfe.append(labels) 151 | 152 | X_rfe, Y_rfe = np.squeeze(X_rfe, axis=1), np.squeeze(Y_rfe, axis=1) 153 | rfe_model = SVR(kernel="linear") 154 | rfe = RFE(rfe_model, 5, step=1) 155 | fit = rfe.fit(X_rfe, Y_rfe) 156 | 157 | rank = fit.ranking_ 158 | req_idx = np.where(rank == 1) 159 | print(fit.ranking_) 160 | print('Finished RFE') 161 | 162 | X_train, Y_train = [], [] 163 | for batch_idx, data in enumerate(train_loader): 164 | inputs, labels = data 165 | req_inputs = important_features(inputs.cpu().numpy(), req_idx) 166 | X_train.append(req_inputs) 167 | Y_train.append(labels.cpu().numpy()) 168 | X_train, Y_train = np.array(X_train), np.squeeze(np.array(Y_train), axis=1) 169 | 170 | X_test, Y_test = [], [] 171 | for batch_idx, data in enumerate(test_loader): 172 | inputs, labels = data 173 | req_inputs = important_features(inputs.cpu().numpy(), req_idx) 174 | X_test.append(req_inputs) 175 | Y_test.append(labels.cpu().numpy()) 176 | X_test, Y_test = np.array(X_test), np.squeeze(np.array(Y_test), axis=1) 177 | 178 | score, count = [], [] 179 | #model = LogisticRegression() 180 | model = SVC(gamma='auto') 181 | model.fit(X_train, Y_train) 182 | score.append(sum(model.predict(X_test) == Y_test)) 183 | count.append(len(Y_test)) 184 | print(model.predict(X_test)) 185 | print(score) 186 | 187 | #print(X_train.shape, Y_train.shape, X_test.shape,Y_test.shape) --------------------------------------------------------------------------------