├── ClassificationModel ├── README ├── net_classify.py ├── params.yaml ├── radiomics_featureextraction.py ├── test_clf_model.py └── train_clf_model.py ├── DataGenerator ├── GGO_ROIgenerator.py ├── README ├── annotstructs.py ├── lidcxmlparser.py └── test_data_generator.py ├── README.md └── SegModel ├── README ├── net_seg.py └── train_seg_model.py /ClassificationModel/README: -------------------------------------------------------------------------------- 1 | # The code for building classification model 2 | -------------------------------------------------------------------------------- /ClassificationModel/net_classify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Aug 5 11:07:39 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | 9 | from torch.nn import Module, Sequential 10 | from torch.nn import Conv3d, ConvTranspose3d, BatchNorm3d, MaxPool3d, AvgPool1d 11 | from torch.nn import ReLU, Sigmoid 12 | from torch import nn 13 | import torch 14 | 15 | class Conv3D_Block(Module): 16 | 17 | def __init__(self, inp_feat, out_feat, kernel=3, stride=1, padding=1, residual=None): 18 | 19 | super(Conv3D_Block, self).__init__() 20 | 21 | self.conv1 = Sequential( 22 | Conv3d(inp_feat, out_feat, kernel_size=kernel, 23 | stride=stride, padding=padding, bias=True), 24 | BatchNorm3d(out_feat), 25 | ReLU()) 26 | 27 | self.conv2 = Sequential( 28 | Conv3d(out_feat, out_feat, kernel_size=kernel, 29 | stride=stride, padding=padding, bias=True), 30 | BatchNorm3d(out_feat), 31 | ReLU()) 32 | 33 | self.residual = residual 34 | 35 | if self.residual is not None: 36 | self.residual_upsampler = Conv3d(inp_feat, out_feat, kernel_size=1, bias=False) 37 | 38 | def forward(self, x): 39 | 40 | res = x 41 | 42 | if not self.residual: 43 | return self.conv2(self.conv1(x)) 44 | else: 45 | return self.conv2(self.conv1(x)) + self.residual_upsampler(res) 46 | 47 | def Maxpool3D_Block(): 48 | 49 | pool = MaxPool3d(kernel_size=2, stride=2, padding=0) 50 | 51 | return pool 52 | 53 | class Deconv3D_Block(Module): 54 | 55 | def __init__(self, inp_feat, out_feat, kernel=4, stride=2, padding=1): 56 | 57 | super(Deconv3D_Block, self).__init__() 58 | 59 | self.deconv = Sequential( 60 | ConvTranspose3d(inp_feat, out_feat, kernel_size=kernel, 61 | stride=stride, padding=padding, output_padding=0, bias=True), 62 | BatchNorm3d(out_feat), 63 | ReLU()) 64 | 65 | def forward(self, x): 66 | 67 | return self.deconv(x) 68 | 69 | 70 | class ClassifyNet(Module): 71 | def __init__(self, num_feat=[16,32,64,96,128], residual='conv'): 72 | super(ClassifyNet, self).__init__() 73 | 74 | #Encoder process 75 | self.conv1 = Conv3D_Block(1, num_feat[0], residual=residual) 76 | self.pool1 = Maxpool3D_Block() 77 | self.conv2 = Conv3D_Block(num_feat[0], num_feat[1], residual=residual) 78 | self.pool2 = Maxpool3D_Block() 79 | self.conv3 = Conv3D_Block(num_feat[1], num_feat[2], residual=residual) 80 | self.pool3 = Maxpool3D_Block() 81 | self.conv4 = Conv3D_Block(num_feat[2], num_feat[3], residual=residual) 82 | self.pool4 = Maxpool3D_Block() 83 | self.conv5 = Conv3D_Block(num_feat[3], num_feat[4], residual=residual) 84 | self.conv6 = Conv3D_Block(num_feat[4], num_feat[4], residual=residual) 85 | self.drop = nn.Dropout3d(p = 0.5, inplace = False) 86 | self.fc1 = nn.Linear(128*4*4*4,128) 87 | self.fc2 = nn.Linear(128,2) 88 | self.Relu = nn.ReLU() 89 | 90 | 91 | def forward(self, x): 92 | down_1 = self.conv1(x) 93 | pool_1 = self.pool1(down_1) 94 | down_2 = self.conv2(pool_1) 95 | pool_2 = self.pool2(down_2) 96 | down_3 = self.conv3(pool_2) 97 | pool_3 = self.pool3(down_3) 98 | down_4 = self.conv4(pool_3) 99 | pool_4 = self.pool4(down_4) 100 | down_5 = self.conv5(pool_4) 101 | conv_6 = self.conv6(down_5) 102 | conv_6 = self.drop(conv_6) 103 | view1 = down_5.view(conv_6.size(0),-1) 104 | fc1 = self.Relu(self.fc1(view1)) 105 | out = self.fc2(fc1) 106 | 107 | return out 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /ClassificationModel/params.yaml: -------------------------------------------------------------------------------- 1 | # This is an example of settings that can be used as a starting point for analyzing CT data. This is only intended as a 2 | # starting point and is not likely to be the optimal settings for your dataset. Some points in determining better values 3 | # are added as comments where appropriate 4 | 5 | # When adapting and using these settings for an analysis, be sure to add the PyRadiomics version used to allow you to 6 | # easily recreate your extraction at a later timepoint: 7 | 8 | # ############################# Extracted using PyRadiomics version: ###################################### 9 | 10 | imageType: 11 | Original: {} 12 | LoG: 13 | sigma: [1.0, 2.0, 3.0, 4.0, 5.0] # If you include sigma values >5, remember to also increase the padDistance. 14 | Wavelet: {} 15 | 16 | featureClass: 17 | # redundant Compactness 1, Compactness 2 an Spherical Disproportion features are disabled by default, they can be 18 | # enabled by specifying individual feature names (as is done for glcm) and including them in the list. 19 | shape: 20 | firstorder: 21 | glcm: # Disable SumAverage by specifying all other GLCM features available 22 | - 'Autocorrelation' 23 | - 'JointAverage' 24 | - 'ClusterProminence' 25 | - 'ClusterShade' 26 | - 'ClusterTendency' 27 | - 'Contrast' 28 | - 'Correlation' 29 | - 'DifferenceAverage' 30 | - 'DifferenceEntropy' 31 | - 'DifferenceVariance' 32 | - 'JointEnergy' 33 | - 'JointEntropy' 34 | - 'Imc1' 35 | - 'Imc2' 36 | - 'Idm' 37 | - 'Idmn' 38 | - 'Id' 39 | - 'Idn' 40 | - 'InverseVariance' 41 | - 'MaximumProbability' 42 | - 'SumEntropy' 43 | - 'SumSquares' 44 | glrlm: 45 | glszm: 46 | gldm: 47 | 48 | setting: 49 | # Normalization: 50 | # most likely not needed, CT gray values reflect absolute world values (HU) and should be comparable between scanners. 51 | # If analyzing using different scanners / vendors, check if the extracted features are correlated to the scanner used. 52 | # If so, consider enabling normalization by uncommenting settings below: 53 | #normalize: true 54 | #normalizeScale: 500 # This allows you to use more or less the same bin width. 55 | 56 | # Resampling: 57 | # Usual spacing for CT is often close to 1 or 2 mm, if very large slice thickness is used, 58 | # increase the resampled spacing. 59 | # On a side note: increasing the resampled spacing forces PyRadiomics to look at more coarse textures, which may or 60 | # may not increase accuracy and stability of your extracted features. 61 | # interpolator: 'sitkBSpline' 62 | # resampledPixelSpacing: [1, 1, 1] 63 | padDistance: 10 # Extra padding for large sigma valued LoG filtered images 64 | 65 | # Mask validation: 66 | # correctMask and geometryTolerance are not needed, as both image and mask are resampled, if you expect very small 67 | # masks, consider to enable a size constraint by uncommenting settings below: 68 | #minimumROIDimensions: 2 69 | #minimumROISize: 50 70 | 71 | # Image discretization: 72 | # The ideal number of bins is somewhere in the order of 16-128 bins. A possible way to define a good binwidt is to 73 | # extract firstorder:Range from the dataset to analyze, and choose a binwidth so, that range/binwidth remains approximately 74 | # in this range of bins. 75 | binWidth: 25 76 | 77 | # first order specific settings: 78 | voxelArrayShift: 1000 # Minimum value in HU is -1000, shift +1000 to prevent negative values from being squared. 79 | 80 | # Misc: 81 | # default label value. Labels can also be defined in the call to featureextractor.execute, as a commandline argument, 82 | # or in a column "Label" in the input csv (batchprocessing) 83 | label: 1 -------------------------------------------------------------------------------- /ClassificationModel/radiomics_featureextraction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Apr 12 13:44:09 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | import SimpleITK as sitk 9 | import numpy as np 10 | from radiomics import featureextractor,imageoperations 11 | import os 12 | import pandas as pd 13 | from pandas import DataFrame as DF 14 | import warnings 15 | import time 16 | from time import sleep 17 | from tqdm import tqdm 18 | from skimage import measure 19 | 20 | 21 | def Img_Normalization(Image_Orig): 22 | Image_array = sitk.GetArrayFromImage(Image_Orig) 23 | min_ImgValue = Image_array.min() 24 | max_ImgValue = Image_array.max() 25 | ImgRange = max_ImgValue-min_ImgValue 26 | min_NewValue = 0 27 | max_NewValue = 1200 28 | NewRange = max_NewValue-min_NewValue 29 | Img_array = ((Image_array-min_ImgValue)/ImgRange)*NewRange+min_NewValue 30 | Img = sitk.GetImageFromArray(Img_array.astype(int)) 31 | Img.SetDirection(Image_Orig.GetDirection()) 32 | Img.SetOrigin(Image_Orig.GetOrigin()) 33 | Img.SetSpacing(Image_Orig.GetSpacing()) 34 | # Img.CopyInformation(Image_Orig) 35 | return Img 36 | 37 | def readDCM_Img(FilePath): 38 | reader = sitk.ImageSeriesReader() 39 | dcm_names = reader.GetGDCMSeriesFileNames(FilePath) 40 | reader.SetFileNames(dcm_names) 41 | image = reader.Execute() 42 | return image 43 | 44 | def Extract_Features(image,mask,params_path): 45 | paramsFile = os.path.abspath(params_path) 46 | extractor = featureextractor.RadiomicsFeaturesExtractor(paramsFile) 47 | result = extractor.execute(image, mask) 48 | general_info = {'diagnostics_Configuration_EnabledImageTypes','diagnostics_Configuration_Settings', 49 | 'diagnostics_Image-interpolated_Maximum','diagnostics_Image-interpolated_Mean', 50 | 'diagnostics_Image-interpolated_Minimum','diagnostics_Image-interpolated_Size', 51 | 'diagnostics_Image-interpolated_Spacing','diagnostics_Image-original_Hash', 52 | 'diagnostics_Image-original_Maximum','diagnostics_Image-original_Mean', 53 | 'diagnostics_Image-original_Minimum','diagnostics_Image-original_Size', 54 | 'diagnostics_Image-original_Spacing','diagnostics_Mask-interpolated_BoundingBox', 55 | 'diagnostics_Mask-interpolated_CenterOfMass','diagnostics_Mask-interpolated_CenterOfMassIndex', 56 | 'diagnostics_Mask-interpolated_Maximum','diagnostics_Mask-interpolated_Mean', 57 | 'diagnostics_Mask-interpolated_Minimum','diagnostics_Mask-interpolated_Size', 58 | 'diagnostics_Mask-interpolated_Spacing','diagnostics_Mask-interpolated_VolumeNum', 59 | 'diagnostics_Mask-interpolated_VoxelNum','diagnostics_Mask-original_BoundingBox', 60 | 'diagnostics_Mask-original_CenterOfMass','diagnostics_Mask-original_CenterOfMassIndex', 61 | 'diagnostics_Mask-original_Hash','diagnostics_Mask-original_Size', 62 | 'diagnostics_Mask-original_Spacing','diagnostics_Mask-original_VolumeNum', 63 | 'diagnostics_Mask-original_VoxelNum','diagnostics_Versions_Numpy', 64 | 'diagnostics_Versions_PyRadiomics','diagnostics_Versions_PyWavelet', 65 | 'diagnostics_Versions_Python','diagnostics_Versions_SimpleITK', 66 | 'diagnostics_Image-original_Dimensionality'} 67 | features = dict((key, value) for key, value in result.items() if key not in general_info) 68 | feature_info = dict((key, value) for key, value in result.items() if key in general_info) 69 | return features,feature_info 70 | 71 | if __name__ == '__main__': 72 | start = time.clock() 73 | warnings.simplefilter('ignore') 74 | 75 | img_path = r'.\GGO_DataSet\test_data\seg_result' 76 | list_path = r'.\GGO_DataSet\test_data\test.csv' 77 | 78 | f = open(list_path) 79 | GGO_list = pd.read_csv(f) 80 | List_Num = np.array(GGO_list['Num'].tolist()) 81 | Type = GGO_list['Type'].tolist() 82 | Class = np.array(GGO_list['Class'].tolist()) 83 | List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']] 84 | Class= Class[[i for i,x in enumerate(Type) if x!='Solid']] 85 | List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]] 86 | Histopathology = np.array(GGO_list['Histopathology'].tolist()) 87 | Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']] 88 | Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]] 89 | Label = 0 90 | 91 | Feature = [] 92 | for i in tqdm(range(len(List_Num))): 93 | sleep(0.01) 94 | dcm_File = List_Num[i] 95 | roi_path = img_path+'/'+'ROI_'+str(dcm_File)+'.nii' 96 | ROI = sitk.ReadImage(roi_path) 97 | 98 | mask_path = img_path+'/'+'Mask_'+str(dcm_File)+'.nii' 99 | Mask = sitk.ReadImage(mask_path) 100 | 101 | features, feature_info = Extract_Features(ROI, Mask, 'params.yaml') 102 | if Histopathology[i] == 'AIS' or Histopathology[i] == 'MIA': 103 | Label = 0 104 | elif Histopathology[i] == 'IAC': 105 | Label = 1 106 | 107 | features['Name'] = List_Num[i] 108 | features['Histopathology'] = Histopathology[i] 109 | features['Class'] = Label 110 | 111 | Feature.append(features) 112 | 113 | df = DF(Feature).fillna('0') 114 | df.to_csv('./testing_Radiomics_Feature.csv',index=False,sep=',') 115 | 116 | end = time.clock() 117 | print(end-start) -------------------------------------------------------------------------------- /ClassificationModel/test_clf_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 23 12:38:04 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | import os 9 | import time 10 | import numpy as np 11 | from net_classify_test import * 12 | import torch 13 | from torch.backends import cudnn 14 | from torch.utils.data import Dataset 15 | from torch.utils.data import DataLoader 16 | from torch.autograd import Variable 17 | import torch.nn.functional as F 18 | from scipy.ndimage.interpolation import rotate 19 | import glob 20 | import pandas as pd 21 | import SimpleITK as sitk 22 | import scipy 23 | import scipy.ndimage 24 | import pandas as pd 25 | import matplotlib.pyplot as plt 26 | from skimage import measure 27 | from sklearn.metrics import accuracy_score,roc_curve,recall_score,roc_auc_score,auc,confusion_matrix,cohen_kappa_score, f1_score, precision_score,matthews_corrcoef 28 | from tqdm import tqdm 29 | from sklearn.svm import SVC,LinearSVC 30 | from sklearn.model_selection import StratifiedKFold 31 | from sklearn.feature_selection import SelectPercentile, f_classif, chi2, SelectFromModel, SelectKBest 32 | from sklearn.feature_selection import RFE 33 | from sklearn.preprocessing import MinMaxScaler 34 | import scipy.stats as stats 35 | 36 | 37 | def permutation_test_between_clfs(y_test, pred_proba_1, pred_proba_2, nsamples=1000): 38 | auc_differences = [] 39 | auc1 = roc_auc_score(y_test.ravel(), pred_proba_1.ravel()) 40 | auc2 = roc_auc_score(y_test.ravel(), pred_proba_2.ravel()) 41 | observed_difference = auc1 - auc2 42 | for _ in range(nsamples): 43 | mask = np.random.randint(2, size=len(pred_proba_1.ravel())) 44 | p1 = np.where(mask, pred_proba_1.ravel(), pred_proba_2.ravel()) 45 | p2 = np.where(mask, pred_proba_2.ravel(), pred_proba_1.ravel()) 46 | auc1 = roc_auc_score(y_test.ravel(), p1) 47 | auc2 = roc_auc_score(y_test.ravel(), p2) 48 | auc_differences.append(auc1 - auc2) 49 | return observed_difference, np.mean(auc_differences >= observed_difference) 50 | 51 | def confindence_interval_compute(y_pred, y_true): 52 | n_bootstraps = 1000 53 | rng_seed = 42 # control reproducibility 54 | bootstrapped_scores = [] 55 | 56 | rng = np.random.RandomState(rng_seed) 57 | for i in range(n_bootstraps): 58 | # bootstrap by sampling with replacement on the prediction indices 59 | # indices = rng.random_integers(0, len(y_pred) - 1, len(y_pred)) 60 | indices = rng.randint(0, len(y_pred)-1, len(y_pred)) 61 | if len(np.unique(y_true[indices])) < 2: 62 | # We need at least one positive and one negative sample for ROC AUC 63 | # to be defined: reject the sample 64 | continue 65 | 66 | score = roc_auc_score(y_true[indices], y_pred[indices]) 67 | bootstrapped_scores.append(score) 68 | sorted_scores = np.array(bootstrapped_scores) 69 | sorted_scores.sort() 70 | confidence_std = sorted_scores.std() 71 | 72 | # Computing the lower and upper bound of the 90% confidence interval 73 | # You can change the bounds percentiles to 0.025 and 0.975 to get 74 | # a 95% confidence interval instead. 75 | confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))] 76 | confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))] 77 | return confidence_lower,confidence_upper,confidence_std 78 | 79 | if __name__ == "__main__": 80 | Pretrained_path = r'.\TaiZhouHospital\model\clf_IA_VS_nonIA' 81 | model = ClassifyNet()#.cuda() 82 | classify_path = os.path.join(Pretrained_path, '020.ckpt') 83 | modelCheckpoint = torch.load(classify_path) 84 | pretrained_dict = modelCheckpoint['state_dict'] 85 | model_dict = model.state_dict() 86 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}#filter out unnecessary keys 87 | model_dict.update(pretrained_dict) 88 | model.load_state_dict(model_dict) 89 | model.eval() 90 | img_path = r'.\GGO_DataSet\test_data\test_Img' 91 | list_path = r'.\GGO_DataSet\test_data\test.csv' 92 | 93 | f = open(list_path) 94 | GGO_list = pd.read_csv(f) 95 | List_Num = np.array(GGO_list['Num'].tolist()) 96 | Type = GGO_list['Type'].tolist() 97 | Class = np.array(GGO_list['Class'].tolist()) 98 | List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']] 99 | Class= Class[[i for i,x in enumerate(Type) if x!='Solid']] 100 | List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]] 101 | Histopathology = np.array(GGO_list['Histopathology'].tolist()) 102 | Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']] 103 | Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]] 104 | 105 | prob = [] 106 | prob_label = [] 107 | real_class = [] 108 | test_result=[] 109 | for i in tqdm(range(len(List_Num))): 110 | roi_path = os.path.join(img_path, List_Num[i]+'_roi.npy') 111 | label_path = os.path.join(img_path, List_Num[i]+'_label.npy') 112 | data = np.load(roi_path) 113 | data = data[np.newaxis,...] 114 | data = torch.from_numpy(data.astype(np.float32)) 115 | GGO_Class = np.load(label_path) 116 | with torch.no_grad(): 117 | input_data = Variable(data)#.cuda() 118 | predict = model(input_data) 119 | result = predict.data.cpu().numpy() 120 | prob.append(result[0][1]) 121 | real_class.append(GGO_Class) 122 | prob_label.append(np.argmax(result[0])) 123 | test = {} 124 | test['Num'] = List_Num[i] 125 | test['Class'] = GGO_Class 126 | test['Prob'] = result[0][1] 127 | test['Histopathology'] = Histopathology[i] 128 | test_result.append(test) 129 | df = pd.DataFrame(test_result).fillna('null') 130 | df.to_csv('./test_result.csv',index=False,sep=',') 131 | print('Our Model ACC:',accuracy_score(real_class,prob_label)*100) 132 | fpr,tpr,threshold = roc_curve(np.array(real_class),prob) 133 | auc = auc(fpr,tpr) 134 | auc_fl_cnn, auc_fh_cnn, auc_fstd_cnn = confindence_interval_compute(np.array(prob), np.array(real_class)) 135 | print('AUC:%.2f'%auc,'+/-%.2f'%auc_fstd_cnn,' 95% CI:[','%.2f,'%auc_fl_cnn,'%.2f'%auc_fh_cnn,']') 136 | F1 = f1_score(np.array(real_class),prob_label) 137 | print('F1:',F1) 138 | F1_w = f1_score(np.array(real_class),prob_label,average='weighted') 139 | print('F1_weight:',F1_w) 140 | MCC = matthews_corrcoef(np.array(real_class),prob_label) 141 | print('MCC:',MCC) 142 | 143 | 144 | training_csv = r'.\Radiomics_Feature.csv' 145 | testing_csv = r'.\testing_Radiomics_Feature.csv' 146 | f_training = open(training_csv) 147 | train_list = pd.read_csv(f_training) 148 | train_x = np.array(train_list.values[:,3:]) 149 | train_y = np.array(train_list['Class'].tolist()) 150 | 151 | f_testing = open(testing_csv) 152 | test_list = pd.read_csv(f_testing) 153 | 154 | test_x = np.array(test_list.values[:,3:]) 155 | test_y = np.array(test_list['Class'].tolist()) 156 | 157 | 158 | # Feature normalization 159 | min_max_scaler = MinMaxScaler() 160 | train_x = min_max_scaler.fit_transform(np.array(train_x,dtype=np.float64)) 161 | test_x = min_max_scaler.transform(test_x) 162 | 163 | selector = SelectKBest(f_classif, 20) 164 | train_x = selector.fit_transform(train_x, train_y) 165 | test_x = selector.transform(test_x) 166 | 167 | clf = SVC(kernel='rbf', probability=True, random_state=0, gamma='scale') 168 | clf.fit(train_x, train_y) 169 | test_prob = clf.predict_proba(test_x)[:,1] 170 | test_label = clf.predict(test_x) 171 | print('Radiomics:',accuracy_score(test_y,test_label)) 172 | fpr3,tpr3,threshold3 = roc_curve(np.array(test_y),test_prob) 173 | auc3 = roc_auc_score(np.array(test_y),test_prob) 174 | auc_fl_ra, auc_fh_ra, auc_fstd_ra = confindence_interval_compute(np.array(test_prob), np.array(test_y)) 175 | print('AUC:%.2f'%auc3,'+/-%.2f'%auc_fstd_ra,' 95% CI:[','%.2f,'%auc_fl_ra,'%.2f'%auc_fh_ra,']') 176 | F1_3 = f1_score(np.array(test_y),test_label) 177 | print('F1:', F1_3) 178 | F1_w3 = f1_score(np.array(test_y),test_label,average='weighted') 179 | print('F1_weight:',F1_w3) 180 | MCC3 = matthews_corrcoef(np.array(test_y),test_label) 181 | print('MCC:',MCC3) 182 | 183 | radiology1_path = r'.\GGO_DataSet\test_data\radiology_HW.csv' 184 | radiology1 = open(radiology1_path) 185 | radiology1_List = pd.read_csv(radiology1) 186 | radiology1_result = radiology1_List['Diagnosis'].tolist() 187 | print('HW:',accuracy_score(real_class,radiology1_result)) 188 | fpr1,tpr1,threshold1 = roc_curve(np.array(real_class),radiology1_result) 189 | F1_1 = f1_score(np.array(real_class),radiology1_result) 190 | print('F1:',F1_1) 191 | F1_w1 = f1_score(np.array(real_class),radiology1_result,average='weighted') 192 | print('F1_weight:',F1_w1) 193 | MCC1 = matthews_corrcoef(np.array(real_class),radiology1_result) 194 | print('MCC:',MCC1) 195 | # auc1 = auc(fpr1,tpr1) 196 | # print(auc1) 197 | TN1, FP1, FN1, TP1 = confusion_matrix(real_class,radiology1_result).ravel() 198 | 199 | ACC1 = (TP1+TN1)/(TP1+FP1+FN1+TN1) 200 | print('ACC:%0.4f'%ACC1) 201 | 202 | radiology2_path = r'.\GGO_DataSet\test_data\radiology_WSP.csv' 203 | radiology2 = open(radiology2_path) 204 | radiology2_List = pd.read_csv(radiology2) 205 | radiology2_result = radiology2_List['Diagnosis'].tolist() 206 | print('WSP:',accuracy_score(real_class,radiology2_result)) 207 | fpr2,tpr2,threshold2 = roc_curve(np.array(real_class),radiology2_result) 208 | F1_2 = f1_score(np.array(real_class),radiology2_result) 209 | print('F1:',F1_2) 210 | F1_w2 = f1_score(np.array(real_class),radiology2_result,average='weighted') 211 | print('F1_weight:',F1_w2) 212 | MCC2 = matthews_corrcoef(np.array(real_class),radiology2_result) 213 | print('MCC:',MCC2) 214 | TN2, FP2, FN2, TP2 = confusion_matrix(real_class,radiology2_result).ravel() 215 | 216 | ACC2 = (TP2+TN2)/(TP2+FP2+FN2+TN2) 217 | print('ACC:%0.4f'%ACC2) 218 | 219 | kappa = cohen_kappa_score(radiology1_result,radiology2_result) 220 | print('kappa:%0.4f'%kappa) 221 | 222 | kappa1 = cohen_kappa_score(radiology1_result,prob_label) 223 | print('kappa1:%0.4f'%kappa1) 224 | 225 | kappa2 = cohen_kappa_score(radiology2_result,prob_label) 226 | print('kappa1:%0.4f'%kappa2) 227 | 228 | ## Fusion 229 | scales = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] 230 | 231 | for scale in scales: 232 | prob_fusion = scale*np.array(prob)+(1-scale)*np.array(test_prob) 233 | # fpr,tpr,threshold = roc_curve(np.array(real_class),prob_fusion) 234 | # auc_value = auc(fpr,tpr) 235 | auc_value = roc_auc_score(np.array(real_class),prob_fusion) 236 | auc_fl, auc_fh, auc_fstd = confindence_interval_compute(np.array(prob_fusion), np.array(real_class)) 237 | print('Fusion Scale',scale,'AUC:%.2f'%auc_value,'+/-%.2f'%auc_fstd, 238 | ' 95% CI:[','%.2f,'%auc_fl,'%.2f'%auc_fh,']') 239 | Fusion = np.zeros([len(prob),2]) 240 | Fusion[:,0] = np.array(prob) 241 | Fusion[:,1] = np.array(test_prob) 242 | Fusion_min = Fusion.min(1) 243 | Fusion_max = Fusion.max(1) 244 | 245 | auc_min = roc_auc_score(np.array(real_class),Fusion_min) 246 | auc_fl_min, auc_fh_min, auc_fstd_min = confindence_interval_compute(np.array(Fusion_min), np.array(real_class)) 247 | print('Min Fusion AUC:%.2f'%auc_min,'+/-%.2f'%auc_fstd_min,' 95% CI:[','%.2f,'%auc_fl_min,'%.2f'%auc_fh_min,']') 248 | # fpr,tpr,threshold = roc_curve(np.array(y),Fusion_max) 249 | 250 | auc_max = roc_auc_score(np.array(real_class),Fusion_max) 251 | auc_fl_max, auc_fh_max,auc_fstd_max = confindence_interval_compute(np.array(Fusion_max), np.array(real_class)) 252 | print('Max Fusion AUC:%.2f'%auc_max,'+/-%.2f'%auc_fstd_max,' 95% CI:[','%.2f,'%auc_fl_max,'%.2f'%auc_fh_max,']') 253 | 254 | 255 | Fusion_new = Fusion_max 256 | fpr4,tpr4,threshold4 = roc_curve(np.array(real_class),Fusion_new) 257 | F1_0 = f1_score(np.array(real_class),Fusion_new>=0.6) 258 | print('ACC:',accuracy_score(real_class,Fusion_new>=0.6)) 259 | print('F1:', F1_0) 260 | F1_w0 = f1_score(np.array(real_class),Fusion_new>=0.6,average='weighted') 261 | print('F1_weight:',F1_w0) 262 | MCC0 = matthews_corrcoef(np.array(real_class),Fusion_new>=0.6) 263 | print('MCC:',MCC0) 264 | 265 | stat_val3, p_val3 = stats.ttest_ind(prob, Fusion_new, equal_var=False) 266 | print('Seg_transfer and Fusion p:%.5f'%p_val3) 267 | stat_val4, p_val4 = stats.ttest_ind(test_prob, Fusion_new, equal_var=False) 268 | print('Radiomics and Fusion p:%.5f'%p_val4) 269 | stat_val5, p_val5 = stats.ttest_ind(test_prob, prob, equal_var=False) 270 | print('Seg_transfer and Radiomics p:%.5f'%p_val5) 271 | 272 | font = {'family' : 'Times New Roman', 273 | 'weight' : 'normal', 274 | 'size' : 10,} 275 | plt.rc('font', **font) 276 | 277 | lw = 1 278 | plt.figure(figsize=(5,5)) 279 | plt.plot(fpr4, tpr4, color='red', 280 | lw=lw, label='Fusion model AUC:%.2f'%roc_auc_score(np.array(real_class),Fusion_new)) 281 | plt.plot(fpr, tpr, color='blue', 282 | lw=lw, label='DL model AUC:%.2f'%roc_auc_score(np.array(real_class),prob)) 283 | 284 | plt.plot(fpr3, tpr3, color='g', 285 | lw=lw, label='Radiomics model AUC:%.2f'%roc_auc_score(np.array(test_y),test_prob)) 286 | 287 | plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--') 288 | plt.plot(fpr2[1], tpr2[1], color='orange',marker = '^', 289 | label='Senior Radiologist') # 290 | 291 | plt.plot(fpr1[1], tpr1[1], color='c',marker = 'o', 292 | label='Junior Radiologist') 293 | plt.xlim([0.0, 1.0]) 294 | plt.ylim([0.0, 1.0]) 295 | plt.xlabel('False Positive Rate') 296 | plt.ylabel('True Positive Rate') 297 | plt.legend(loc="lower right") 298 | plt.show() 299 | 300 | 301 | -------------------------------------------------------------------------------- /ClassificationModel/train_clf_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 9 10:43:26 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | # -*- coding: utf-8 -*- 9 | """ 10 | Created on Mon Aug 5 08:55:09 2019 11 | 12 | @author: PC 13 | 14 | """ 15 | 16 | import os 17 | import time 18 | import numpy as np 19 | from net_classify import * 20 | import torch 21 | from torch.backends import cudnn 22 | from torch.utils.data import Dataset 23 | from torch.utils.data import DataLoader 24 | from torch.autograd import Variable 25 | from scipy.ndimage.interpolation import rotate 26 | import glob 27 | import pandas as pd 28 | import SimpleITK as sitk 29 | 30 | #from CumulativeAverager import * 31 | 32 | 33 | def augment(roi, ifflip = True, ifrotate=True, ifswap = True): 34 | if ifrotate: 35 | angle1 = 0.5*180 36 | roi = rotate(roi,angle1,axes=(1,2),reshape=False) 37 | if ifswap: 38 | axisorder = np.random.permutation(3) 39 | roi = np.transpose(roi,np.concatenate([[0],axisorder+1])) 40 | if ifflip: 41 | flipid = np.array([1,np.random.randint(2),np.random.randint(2)])*2-1 42 | roi = np.ascontiguousarray(roi[:,::flipid[0],::flipid[1],::flipid[2]]) 43 | return roi 44 | 45 | class GGODataGenerator(Dataset): 46 | def __init__(self, img_path, csv_path, phase='train'): 47 | assert(phase == 'train' or phase == 'val' or phase == 'test') 48 | f = open(csv_path) 49 | GGO_list = pd.read_csv(f) 50 | List_Num = np.array(GGO_list['new_number'].tolist()) 51 | Label = GGO_list['Class'].tolist() 52 | self.List_Num = List_Num[[i for i,x in enumerate(Label) if x=='Malignant']] 53 | Histopathology = np.array(GGO_list['Histopathology'].tolist()) 54 | self.Histopathology = Histopathology[[i for i,x in enumerate(Label) if x=='Malignant']] 55 | self.img_path = img_path 56 | self.phase = phase 57 | 58 | def __getitem__(self,idx): 59 | if self.phase =='train': 60 | if idx>=len(self.List_Num): 61 | idx = idx%len(self.List_Num) 62 | ifflip = True 63 | ifrotate= False 64 | ifswap = False 65 | elif idx>=(len(self.List_Num)*2): 66 | idx = idx%(len(self.List_Num)*2) 67 | ifflip = False 68 | ifrotate= True 69 | ifswap = False 70 | elif idx>=(len(self.List_Num)*3): 71 | idx = idx%(len(self.List_Num)*3) 72 | ifflip = False 73 | ifrotate= False 74 | ifswap = True 75 | else: 76 | ifflip = False 77 | ifrotate= False 78 | ifswap = False 79 | 80 | if self.phase == 'train': 81 | dcm_File = self.List_Num[idx] 82 | roi_path = self.img_path+'/'+'ROI_'+str(dcm_File)+'.nii' 83 | ROI = sitk.ReadImage(roi_path) 84 | ROI = sitk.GetArrayFromImage(ROI).transpose(2,1,0) 85 | ROI = (ROI.astype(np.float32)-128)/128 86 | ROI = ROI[np.newaxis,...] 87 | ROI = augment(ROI, ifflip = ifflip, ifrotate=ifrotate, ifswap = ifswap) 88 | else: 89 | dcm_File = self.List_Num[idx] 90 | roi_path = self.img_path+'/'+'ROI_'+str(dcm_File)+'.nii' 91 | ROI = sitk.ReadImage(roi_path) 92 | ROI = sitk.GetArrayFromImage(ROI).transpose(2,1,0)[np.newaxis,...] 93 | ROI = (ROI.astype(np.float32)-128)/128 94 | ROI = ROI[np.newaxis,...] 95 | if self.Histopathology[idx] == 'AIS' or self.Histopathology[idx] == 'MIA': 96 | Label = 0 97 | elif self.Histopathology[idx] == 'IA': 98 | Label = 1 99 | return ROI, Label 100 | 101 | def __len__(self): 102 | if self.phase == 'train': 103 | return len(self.List_Num)*4 104 | else: 105 | return len(self.List_Num) 106 | 107 | def get_lr(epoch, lr): 108 | if epoch <= epochs * 0.5: 109 | lr = lr 110 | elif epoch <= epochs * 0.8: 111 | lr = 0.1 * lr 112 | else: 113 | lr = 0.01 * lr 114 | return lr 115 | 116 | def get_optimizer(parameters, st, lr, momentum=0.9): 117 | if st == 'sgd': 118 | return torch.optim.SGD(parameters, lr = lr, momentum=momentum, weight_decay=1e-3) 119 | elif st == 'adam': 120 | return torch.optim.Adam(parameters, lr = lr, weight_decay=1e-4) 121 | 122 | 123 | def train(dataloader, net, loss_fun,epoch, optimizer, get_lr, save_freq, save_dir): 124 | starttime = time.time() 125 | net.train() 126 | lr = get_lr(epoch, 0.001) 127 | for param_group in optimizer.param_groups: 128 | param_group['lr'] = lr 129 | metrics = [] 130 | acc = [] 131 | ok = 0 132 | 133 | for i, (data,Class) in enumerate(data_loader): 134 | optimizer.zero_grad() # clear gradients for next train 135 | data = Variable(data.cuda(non_blocking = True)) 136 | target = Variable(Class.cuda(non_blocking = True)) 137 | l1_regularization = torch.tensor(0.).cuda() 138 | # l2_regularization = torch.tensor(0.).cuda() 139 | target = target.long() 140 | output = net(data) 141 | loss_output = loss_fun(output, target) 142 | for param in [clf_model.fc1.weight, clf_model.fc1.bias, 143 | clf_model.fc2.weight, clf_model.fc2.bias 144 | ]: 145 | loss = loss_output 146 | loss.backward() # backpropagation, compute gradients 147 | optimizer.step() # apply gradients 148 | metrics.append(loss_output) 149 | _, predicted = torch.max(output.data,1) 150 | ok = ok+(predicted==target).sum() 151 | traind_total = (i + 1) * len(target) 152 | acc_output = 100. * ok / traind_total 153 | acc.append(acc_output) 154 | if epoch > 0 and epoch % save_freq == 0: 155 | state_dict = net.state_dict() 156 | for key in state_dict.keys(): 157 | state_dict[key] = state_dict[key].cpu() 158 | torch.save({ 159 | 'epoch': epoch, 160 | 'save_dir': save_dir, 161 | 'state_dict': state_dict}, 162 | os.path.join(save_dir, '%03d.ckpt' % epoch)) 163 | 164 | endtime = time.time() 165 | metrics = np.asarray(metrics, np.float32) 166 | acc = np.asarray(acc, np.float32) 167 | print('Epoch %03d (lr %.5f)' % (epoch, lr)) 168 | print('loss %2.4f' % (np.mean(metrics))) 169 | print('Accuracy %2.4f' % (np.mean(acc))) 170 | print('time:%3.2f'%(endtime-starttime)) 171 | 172 | if __name__ == '__main__': 173 | Pretrained_path = './model' 174 | clf_model = ClassifyNet().cuda() 175 | classify_path = os.path.join(Pretrained_path, '200.ckpt') 176 | modelCheckpoint = torch.load(classify_path) 177 | pretrained_dict = modelCheckpoint['state_dict'] 178 | model_dict = clf_model.state_dict() 179 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}#filter out unnecessary keys 180 | model_dict.update(pretrained_dict) 181 | clf_model.load_state_dict(model_dict) 182 | torch.cuda.set_device(0) 183 | for k,v in clf_model.named_parameters(): 184 | if k!='fc1.weight' and k!='fc1.bias' and k!='fc2.weight' and k!='fc2.bias': 185 | v.requires_grad = False 186 | 187 | optimizer = get_optimizer(parameters=[clf_model.fc1.weight, clf_model.fc1.bias, 188 | clf_model.fc2.weight, clf_model.fc2.bias 189 | ], st='adam', lr=0.001) 190 | 191 | loss = torch.nn.CrossEntropyLoss().cuda() 192 | cudnn.benchmark = True 193 | img_path = r'.\TaiZhouHospital\ROIs' 194 | csv_path = r'.\TaiZhouHospital\GGO_list.csv' 195 | 196 | dataset = GGODataGenerator(img_path,csv_path, phase='train') 197 | data_loader = DataLoader(dataset, batch_size = 1,shuffle = True, num_workers = 0, pin_memory=True) 198 | save_freq = 2 199 | epochs = 20 200 | lr = 0.001 201 | 202 | save_dir = r'.\TaiZhouHospital\model\clf_IA_VS_nonIA' 203 | for epoch in range(0, epochs + 1): 204 | train(data_loader, clf_model, loss, epoch, optimizer, get_lr, save_freq, save_dir) 205 | -------------------------------------------------------------------------------- /DataGenerator/GGO_ROIgenerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 1 14:02:14 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | from lidcxmlparser import * 9 | import pydicom as dicom 10 | import numpy as np 11 | import os 12 | import glob 13 | from skimage import draw, measure 14 | import scipy 15 | 16 | # Load the scans in given folder path 17 | def load_scan(path): 18 | slices = [dicom.read_file(s) for s in glob.glob(path+'/*.dcm')] 19 | slices.sort(key = lambda x: float(x.ImagePositionPatient[2])) 20 | try: 21 | slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2]) 22 | except: 23 | slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation) 24 | 25 | for s in slices: 26 | s.SliceThickness = slice_thickness 27 | 28 | return slices 29 | 30 | def resample(img, new_spacing=[1,1,1]): 31 | # Determine current pixel spacing 32 | image = img['array'] 33 | spacing = img['Spacing'] 34 | img_size = np.array(image.shape) 35 | 36 | resize_factor = spacing / new_spacing 37 | new_real_shape = image.shape * resize_factor 38 | new_shape = np.round(new_real_shape) 39 | real_resize_factor = new_shape / image.shape 40 | new_spacing = spacing / real_resize_factor 41 | 42 | image = scipy.ndimage.interpolation.zoom(image, real_resize_factor) 43 | 44 | return image, img_size, real_resize_factor 45 | 46 | def normalize_hu(image): 47 | MIN_BOUND = -1200.0 48 | MAX_BOUND = 600.0 49 | image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND) 50 | image[image > 1] = 1 51 | image[image < 0] = 0 52 | image = (image*255).astype('uint8') 53 | return image 54 | 55 | def crop_roi(resampled_img, img_size, seed_pos, crop_size, resize_factor): 56 | initial_seed = [seed_pos[0], seed_pos[1], seed_pos[2]] 57 | trans_seed = initial_seed*resize_factor 58 | start = [] 59 | end= [] 60 | for i in range(3): 61 | s = np.floor(trans_seed[i]-(crop_size[i]/2)) 62 | e = np.ceil(trans_seed[i]+(crop_size[i]/2)) 63 | if s<0: 64 | s = 0 65 | if e>resampled_img.shape[i]: 66 | e = resampled_img.shape[i] 67 | if e-s != crop_size[i]: 68 | pad = e-s-crop_size[i] 69 | if s==0: 70 | e = e-pad 71 | else: 72 | s = s+pad 73 | start.append(int(s)) 74 | end.append(int(e)) 75 | # print(start,end,pad) 76 | roi = resampled_img[start[0]:end[0], start[1]:end[1], start[2]:end[2]] 77 | 78 | return roi 79 | def get_nodule_center(xml_path, image, slice_loc): 80 | gt = LIDCXmlParser(xml_path) 81 | gt.parse() 82 | mask = np.zeros(image.shape) 83 | for indx, rad in enumerate(gt.rad_annotations): #has 4 radiologistes 84 | mask_1 = np.zeros(image.shape) 85 | for nod in rad.nodules: #nod is one NormalNodule 86 | # if nod.characterstics.texture <= 3: 87 | for nod_roi in nod.rois: 88 | z_index = np.where(slice_loc==nod_roi.z)[0][0] 89 | xy = np.array(nod_roi.roi_xy) 90 | xx, yy = draw.polygon(xy[:,1],xy[:,0]) 91 | mask_1[xx,yy,z_index] = 1 92 | mask = mask+mask_1 93 | mask = np.array(mask>1).astype(int) 94 | L_mask = measure.label(mask) 95 | L_props= measure.regionprops(L_mask) 96 | center_pos = [] 97 | for props in L_props: 98 | center = np.array(props.centroid).astype(int) 99 | center_pos.append(center) 100 | return center_pos,mask 101 | 102 | 103 | def search_xml(file_dir): 104 | xml_path=[] 105 | for root, dirs, files in os.walk(file_dir): 106 | for file in files: 107 | if os.path.splitext(file)[1] == '.xml': 108 | xml_path.append(os.path.join(root, file)) 109 | return xml_path 110 | 111 | if __name__ == '__main__': 112 | 113 | home_path = './LIDC' 114 | save_path = './Nodule_crop' 115 | data_list = os.listdir(home_path) 116 | Nodule_num = 0 117 | for patient_path in data_list: 118 | img_path = os.path.join(home_path,patient_path) 119 | xml_listpath = search_xml(img_path) 120 | for xml_path in xml_listpath: 121 | path, xml_f = os.path.split(xml_path) 122 | dcm = [s for s in glob.glob(path+'/*.dcm')] 123 | if len(dcm) > 10: 124 | dicom_slices = load_scan(path) 125 | image = [s.pixel_array*int(s.RescaleSlope)+int(s.RescaleIntercept) for s in dicom_slices ] 126 | image = np.array(image).transpose(1,2,0) 127 | slice_loc = np.array([s.ImagePositionPatient[2] for s in dicom_slices]).astype(float) 128 | spacing = np.array([dicom_slices[0].PixelSpacing[0], 129 | dicom_slices[0].PixelSpacing[1], 130 | dicom_slices[0].SliceThickness]).astype(float) 131 | center_pos,mask = get_nodule_center(xml_path, image, slice_loc) 132 | if len(center_pos) != 0: 133 | image_new = {} 134 | image_new['array'] = image 135 | image_new['Spacing'] = spacing 136 | img, img_size, resize_factor = resample(image_new) 137 | mask_new = {} 138 | mask_new['array'] = mask 139 | mask_new['Spacing'] = spacing 140 | label, label_size, resize_factor = resample(mask_new) 141 | img = normalize_hu(img) 142 | for pos in center_pos: 143 | Nodule_num = Nodule_num+1 144 | seed_pos = [pos[0], pos[1], pos[2]] 145 | ROI = crop_roi(img, img_size, seed_pos, [64,64,64] , resize_factor) 146 | ROI_label = crop_roi(label, label_size, seed_pos, [64, 64, 64], resize_factor) 147 | ROI = (ROI.astype(np.float32)-128)/128 148 | # 149 | np.save(os.path.join(save_path,str(Nodule_num)+'_roi.npy'), ROI) 150 | np.save(os.path.join(save_path,str(Nodule_num)+'_label.npy'), ROI_label) 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /DataGenerator/README: -------------------------------------------------------------------------------- 1 | # The Preprocessing code 2 | -------------------------------------------------------------------------------- /DataGenerator/annotstructs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Mon Jun 10 14:19:22 2015 3 | 4 | @author: tizita nesibu 5 | """ 6 | 7 | 8 | class NoduleCharstics: 9 | def __init__(self): 10 | self.subtlety = 0 11 | self.internalstructure = 0 12 | self.calcification = 0 13 | self.sphericity = 0 14 | self.margin = 0 15 | self.lobulation = 0 16 | self.spiculation = 0 17 | self.texture = 0 18 | self.malignancy = 0 19 | return 20 | 21 | def __str__(self): 22 | strng = "subtlty (%d) intstruct (%d) calci (%d) sphere (%d) margin (%d) lob (%d) spicul (%d) txtur (%d) malig (%d)" % \ 23 | (self.subtlety, self.internalstructure, self.calcification, self.sphericity, self.margin, self.lobulation, \ 24 | self.spiculation, self.texture, self.malignancy) 25 | return strng 26 | 27 | def setValues(self, sub, inter, calc, spher, lob, spic, tex, malig): 28 | self.subtlety = sub 29 | self.internalstructure = inter 30 | self.calcification = calc 31 | self.sphericity = spher 32 | self.lobulation = lob 33 | self.spiculation = spic 34 | self.texture = tex 35 | self.malignancy = malig 36 | return 37 | 38 | class NoduleRoi: #is common for nodule and nonnodule 39 | def __init__(self, z_pos = 0., sop_uid = ''): 40 | self.z = z_pos 41 | self.sop_uid = sop_uid 42 | self.inclusion = True 43 | 44 | self.roi_xy = [] #to hold list of x,ycords in edgemap(edgmap pairs) 45 | self.roi_rect = [] #rectangle to hold the roi 46 | self.roi_centroid = [] #to hold centroid of the roi 47 | return 48 | 49 | def __str__(self): 50 | n_pts = len(self.roi_xy) 51 | strng = "Inclusion (%s) Z = %.2f SOP_UID (%s) \n ROI points [ %d ] :: "%(self.inclusion,self.z, self.sop_uid,n_pts) 52 | 53 | if (n_pts > 2): 54 | strng += "[[ %d,%d ]] :: "%(self.roi_centroid[0],self.roi_centroid[1]) 55 | strng += "(%d, %d), (%d,%d)..."%(self.roi_xy[0][0],self.roi_xy[0][1],self.roi_xy[1][0],self.roi_xy[1][1]) 56 | strng += "(%d, %d), (%d,%d)"%(self.roi_xy[-2][0],self.roi_xy[-2][1],self.roi_xy[-1][0],self.roi_xy[-1][1]) 57 | else: 58 | for i in range(n_pts): 59 | strng += "(%d, %d),"% (self.roi_xy[i][0], self.roi_xy[i][1]) 60 | return strng 61 | 62 | 63 | class Nodule: #is base class for all nodule types (NormalNodule, SmallNodule, NonNodule) 64 | def __init__(self): 65 | self.id = None 66 | self.rois = [] 67 | self.is_small = False 68 | 69 | def __str__(self): 70 | strng = "--- Nodule ID (%s) Small [%s] ---\n"%(self.id,str(self.is_small)) 71 | strng += self.tostring() + "\n" 72 | return strng 73 | 74 | def tostring(self): 75 | pass 76 | 77 | class NoduleAnnotationCluster(): # to be seen 78 | def __init__(self): 79 | self.id = [] 80 | self.z_pos = [] 81 | self.centroid = []#(x,y) of the centroid 82 | # convex hull description 83 | # p0 ---- p1 84 | # | | 85 | # p2-----p3 86 | self.convex_hull = [] # [()_0 ()_1 ()_2 ()_3] 87 | self.convex_hull_with_margin = [] 88 | self.no_annots = 0 89 | self.nodules_data = [] 90 | 91 | 92 | def compute_centroid(self): 93 | self.set_convex_hull() 94 | xc = 0.5*(self.convex_hull[0][0] + self.convex_hull[3][0]) # (x_min + x_max)/2 95 | yc = 0.5*(self.convex_hull[0][1] + self.convex_hull[3][1]) # (y_min + y_max)/2 96 | self.centroid = (xc,yc) 97 | return self.centroid 98 | 99 | def set_convex_hull(self): 100 | x_min, x_max = 640, 0 101 | y_min, y_max = 640, 0 102 | 103 | for nodule in self.nodules_data: 104 | for roi in nodule.rois: 105 | for dt_pt in roi.roi_xy: 106 | #roi.roi_xy -> [(x,y)] 107 | # TODO : finish this loop #????????????????????????????? 108 | x_min = dt_pt[0] if (x_min > dt_pt[0]) else x_min 109 | x_max = dt_pt[0] if (x_max < dt_pt[0]) else x_max 110 | y_min = dt_pt[1] if (y_min > dt_pt[1]) else y_min 111 | y_max = dt_pt[1] if (y_max < dt_pt[1]) else y_max 112 | self.convex_hull = [(x_min,y_min),(x_max,y_min), 113 | (x_min,y_max),(x_max,y_max)] 114 | w, h = (x_max-x_min), (y_max-y_min) 115 | x_min = int(x_min - 0.15*w) 116 | x_max = int(x_max + 0.15*w) 117 | y_min = int(y_min - 0.15*h) 118 | y_max = int(y_max + 0.15*h) 119 | self.convex_hull_with_margin = [(x_min,y_min),(x_max,y_min), 120 | (x_min,y_max),(x_max,y_max)] 121 | 122 | 123 | class NormalNodule(Nodule): 124 | 125 | def __init__(self): 126 | Nodule.__init__(self) 127 | self.characterstics = NoduleCharstics() 128 | self.is_small = False 129 | 130 | def tostring(self): 131 | strng = str(self.characterstics) 132 | strng += "\n" 133 | 134 | for roi in self.rois: 135 | strng += str(roi) + "\n" #str calls __str__ of NoduleRoi's class i.e.converting roi to 136 | return strng #string to prepare it for printing(it doesn't print it) 137 | 138 | class SmallNodule(Nodule): 139 | 140 | def __init__(self): 141 | Nodule.__init__(self) 142 | self.is_small = True 143 | 144 | def tostring(self): 145 | strng = '' 146 | for roi in self.rois: 147 | strng += str(roi) + "\n" 148 | return strng 149 | 150 | class NonNodule(Nodule): 151 | 152 | def __init__(self): 153 | Nodule.__init__(self) 154 | self.is_small = True 155 | 156 | def tostring(self): 157 | strng = '' 158 | for roi in self.rois: 159 | strng += str(roi) 160 | return strng 161 | 162 | class RadAnnotation: 163 | def __init__(self, init=True): 164 | self.version = None 165 | self.id = None 166 | 167 | self.nodules = [] #is normalNodule i.e in xml unblindedReadNodule with characteristics info 168 | self.small_nodules = [] #in xml unblindedReadNodule with no characteristics info 169 | self.non_nodules = [] #located inside readingSession 170 | self.initialized = init 171 | return 172 | 173 | def is_init(self): 174 | return self.initialized 175 | 176 | def set_init(self, init): 177 | self.initialized = init 178 | return 179 | 180 | def __str__(self): 181 | n_nodules = len(self.nodules) 182 | n_small_nodules = len(self.small_nodules) 183 | n_non_nodules = len(self.non_nodules) 184 | strng = "Annotation Version [%s] Radiologist ID [%s] \n"%(self.version, self.id) 185 | strng += "#Nodules [%d] #SmallNodules [%d] #NonNodules[%d] \n"%(n_nodules, n_small_nodules, n_non_nodules) 186 | 187 | if (n_nodules > 0): 188 | strng += "--- Nodules [%d]---\n"%n_nodules 189 | for i in range(n_nodules): 190 | strng += str(self.nodules[i]) 191 | 192 | if (n_small_nodules > 0): 193 | strng += "--- Small Nodules [%d] ---\n"%n_small_nodules 194 | for i in range(n_small_nodules): 195 | strng += str(self.small_nodules[i]) 196 | 197 | if (n_non_nodules > 0): 198 | strng += "--- Non Nodules [%d] ---\n"%n_non_nodules 199 | for i in range(n_non_nodules): 200 | strng += str(self.non_nodules[i]) 201 | 202 | strng += "-"*79 + "\n" 203 | return strng 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /DataGenerator/lidcxmlparser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jun 10 14:19:22 2015 4 | 5 | @author: tizita nesibu 6 | """ 7 | import xml.etree.ElementTree as ET 8 | import os, sys 9 | 10 | from annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation 11 | 12 | #RadAnnotation holds -> readingSession(is the annotaion of one raiologist slice by slice) 13 | # unblindedReadNodule -> holds one slice's annotation info -> #if no characterstics -> SmallNodule 14 | #if with characterstics -> NormalNodule 15 | #NonNodule -> if it is not unblindedReadNodule 16 | #locus is like edgmap for -> #NonNodule 17 | 18 | class LIDCXmlHeader: 19 | 20 | def __init__(self): #4 elements are not included b/c they don't have data inside 21 | self.version = None 22 | self.messageid = None 23 | self.date_request = None 24 | self.time_request = None 25 | self.task_descr = None 26 | self.series_instance_uid = None 27 | self.date_service = None 28 | self.time_service = None 29 | self.study_instance_uid = None 30 | 31 | def __str__(self): 32 | strng = ("--- XML HEADER ---\n" 33 | "Version (%s) Message-Id (%s) Date-request (%s) Time-request (%s) \n" 34 | "Series-UID (%s)\n" 35 | "Time-service (%s) Task-descr (%s) Date-service (%s) Time-service (%s)\n" 36 | "Study-UID (%s)")%(self.version, self.messageid, self.date_request, self.time_request, 37 | self.series_instance_uid, self.time_service, self.task_descr, 38 | self.date_service, self.time_service, self.study_instance_uid) 39 | return strng 40 | 41 | 42 | class LIDCXmlParser: 43 | 44 | def __init__(self, fname=[]): 45 | 46 | #check if file exists or not 47 | self.initialized = False 48 | if (not (fname == [])): #if fname is not empity 49 | if not os.path.isfile(fname): 50 | print("Error: filename (%s) doesn't exist"%(fname)) 51 | self.initialized = False 52 | else: 53 | self.initialized = True 54 | 55 | self.xml_fname = fname 56 | self.rad_annotations = [] #to hold list of readingSession(xml element)->which holds each radiologists 57 | #annotation info i.e. len(rad_annotations) tells us nbr of radiologist 58 | self.xml_header = LIDCXmlHeader() 59 | self.namespace = {'nih': 'http://www.nih.gov'} #dict to store namespace's key and value b/c when this xml file 60 | #is parsed it containes this website infront of each tag that is parsed, to avoid including the 61 | # whole site, namespace could be used to shorten it(can be indicated by the key i.e.'nih'). 62 | 63 | if (self.initialized): 64 | print("LIDC Xml Parser Initialized!") 65 | return 66 | 67 | def set_xml_file(self, fname): 68 | #check if file exists or not 69 | if not os.path.isfile(fname): 70 | print("Error: filename (%s) doesn't exist"%(fname)) 71 | self.initialized = False 72 | else: 73 | self.xml_fname = fname 74 | self.initialized = True 75 | 76 | return self.initialized 77 | 78 | def parse(self): 79 | if (not self.initialized): # if file not exist(if self.initialized is false) 80 | print("Error: Parser not initiialized!") 81 | return 82 | ns = self.namespace 83 | 84 | tree = ET.parse(self.xml_fname) #ET is the library we use to parse xml data 85 | root = tree.getroot() 86 | 87 | #print root[0][0].tag, root[0][0].text 88 | #print root[0][1].tag, root[0][1].text 89 | #print root[0][2].tag, root[0][2].text 90 | 91 | #print root.attrib 92 | #parse the file 93 | 94 | #FIXME: Exception Handling 95 | resp_hdr = root.findall('nih:ResponseHeader', ns)[0] #ns is to show what nih is,and [0] is b/c only one ResponseHeader is available 96 | #4 elements are not included b/c they don't have data inside 97 | if resp_hdr.find('nih:Version', ns) is not None: 98 | self.xml_header.version = resp_hdr.find('nih:Version', ns).text 99 | if resp_hdr.find('nih:MessageId', ns) is not None: 100 | self.xml_header.messageid = resp_hdr.find('nih:MessageId', ns).text 101 | if resp_hdr.find('nih:DateRequest', ns) is not None: 102 | self.xml_header.date_request = resp_hdr.find('nih:DateRequest', ns).text 103 | if resp_hdr.find('nih:TimeRequest', ns) is not None: 104 | self.xml_header.time_request = resp_hdr.find('nih:TimeRequest', ns).text 105 | if resp_hdr.find('nih:TaskDescription', ns) is not None: 106 | self.xml_header.task_descr = resp_hdr.find('nih:TaskDescription', ns).text 107 | if resp_hdr.find('nih:SeriesInstanceUid', ns) is not None: 108 | self.xml_header.series_instance_uid = resp_hdr.find('nih:SeriesInstanceUid', ns).text 109 | if resp_hdr.find('nih:DateService', ns) is not None: 110 | self.xml_header.date_service = resp_hdr.find('nih:DateService', ns).text 111 | if resp_hdr.find('nih:TimeService', ns) is not None: 112 | self.xml_header.time_service = resp_hdr.find('nih:TimeService', ns).text 113 | if resp_hdr.find('nih:StudyInstanceUID', ns) is not None: 114 | self.xml_header.study_instance_uid = resp_hdr.find('nih:StudyInstanceUID', ns).text 115 | 116 | print(self.xml_header) # calles str of the class LIDCXmlHeader() 117 | 118 | 119 | 120 | for read_session in root.findall('nih:readingSession',ns): #readingSession-> holds radiologist's annotation info 121 | rad_annotation = RadAnnotation() #to hold each radiologists annotation i.e. readingSession in xml file 122 | rad_annotation.version = read_session.find('nih:annotationVersion', ns).text 123 | rad_annotation.id = read_session.find('nih:servicingRadiologistID', ns).text 124 | 125 | unblinded_nodule = read_session.findall('nih:unblindedReadNodule', ns) 126 | 127 | for node in unblinded_nodule: 128 | nodule = self.parse_nodule(node) 129 | 130 | # if (not nodule.is_small): 131 | # rad_annotation.nodules.append(nodule) 132 | # else: 133 | # rad_annotation.small_nodules.append(nodule) 134 | # 135 | if(nodule.is_small): 136 | rad_annotation.small_nodules.append(nodule) 137 | else: 138 | rad_annotation.nodules.append(nodule) # nodule is normalNodule 139 | 140 | 141 | non_nodule = read_session.findall('nih:nonNodule', ns) 142 | 143 | for node in non_nodule: 144 | nodule = self.parse_non_nodule(node) 145 | rad_annotation.non_nodules.append(nodule) 146 | 147 | self.rad_annotations.append(rad_annotation) 148 | 149 | return 150 | #for child in root: 151 | # print child.tag#, child.attrib 152 | 153 | def parse_nodule(self, xml_node): #xml_node is one unblindedReadNodule 154 | ns = self.namespace 155 | 156 | chartcs_node = xml_node.find('nih:characteristics', ns) 157 | is_small = (chartcs_node is None) # if no characteristics, it is smallnodule i.e. is_small=TRUE 158 | 159 | if (is_small) or (chartcs_node.find('nih:subtlety',ns) is None): 160 | nodule = SmallNodule() 161 | nodule.id = xml_node.find('nih:noduleID', ns).text 162 | else: 163 | nodule = NormalNodule() #if it has characteristics it is normalNodule 164 | nodule.id = xml_node.find('nih:noduleID', ns).text 165 | 166 | nodule.characterstics.subtlety = int(chartcs_node.find('nih:subtlety',ns).text) 167 | nodule.characterstics.internalstructure = int(chartcs_node.find('nih:internalStructure',ns).text) 168 | nodule.characterstics.calcification = int(chartcs_node.find('nih:calcification',ns).text) 169 | nodule.characterstics.sphericity = int(chartcs_node.find('nih:sphericity',ns).text) 170 | nodule.characterstics.margin = int(chartcs_node.find('nih:margin',ns).text) 171 | nodule.characterstics.lobulation = int(chartcs_node.find('nih:lobulation',ns).text) 172 | nodule.characterstics.spiculation = int(chartcs_node.find('nih:spiculation',ns).text) 173 | nodule.characterstics.texture = int(chartcs_node.find('nih:texture',ns).text) 174 | nodule.characterstics.malignancy = int(chartcs_node.find('nih:malignancy',ns).text) 175 | 176 | xml_rois = xml_node.findall('nih:roi', ns) 177 | 178 | for xml_roi in xml_rois: 179 | roi = NoduleRoi() 180 | roi.z = float(xml_roi.find('nih:imageZposition', ns).text) 181 | roi.sop_uid = xml_roi.find('nih:imageSOP_UID', ns).text 182 | roi.inclusion = (xml_roi.find('nih:inclusion', ns).text == "TRUE") # when inclusion = TRUE ->roi includes the whole nodule 183 | #when inclusion = FALSE ->roi is drown twice for one nodule 1.ouside the nodule 184 | #2.inside the nodule -> to indicate that the nodule has donut hole(the inside hole is 185 | #not part of the nodule) but by forcing inclusion to be TRUE, this situation is ignored 186 | 187 | edgemaps = xml_roi.findall('nih:edgeMap', ns) 188 | 189 | xmin, xmax, ymin, ymax = sys.maxsize,0,sys.maxsize,0 #??????????????????? 190 | 191 | for edgemap in edgemaps: 192 | x = int(edgemap.find('nih:xCoord', ns).text) 193 | y = int(edgemap.find('nih:yCoord', ns).text) 194 | 195 | if (x > xmax): # to define a rectangle arround the roi 196 | xmax = x #only the 1st point i.e.(xmin, ymin) and 197 | #the last point(xmax, ymax) is needed to drow a rectangle 198 | if (x < xmin): 199 | xmin = x 200 | 201 | if (y > ymax): 202 | ymax = y 203 | 204 | if (y < ymin): 205 | ymin = y 206 | 207 | 208 | roi.roi_xy.append((x,y)) 209 | 210 | if not is_small: #only for normalNodules 211 | roi.roi_rect = (xmin, ymin, xmax, ymax) 212 | roi.roi_centroid = ((xmax+xmin)/2., (ymin+ymax)/2.) #center point 213 | 214 | nodule.rois.append(roi) 215 | 216 | return nodule #is equivalent to unblindedReadNodule(xml element) 217 | 218 | def parse_non_nodule(self, xml_node): #xml_node is one nonNodule 219 | ns = self.namespace 220 | 221 | nodule = NonNodule() 222 | 223 | nodule.id = xml_node.find('nih:nonNoduleID', ns).text 224 | roi = NoduleRoi() 225 | roi.z = float(xml_node.find('nih:imageZposition', ns).text) 226 | roi.sop_uid = xml_node.find('nih:imageSOP_UID', ns).text 227 | 228 | loci = xml_node.findall('nih:locus', ns) 229 | 230 | for locus in loci: 231 | x = int(locus.find('nih:xCoord', ns).text) 232 | y = int(locus.find('nih:yCoord', ns).text) 233 | roi.roi_xy.append((x,y)) 234 | nodule.rois.append(roi) 235 | return nodule #is equivalent to nonNodule(xml element) 236 | 237 | 238 | def __str__(self): #to print the whole xml data of a patient(not important) 239 | strng = "*"*79 + "\n" 240 | strng += "XML FileName [%s] \n"%self.xml_fname 241 | strng += str(self.xml_header) #str calles LIDCXmlHeader's str b/c xml_header is object of LIDCXmlHeader class 242 | 243 | strng += "# of Rad. Annotations [%d] \n" % len(self.rad_annotations) 244 | 245 | for ant in self.rad_annotations: 246 | strng += str(ant) 247 | 248 | strng += "*"*79 + "\n" 249 | return strng 250 | 251 | 252 | 253 | #def main(): 254 | # dt = LIDCXmlParser(r'F:\ImageData\LIDC\DOI\LIDC-IDRI-0001\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192\069.xml') 255 | # dt.parse() 256 | # print(dt) 257 | # return 258 | 259 | 260 | #if __name__ == '__main__': 261 | ## if __package__ is None: 262 | ## path_abs_name = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 263 | ## if (not (path_abs_name in set(os.sys.path))): 264 | ## os.sys.path.append(path_abs_name) 265 | ## 266 | ## from structs.annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation 267 | ## else: 268 | ## from ..structs.annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation 269 | # dt = LIDCXmlParser(r'F:\ImageData\LIDC\DOI\LIDC-IDRI-0001\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192\069.xml') 270 | # dt.parse() 271 | # print(dt) 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /DataGenerator/test_data_generator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 23 12:38:04 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | import os 9 | import time 10 | import numpy as np 11 | import torch 12 | from torch.backends import cudnn 13 | from torch.utils.data import Dataset 14 | from torch.utils.data import DataLoader 15 | from torch.autograd import Variable 16 | from scipy.ndimage.interpolation import rotate 17 | import pandas as pd 18 | import SimpleITK as sitk 19 | import scipy 20 | import scipy.ndimage 21 | import pandas as pd 22 | import matplotlib.pyplot as plt 23 | from skimage import measure 24 | from tqdm import tqdm 25 | 26 | def readDCM_Img(FilePath): 27 | img = {} 28 | reader = sitk.ImageSeriesReader() 29 | dcm_names = reader.GetGDCMSeriesFileNames(FilePath) 30 | reader.SetFileNames(dcm_names) 31 | image = reader.Execute() 32 | img_array = sitk.GetArrayFromImage(image) # z,y,x 33 | Spacing = image.GetSpacing() 34 | # Origin = image.GetOrigin() 35 | img_array = img_array.transpose(2,1,0)#x,y,z 36 | img['array'] = img_array 37 | img['Spacing'] = np.array(Spacing).astype(float) 38 | # img['Origin'] = Origin 39 | return img 40 | 41 | def resample(img, new_spacing=[1,1,1]): 42 | # Determine current pixel spacing 43 | image = img['array'] 44 | spacing = img['Spacing'] 45 | img_size = np.array(image.shape) 46 | 47 | resize_factor = spacing / new_spacing 48 | new_real_shape = image.shape * resize_factor 49 | new_shape = np.round(new_real_shape) 50 | real_resize_factor = new_shape / image.shape 51 | new_spacing = spacing / real_resize_factor 52 | 53 | image = scipy.ndimage.interpolation.zoom(image, real_resize_factor) 54 | 55 | return image, img_size, real_resize_factor 56 | 57 | def normalize_hu(image): 58 | MIN_BOUND = -1200.0 59 | MAX_BOUND = 600.0 60 | image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND) 61 | image[image > 1] = 1 62 | image[image < 0] = 0 63 | image = (image*255).astype('uint8') 64 | return image 65 | 66 | def crop_roi(resampled_img, img_size, seed_pos, crop_size, resize_factor): 67 | initial_seed = [seed_pos[0], seed_pos[1], img_size[2]-seed_pos[2]] 68 | trans_seed = initial_seed*resize_factor 69 | start = [] 70 | end= [] 71 | for i in range(3): 72 | s = np.floor(trans_seed[i]-(crop_size[i]/2)) 73 | e = np.ceil(trans_seed[i]+(crop_size[i]/2)) 74 | if s<0: 75 | s = 0 76 | if e>resampled_img.shape[i]: 77 | e = resampled_img.shape[i] 78 | if e-s != crop_size[i]: 79 | pad = e-s-crop_size[i] 80 | if s==0: 81 | e = e-pad 82 | else: 83 | s = s+pad 84 | start.append(int(s)) 85 | end.append(int(e)) 86 | # print(start,end,pad) 87 | roi = resampled_img[start[0]:end[0], start[1]:end[1], start[2]:end[2]] 88 | 89 | return roi 90 | def save_img(image, outputImageFileName): 91 | writer = sitk.ImageFileWriter() 92 | writer.SetFileName(outputImageFileName) 93 | writer.Execute(image) 94 | 95 | if __name__ == "__main__": 96 | img_path = r'.\GGO_DataSet\test_data\DCM' 97 | list_path = r'.\GGO_DataSet\test_data\test.csv' 98 | save_dir = r'.\GGO_DataSet\test_data\test_Img' 99 | 100 | f = open(list_path) 101 | GGO_list = pd.read_csv(f) 102 | List_Num = np.array(GGO_list['Num'].tolist()) 103 | Type = GGO_list['Type'].tolist() 104 | Class = np.array(GGO_list['Class'].tolist()) 105 | List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']] 106 | Class= Class[[i for i,x in enumerate(Type) if x!='Solid']] 107 | List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]] 108 | Histopathology = np.array(GGO_list['Histopathology'].tolist()) 109 | Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']] 110 | Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]] 111 | x_list = np.array(GGO_list['X'].tolist()) 112 | x_list = x_list[[i for i,x in enumerate(Type) if x!='Solid']] 113 | x_list = x_list[[i for i,x in enumerate(Class) if x!=0]] 114 | y_list = np.array(GGO_list['Y'].tolist()) 115 | y_list = y_list[[i for i,x in enumerate(Type) if x!='Solid']] 116 | y_list = y_list[[i for i,x in enumerate(Class) if x!=0]] 117 | z_list = np.array(GGO_list['Z'].tolist()) 118 | z_list = z_list[[i for i,x in enumerate(Type) if x!='Solid']] 119 | z_list = z_list[[i for i,x in enumerate(Class) if x!=0]] 120 | prob = [] 121 | prob_label = [] 122 | real_class = [] 123 | test_result=[] 124 | for i in tqdm(range(len(List_Num))): 125 | name_split = List_Num[i].split('_') 126 | dcm_File = name_split[0]+'_'+name_split[1] 127 | img_fold = os.path.join(img_path, dcm_File) 128 | img = readDCM_Img(img_fold) 129 | image, img_size, resize_factor = resample(img) 130 | X = round(x_list[i]) 131 | Y = round(y_list[i]) 132 | Z = round(z_list[i]) 133 | image = normalize_hu(image) 134 | seed_pos = [X, Y, Z] 135 | ROI = crop_roi(image, img_size, seed_pos, [64,64,64] , resize_factor) 136 | new_spacing = [1,1,1] 137 | ROI_sitk = sitk.GetImageFromArray(ROI) 138 | ROI_sitk.SetSpacing(new_spacing) 139 | ROI = ROI[np.newaxis,...] 140 | ROI = (ROI.astype(np.float32)-128)/128 141 | 142 | if Histopathology[i] == 'IAC': 143 | GGO_Class = 1 144 | else: 145 | GGO_Class = 0 146 | np.save(os.path.join(save_dir,List_Num[i]+'_roi'),ROI) 147 | np.save(os.path.join(save_dir,List_Num[i]+'_label'),np.array(GGO_Class)) 148 | 149 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DL_Radiomics_Fusion 2 | Comparison and fusion of deep learning and radiomics features of ground-glass nodules to predict the invasiveness risk of stage-I lung adenocarcinomas in CT scan 3 | -------------------------------------------------------------------------------- /SegModel/README: -------------------------------------------------------------------------------- 1 | # The code for building segmentation model 2 | -------------------------------------------------------------------------------- /SegModel/net_seg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Aug 5 11:07:39 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | 9 | from torch.nn import Module, Sequential 10 | from torch.nn import Conv3d, ConvTranspose3d, BatchNorm3d, MaxPool3d, AvgPool1d 11 | from torch.nn import ReLU, Sigmoid 12 | import torch 13 | 14 | class Conv3D_Block(Module): 15 | 16 | def __init__(self, inp_feat, out_feat, kernel=3, stride=1, padding=1, residual=None): 17 | 18 | super(Conv3D_Block, self).__init__() 19 | 20 | self.conv1 = Sequential( 21 | Conv3d(inp_feat, out_feat, kernel_size=kernel, 22 | stride=stride, padding=padding, bias=True), 23 | BatchNorm3d(out_feat), 24 | ReLU()) 25 | 26 | self.conv2 = Sequential( 27 | Conv3d(out_feat, out_feat, kernel_size=kernel, 28 | stride=stride, padding=padding, bias=True), 29 | BatchNorm3d(out_feat), 30 | ReLU()) 31 | 32 | self.residual = residual 33 | 34 | if self.residual is not None: 35 | self.residual_upsampler = Conv3d(inp_feat, out_feat, kernel_size=1, bias=False) 36 | 37 | def forward(self, x): 38 | 39 | res = x 40 | 41 | if not self.residual: 42 | return self.conv2(self.conv1(x)) 43 | else: 44 | return self.conv2(self.conv1(x)) + self.residual_upsampler(res) 45 | 46 | def Maxpool3D_Block(): 47 | 48 | pool = MaxPool3d(kernel_size=2, stride=2, padding=0) 49 | 50 | return pool 51 | 52 | class Deconv3D_Block(Module): 53 | 54 | def __init__(self, inp_feat, out_feat, kernel=4, stride=2, padding=1): 55 | 56 | super(Deconv3D_Block, self).__init__() 57 | 58 | self.deconv = Sequential( 59 | ConvTranspose3d(inp_feat, out_feat, kernel_size=kernel, 60 | stride=stride, padding=padding, output_padding=0, bias=True), 61 | BatchNorm3d(out_feat), 62 | ReLU()) 63 | 64 | def forward(self, x): 65 | 66 | return self.deconv(x) 67 | 68 | 69 | class Unet3D(Module): 70 | def __init__(self, num_feat=[16,32,64,96,128], residual='conv'): 71 | super(Unet3D, self).__init__() 72 | 73 | #Encoder process 74 | self.conv1 = Conv3D_Block(1, num_feat[0], residual=residual) 75 | self.pool1 = Maxpool3D_Block() 76 | self.conv2 = Conv3D_Block(num_feat[0], num_feat[1], residual=residual) 77 | self.pool2 = Maxpool3D_Block() 78 | self.conv3 = Conv3D_Block(num_feat[1], num_feat[2], residual=residual) 79 | self.pool3 = Maxpool3D_Block() 80 | self.conv4 = Conv3D_Block(num_feat[2], num_feat[3], residual=residual) 81 | self.pool4 = Maxpool3D_Block() 82 | self.conv5 = Conv3D_Block(num_feat[3], num_feat[4], residual=residual) 83 | 84 | 85 | #Decoder process 86 | self.upconv4 = Deconv3D_Block(num_feat[4], num_feat[3]) 87 | self.deconv4 = Conv3D_Block(num_feat[3]*2, num_feat[3], residual=residual) 88 | self.upconv3 = Deconv3D_Block(num_feat[3], num_feat[2]) 89 | self.deconv3 = Conv3D_Block(num_feat[2]*2, num_feat[2], residual=residual) 90 | self.upconv2 = Deconv3D_Block(num_feat[2], num_feat[1]) 91 | self.deconv2 = Conv3D_Block(num_feat[1]*2, num_feat[1], residual=residual) 92 | self.upconv1 = Deconv3D_Block(num_feat[1], num_feat[0]) 93 | self.deconv1 = Conv3D_Block(num_feat[0]*2, num_feat[0], residual=residual) 94 | 95 | self.out_conv = Conv3d(num_feat[0], 1, kernel_size=1, stride=1, padding=0, bias=True) 96 | self.sigmoid = Sigmoid() 97 | 98 | def forward(self, x): 99 | down_1 = self.conv1(x) 100 | pool_1 = self.pool1(down_1) 101 | down_2 = self.conv2(pool_1) 102 | pool_2 = self.pool2(down_2) 103 | down_3 = self.conv3(pool_2) 104 | pool_3 = self.pool3(down_3) 105 | down_4 = self.conv4(pool_3) 106 | pool_4 = self.pool4(down_4) 107 | down_5 = self.conv5(pool_4) 108 | 109 | 110 | up_4 = torch.cat([self.upconv4(down_5), down_4], dim=1) 111 | deconv_4 = self.deconv4(up_4) 112 | up_3 = torch.cat([self.upconv3(deconv_4), down_3], dim=1) 113 | deconv_3 = self.deconv3(up_3) 114 | up_2 = torch.cat([self.upconv2(deconv_3), down_2], dim=1) 115 | deconv_2 = self.deconv2(up_2) 116 | up_1 = torch.cat([self.upconv1(deconv_2), down_1], dim=1) 117 | deconv_1 = self.deconv1(up_1) 118 | 119 | out = self.sigmoid(self.out_conv(deconv_1)) 120 | 121 | return out 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /SegModel/train_seg_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jul 3 15:36:02 2019 4 | 5 | @author: PC 6 | """ 7 | 8 | import os 9 | import time 10 | import numpy as np 11 | from net_seg import * 12 | import torch 13 | from torch.backends import cudnn 14 | from torch.utils.data import Dataset 15 | from torch.utils.data import DataLoader 16 | from torch.autograd import Variable 17 | from scipy.ndimage.interpolation import rotate 18 | import glob 19 | #from CumulativeAverager import * 20 | 21 | def augment(roi, label, ifflip = True, ifrotate=True, ifswap = True): 22 | if ifrotate: 23 | angle1 = np.random.rand()*180 24 | roi = rotate(roi,angle1,axes=(2,3),reshape=False) 25 | label = rotate(label,angle1,axes=(2,3),reshape=False) 26 | if ifswap: 27 | axisorder = np.random.permutation(3) 28 | roi = np.transpose(roi,np.concatenate([[0],axisorder+1])) 29 | label = np.transpose(label,np.concatenate([[0],axisorder+1])) 30 | if ifflip: 31 | flipid = np.array([1,np.random.randint(2),np.random.randint(2)])*2-1 32 | roi = np.ascontiguousarray(roi[:,::flipid[0],::flipid[1],::flipid[2]]) 33 | label = np.ascontiguousarray(label[:,::flipid[0],::flipid[1],::flipid[2]]) 34 | return roi, label 35 | 36 | class GGODataGenerator(Dataset): 37 | def __init__(self, img_path, phase='train'): 38 | assert(phase == 'train' or phase == 'val' or phase == 'test') 39 | 40 | self.List_Num = np.array([os.path.split(s)[-1].split('_')[0] for s in glob.glob(img_path+'/*_roi.npy')]) 41 | 42 | self.img_path = img_path 43 | self.phase = phase 44 | 45 | def __getitem__(self,idx): 46 | if self.phase =='train': 47 | if idx>=len(self.List_Num): 48 | idx = idx%len(self.List_Num) 49 | ifflip = True 50 | ifrotate= False 51 | ifswap = False 52 | elif idx>=(len(self.List_Num)*2): 53 | idx = idx%(len(self.List_Num)*2) 54 | ifflip = False 55 | ifrotate= True 56 | ifswap = False 57 | elif idx>=(len(self.List_Num)*3): 58 | idx = idx%(len(self.List_Num)*3) 59 | ifflip = False 60 | ifrotate= False 61 | ifswap = True 62 | else: 63 | ifflip = False 64 | ifrotate= False 65 | ifswap = False 66 | 67 | if self.phase == 'train': 68 | dcm_File = self.List_Num[idx] 69 | roi_path = self.img_path+'/'+dcm_File+'_roi.npy' 70 | ROI = np.load(roi_path)[np.newaxis,...] 71 | class_path = self.img_path+'/'+dcm_File+'_label.npy' 72 | Label = np.load(class_path)[np.newaxis,...] 73 | ROI, Label = augment(ROI,Label, ifflip = ifflip, ifrotate=ifrotate, ifswap = ifswap) 74 | else: 75 | dcm_File = self.List_Num[idx] 76 | roi_path = self.img_path+'/'+dcm_File+'_roi.npy' 77 | ROI = np.load(roi_path) 78 | class_path = self.img_path+'/'+dcm_File+'_label.npy' 79 | Label = np.load(class_path) 80 | return ROI, Label 81 | 82 | def __len__(self): 83 | if self.phase == 'train': 84 | return len(self.List_Num)*4 85 | else: 86 | return len(self.List_Num) 87 | 88 | def get_lr(epoch, lr): 89 | if epoch <= epochs * 0.5: 90 | lr = lr 91 | elif epoch <= epochs * 0.8: 92 | lr = 0.1 * lr 93 | else: 94 | lr = 0.01 * lr 95 | return lr 96 | 97 | def get_optimizer(st, lr, momentum=0.9): 98 | if st == 'sgd': 99 | return torch.optim.SGD(net.parameters(), lr = lr, momentum=momentum) 100 | elif st == 'adam': 101 | return torch.optim.Adam(net.parameters(), lr = lr) 102 | 103 | def dice_loss(y, pred): 104 | smooth = 1. 105 | 106 | yflat = y.view(-1) 107 | predflat = pred.view(-1) 108 | intersection = (yflat * predflat).sum() 109 | 110 | return 1 - ((2. * intersection + smooth) / 111 | (yflat.sum() + predflat.sum() + smooth)) 112 | 113 | 114 | def train(dataloader, net, epoch, optimizer, lr, save_freq, save_dir): 115 | starttime = time.time() 116 | net.train() 117 | 118 | # lr = get_lr(epoch, 0.001) 119 | for param_group in optimizer.param_groups: 120 | param_group['lr'] = lr 121 | metrics = [] 122 | 123 | for i, (data,Class) in enumerate(data_loader): 124 | # print(i) 125 | data = Variable(data.cuda(async = True)) 126 | target = Variable(Class.cuda(async = True)) 127 | target = target.float() 128 | 129 | output = net(data) 130 | 131 | # avg_tool = CumulativeAverager() 132 | loss_output = dice_loss(output,target) 133 | # avg_tool.update(loss_output) 134 | optimizer.zero_grad() # clear gradients for next train 135 | loss_output.backward() # backpropagation, compute gradients 136 | optimizer.step() # apply gradients 137 | metrics.append(loss_output) 138 | 139 | if epoch % save_freq == 0: 140 | state_dict = net.state_dict() 141 | for key in state_dict.keys(): 142 | state_dict[key] = state_dict[key].cpu() 143 | 144 | torch.save({ 145 | 'epoch': epoch, 146 | 'save_dir': save_dir, 147 | 'state_dict': state_dict}, 148 | os.path.join(save_dir, '%03d.ckpt' % epoch)) 149 | 150 | 151 | endtime = time.time() 152 | metrics = np.asarray(metrics, np.float32) 153 | print('Epoch %03d (lr %.5f)' % (epoch, lr)) 154 | print('loss %2.4f' % (np.mean(metrics))) 155 | print('time:%3.2f'%(endtime-starttime)) 156 | 157 | if __name__ == '__main__': 158 | # torch.cuda.set_device(0) 159 | net = Unet3D().cuda() 160 | optimizer = get_optimizer('adam', lr=0.001) 161 | cudnn.benchmark = True 162 | img_path = './Nodule_crop' 163 | dataset = GGODataGenerator(img_path, phase='train') 164 | data_loader = DataLoader(dataset, batch_size = 12,shuffle = True, num_workers = 0, pin_memory=True) 165 | save_freq = 20 166 | epochs = 200 167 | lr = 0.001 168 | save_dir = './model' 169 | for epoch in range(0, epochs + 1): 170 | train(data_loader, net, epoch, optimizer, lr, save_freq, save_dir) 171 | --------------------------------------------------------------------------------