├── ClassificationModel
    ├── README
    ├── net_classify.py
    ├── params.yaml
    ├── radiomics_featureextraction.py
    ├── test_clf_model.py
    └── train_clf_model.py
├── DataGenerator
    ├── GGO_ROIgenerator.py
    ├── README
    ├── annotstructs.py
    ├── lidcxmlparser.py
    └── test_data_generator.py
├── README.md
└── SegModel
    ├── README
    ├── net_seg.py
    └── train_seg_model.py


/ClassificationModel/README:
--------------------------------------------------------------------------------
1 | # The code for building classification model
2 | 


--------------------------------------------------------------------------------
/ClassificationModel/net_classify.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Aug  5 11:07:39 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 |     
  9 | from torch.nn import Module, Sequential 
 10 | from torch.nn import Conv3d, ConvTranspose3d, BatchNorm3d, MaxPool3d, AvgPool1d
 11 | from torch.nn import ReLU, Sigmoid
 12 | from torch import nn
 13 | import torch
 14 | 
 15 | class Conv3D_Block(Module):
 16 |         
 17 |     def __init__(self, inp_feat, out_feat, kernel=3, stride=1, padding=1, residual=None):
 18 |         
 19 |         super(Conv3D_Block, self).__init__()
 20 | 
 21 |         self.conv1 = Sequential(
 22 |                         Conv3d(inp_feat, out_feat, kernel_size=kernel, 
 23 |                                     stride=stride, padding=padding, bias=True),
 24 |                         BatchNorm3d(out_feat),
 25 |                         ReLU())
 26 | 
 27 |         self.conv2 = Sequential(
 28 |                         Conv3d(out_feat, out_feat, kernel_size=kernel, 
 29 |                                     stride=stride, padding=padding, bias=True),
 30 |                         BatchNorm3d(out_feat),
 31 |                         ReLU())
 32 |         
 33 |         self.residual = residual
 34 | 
 35 |         if self.residual is not None:
 36 |             self.residual_upsampler = Conv3d(inp_feat, out_feat, kernel_size=1, bias=False)
 37 | 
 38 |     def forward(self, x):
 39 |         
 40 |         res = x
 41 | 
 42 |         if not self.residual:
 43 |             return self.conv2(self.conv1(x))
 44 |         else:
 45 |             return self.conv2(self.conv1(x)) + self.residual_upsampler(res)
 46 | 
 47 | def Maxpool3D_Block():
 48 |     
 49 |     pool = MaxPool3d(kernel_size=2, stride=2, padding=0)
 50 |     
 51 |     return pool       
 52 |         
 53 | class Deconv3D_Block(Module):
 54 |     
 55 |     def __init__(self, inp_feat, out_feat, kernel=4, stride=2, padding=1):
 56 |         
 57 |         super(Deconv3D_Block, self).__init__()
 58 |         
 59 |         self.deconv = Sequential(
 60 |                         ConvTranspose3d(inp_feat, out_feat, kernel_size=kernel, 
 61 |                                     stride=stride, padding=padding, output_padding=0, bias=True),
 62 |                         BatchNorm3d(out_feat),
 63 |                         ReLU())
 64 |     
 65 |     def forward(self, x):
 66 |         
 67 |         return self.deconv(x)
 68 | 
 69 |     
 70 | class ClassifyNet(Module):
 71 |     def __init__(self, num_feat=[16,32,64,96,128], residual='conv'):
 72 |         super(ClassifyNet, self).__init__()
 73 |         
 74 |         #Encoder process
 75 |         self.conv1 = Conv3D_Block(1, num_feat[0], residual=residual)
 76 |         self.pool1 = Maxpool3D_Block()
 77 |         self.conv2 = Conv3D_Block(num_feat[0], num_feat[1], residual=residual)
 78 |         self.pool2 = Maxpool3D_Block()
 79 |         self.conv3 = Conv3D_Block(num_feat[1], num_feat[2], residual=residual)
 80 |         self.pool3 = Maxpool3D_Block()
 81 |         self.conv4 = Conv3D_Block(num_feat[2], num_feat[3], residual=residual)
 82 |         self.pool4 = Maxpool3D_Block()
 83 |         self.conv5 = Conv3D_Block(num_feat[3], num_feat[4], residual=residual)
 84 |         self.conv6 = Conv3D_Block(num_feat[4], num_feat[4], residual=residual)
 85 |         self.drop = nn.Dropout3d(p = 0.5, inplace = False)
 86 |         self.fc1 = nn.Linear(128*4*4*4,128)
 87 |         self.fc2 = nn.Linear(128,2)
 88 |         self.Relu = nn.ReLU()
 89 | 
 90 | 
 91 |     def forward(self, x):
 92 |         down_1 = self.conv1(x)
 93 |         pool_1 = self.pool1(down_1)
 94 |         down_2 = self.conv2(pool_1)
 95 |         pool_2 = self.pool2(down_2)
 96 |         down_3 = self.conv3(pool_2)
 97 |         pool_3 = self.pool3(down_3)
 98 |         down_4 = self.conv4(pool_3)
 99 |         pool_4 = self.pool4(down_4)
100 |         down_5 = self.conv5(pool_4)        
101 |         conv_6 = self.conv6(down_5)
102 |         conv_6 = self.drop(conv_6)
103 |         view1 = down_5.view(conv_6.size(0),-1)
104 |         fc1 = self.Relu(self.fc1(view1))
105 |         out = self.fc2(fc1)
106 |         
107 |         return out
108 |         
109 |         
110 |         
111 |         
112 |         
113 |     


--------------------------------------------------------------------------------
/ClassificationModel/params.yaml:
--------------------------------------------------------------------------------
 1 | # This is an example of settings that can be used as a starting point for analyzing CT data. This is only intended as a
 2 | # starting point and is not likely to be the optimal settings for your dataset. Some points in determining better values
 3 | # are added as comments where appropriate
 4 | 
 5 | # When adapting and using these settings for an analysis, be sure to add the PyRadiomics version used to allow you to
 6 | # easily recreate your extraction at a later timepoint:
 7 | 
 8 | # #############################  Extracted using PyRadiomics version: <version>  ######################################
 9 | 
10 | imageType:
11 |   Original: {}
12 |   LoG:
13 |     sigma: [1.0, 2.0, 3.0, 4.0, 5.0]  # If you include sigma values >5, remember to also increase the padDistance.
14 |   Wavelet: {}
15 | 
16 | featureClass:
17 |   # redundant Compactness 1, Compactness 2 an Spherical Disproportion features are disabled by default, they can be
18 |   # enabled by specifying individual feature names (as is done for glcm) and including them in the list.
19 |   shape:
20 |   firstorder:
21 |   glcm:  # Disable SumAverage by specifying all other GLCM features available
22 |     - 'Autocorrelation'
23 |     - 'JointAverage'
24 |     - 'ClusterProminence'
25 |     - 'ClusterShade'
26 |     - 'ClusterTendency'
27 |     - 'Contrast'
28 |     - 'Correlation'
29 |     - 'DifferenceAverage'
30 |     - 'DifferenceEntropy'
31 |     - 'DifferenceVariance'
32 |     - 'JointEnergy'
33 |     - 'JointEntropy'
34 |     - 'Imc1'
35 |     - 'Imc2'
36 |     - 'Idm'
37 |     - 'Idmn'
38 |     - 'Id'
39 |     - 'Idn'
40 |     - 'InverseVariance'
41 |     - 'MaximumProbability'
42 |     - 'SumEntropy'
43 |     - 'SumSquares'
44 |   glrlm:
45 |   glszm:
46 |   gldm:
47 | 
48 | setting:
49 |   # Normalization:
50 |   # most likely not needed, CT gray values reflect absolute world values (HU) and should be comparable between scanners.
51 |   # If analyzing using different scanners / vendors, check if the extracted features are correlated to the scanner used.
52 |   # If so, consider enabling normalization by uncommenting settings below:
53 |   #normalize: true
54 |   #normalizeScale: 500  # This allows you to use more or less the same bin width.
55 | 
56 |   # Resampling:
57 |   # Usual spacing for CT is often close to 1 or 2 mm, if very large slice thickness is used,
58 |   # increase the resampled spacing.
59 |   # On a side note: increasing the resampled spacing forces PyRadiomics to look at more coarse textures, which may or
60 |   # may not increase accuracy and stability of your extracted features.
61 | #  interpolator: 'sitkBSpline'
62 | #  resampledPixelSpacing: [1, 1, 1]
63 |   padDistance: 10  # Extra padding for large sigma valued LoG filtered images
64 | 
65 |   # Mask validation:
66 |   # correctMask and geometryTolerance are not needed, as both image and mask are resampled, if you expect very small
67 |   # masks, consider to enable a size constraint by uncommenting settings below:
68 |   #minimumROIDimensions: 2
69 |   #minimumROISize: 50
70 | 
71 |   # Image discretization:
72 |   # The ideal number of bins is somewhere in the order of 16-128 bins. A possible way to define a good binwidt is to
73 |   # extract firstorder:Range from the dataset to analyze, and choose a binwidth so, that range/binwidth remains approximately
74 |   # in this range of bins.
75 |   binWidth: 25
76 | 
77 |   # first order specific settings:
78 |   voxelArrayShift: 1000  # Minimum value in HU is -1000, shift +1000 to prevent negative values from being squared.
79 | 
80 |   # Misc:
81 |   # default label value. Labels can also be defined in the call to featureextractor.execute, as a commandline argument,
82 |   # or in a column "Label" in the input csv (batchprocessing)
83 |   label: 1


--------------------------------------------------------------------------------
/ClassificationModel/radiomics_featureextraction.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Apr 12 13:44:09 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | import SimpleITK as sitk
  9 | import numpy as np
 10 | from radiomics import featureextractor,imageoperations
 11 | import os
 12 | import pandas as pd
 13 | from pandas import DataFrame as DF
 14 | import warnings
 15 | import time
 16 | from time import sleep
 17 | from tqdm import tqdm
 18 | from skimage import measure
 19 | 
 20 | 
 21 | def Img_Normalization(Image_Orig):
 22 |     Image_array = sitk.GetArrayFromImage(Image_Orig)
 23 |     min_ImgValue = Image_array.min()
 24 |     max_ImgValue = Image_array.max()
 25 |     ImgRange = max_ImgValue-min_ImgValue
 26 |     min_NewValue = 0
 27 |     max_NewValue = 1200
 28 |     NewRange = max_NewValue-min_NewValue
 29 |     Img_array = ((Image_array-min_ImgValue)/ImgRange)*NewRange+min_NewValue
 30 |     Img = sitk.GetImageFromArray(Img_array.astype(int))
 31 |     Img.SetDirection(Image_Orig.GetDirection())
 32 |     Img.SetOrigin(Image_Orig.GetOrigin())
 33 |     Img.SetSpacing(Image_Orig.GetSpacing())
 34 | #    Img.CopyInformation(Image_Orig)
 35 |     return Img
 36 |     
 37 | def readDCM_Img(FilePath):
 38 |     reader = sitk.ImageSeriesReader()
 39 |     dcm_names = reader.GetGDCMSeriesFileNames(FilePath)
 40 |     reader.SetFileNames(dcm_names)
 41 |     image = reader.Execute()
 42 |     return image
 43 | 
 44 | def Extract_Features(image,mask,params_path):
 45 |     paramsFile = os.path.abspath(params_path)
 46 |     extractor = featureextractor.RadiomicsFeaturesExtractor(paramsFile)
 47 |     result = extractor.execute(image, mask)
 48 |     general_info = {'diagnostics_Configuration_EnabledImageTypes','diagnostics_Configuration_Settings',
 49 |                     'diagnostics_Image-interpolated_Maximum','diagnostics_Image-interpolated_Mean',
 50 |                     'diagnostics_Image-interpolated_Minimum','diagnostics_Image-interpolated_Size',
 51 |                     'diagnostics_Image-interpolated_Spacing','diagnostics_Image-original_Hash',
 52 |                     'diagnostics_Image-original_Maximum','diagnostics_Image-original_Mean',
 53 |                     'diagnostics_Image-original_Minimum','diagnostics_Image-original_Size',
 54 |                     'diagnostics_Image-original_Spacing','diagnostics_Mask-interpolated_BoundingBox',
 55 |                     'diagnostics_Mask-interpolated_CenterOfMass','diagnostics_Mask-interpolated_CenterOfMassIndex',
 56 |                     'diagnostics_Mask-interpolated_Maximum','diagnostics_Mask-interpolated_Mean',
 57 |                     'diagnostics_Mask-interpolated_Minimum','diagnostics_Mask-interpolated_Size',
 58 |                     'diagnostics_Mask-interpolated_Spacing','diagnostics_Mask-interpolated_VolumeNum',
 59 |                     'diagnostics_Mask-interpolated_VoxelNum','diagnostics_Mask-original_BoundingBox',
 60 |                     'diagnostics_Mask-original_CenterOfMass','diagnostics_Mask-original_CenterOfMassIndex',
 61 |                     'diagnostics_Mask-original_Hash','diagnostics_Mask-original_Size',
 62 |                     'diagnostics_Mask-original_Spacing','diagnostics_Mask-original_VolumeNum',
 63 |                     'diagnostics_Mask-original_VoxelNum','diagnostics_Versions_Numpy',
 64 |                     'diagnostics_Versions_PyRadiomics','diagnostics_Versions_PyWavelet',
 65 |                     'diagnostics_Versions_Python','diagnostics_Versions_SimpleITK',
 66 |                     'diagnostics_Image-original_Dimensionality'}
 67 |     features = dict((key, value) for key, value in result.items() if key not in general_info)
 68 |     feature_info = dict((key, value) for key, value in result.items() if key in general_info)
 69 |     return features,feature_info
 70 | 
 71 | if __name__ == '__main__':
 72 |     start = time.clock()
 73 |     warnings.simplefilter('ignore')
 74 | 
 75 |     img_path = r'.\GGO_DataSet\test_data\seg_result'
 76 |     list_path = r'.\GGO_DataSet\test_data\test.csv'
 77 | 
 78 |     f = open(list_path)
 79 |     GGO_list = pd.read_csv(f)
 80 |     List_Num = np.array(GGO_list['Num'].tolist())
 81 |     Type = GGO_list['Type'].tolist()
 82 |     Class = np.array(GGO_list['Class'].tolist()) 
 83 |     List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']]
 84 |     Class= Class[[i for i,x in enumerate(Type) if x!='Solid']]
 85 |     List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]]
 86 |     Histopathology = np.array(GGO_list['Histopathology'].tolist())
 87 |     Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']]
 88 |     Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]]
 89 |     Label = 0
 90 |     
 91 |     Feature = []
 92 |     for i in tqdm(range(len(List_Num))):
 93 |         sleep(0.01)
 94 |         dcm_File = List_Num[i]
 95 |         roi_path = img_path+'/'+'ROI_'+str(dcm_File)+'.nii'
 96 |         ROI = sitk.ReadImage(roi_path)
 97 |         
 98 |         mask_path = img_path+'/'+'Mask_'+str(dcm_File)+'.nii'
 99 |         Mask = sitk.ReadImage(mask_path)
100 |         
101 |         features, feature_info = Extract_Features(ROI, Mask, 'params.yaml')
102 |         if Histopathology[i] == 'AIS' or Histopathology[i] == 'MIA':
103 |             Label = 0
104 |         elif Histopathology[i] == 'IAC':
105 |             Label = 1
106 | 
107 |         features['Name'] = List_Num[i]
108 |         features['Histopathology'] = Histopathology[i]
109 |         features['Class'] = Label
110 | 
111 |         Feature.append(features)
112 |     
113 |     df = DF(Feature).fillna('0')
114 |     df.to_csv('./testing_Radiomics_Feature.csv',index=False,sep=',')
115 | 
116 |     end = time.clock()
117 |     print(end-start)  


--------------------------------------------------------------------------------
/ClassificationModel/test_clf_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug 23 12:38:04 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | import os
  9 | import time
 10 | import numpy as np
 11 | from net_classify_test import *
 12 | import torch
 13 | from torch.backends import cudnn
 14 | from torch.utils.data import Dataset
 15 | from torch.utils.data import DataLoader
 16 | from torch.autograd import Variable
 17 | import torch.nn.functional as F
 18 | from scipy.ndimage.interpolation import rotate
 19 | import glob
 20 | import pandas as pd
 21 | import SimpleITK as sitk
 22 | import scipy
 23 | import scipy.ndimage
 24 | import pandas as pd
 25 | import matplotlib.pyplot as plt
 26 | from skimage import measure
 27 | from sklearn.metrics import accuracy_score,roc_curve,recall_score,roc_auc_score,auc,confusion_matrix,cohen_kappa_score, f1_score, precision_score,matthews_corrcoef 
 28 | from tqdm import tqdm
 29 | from sklearn.svm import SVC,LinearSVC
 30 | from sklearn.model_selection import StratifiedKFold
 31 | from sklearn.feature_selection import SelectPercentile, f_classif, chi2, SelectFromModel, SelectKBest
 32 | from sklearn.feature_selection import RFE
 33 | from sklearn.preprocessing import MinMaxScaler
 34 | import scipy.stats as stats
 35 | 
 36 | 
 37 | def permutation_test_between_clfs(y_test, pred_proba_1, pred_proba_2, nsamples=1000):
 38 |     auc_differences = []
 39 |     auc1 = roc_auc_score(y_test.ravel(), pred_proba_1.ravel())
 40 |     auc2 = roc_auc_score(y_test.ravel(), pred_proba_2.ravel())
 41 |     observed_difference = auc1 - auc2
 42 |     for _ in range(nsamples):
 43 |         mask = np.random.randint(2, size=len(pred_proba_1.ravel()))
 44 |         p1 = np.where(mask, pred_proba_1.ravel(), pred_proba_2.ravel())
 45 |         p2 = np.where(mask, pred_proba_2.ravel(), pred_proba_1.ravel())
 46 |         auc1 = roc_auc_score(y_test.ravel(), p1)
 47 |         auc2 = roc_auc_score(y_test.ravel(), p2)
 48 |         auc_differences.append(auc1 - auc2)
 49 |     return observed_difference, np.mean(auc_differences >= observed_difference)
 50 | 
 51 | def confindence_interval_compute(y_pred, y_true):
 52 |     n_bootstraps = 1000
 53 |     rng_seed = 42  # control reproducibility
 54 |     bootstrapped_scores = []
 55 |     
 56 |     rng = np.random.RandomState(rng_seed)
 57 |     for i in range(n_bootstraps):
 58 |         # bootstrap by sampling with replacement on the prediction indices
 59 | #        indices = rng.random_integers(0, len(y_pred) - 1, len(y_pred))
 60 |         indices = rng.randint(0, len(y_pred)-1, len(y_pred))
 61 |         if len(np.unique(y_true[indices])) < 2:
 62 |             # We need at least one positive and one negative sample for ROC AUC
 63 |             # to be defined: reject the sample
 64 |             continue
 65 |     
 66 |         score = roc_auc_score(y_true[indices], y_pred[indices])
 67 |         bootstrapped_scores.append(score)
 68 |     sorted_scores = np.array(bootstrapped_scores)
 69 |     sorted_scores.sort()
 70 |     confidence_std = sorted_scores.std()
 71 |     
 72 |     # Computing the lower and upper bound of the 90% confidence interval
 73 |     # You can change the bounds percentiles to 0.025 and 0.975 to get
 74 |     # a 95% confidence interval instead.
 75 |     confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))]
 76 |     confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))]
 77 |     return confidence_lower,confidence_upper,confidence_std
 78 | 
 79 | if __name__ == "__main__":
 80 |     Pretrained_path = r'.\TaiZhouHospital\model\clf_IA_VS_nonIA'
 81 |     model = ClassifyNet()#.cuda()
 82 |     classify_path = os.path.join(Pretrained_path, '020.ckpt')
 83 |     modelCheckpoint = torch.load(classify_path)
 84 |     pretrained_dict = modelCheckpoint['state_dict']
 85 |     model_dict = model.state_dict()
 86 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}#filter out unnecessary keys
 87 |     model_dict.update(pretrained_dict)
 88 |     model.load_state_dict(model_dict)
 89 |     model.eval()
 90 |     img_path = r'.\GGO_DataSet\test_data\test_Img'
 91 |     list_path = r'.\GGO_DataSet\test_data\test.csv'
 92 |     
 93 |     f = open(list_path)
 94 |     GGO_list = pd.read_csv(f)
 95 |     List_Num = np.array(GGO_list['Num'].tolist())
 96 |     Type = GGO_list['Type'].tolist()
 97 |     Class = np.array(GGO_list['Class'].tolist()) 
 98 |     List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']]
 99 |     Class= Class[[i for i,x in enumerate(Type) if x!='Solid']]
100 |     List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]]
101 |     Histopathology = np.array(GGO_list['Histopathology'].tolist())
102 |     Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']]
103 |     Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]]
104 | 
105 |     prob = []
106 |     prob_label = []
107 |     real_class = []
108 |     test_result=[]
109 |     for i in tqdm(range(len(List_Num))): 
110 |         roi_path = os.path.join(img_path, List_Num[i]+'_roi.npy')
111 |         label_path = os.path.join(img_path, List_Num[i]+'_label.npy')
112 |         data = np.load(roi_path)
113 |         data = data[np.newaxis,...]
114 |         data = torch.from_numpy(data.astype(np.float32))
115 |         GGO_Class = np.load(label_path)
116 |         with torch.no_grad():
117 |             input_data = Variable(data)#.cuda()
118 |             predict = model(input_data)            
119 |             result = predict.data.cpu().numpy()
120 |             prob.append(result[0][1])
121 |             real_class.append(GGO_Class)
122 |             prob_label.append(np.argmax(result[0]))
123 |             test = {}
124 |             test['Num'] = List_Num[i]
125 |             test['Class'] = GGO_Class
126 |             test['Prob'] = result[0][1]
127 |             test['Histopathology'] = Histopathology[i]
128 |             test_result.append(test)
129 |     df = pd.DataFrame(test_result).fillna('null')
130 |     df.to_csv('./test_result.csv',index=False,sep=',')
131 |     print('Our Model ACC:',accuracy_score(real_class,prob_label)*100)      
132 |     fpr,tpr,threshold = roc_curve(np.array(real_class),prob)
133 |     auc = auc(fpr,tpr)
134 |     auc_fl_cnn, auc_fh_cnn, auc_fstd_cnn = confindence_interval_compute(np.array(prob), np.array(real_class))
135 |     print('AUC:%.2f'%auc,'+/-%.2f'%auc_fstd_cnn,'  95% CI:[','%.2f,'%auc_fl_cnn,'%.2f'%auc_fh_cnn,']')
136 |     F1 = f1_score(np.array(real_class),prob_label)
137 |     print('F1:',F1)
138 |     F1_w = f1_score(np.array(real_class),prob_label,average='weighted')
139 |     print('F1_weight:',F1_w)
140 |     MCC = matthews_corrcoef(np.array(real_class),prob_label)
141 |     print('MCC:',MCC)
142 | 
143 | 
144 |     training_csv = r'.\Radiomics_Feature.csv'
145 |     testing_csv = r'.\testing_Radiomics_Feature.csv'  
146 |     f_training = open(training_csv)
147 |     train_list = pd.read_csv(f_training)
148 |     train_x = np.array(train_list.values[:,3:])
149 |     train_y = np.array(train_list['Class'].tolist())
150 |     
151 |     f_testing = open(testing_csv)
152 |     test_list = pd.read_csv(f_testing)
153 |    
154 |     test_x = np.array(test_list.values[:,3:])
155 |     test_y = np.array(test_list['Class'].tolist())
156 |     
157 | 
158 |     # Feature normalization
159 |     min_max_scaler = MinMaxScaler()
160 |     train_x = min_max_scaler.fit_transform(np.array(train_x,dtype=np.float64))
161 |     test_x = min_max_scaler.transform(test_x)
162 | 
163 |     selector = SelectKBest(f_classif, 20)
164 |     train_x = selector.fit_transform(train_x, train_y)
165 |     test_x = selector.transform(test_x)
166 |     
167 |     clf = SVC(kernel='rbf',  probability=True, random_state=0, gamma='scale')
168 |     clf.fit(train_x, train_y)
169 |     test_prob = clf.predict_proba(test_x)[:,1]
170 |     test_label = clf.predict(test_x)
171 |     print('Radiomics:',accuracy_score(test_y,test_label))
172 |     fpr3,tpr3,threshold3 = roc_curve(np.array(test_y),test_prob)
173 |     auc3 = roc_auc_score(np.array(test_y),test_prob)
174 |     auc_fl_ra, auc_fh_ra, auc_fstd_ra = confindence_interval_compute(np.array(test_prob), np.array(test_y))
175 |     print('AUC:%.2f'%auc3,'+/-%.2f'%auc_fstd_ra,'  95% CI:[','%.2f,'%auc_fl_ra,'%.2f'%auc_fh_ra,']')
176 |     F1_3 = f1_score(np.array(test_y),test_label)
177 |     print('F1:', F1_3)
178 |     F1_w3 = f1_score(np.array(test_y),test_label,average='weighted')
179 |     print('F1_weight:',F1_w3)
180 |     MCC3 = matthews_corrcoef(np.array(test_y),test_label)
181 |     print('MCC:',MCC3)
182 |     
183 |     radiology1_path = r'.\GGO_DataSet\test_data\radiology_HW.csv'
184 |     radiology1 = open(radiology1_path)
185 |     radiology1_List = pd.read_csv(radiology1)
186 |     radiology1_result = radiology1_List['Diagnosis'].tolist()
187 |     print('HW:',accuracy_score(real_class,radiology1_result))
188 |     fpr1,tpr1,threshold1 = roc_curve(np.array(real_class),radiology1_result)
189 |     F1_1 = f1_score(np.array(real_class),radiology1_result)
190 |     print('F1:',F1_1)
191 |     F1_w1 = f1_score(np.array(real_class),radiology1_result,average='weighted')
192 |     print('F1_weight:',F1_w1)
193 |     MCC1 = matthews_corrcoef(np.array(real_class),radiology1_result)
194 |     print('MCC:',MCC1)
195 | #    auc1 = auc(fpr1,tpr1)
196 | #    print(auc1)
197 |     TN1, FP1, FN1, TP1 = confusion_matrix(real_class,radiology1_result).ravel()
198 | 
199 |     ACC1 = (TP1+TN1)/(TP1+FP1+FN1+TN1)
200 |     print('ACC:%0.4f'%ACC1)
201 |     
202 |     radiology2_path = r'.\GGO_DataSet\test_data\radiology_WSP.csv'
203 |     radiology2 = open(radiology2_path)
204 |     radiology2_List = pd.read_csv(radiology2)
205 |     radiology2_result = radiology2_List['Diagnosis'].tolist()
206 |     print('WSP:',accuracy_score(real_class,radiology2_result))
207 |     fpr2,tpr2,threshold2 = roc_curve(np.array(real_class),radiology2_result)
208 |     F1_2 = f1_score(np.array(real_class),radiology2_result)
209 |     print('F1:',F1_2)
210 |     F1_w2 = f1_score(np.array(real_class),radiology2_result,average='weighted')
211 |     print('F1_weight:',F1_w2)
212 |     MCC2 = matthews_corrcoef(np.array(real_class),radiology2_result)
213 |     print('MCC:',MCC2)
214 |     TN2, FP2, FN2, TP2 = confusion_matrix(real_class,radiology2_result).ravel()
215 | 
216 |     ACC2 = (TP2+TN2)/(TP2+FP2+FN2+TN2)
217 |     print('ACC:%0.4f'%ACC2)
218 |     
219 |     kappa = cohen_kappa_score(radiology1_result,radiology2_result)
220 |     print('kappa:%0.4f'%kappa)
221 |         
222 |     kappa1 = cohen_kappa_score(radiology1_result,prob_label)
223 |     print('kappa1:%0.4f'%kappa1)
224 |     
225 |     kappa2 = cohen_kappa_score(radiology2_result,prob_label)
226 |     print('kappa1:%0.4f'%kappa2)
227 |     
228 |     ## Fusion
229 |     scales = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
230 | 
231 |     for scale in scales:
232 |         prob_fusion = scale*np.array(prob)+(1-scale)*np.array(test_prob)
233 | #        fpr,tpr,threshold = roc_curve(np.array(real_class),prob_fusion)
234 | #        auc_value = auc(fpr,tpr)
235 |         auc_value = roc_auc_score(np.array(real_class),prob_fusion)
236 |         auc_fl, auc_fh, auc_fstd = confindence_interval_compute(np.array(prob_fusion), np.array(real_class))
237 |         print('Fusion Scale',scale,'AUC:%.2f'%auc_value,'+/-%.2f'%auc_fstd,
238 |               '  95% CI:[','%.2f,'%auc_fl,'%.2f'%auc_fh,']')      
239 |     Fusion = np.zeros([len(prob),2])
240 |     Fusion[:,0] = np.array(prob)
241 |     Fusion[:,1] = np.array(test_prob)
242 |     Fusion_min = Fusion.min(1)
243 |     Fusion_max = Fusion.max(1)
244 | 
245 |     auc_min = roc_auc_score(np.array(real_class),Fusion_min)
246 |     auc_fl_min, auc_fh_min, auc_fstd_min = confindence_interval_compute(np.array(Fusion_min), np.array(real_class))
247 |     print('Min Fusion AUC:%.2f'%auc_min,'+/-%.2f'%auc_fstd_min,'  95% CI:[','%.2f,'%auc_fl_min,'%.2f'%auc_fh_min,']')
248 | #    fpr,tpr,threshold = roc_curve(np.array(y),Fusion_max)
249 | 
250 |     auc_max = roc_auc_score(np.array(real_class),Fusion_max)
251 |     auc_fl_max, auc_fh_max,auc_fstd_max = confindence_interval_compute(np.array(Fusion_max), np.array(real_class))
252 |     print('Max Fusion AUC:%.2f'%auc_max,'+/-%.2f'%auc_fstd_max,'  95% CI:[','%.2f,'%auc_fl_max,'%.2f'%auc_fh_max,']')
253 |     
254 | 
255 |     Fusion_new = Fusion_max
256 |     fpr4,tpr4,threshold4 = roc_curve(np.array(real_class),Fusion_new)
257 |     F1_0 = f1_score(np.array(real_class),Fusion_new>=0.6)
258 |     print('ACC:',accuracy_score(real_class,Fusion_new>=0.6))
259 |     print('F1:', F1_0)
260 |     F1_w0 = f1_score(np.array(real_class),Fusion_new>=0.6,average='weighted')
261 |     print('F1_weight:',F1_w0)
262 |     MCC0 = matthews_corrcoef(np.array(real_class),Fusion_new>=0.6)
263 |     print('MCC:',MCC0)
264 |     
265 |     stat_val3, p_val3 = stats.ttest_ind(prob, Fusion_new, equal_var=False)
266 |     print('Seg_transfer and Fusion p:%.5f'%p_val3)
267 |     stat_val4, p_val4 = stats.ttest_ind(test_prob, Fusion_new, equal_var=False)
268 |     print('Radiomics and Fusion p:%.5f'%p_val4)
269 |     stat_val5, p_val5 = stats.ttest_ind(test_prob, prob, equal_var=False)
270 |     print('Seg_transfer and Radiomics p:%.5f'%p_val5)
271 |     
272 |     font = {'family' : 'Times New Roman',
273 | 			'weight' : 'normal',
274 | 			'size'   : 10,}
275 |     plt.rc('font', **font)
276 | 
277 |     lw = 1
278 |     plt.figure(figsize=(5,5))
279 |     plt.plot(fpr4, tpr4, color='red',
280 |              lw=lw, label='Fusion model AUC:%.2f'%roc_auc_score(np.array(real_class),Fusion_new))
281 |     plt.plot(fpr, tpr, color='blue',
282 |              lw=lw, label='DL model AUC:%.2f'%roc_auc_score(np.array(real_class),prob))
283 | 
284 |     plt.plot(fpr3, tpr3, color='g',
285 |              lw=lw, label='Radiomics model AUC:%.2f'%roc_auc_score(np.array(test_y),test_prob))
286 | 
287 |     plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
288 |     plt.plot(fpr2[1], tpr2[1], color='orange',marker = '^',
289 |              label='Senior Radiologist') #
290 | 
291 |     plt.plot(fpr1[1], tpr1[1], color='c',marker = 'o',
292 |              label='Junior Radiologist') 
293 |     plt.xlim([0.0, 1.0])
294 |     plt.ylim([0.0, 1.0])
295 |     plt.xlabel('False Positive Rate')
296 |     plt.ylabel('True Positive Rate')
297 |     plt.legend(loc="lower right")
298 |     plt.show()
299 |         
300 |         
301 |         


--------------------------------------------------------------------------------
/ClassificationModel/train_clf_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug  9 10:43:26 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | # -*- coding: utf-8 -*-
  9 | """
 10 | Created on Mon Aug  5 08:55:09 2019
 11 | 
 12 | @author: PC
 13 | 
 14 | """
 15 | 
 16 | import os
 17 | import time
 18 | import numpy as np
 19 | from net_classify import *
 20 | import torch
 21 | from torch.backends import cudnn
 22 | from torch.utils.data import Dataset
 23 | from torch.utils.data import DataLoader
 24 | from torch.autograd import Variable
 25 | from scipy.ndimage.interpolation import rotate
 26 | import glob
 27 | import pandas as pd
 28 | import SimpleITK as sitk
 29 | 
 30 | #from CumulativeAverager import *
 31 | 
 32 | 
 33 | def augment(roi, ifflip = True, ifrotate=True, ifswap = True):
 34 |     if ifrotate:
 35 |         angle1 = 0.5*180
 36 |         roi = rotate(roi,angle1,axes=(1,2),reshape=False)
 37 |     if ifswap:
 38 |         axisorder = np.random.permutation(3)
 39 |         roi = np.transpose(roi,np.concatenate([[0],axisorder+1]))
 40 |     if ifflip:
 41 |         flipid = np.array([1,np.random.randint(2),np.random.randint(2)])*2-1
 42 |         roi = np.ascontiguousarray(roi[:,::flipid[0],::flipid[1],::flipid[2]])
 43 |     return roi
 44 | 
 45 | class GGODataGenerator(Dataset):
 46 |     def __init__(self, img_path, csv_path, phase='train'):
 47 |         assert(phase == 'train' or phase == 'val' or phase == 'test')
 48 |         f = open(csv_path)
 49 |         GGO_list = pd.read_csv(f) 
 50 |         List_Num = np.array(GGO_list['new_number'].tolist())
 51 |         Label = GGO_list['Class'].tolist()
 52 |         self.List_Num = List_Num[[i for i,x in enumerate(Label) if x=='Malignant']]       
 53 |         Histopathology = np.array(GGO_list['Histopathology'].tolist())
 54 |         self.Histopathology = Histopathology[[i for i,x in enumerate(Label) if x=='Malignant']]
 55 |         self.img_path = img_path
 56 |         self.phase = phase
 57 |                 
 58 |     def __getitem__(self,idx):  
 59 |         if self.phase =='train':
 60 |             if idx>=len(self.List_Num):
 61 |                 idx = idx%len(self.List_Num)
 62 |                 ifflip = True
 63 |                 ifrotate= False
 64 |                 ifswap = False
 65 |             elif idx>=(len(self.List_Num)*2):
 66 |                 idx = idx%(len(self.List_Num)*2)
 67 |                 ifflip = False
 68 |                 ifrotate= True
 69 |                 ifswap = False
 70 |             elif idx>=(len(self.List_Num)*3):
 71 |                 idx = idx%(len(self.List_Num)*3)
 72 |                 ifflip = False
 73 |                 ifrotate= False
 74 |                 ifswap = True
 75 |             else:
 76 |                 ifflip = False
 77 |                 ifrotate= False
 78 |                 ifswap = False
 79 | 
 80 |         if self.phase == 'train':
 81 |             dcm_File = self.List_Num[idx]
 82 |             roi_path = self.img_path+'/'+'ROI_'+str(dcm_File)+'.nii'
 83 |             ROI = sitk.ReadImage(roi_path)
 84 |             ROI = sitk.GetArrayFromImage(ROI).transpose(2,1,0)
 85 |             ROI = (ROI.astype(np.float32)-128)/128 
 86 |             ROI = ROI[np.newaxis,...]
 87 |             ROI = augment(ROI, ifflip = ifflip, ifrotate=ifrotate, ifswap = ifswap)
 88 |         else:
 89 |             dcm_File = self.List_Num[idx]
 90 |             roi_path = self.img_path+'/'+'ROI_'+str(dcm_File)+'.nii'
 91 |             ROI = sitk.ReadImage(roi_path)
 92 |             ROI = sitk.GetArrayFromImage(ROI).transpose(2,1,0)[np.newaxis,...]
 93 |             ROI = (ROI.astype(np.float32)-128)/128 
 94 |             ROI = ROI[np.newaxis,...]
 95 |         if self.Histopathology[idx] == 'AIS' or self.Histopathology[idx] == 'MIA':
 96 |             Label = 0
 97 |         elif self.Histopathology[idx] == 'IA':
 98 |             Label = 1
 99 |         return ROI, Label
100 |         
101 |     def __len__(self):
102 |         if self.phase == 'train':
103 |             return len(self.List_Num)*4
104 |         else:
105 |             return len(self.List_Num)
106 | 
107 | def get_lr(epoch, lr):
108 |     if epoch <= epochs * 0.5:
109 |         lr = lr
110 |     elif epoch <= epochs * 0.8:
111 |         lr = 0.1 * lr
112 |     else:
113 |         lr = 0.01 * lr
114 |     return lr
115 | 
116 | def get_optimizer(parameters, st, lr, momentum=0.9):
117 |     if st == 'sgd':
118 |         return torch.optim.SGD(parameters, lr = lr, momentum=momentum, weight_decay=1e-3)
119 |     elif st == 'adam':
120 |         return torch.optim.Adam(parameters, lr = lr, weight_decay=1e-4)
121 | 
122 | 
123 | def train(dataloader, net, loss_fun,epoch, optimizer, get_lr, save_freq, save_dir):
124 |     starttime = time.time()
125 |     net.train()
126 |     lr = get_lr(epoch, 0.001)
127 |     for param_group in optimizer.param_groups:
128 |         param_group['lr'] = lr
129 |     metrics = []
130 |     acc = []
131 |     ok = 0
132 | 
133 |     for i, (data,Class) in enumerate(data_loader):
134 |         optimizer.zero_grad()   # clear gradients for next train
135 |         data = Variable(data.cuda(non_blocking = True))
136 |         target = Variable(Class.cuda(non_blocking = True))
137 |         l1_regularization = torch.tensor(0.).cuda()
138 | #        l2_regularization = torch.tensor(0.).cuda()
139 |         target = target.long()
140 |         output = net(data)
141 |         loss_output = loss_fun(output, target)
142 |         for param in [clf_model.fc1.weight, clf_model.fc1.bias,                     
143 |                       clf_model.fc2.weight, clf_model.fc2.bias
144 |                       ]:
145 |         loss = loss_output
146 |         loss.backward()         # backpropagation, compute gradients
147 |         optimizer.step()        # apply gradients
148 |         metrics.append(loss_output)
149 |         _, predicted = torch.max(output.data,1)
150 |         ok = ok+(predicted==target).sum()            
151 |         traind_total = (i + 1) * len(target)
152 |         acc_output = 100. * ok / traind_total
153 |         acc.append(acc_output)
154 |     if epoch > 0 and epoch % save_freq == 0:            
155 |         state_dict = net.state_dict()
156 |         for key in state_dict.keys():
157 |             state_dict[key] = state_dict[key].cpu()         
158 |         torch.save({
159 |             'epoch': epoch,
160 |             'save_dir': save_dir,
161 |             'state_dict': state_dict},
162 |             os.path.join(save_dir, '%03d.ckpt' % epoch))
163 |      
164 |     endtime = time.time()
165 |     metrics = np.asarray(metrics, np.float32)
166 |     acc = np.asarray(acc, np.float32)
167 |     print('Epoch %03d (lr %.5f)' % (epoch, lr))
168 |     print('loss %2.4f' % (np.mean(metrics)))
169 |     print('Accuracy %2.4f' % (np.mean(acc)))
170 |     print('time:%3.2f'%(endtime-starttime))
171 |     
172 | if __name__ == '__main__':
173 |     Pretrained_path = './model'
174 |     clf_model = ClassifyNet().cuda()
175 |     classify_path = os.path.join(Pretrained_path, '200.ckpt')
176 |     modelCheckpoint = torch.load(classify_path)
177 |     pretrained_dict = modelCheckpoint['state_dict']
178 |     model_dict = clf_model.state_dict()
179 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}#filter out unnecessary keys
180 |     model_dict.update(pretrained_dict)
181 |     clf_model.load_state_dict(model_dict)
182 |     torch.cuda.set_device(0)
183 |     for k,v in clf_model.named_parameters():
184 |         if k!='fc1.weight' and k!='fc1.bias' and k!='fc2.weight' and k!='fc2.bias':     
185 |             v.requires_grad = False    
186 | 
187 |     optimizer = get_optimizer(parameters=[clf_model.fc1.weight, clf_model.fc1.bias,                           
188 |                             clf_model.fc2.weight, clf_model.fc2.bias
189 |                             ], st='adam', lr=0.001)
190 | 
191 |     loss = torch.nn.CrossEntropyLoss().cuda()
192 |     cudnn.benchmark = True
193 |     img_path = r'.\TaiZhouHospital\ROIs'
194 |     csv_path = r'.\TaiZhouHospital\GGO_list.csv'
195 |     
196 |     dataset = GGODataGenerator(img_path,csv_path, phase='train')
197 |     data_loader = DataLoader(dataset, batch_size = 1,shuffle = True, num_workers = 0, pin_memory=True)
198 |     save_freq = 2
199 |     epochs = 20
200 |     lr = 0.001
201 |     
202 |     save_dir = r'.\TaiZhouHospital\model\clf_IA_VS_nonIA'
203 |     for epoch in range(0, epochs + 1):
204 |         train(data_loader, clf_model, loss, epoch, optimizer, get_lr, save_freq, save_dir)
205 | 


--------------------------------------------------------------------------------
/DataGenerator/GGO_ROIgenerator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Aug  1 14:02:14 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | from lidcxmlparser import *
  9 | import pydicom as dicom
 10 | import numpy as np
 11 | import os
 12 | import glob
 13 | from skimage import draw, measure
 14 | import scipy
 15 | 
 16 | # Load the scans in given folder path
 17 | def load_scan(path):
 18 |     slices = [dicom.read_file(s) for s in glob.glob(path+'/*.dcm')]
 19 |     slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
 20 |     try:
 21 |         slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
 22 |     except:
 23 |         slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
 24 |         
 25 |     for s in slices:
 26 |         s.SliceThickness = slice_thickness
 27 |         
 28 |     return slices
 29 | 
 30 | def resample(img, new_spacing=[1,1,1]):
 31 |     # Determine current pixel spacing
 32 |     image = img['array']
 33 |     spacing = img['Spacing']
 34 |     img_size = np.array(image.shape)
 35 |     
 36 |     resize_factor = spacing / new_spacing
 37 |     new_real_shape = image.shape * resize_factor
 38 |     new_shape = np.round(new_real_shape)
 39 |     real_resize_factor = new_shape / image.shape
 40 |     new_spacing = spacing / real_resize_factor
 41 |     
 42 |     image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
 43 |     
 44 |     return image, img_size, real_resize_factor
 45 | 
 46 | def normalize_hu(image):
 47 |     MIN_BOUND = -1200.0
 48 |     MAX_BOUND = 600.0
 49 |     image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
 50 |     image[image > 1] = 1
 51 |     image[image < 0] = 0
 52 |     image = (image*255).astype('uint8')
 53 |     return image
 54 | 
 55 | def crop_roi(resampled_img, img_size, seed_pos, crop_size, resize_factor):
 56 |     initial_seed = [seed_pos[0], seed_pos[1], seed_pos[2]]
 57 |     trans_seed = initial_seed*resize_factor
 58 |     start = []
 59 |     end= []
 60 |     for i in range(3):
 61 |         s = np.floor(trans_seed[i]-(crop_size[i]/2))
 62 |         e = np.ceil(trans_seed[i]+(crop_size[i]/2))
 63 |         if s<0:
 64 |             s = 0
 65 |         if e>resampled_img.shape[i]:
 66 |             e = resampled_img.shape[i]
 67 |         if e-s != crop_size[i]:
 68 |             pad = e-s-crop_size[i]
 69 |             if s==0:
 70 |                 e = e-pad
 71 |             else:
 72 |                 s = s+pad
 73 |         start.append(int(s))
 74 |         end.append(int(e))       
 75 | #    print(start,end,pad)
 76 |     roi = resampled_img[start[0]:end[0], start[1]:end[1], start[2]:end[2]]      
 77 |         
 78 |     return roi
 79 | def get_nodule_center(xml_path, image, slice_loc):
 80 |     gt = LIDCXmlParser(xml_path)
 81 |     gt.parse()
 82 |     mask = np.zeros(image.shape)
 83 |     for indx, rad in enumerate(gt.rad_annotations): #has 4 radiologistes
 84 |         mask_1 = np.zeros(image.shape)
 85 |         for nod in rad.nodules: #nod is one NormalNodule
 86 |             # if nod.characterstics.texture <= 3:                
 87 |             for nod_roi in nod.rois:  
 88 |                 z_index = np.where(slice_loc==nod_roi.z)[0][0]
 89 |                 xy = np.array(nod_roi.roi_xy)
 90 |                 xx, yy = draw.polygon(xy[:,1],xy[:,0])
 91 |                 mask_1[xx,yy,z_index] = 1
 92 |         mask = mask+mask_1
 93 |     mask = np.array(mask>1).astype(int)
 94 |     L_mask = measure.label(mask)
 95 |     L_props= measure.regionprops(L_mask)
 96 |     center_pos = []
 97 |     for props in L_props:
 98 |         center = np.array(props.centroid).astype(int)
 99 |         center_pos.append(center)
100 |     return center_pos,mask
101 | 
102 |   
103 | def search_xml(file_dir):  
104 |     xml_path=[]  
105 |     for root, dirs, files in os.walk(file_dir): 
106 |         for file in files: 
107 |             if os.path.splitext(file)[1] == '.xml': 
108 |                 xml_path.append(os.path.join(root, file)) 
109 |     return xml_path
110 | 
111 | if __name__ == '__main__':
112 |     
113 |     home_path = './LIDC'
114 |     save_path = './Nodule_crop'
115 |     data_list = os.listdir(home_path)
116 |     Nodule_num = 0
117 |     for patient_path in data_list:
118 |         img_path = os.path.join(home_path,patient_path)
119 |         xml_listpath = search_xml(img_path)
120 |         for xml_path in xml_listpath:
121 |             path, xml_f = os.path.split(xml_path)
122 |             dcm = [s for s in glob.glob(path+'/*.dcm')]
123 |             if len(dcm) > 10:
124 |                 dicom_slices = load_scan(path)
125 |                 image = [s.pixel_array*int(s.RescaleSlope)+int(s.RescaleIntercept) for s in dicom_slices ]
126 |                 image = np.array(image).transpose(1,2,0)
127 |                 slice_loc = np.array([s.ImagePositionPatient[2] for s in dicom_slices]).astype(float)
128 |                 spacing = np.array([dicom_slices[0].PixelSpacing[0], 
129 |                                     dicom_slices[0].PixelSpacing[1],
130 |                                     dicom_slices[0].SliceThickness]).astype(float)
131 |                 center_pos,mask = get_nodule_center(xml_path, image, slice_loc)
132 |                 if len(center_pos) != 0:
133 |                     image_new = {}
134 |                     image_new['array'] = image
135 |                     image_new['Spacing'] = spacing
136 |                     img, img_size, resize_factor = resample(image_new)
137 |                     mask_new = {}
138 |                     mask_new['array'] = mask
139 |                     mask_new['Spacing'] = spacing
140 |                     label, label_size, resize_factor = resample(mask_new)
141 |                     img = normalize_hu(img)   
142 |                     for pos in center_pos:
143 |                         Nodule_num = Nodule_num+1
144 |                         seed_pos = [pos[0], pos[1], pos[2]]
145 |                         ROI = crop_roi(img, img_size, seed_pos, [64,64,64] , resize_factor)
146 |                         ROI_label = crop_roi(label, label_size, seed_pos, [64, 64, 64], resize_factor)
147 |                         ROI = (ROI.astype(np.float32)-128)/128
148 | #
149 |                         np.save(os.path.join(save_path,str(Nodule_num)+'_roi.npy'), ROI)
150 |                         np.save(os.path.join(save_path,str(Nodule_num)+'_label.npy'), ROI_label)
151 |                     
152 |         
153 | 
154 | 
155 |         
156 |     
157 | 


--------------------------------------------------------------------------------
/DataGenerator/README:
--------------------------------------------------------------------------------
1 | # The Preprocessing code
2 | 


--------------------------------------------------------------------------------
/DataGenerator/annotstructs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Created on Mon Jun 10 14:19:22 2015
  3 | 
  4 | @author: tizita nesibu
  5 | """
  6 | 
  7 | 
  8 | class NoduleCharstics:
  9 |     def __init__(self):
 10 |         self.subtlety = 0
 11 |         self.internalstructure = 0
 12 |         self.calcification = 0
 13 |         self.sphericity = 0
 14 |         self.margin = 0
 15 |         self.lobulation = 0
 16 |         self.spiculation = 0
 17 |         self.texture = 0
 18 |         self.malignancy = 0
 19 |         return
 20 | 
 21 |     def __str__(self):
 22 |         strng = "subtlty (%d) intstruct (%d) calci (%d) sphere (%d) margin (%d) lob (%d) spicul (%d) txtur (%d) malig (%d)" % \
 23 |         (self.subtlety, self.internalstructure, self.calcification, self.sphericity, self.margin, self.lobulation, \
 24 |         self.spiculation, self.texture, self.malignancy)
 25 |         return strng
 26 | 
 27 |     def setValues(self, sub, inter, calc, spher, lob, spic, tex, malig):
 28 |         self.subtlety = sub
 29 |         self.internalstructure = inter
 30 |         self.calcification = calc
 31 |         self.sphericity = spher
 32 |         self.lobulation = lob
 33 |         self.spiculation = spic
 34 |         self.texture = tex
 35 |         self.malignancy = malig
 36 |         return
 37 | 
 38 | class NoduleRoi:   #is common for nodule and nonnodule
 39 |     def __init__(self, z_pos = 0., sop_uid = ''):
 40 |         self.z = z_pos
 41 |         self.sop_uid = sop_uid
 42 |         self.inclusion = True
 43 |         
 44 |         self.roi_xy = []  #to hold list of x,ycords in edgemap(edgmap pairs)
 45 |         self.roi_rect = []   #rectangle to hold the roi
 46 |         self.roi_centroid = []   #to hold centroid of the roi
 47 |         return
 48 | 
 49 |     def __str__(self):
 50 |         n_pts = len(self.roi_xy)
 51 |         strng = "Inclusion (%s) Z = %.2f SOP_UID (%s) \n ROI points [ %d ]  ::  "%(self.inclusion,self.z, self.sop_uid,n_pts)
 52 | 
 53 |         if (n_pts > 2):
 54 |             strng += "[[ %d,%d ]] :: "%(self.roi_centroid[0],self.roi_centroid[1])
 55 |             strng += "(%d, %d), (%d,%d)..."%(self.roi_xy[0][0],self.roi_xy[0][1],self.roi_xy[1][0],self.roi_xy[1][1])
 56 |             strng += "(%d, %d), (%d,%d)"%(self.roi_xy[-2][0],self.roi_xy[-2][1],self.roi_xy[-1][0],self.roi_xy[-1][1])
 57 |         else:
 58 |             for i in range(n_pts):
 59 |                 strng += "(%d, %d),"% (self.roi_xy[i][0], self.roi_xy[i][1])
 60 |         return strng
 61 | 
 62 | 
 63 | class Nodule:   #is base class for all nodule types (NormalNodule, SmallNodule, NonNodule)
 64 |     def __init__(self):
 65 |         self.id = None
 66 |         self.rois = []
 67 |         self.is_small = False
 68 | 
 69 |     def __str__(self):
 70 |         strng = "--- Nodule ID (%s) Small [%s] ---\n"%(self.id,str(self.is_small))
 71 |         strng += self.tostring() + "\n"
 72 |         return strng
 73 | 
 74 |     def tostring(self):
 75 |         pass
 76 |       
 77 | class NoduleAnnotationCluster():   # to be seen
 78 |     def __init__(self):
 79 |         self.id = []
 80 |         self.z_pos = []
 81 |         self.centroid = []#(x,y) of the centroid
 82 |         #  convex hull description
 83 |         #   p0 ---- p1
 84 |         #   |       |
 85 |         #   p2-----p3
 86 |         self.convex_hull = [] # [()_0 ()_1 ()_2 ()_3]
 87 |         self.convex_hull_with_margin = []
 88 |         self.no_annots = 0
 89 |         self.nodules_data = []
 90 |         
 91 | 
 92 |     def compute_centroid(self):
 93 |         self.set_convex_hull()
 94 |         xc = 0.5*(self.convex_hull[0][0] + self.convex_hull[3][0])  # (x_min + x_max)/2
 95 |         yc = 0.5*(self.convex_hull[0][1] + self.convex_hull[3][1])  # (y_min + y_max)/2
 96 |         self.centroid = (xc,yc)
 97 |         return self.centroid
 98 | 
 99 |     def set_convex_hull(self):
100 |         x_min, x_max = 640, 0
101 |         y_min, y_max = 640, 0
102 |         
103 |         for nodule in self.nodules_data:
104 |             for roi in nodule.rois:
105 |                 for dt_pt in roi.roi_xy:
106 |                     #roi.roi_xy -> [(x,y)]
107 |                     # TODO : finish this loop  #?????????????????????????????
108 |                     x_min = dt_pt[0] if (x_min > dt_pt[0]) else x_min
109 |                     x_max = dt_pt[0] if (x_max < dt_pt[0]) else x_max
110 |                     y_min = dt_pt[1] if (y_min > dt_pt[1]) else y_min
111 |                     y_max = dt_pt[1] if (y_max < dt_pt[1]) else y_max
112 |         self.convex_hull = [(x_min,y_min),(x_max,y_min),
113 |                             (x_min,y_max),(x_max,y_max)]
114 |         w, h = (x_max-x_min), (y_max-y_min)
115 |         x_min = int(x_min - 0.15*w)
116 |         x_max = int(x_max + 0.15*w)
117 |         y_min = int(y_min - 0.15*h)
118 |         y_max = int(y_max + 0.15*h)
119 |         self.convex_hull_with_margin = [(x_min,y_min),(x_max,y_min),
120 |                                         (x_min,y_max),(x_max,y_max)]
121 |         
122 | 
123 | class NormalNodule(Nodule): 
124 |     
125 |     def __init__(self):
126 |         Nodule.__init__(self)
127 |         self.characterstics = NoduleCharstics()
128 |         self.is_small = False
129 |         
130 |     def tostring(self):
131 |         strng = str(self.characterstics)
132 |         strng += "\n"
133 |         
134 |         for roi in self.rois:
135 |             strng += str(roi) + "\n" #str calls __str__ of NoduleRoi's class i.e.converting roi to  
136 |         return strng                     #string to prepare it for printing(it doesn't print it)
137 | 
138 | class SmallNodule(Nodule):
139 |     
140 |     def __init__(self):
141 |         Nodule.__init__(self)
142 |         self.is_small = True
143 |         
144 |     def tostring(self):
145 |         strng = ''  
146 |         for roi in self.rois:
147 |             strng += str(roi) + "\n"
148 |         return strng
149 | 
150 | class NonNodule(Nodule):
151 |     
152 |     def __init__(self):
153 |         Nodule.__init__(self)
154 |         self.is_small = True
155 | 
156 |     def tostring(self):
157 |         strng = ''       
158 |         for roi in self.rois:
159 |             strng += str(roi)
160 |         return strng
161 |         
162 | class RadAnnotation:
163 |     def __init__(self, init=True):
164 |         self.version = None
165 |         self.id = None
166 |         
167 |         self.nodules = []  #is normalNodule i.e in xml unblindedReadNodule with characteristics info
168 |         self.small_nodules = [] #in xml unblindedReadNodule with no characteristics info
169 |         self.non_nodules = [] #located inside readingSession 
170 |         self.initialized = init
171 |         return
172 |         
173 |     def is_init(self):
174 |         return self.initialized
175 |     
176 |     def set_init(self, init):
177 |         self.initialized = init
178 |         return
179 | 
180 |     def __str__(self):
181 |         n_nodules = len(self.nodules)
182 |         n_small_nodules = len(self.small_nodules)
183 |         n_non_nodules = len(self.non_nodules)
184 |         strng = "Annotation Version [%s] Radiologist ID [%s] \n"%(self.version, self.id)
185 |         strng += "#Nodules [%d] #SmallNodules [%d] #NonNodules[%d] \n"%(n_nodules, n_small_nodules, n_non_nodules)
186 | 
187 |         if (n_nodules > 0):
188 |             strng += "--- Nodules [%d]---\n"%n_nodules
189 |             for i in range(n_nodules):
190 |                 strng += str(self.nodules[i])
191 | 
192 |         if (n_small_nodules > 0):
193 |             strng += "--- Small Nodules [%d] ---\n"%n_small_nodules
194 |             for i in range(n_small_nodules):
195 |                 strng += str(self.small_nodules[i])
196 | 
197 |         if (n_non_nodules > 0):
198 |             strng += "--- Non Nodules [%d] ---\n"%n_non_nodules
199 |             for i in range(n_non_nodules):
200 |                 strng += str(self.non_nodules[i])
201 | 
202 |         strng += "-"*79 + "\n"
203 |         return strng
204 | 
205 | 
206 |     
207 | 


--------------------------------------------------------------------------------
/DataGenerator/lidcxmlparser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Jun 10 14:19:22 2015
  4 | 
  5 | @author: tizita nesibu
  6 | """
  7 | import xml.etree.ElementTree as ET
  8 | import os, sys
  9 | 
 10 | from annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation
 11 | 
 12 | #RadAnnotation holds -> readingSession(is the annotaion of one raiologist slice by slice)
 13 | # unblindedReadNodule -> holds one slice's annotation info -> #if no characterstics -> SmallNodule
 14 |                                                               #if with characterstics -> NormalNodule
 15 | #NonNodule -> if it is not unblindedReadNodule 
 16 | #locus is like edgmap  for -> #NonNodule
 17 |                              
 18 | class LIDCXmlHeader:
 19 |     
 20 |     def __init__(self):       #4 elements are not included b/c they don't have data inside
 21 |         self.version = None 
 22 |         self.messageid = None
 23 |         self.date_request = None
 24 |         self.time_request = None
 25 |         self.task_descr = None
 26 |         self.series_instance_uid = None
 27 |         self.date_service = None
 28 |         self.time_service = None
 29 |         self.study_instance_uid = None
 30 |     
 31 |     def __str__(self):
 32 |         strng = ("--- XML HEADER ---\n"
 33 |                 "Version (%s) Message-Id (%s) Date-request (%s) Time-request (%s) \n"
 34 |                 "Series-UID (%s)\n"
 35 |                 "Time-service (%s) Task-descr (%s) Date-service (%s) Time-service (%s)\n"
 36 |                 "Study-UID (%s)")%(self.version, self.messageid, self.date_request, self.time_request,
 37 |                      self.series_instance_uid, self.time_service, self.task_descr, 
 38 |                      self.date_service, self.time_service, self.study_instance_uid)
 39 |         return strng
 40 |                 
 41 | 
 42 | class LIDCXmlParser:
 43 |     
 44 |     def __init__(self, fname=[]):
 45 |         
 46 |         #check if file exists or not
 47 |         self.initialized = False
 48 |         if (not (fname == [])): #if fname is not empity
 49 |             if not os.path.isfile(fname):
 50 |                 print("Error: filename (%s) doesn't exist"%(fname))
 51 |                 self.initialized = False
 52 |             else:
 53 |                 self.initialized = True
 54 | 
 55 |         self.xml_fname = fname
 56 |         self.rad_annotations = []   #to hold list of readingSession(xml element)->which holds each radiologists 
 57 |                             #annotation info i.e. len(rad_annotations) tells us nbr of radiologist 
 58 |         self.xml_header = LIDCXmlHeader()  
 59 |         self.namespace = {'nih': 'http://www.nih.gov'} #dict to store namespace's key and value b/c when this xml file 
 60 |                         #is parsed it containes this website infront of each tag that is parsed, to avoid including the
 61 |                         # whole site, namespace could be used to shorten it(can be indicated by the key i.e.'nih').
 62 |         
 63 |         if (self.initialized):
 64 |             print("LIDC Xml Parser Initialized!")
 65 |         return
 66 | 
 67 |     def set_xml_file(self, fname):
 68 |         #check if file exists or not
 69 |         if not os.path.isfile(fname):
 70 |             print("Error: filename (%s) doesn't exist"%(fname))
 71 |             self.initialized = False
 72 |         else:
 73 |             self.xml_fname = fname
 74 |             self.initialized = True
 75 |         
 76 |         return self.initialized
 77 |         
 78 |     def parse(self):
 79 |         if (not self.initialized): # if file not exist(if self.initialized is false)
 80 |             print("Error: Parser not initiialized!")
 81 |             return
 82 |         ns = self.namespace
 83 |         
 84 |         tree = ET.parse(self.xml_fname) #ET is the library we use to parse xml data
 85 |         root = tree.getroot()
 86 |         
 87 |         #print root[0][0].tag, root[0][0].text
 88 |         #print root[0][1].tag, root[0][1].text
 89 |         #print root[0][2].tag, root[0][2].text
 90 |         
 91 |         #print root.attrib
 92 |         #parse the file
 93 |         
 94 |         #FIXME: Exception Handling        
 95 |         resp_hdr = root.findall('nih:ResponseHeader', ns)[0]  #ns is to show what nih is,and [0] is b/c only one ResponseHeader is available
 96 |    #4 elements are not included b/c they don't have data inside
 97 |         if resp_hdr.find('nih:Version', ns) is not None:
 98 |             self.xml_header.version = resp_hdr.find('nih:Version', ns).text
 99 |         if resp_hdr.find('nih:MessageId', ns) is not None:
100 |             self.xml_header.messageid = resp_hdr.find('nih:MessageId', ns).text
101 |         if resp_hdr.find('nih:DateRequest', ns) is not None:
102 |             self.xml_header.date_request = resp_hdr.find('nih:DateRequest', ns).text
103 |         if resp_hdr.find('nih:TimeRequest', ns) is not None:
104 |             self.xml_header.time_request = resp_hdr.find('nih:TimeRequest', ns).text
105 |         if resp_hdr.find('nih:TaskDescription', ns) is not None:
106 |             self.xml_header.task_descr = resp_hdr.find('nih:TaskDescription', ns).text
107 |         if resp_hdr.find('nih:SeriesInstanceUid', ns) is not None:
108 |             self.xml_header.series_instance_uid = resp_hdr.find('nih:SeriesInstanceUid', ns).text
109 |         if resp_hdr.find('nih:DateService', ns) is not None:
110 |             self.xml_header.date_service = resp_hdr.find('nih:DateService', ns).text
111 |         if resp_hdr.find('nih:TimeService', ns) is not None:
112 |             self.xml_header.time_service = resp_hdr.find('nih:TimeService', ns).text
113 |         if resp_hdr.find('nih:StudyInstanceUID', ns) is not None:
114 |             self.xml_header.study_instance_uid = resp_hdr.find('nih:StudyInstanceUID', ns).text
115 |         
116 |         print(self.xml_header) # calles str of the class LIDCXmlHeader() 
117 |             
118 | 
119 |             
120 |         for read_session in root.findall('nih:readingSession',ns): #readingSession-> holds radiologist's annotation info
121 |             rad_annotation = RadAnnotation() #to hold each radiologists annotation i.e. readingSession in xml file
122 |             rad_annotation.version = read_session.find('nih:annotationVersion', ns).text            
123 |             rad_annotation.id = read_session.find('nih:servicingRadiologistID', ns).text
124 |             
125 |             unblinded_nodule = read_session.findall('nih:unblindedReadNodule', ns)
126 |             
127 |             for node in unblinded_nodule:
128 |                 nodule = self.parse_nodule(node)
129 |                 
130 | #                if (not nodule.is_small):
131 | #                    rad_annotation.nodules.append(nodule)
132 | #                else:
133 | #                    rad_annotation.small_nodules.append(nodule)
134 | #                    
135 |                 if(nodule.is_small):
136 |                     rad_annotation.small_nodules.append(nodule)
137 |                 else:
138 |                     rad_annotation.nodules.append(nodule) # nodule is normalNodule
139 |                     
140 | 
141 |             non_nodule = read_session.findall('nih:nonNodule', ns)
142 |             
143 |             for node in non_nodule:
144 |                 nodule = self.parse_non_nodule(node)
145 |                 rad_annotation.non_nodules.append(nodule)
146 |            
147 |             self.rad_annotations.append(rad_annotation)
148 |         
149 |         return
150 |         #for child in root:
151 |         #    print child.tag#, child.attrib
152 |             
153 |     def parse_nodule(self, xml_node): #xml_node is one unblindedReadNodule
154 |         ns = self.namespace
155 |         
156 |         chartcs_node = xml_node.find('nih:characteristics', ns)
157 |         is_small = (chartcs_node is None) # if no characteristics, it is smallnodule  i.e. is_small=TRUE
158 |         
159 |         if (is_small) or (chartcs_node.find('nih:subtlety',ns) is None):
160 |             nodule = SmallNodule()
161 |             nodule.id = xml_node.find('nih:noduleID', ns).text
162 |         else:
163 |             nodule = NormalNodule() #if it has characteristics it is normalNodule
164 |             nodule.id = xml_node.find('nih:noduleID', ns).text
165 | 
166 |             nodule.characterstics.subtlety = int(chartcs_node.find('nih:subtlety',ns).text)
167 |             nodule.characterstics.internalstructure = int(chartcs_node.find('nih:internalStructure',ns).text)
168 |             nodule.characterstics.calcification = int(chartcs_node.find('nih:calcification',ns).text)
169 |             nodule.characterstics.sphericity = int(chartcs_node.find('nih:sphericity',ns).text)
170 |             nodule.characterstics.margin = int(chartcs_node.find('nih:margin',ns).text)
171 |             nodule.characterstics.lobulation = int(chartcs_node.find('nih:lobulation',ns).text)
172 |             nodule.characterstics.spiculation = int(chartcs_node.find('nih:spiculation',ns).text)
173 |             nodule.characterstics.texture = int(chartcs_node.find('nih:texture',ns).text)
174 |             nodule.characterstics.malignancy = int(chartcs_node.find('nih:malignancy',ns).text)
175 |             
176 |         xml_rois = xml_node.findall('nih:roi', ns)
177 | 
178 |         for xml_roi in xml_rois:
179 |             roi = NoduleRoi()
180 |             roi.z = float(xml_roi.find('nih:imageZposition', ns).text)
181 |             roi.sop_uid = xml_roi.find('nih:imageSOP_UID', ns).text
182 |             roi.inclusion = (xml_roi.find('nih:inclusion', ns).text == "TRUE") # when inclusion = TRUE ->roi includes the whole nodule
183 |                                             #when inclusion = FALSE ->roi is drown twice for one nodule 1.ouside the nodule
184 |                                             #2.inside the nodule -> to indicate that the nodule has donut hole(the inside hole is 
185 |                                             #not part of the nodule) but by forcing inclusion to be TRUE, this situation is ignored
186 |                 
187 |             edgemaps =  xml_roi.findall('nih:edgeMap', ns)
188 | 
189 |             xmin, xmax, ymin, ymax = sys.maxsize,0,sys.maxsize,0  #???????????????????
190 |             
191 |             for edgemap in edgemaps:
192 |                 x = int(edgemap.find('nih:xCoord', ns).text)
193 |                 y = int(edgemap.find('nih:yCoord', ns).text)
194 |                 
195 |                 if (x > xmax):   # to define a rectangle arround the roi 
196 |                     xmax = x     #only the 1st point i.e.(xmin, ymin) and 
197 |                                  #the last point(xmax, ymax) is needed to drow a rectangle
198 |                 if (x < xmin):
199 |                     xmin = x
200 |                     
201 |                 if (y > ymax):
202 |                     ymax = y
203 |                
204 |                 if (y < ymin):
205 |                    ymin = y
206 |                 
207 |                 
208 |                 roi.roi_xy.append((x,y))
209 |      
210 |             if not is_small:   #only for normalNodules
211 |                 roi.roi_rect = (xmin, ymin, xmax, ymax)
212 |                 roi.roi_centroid = ((xmax+xmin)/2., (ymin+ymax)/2.) #center point 
213 | 
214 |             nodule.rois.append(roi)
215 |                 
216 |         return nodule  #is equivalent to unblindedReadNodule(xml element)
217 | 
218 |     def parse_non_nodule(self, xml_node):   #xml_node is one nonNodule
219 |         ns = self.namespace
220 |         
221 |         nodule = NonNodule()        
222 | 
223 |         nodule.id = xml_node.find('nih:nonNoduleID', ns).text
224 |         roi = NoduleRoi()
225 |         roi.z =  float(xml_node.find('nih:imageZposition', ns).text)
226 |         roi.sop_uid =  xml_node.find('nih:imageSOP_UID', ns).text
227 | 
228 |         loci =  xml_node.findall('nih:locus', ns)
229 |         
230 |         for locus in loci:
231 |             x = int(locus.find('nih:xCoord', ns).text)
232 |             y = int(locus.find('nih:yCoord', ns).text)
233 |             roi.roi_xy.append((x,y))
234 |         nodule.rois.append(roi)    
235 |         return nodule    #is equivalent to nonNodule(xml element)
236 |       
237 |     
238 |     def __str__(self):   #to print the whole xml data of a patient(not important)
239 |         strng = "*"*79 + "\n"
240 |         strng += "XML FileName [%s] \n"%self.xml_fname
241 |         strng += str(self.xml_header) #str calles LIDCXmlHeader's str b/c xml_header is object of LIDCXmlHeader class
242 |         
243 |         strng += "# of Rad. Annotations [%d] \n" % len(self.rad_annotations)
244 |         
245 |         for ant in self.rad_annotations:
246 |             strng += str(ant)
247 |         
248 |         strng += "*"*79 + "\n"  
249 |         return strng
250 |          
251 |         
252 | 
253 | #def main():
254 | #    dt = LIDCXmlParser(r'F:\ImageData\LIDC\DOI\LIDC-IDRI-0001\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192\069.xml')
255 | #    dt.parse()
256 | #    print(dt)
257 | #    return
258 | 
259 | 
260 | #if __name__ == '__main__':
261 | ##    if __package__ is None:
262 | ##        path_abs_name = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
263 | ##        if (not (path_abs_name in set(os.sys.path))):
264 | ##            os.sys.path.append(path_abs_name)
265 | ##
266 | ##        from structs.annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation
267 | ##    else:
268 | ##        from ..structs.annotstructs import NoduleRoi, NormalNodule, SmallNodule, NonNodule, RadAnnotation
269 | #    dt = LIDCXmlParser(r'F:\ImageData\LIDC\DOI\LIDC-IDRI-0001\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192\069.xml')
270 | #    dt.parse()
271 | #    print(dt)
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/DataGenerator/test_data_generator.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Aug 23 12:38:04 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | import os
  9 | import time
 10 | import numpy as np
 11 | import torch
 12 | from torch.backends import cudnn
 13 | from torch.utils.data import Dataset
 14 | from torch.utils.data import DataLoader
 15 | from torch.autograd import Variable
 16 | from scipy.ndimage.interpolation import rotate
 17 | import pandas as pd
 18 | import SimpleITK as sitk
 19 | import scipy
 20 | import scipy.ndimage
 21 | import pandas as pd
 22 | import matplotlib.pyplot as plt
 23 | from skimage import measure
 24 | from tqdm import tqdm
 25 | 
 26 | def readDCM_Img(FilePath):
 27 |     img = {}
 28 |     reader = sitk.ImageSeriesReader()
 29 |     dcm_names = reader.GetGDCMSeriesFileNames(FilePath)
 30 |     reader.SetFileNames(dcm_names)
 31 |     image = reader.Execute()
 32 |     img_array = sitk.GetArrayFromImage(image) # z,y,x
 33 |     Spacing = image.GetSpacing()
 34 | #    Origin = image.GetOrigin()
 35 |     img_array = img_array.transpose(2,1,0)#x,y,z
 36 |     img['array'] = img_array
 37 |     img['Spacing'] = np.array(Spacing).astype(float)
 38 | #    img['Origin'] = Origin
 39 |     return img
 40 | 
 41 | def resample(img, new_spacing=[1,1,1]):
 42 |     # Determine current pixel spacing
 43 |     image = img['array']
 44 |     spacing = img['Spacing']
 45 |     img_size = np.array(image.shape)
 46 |     
 47 |     resize_factor = spacing / new_spacing
 48 |     new_real_shape = image.shape * resize_factor
 49 |     new_shape = np.round(new_real_shape)
 50 |     real_resize_factor = new_shape / image.shape
 51 |     new_spacing = spacing / real_resize_factor
 52 |     
 53 |     image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
 54 |     
 55 |     return image, img_size, real_resize_factor
 56 | 
 57 | def normalize_hu(image):
 58 |     MIN_BOUND = -1200.0
 59 |     MAX_BOUND = 600.0
 60 |     image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
 61 |     image[image > 1] = 1
 62 |     image[image < 0] = 0
 63 |     image = (image*255).astype('uint8')
 64 |     return image
 65 | 
 66 | def crop_roi(resampled_img, img_size, seed_pos, crop_size, resize_factor):
 67 |     initial_seed = [seed_pos[0], seed_pos[1], img_size[2]-seed_pos[2]]
 68 |     trans_seed = initial_seed*resize_factor
 69 |     start = []
 70 |     end= []
 71 |     for i in range(3):
 72 |         s = np.floor(trans_seed[i]-(crop_size[i]/2))
 73 |         e = np.ceil(trans_seed[i]+(crop_size[i]/2))
 74 |         if s<0:
 75 |             s = 0
 76 |         if e>resampled_img.shape[i]:
 77 |             e = resampled_img.shape[i]
 78 |         if e-s != crop_size[i]:
 79 |             pad = e-s-crop_size[i]
 80 |             if s==0:
 81 |                 e = e-pad
 82 |             else:
 83 |                 s = s+pad
 84 |         start.append(int(s))
 85 |         end.append(int(e))       
 86 | #    print(start,end,pad)
 87 |     roi = resampled_img[start[0]:end[0], start[1]:end[1], start[2]:end[2]]      
 88 |         
 89 |     return roi
 90 | def save_img(image, outputImageFileName):
 91 |     writer = sitk.ImageFileWriter()
 92 |     writer.SetFileName(outputImageFileName)
 93 |     writer.Execute(image)
 94 | 
 95 | if __name__ == "__main__":
 96 |     img_path = r'.\GGO_DataSet\test_data\DCM'
 97 |     list_path = r'.\GGO_DataSet\test_data\test.csv'
 98 |     save_dir = r'.\GGO_DataSet\test_data\test_Img'
 99 |     
100 |     f = open(list_path)
101 |     GGO_list = pd.read_csv(f)
102 |     List_Num = np.array(GGO_list['Num'].tolist())
103 |     Type = GGO_list['Type'].tolist()
104 |     Class = np.array(GGO_list['Class'].tolist()) 
105 |     List_Num = List_Num[[i for i,x in enumerate(Type) if x!='Solid']]
106 |     Class= Class[[i for i,x in enumerate(Type) if x!='Solid']]
107 |     List_Num = List_Num[[i for i,x in enumerate(Class) if x!=0]]
108 |     Histopathology = np.array(GGO_list['Histopathology'].tolist())
109 |     Histopathology = Histopathology[[i for i,x in enumerate(Type) if x!='Solid']]
110 |     Histopathology = Histopathology[[i for i,x in enumerate(Class) if x!=0]]
111 |     x_list = np.array(GGO_list['X'].tolist())
112 |     x_list = x_list[[i for i,x in enumerate(Type) if x!='Solid']]
113 |     x_list = x_list[[i for i,x in enumerate(Class) if x!=0]]
114 |     y_list = np.array(GGO_list['Y'].tolist())
115 |     y_list = y_list[[i for i,x in enumerate(Type) if x!='Solid']]
116 |     y_list = y_list[[i for i,x in enumerate(Class) if x!=0]]
117 |     z_list = np.array(GGO_list['Z'].tolist())
118 |     z_list = z_list[[i for i,x in enumerate(Type) if x!='Solid']]
119 |     z_list = z_list[[i for i,x in enumerate(Class) if x!=0]]
120 |     prob = []
121 |     prob_label = []
122 |     real_class = []
123 |     test_result=[]
124 |     for i in tqdm(range(len(List_Num))): 
125 |         name_split = List_Num[i].split('_')
126 |         dcm_File = name_split[0]+'_'+name_split[1]
127 |         img_fold = os.path.join(img_path, dcm_File)
128 |         img = readDCM_Img(img_fold)
129 |         image, img_size, resize_factor = resample(img)
130 |         X = round(x_list[i])
131 |         Y = round(y_list[i])
132 |         Z = round(z_list[i])
133 |         image = normalize_hu(image)
134 |         seed_pos = [X, Y, Z]
135 |         ROI = crop_roi(image, img_size, seed_pos, [64,64,64] , resize_factor)
136 |         new_spacing = [1,1,1]
137 |         ROI_sitk = sitk.GetImageFromArray(ROI)
138 |         ROI_sitk.SetSpacing(new_spacing)
139 |         ROI = ROI[np.newaxis,...]
140 |         ROI = (ROI.astype(np.float32)-128)/128 
141 |         
142 |         if Histopathology[i] == 'IAC':
143 |             GGO_Class = 1
144 |         else:
145 |             GGO_Class = 0
146 |         np.save(os.path.join(save_dir,List_Num[i]+'_roi'),ROI)
147 |         np.save(os.path.join(save_dir,List_Num[i]+'_label'),np.array(GGO_Class))
148 | 
149 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DL_Radiomics_Fusion
2 | Comparison and fusion of deep learning and radiomics features of ground-glass nodules to predict the invasiveness risk of stage-I lung adenocarcinomas in CT scan
3 | 


--------------------------------------------------------------------------------
/SegModel/README:
--------------------------------------------------------------------------------
1 | # The code for building segmentation model
2 | 


--------------------------------------------------------------------------------
/SegModel/net_seg.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Aug  5 11:07:39 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 |     
  9 | from torch.nn import Module, Sequential 
 10 | from torch.nn import Conv3d, ConvTranspose3d, BatchNorm3d, MaxPool3d, AvgPool1d
 11 | from torch.nn import ReLU, Sigmoid
 12 | import torch
 13 | 
 14 | class Conv3D_Block(Module):
 15 |         
 16 |     def __init__(self, inp_feat, out_feat, kernel=3, stride=1, padding=1, residual=None):
 17 |         
 18 |         super(Conv3D_Block, self).__init__()
 19 | 
 20 |         self.conv1 = Sequential(
 21 |                         Conv3d(inp_feat, out_feat, kernel_size=kernel, 
 22 |                                     stride=stride, padding=padding, bias=True),
 23 |                         BatchNorm3d(out_feat),
 24 |                         ReLU())
 25 | 
 26 |         self.conv2 = Sequential(
 27 |                         Conv3d(out_feat, out_feat, kernel_size=kernel, 
 28 |                                     stride=stride, padding=padding, bias=True),
 29 |                         BatchNorm3d(out_feat),
 30 |                         ReLU())
 31 |         
 32 |         self.residual = residual
 33 | 
 34 |         if self.residual is not None:
 35 |             self.residual_upsampler = Conv3d(inp_feat, out_feat, kernel_size=1, bias=False)
 36 | 
 37 |     def forward(self, x):
 38 |         
 39 |         res = x
 40 | 
 41 |         if not self.residual:
 42 |             return self.conv2(self.conv1(x))
 43 |         else:
 44 |             return self.conv2(self.conv1(x)) + self.residual_upsampler(res)
 45 | 
 46 | def Maxpool3D_Block():
 47 |     
 48 |     pool = MaxPool3d(kernel_size=2, stride=2, padding=0)
 49 |     
 50 |     return pool       
 51 |         
 52 | class Deconv3D_Block(Module):
 53 |     
 54 |     def __init__(self, inp_feat, out_feat, kernel=4, stride=2, padding=1):
 55 |         
 56 |         super(Deconv3D_Block, self).__init__()
 57 |         
 58 |         self.deconv = Sequential(
 59 |                         ConvTranspose3d(inp_feat, out_feat, kernel_size=kernel, 
 60 |                                     stride=stride, padding=padding, output_padding=0, bias=True),
 61 |                         BatchNorm3d(out_feat),
 62 |                         ReLU())
 63 |     
 64 |     def forward(self, x):
 65 |         
 66 |         return self.deconv(x)
 67 | 
 68 |     
 69 | class Unet3D(Module):
 70 |     def __init__(self, num_feat=[16,32,64,96,128], residual='conv'):
 71 |         super(Unet3D, self).__init__()
 72 |         
 73 |         #Encoder process
 74 |         self.conv1 = Conv3D_Block(1, num_feat[0], residual=residual)
 75 |         self.pool1 = Maxpool3D_Block()
 76 |         self.conv2 = Conv3D_Block(num_feat[0], num_feat[1], residual=residual)
 77 |         self.pool2 = Maxpool3D_Block()
 78 |         self.conv3 = Conv3D_Block(num_feat[1], num_feat[2], residual=residual)
 79 |         self.pool3 = Maxpool3D_Block()
 80 |         self.conv4 = Conv3D_Block(num_feat[2], num_feat[3], residual=residual)
 81 |         self.pool4 = Maxpool3D_Block()
 82 |         self.conv5 = Conv3D_Block(num_feat[3], num_feat[4], residual=residual)
 83 | 
 84 |         
 85 |         #Decoder process
 86 |         self.upconv4 = Deconv3D_Block(num_feat[4], num_feat[3])
 87 |         self.deconv4 = Conv3D_Block(num_feat[3]*2, num_feat[3], residual=residual)
 88 |         self.upconv3 = Deconv3D_Block(num_feat[3], num_feat[2])
 89 |         self.deconv3 = Conv3D_Block(num_feat[2]*2, num_feat[2], residual=residual)
 90 |         self.upconv2 = Deconv3D_Block(num_feat[2], num_feat[1])
 91 |         self.deconv2 = Conv3D_Block(num_feat[1]*2, num_feat[1], residual=residual)
 92 |         self.upconv1 = Deconv3D_Block(num_feat[1], num_feat[0])
 93 |         self.deconv1 = Conv3D_Block(num_feat[0]*2, num_feat[0], residual=residual)
 94 |         
 95 |         self.out_conv = Conv3d(num_feat[0], 1, kernel_size=1, stride=1, padding=0, bias=True)
 96 |         self.sigmoid = Sigmoid()
 97 | 
 98 |     def forward(self, x):
 99 |         down_1 = self.conv1(x)
100 |         pool_1 = self.pool1(down_1)
101 |         down_2 = self.conv2(pool_1)
102 |         pool_2 = self.pool2(down_2)
103 |         down_3 = self.conv3(pool_2)
104 |         pool_3 = self.pool3(down_3)
105 |         down_4 = self.conv4(pool_3)
106 |         pool_4 = self.pool4(down_4)
107 |         down_5 = self.conv5(pool_4)
108 |         
109 |         
110 |         up_4 = torch.cat([self.upconv4(down_5), down_4], dim=1)
111 |         deconv_4 = self.deconv4(up_4)
112 |         up_3 = torch.cat([self.upconv3(deconv_4), down_3], dim=1)
113 |         deconv_3 = self.deconv3(up_3)
114 |         up_2 = torch.cat([self.upconv2(deconv_3), down_2], dim=1)
115 |         deconv_2 = self.deconv2(up_2)
116 |         up_1 = torch.cat([self.upconv1(deconv_2), down_1], dim=1)
117 |         deconv_1 = self.deconv1(up_1)
118 |         
119 |         out = self.sigmoid(self.out_conv(deconv_1))
120 |         
121 |         return out
122 |         
123 |         
124 |         
125 |         
126 |         
127 |     
128 |     
129 |     


--------------------------------------------------------------------------------
/SegModel/train_seg_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Jul  3 15:36:02 2019
  4 | 
  5 | @author: PC
  6 | """
  7 | 
  8 | import os
  9 | import time
 10 | import numpy as np
 11 | from net_seg import *
 12 | import torch
 13 | from torch.backends import cudnn
 14 | from torch.utils.data import Dataset
 15 | from torch.utils.data import DataLoader
 16 | from torch.autograd import Variable
 17 | from scipy.ndimage.interpolation import rotate
 18 | import glob
 19 | #from CumulativeAverager import *
 20 | 
 21 | def augment(roi, label, ifflip = True, ifrotate=True, ifswap = True):
 22 |     if ifrotate:
 23 |         angle1 = np.random.rand()*180
 24 |         roi = rotate(roi,angle1,axes=(2,3),reshape=False)
 25 |         label = rotate(label,angle1,axes=(2,3),reshape=False)
 26 |     if ifswap:
 27 |         axisorder = np.random.permutation(3)
 28 |         roi = np.transpose(roi,np.concatenate([[0],axisorder+1]))
 29 |         label = np.transpose(label,np.concatenate([[0],axisorder+1]))
 30 |     if ifflip:
 31 |         flipid = np.array([1,np.random.randint(2),np.random.randint(2)])*2-1
 32 |         roi = np.ascontiguousarray(roi[:,::flipid[0],::flipid[1],::flipid[2]])
 33 |         label = np.ascontiguousarray(label[:,::flipid[0],::flipid[1],::flipid[2]])
 34 |     return roi, label
 35 | 
 36 | class GGODataGenerator(Dataset):
 37 |     def __init__(self, img_path, phase='train'):
 38 |         assert(phase == 'train' or phase == 'val' or phase == 'test')
 39 | 
 40 |         self.List_Num = np.array([os.path.split(s)[-1].split('_')[0] for s in glob.glob(img_path+'/*_roi.npy')])
 41 |       
 42 |         self.img_path = img_path
 43 |         self.phase = phase
 44 |                 
 45 |     def __getitem__(self,idx):  
 46 |         if self.phase =='train':
 47 |             if idx>=len(self.List_Num):
 48 |                 idx = idx%len(self.List_Num)
 49 |                 ifflip = True
 50 |                 ifrotate= False
 51 |                 ifswap = False
 52 |             elif idx>=(len(self.List_Num)*2):
 53 |                 idx = idx%(len(self.List_Num)*2)
 54 |                 ifflip = False
 55 |                 ifrotate= True
 56 |                 ifswap = False
 57 |             elif idx>=(len(self.List_Num)*3):
 58 |                 idx = idx%(len(self.List_Num)*3)
 59 |                 ifflip = False
 60 |                 ifrotate= False
 61 |                 ifswap = True
 62 |             else:
 63 |                 ifflip = False
 64 |                 ifrotate= False
 65 |                 ifswap = False
 66 | 
 67 |         if self.phase == 'train':
 68 |             dcm_File = self.List_Num[idx]
 69 |             roi_path = self.img_path+'/'+dcm_File+'_roi.npy'
 70 |             ROI = np.load(roi_path)[np.newaxis,...]
 71 |             class_path = self.img_path+'/'+dcm_File+'_label.npy'
 72 |             Label = np.load(class_path)[np.newaxis,...]
 73 |             ROI, Label = augment(ROI,Label, ifflip = ifflip, ifrotate=ifrotate, ifswap = ifswap)
 74 |         else:
 75 |             dcm_File = self.List_Num[idx]
 76 |             roi_path = self.img_path+'/'+dcm_File+'_roi.npy'
 77 |             ROI = np.load(roi_path)
 78 |             class_path = self.img_path+'/'+dcm_File+'_label.npy'
 79 |             Label = np.load(class_path)
 80 |         return ROI, Label
 81 |         
 82 |     def __len__(self):
 83 |         if self.phase == 'train':
 84 |             return len(self.List_Num)*4
 85 |         else:
 86 |             return len(self.List_Num)
 87 | 
 88 | def get_lr(epoch, lr):
 89 |     if epoch <= epochs * 0.5:
 90 |         lr = lr
 91 |     elif epoch <= epochs * 0.8:
 92 |         lr = 0.1 * lr
 93 |     else:
 94 |         lr = 0.01 * lr
 95 |     return lr
 96 | 
 97 | def get_optimizer(st, lr, momentum=0.9):
 98 |     if st == 'sgd':
 99 |         return torch.optim.SGD(net.parameters(), lr = lr, momentum=momentum)
100 |     elif st == 'adam':
101 |         return torch.optim.Adam(net.parameters(), lr = lr)
102 | 
103 | def dice_loss(y, pred):
104 |     smooth = 1.
105 | 
106 |     yflat = y.view(-1)
107 |     predflat = pred.view(-1)
108 |     intersection = (yflat * predflat).sum()
109 |     
110 |     return 1 - ((2. * intersection + smooth) /
111 |               (yflat.sum() + predflat.sum() + smooth))
112 | 
113 | 
114 | def train(dataloader, net, epoch, optimizer, lr, save_freq, save_dir):
115 |     starttime = time.time()
116 |     net.train()
117 | 
118 | #    lr = get_lr(epoch, 0.001)
119 |     for param_group in optimizer.param_groups:
120 |         param_group['lr'] = lr
121 |     metrics = []
122 | 
123 |     for i, (data,Class) in enumerate(data_loader):
124 | #        print(i)
125 |         data = Variable(data.cuda(async = True))
126 |         target = Variable(Class.cuda(async = True))
127 |         target = target.float()
128 | 
129 |         output = net(data)
130 |         
131 | #        avg_tool = CumulativeAverager()
132 |         loss_output = dice_loss(output,target)
133 | #        avg_tool.update(loss_output)
134 |         optimizer.zero_grad()   # clear gradients for next train
135 |         loss_output.backward()         # backpropagation, compute gradients
136 |         optimizer.step()        # apply gradients
137 |         metrics.append(loss_output)
138 | 
139 |         if epoch % save_freq == 0:            
140 |             state_dict = net.state_dict()
141 |             for key in state_dict.keys():
142 |                 state_dict[key] = state_dict[key].cpu()
143 |                 
144 |             torch.save({
145 |                 'epoch': epoch,
146 |                 'save_dir': save_dir,
147 |                 'state_dict': state_dict},
148 |                 os.path.join(save_dir, '%03d.ckpt' % epoch))
149 |          
150 |             
151 |     endtime = time.time()
152 |     metrics = np.asarray(metrics, np.float32)
153 |     print('Epoch %03d (lr %.5f)' % (epoch, lr))
154 |     print('loss %2.4f' % (np.mean(metrics)))
155 |     print('time:%3.2f'%(endtime-starttime))
156 |     
157 | if __name__ == '__main__':
158 |     # torch.cuda.set_device(0)
159 |     net = Unet3D().cuda()
160 |     optimizer = get_optimizer('adam', lr=0.001)
161 |     cudnn.benchmark = True
162 |     img_path = './Nodule_crop'
163 |     dataset = GGODataGenerator(img_path, phase='train')
164 |     data_loader = DataLoader(dataset, batch_size = 12,shuffle = True, num_workers = 0, pin_memory=True)
165 |     save_freq = 20
166 |     epochs = 200
167 |     lr = 0.001
168 |     save_dir = './model'
169 |     for epoch in range(0, epochs + 1):
170 |         train(data_loader, net, epoch, optimizer, lr, save_freq, save_dir)
171 | 


--------------------------------------------------------------------------------