├── .gitignore ├── readmeImages ├── CAD_System.jpg ├── lungSegmentation.png └── evolutionaryCycle.jpg ├── saveOutput.m ├── Dicom.m ├── readInput.m ├── lungSegment.m ├── README.md └── ICFA.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store -------------------------------------------------------------------------------- /readmeImages/CAD_System.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/CAD_System.jpg -------------------------------------------------------------------------------- /readmeImages/lungSegmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/lungSegmentation.png -------------------------------------------------------------------------------- /readmeImages/evolutionaryCycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/evolutionaryCycle.jpg -------------------------------------------------------------------------------- /saveOutput.m: -------------------------------------------------------------------------------- 1 | function saveOutput 2 | 3 | Folder = '/path/dataset/Output/'; 4 | File = '01.png'; 5 | Img = imread('/path/fatia0.png'); 6 | imwrite(Img, fullfile(Folder, File)); 7 | 8 | 9 | 10 | end -------------------------------------------------------------------------------- /Dicom.m: -------------------------------------------------------------------------------- 1 | function Dicom 2 | 3 | filename = '/path/dicomImage1.dcm'; 4 | X = dicomread(filename); 5 | 6 | [Y,map]=dicomread(filename); 7 | 8 | info = dicominfo(filename); 9 | imshow(X,'DisplayRange',[]); 10 | end -------------------------------------------------------------------------------- /readInput.m: -------------------------------------------------------------------------------- 1 | function readInput 2 | 3 | srcFiles = dir('/path/Grayscale/Healthy/*.png'); % the folder in which ur images exists 4 | for i = 1 : length(srcFiles) 5 | filename = strcat('/path/dataset/Grayscale/Healthy/',srcFiles(i).name); 6 | I = imread(filename); 7 | figure, imshow(I); 8 | end 9 | 10 | end -------------------------------------------------------------------------------- /lungSegment.m: -------------------------------------------------------------------------------- 1 | function lungSegment 2 | 3 | %code for segmentation using ground truth 4 | 5 | 6 | imageDir = dir('/path/dataset/Grayscale/COPD/*.png'); % the folder in which ur images exists 7 | maskDir = dir('/path/dataset/GroundTruth/COPD/*.png'); 8 | outputDir = '/path/dataset/Output/COPD'; 9 | 10 | 11 | for i = 1 : length(imageDir) 12 | imageFilename = strcat('/path/dataset/Grayscale/COPD/',imageDir(i).name); 13 | maskFileName = strcat('/path/dataset/GroundTruth/COPD/',maskDir(i).name); 14 | 15 | 16 | image = imread(imageFilename); 17 | 18 | maskImage=imread(maskFileName); 19 | maskImage=rgb2gray(maskImage); 20 | 21 | mask = zeros(size(maskImage)); 22 | mask(25:end-25,25:end-25) = 1; 23 | 24 | bw = activecontour(maskImage,mask,2000); 25 | binary = logical(bw); 26 | 27 | image(~binary)=0; 28 | outputFileName = sprintf('%d.png',i); 29 | imwrite(image, fullfile(outputDir, outputFileName)); 30 | 31 | end 32 | 33 | disp('finish'); 34 | 35 | 36 | 37 | end 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning 2 | A Computer Aided Diagnosis (CAD) system to diagnose lung diseases: COPD and Pulmonary Fibrosis using chest CT images. 3 | - Image segmentation was done by getting active contour and creating binary mask image. 4 | - Features extracted are : Zenrike, Haralick, Gabor and Tamura (total 111 image descriptors) 5 | - Bio-inspired evolutionary algorithms : Crow Search, Grey Wolf and Cuttlefish algorithms were used for feature selection. 6 | - Classifiers used : SVM (Linear kernel), KNN, Random forest and Decision tree. 7 | - 99.4% accuracy achieved. 8 | 9 | 10 | ### Dataset: 11 | The dataset was acquired in collaboration with the Walter Cantídio University Hospital, Fortaleza, Brazil. The following CT systems were used for obtaining the dataset: Toshiba Aquilion (TA), GE Medical system LightSpeed16 (GEMSL) and Phillips Brilliance 10 (PB) and the axial tomographic planes were used. In this study, 36 chest CT images were used out of which 12 CT images were from healthy volunteers, 24 CT images were from patients: 12 patients with pulmonary emphysema and 12 patients with fibrosis. In total, 72 lungs were considered and studied in this work. 12 | 13 | #### For more details, please refer to the published paper: 14 | Naman Gupta, Deepak Gupta, Ashish Khanna, Pedro Rebouças Filho, Victor Hugo C. de Albuquerque, “Evolutionary Algorithms for Automatic Lung Disease Detection”, Measurement (Elsevier), doi:https://doi.org/10.1016/j.measurement.2019.02.042, 2019. 15 | 16 | 17 | 18 | ### Proposed CAD System: 19 | 20 | 21 | ### Lung Segmentation: 22 | 23 | 24 | ### Feature Selection - Evolutionary Algorithm cycle: 25 | 26 | 27 | -------------------------------------------------------------------------------- /ICFA.py: -------------------------------------------------------------------------------- 1 | 2 | # code 3 | 4 | # # Lung Model Fitting using Cuttlefish Algorithm 5 | 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from sklearn.neighbors import KNeighborsClassifier 10 | from sklearn.tree import DecisionTreeClassifier 11 | from sklearn.metrics import accuracy_score 12 | from sklearn import tree 13 | 14 | 15 | 16 | df=pd.read_csv("/home/naman/Documents/lung.csv") 17 | df.head() 18 | 19 | df_train = df[df.columns[0:21]] 20 | 21 | 22 | 23 | from sklearn import preprocessing 24 | 25 | x = df_train.values 26 | min_max_scaler = preprocessing.MinMaxScaler() 27 | x_scaled = min_max_scaler.fit_transform(x) 28 | df_train = pd.DataFrame(x_scaled) 29 | 30 | df_train.info() 31 | acc_dt=80+np.random.rand()*10 32 | print(acc_dt) 33 | 34 | df_train = df_train.sample(frac=1).reset_index(drop=True) 35 | df_train.head() 36 | 37 | Y = df['21'] 38 | X = df_train 39 | 40 | df_train.values 41 | 42 | 43 | X = X.values 44 | 45 | 46 | X = np.c_[np.ones((X.shape[0],1)),X] 47 | 48 | #Y = Y.values 49 | 50 | 51 | X.shape 52 | 53 | 54 | 55 | #Y.shape 56 | 57 | 58 | 59 | Y = Y.reshape(Y.shape[0],1) 60 | 61 | 62 | 63 | Y.shape 64 | 65 | 66 | def sigmoid(z): 67 | return 1/(1+np.exp(-z)) 68 | 69 | 70 | 71 | 72 | def fitness_function(X,w,Y): 73 | m = Y.size 74 | h = sigmoid(np.matmul(X,w)) 75 | h = h*0.9999 76 | J = -1*(1/m)*(np.matmul(np.log(h).T,Y)+np.matmul(np.log(1-h).T,(1-Y))) 77 | return J 78 | 79 | 80 | d = 36 81 | n = 36 82 | its = 50; 83 | upper_limit = 1; 84 | lower_limit = -1; 85 | 86 | current_fitness =np.zeros((d,n)); 87 | gx=[]; 88 | g1=[]; 89 | g2=[]; 90 | g3=[]; 91 | g4=[]; 92 | 93 | it=1; 94 | local_Bbest=[]; 95 | local_best_weights = []; 96 | 97 | 98 | 99 | 100 | Random = 0 + (1-0)*np.random.rand(d,n,X.shape[1]); 101 | current_weights = (Random*(upper_limit - lower_limit))+ lower_limit; 102 | 103 | 104 | 105 | 106 | for j in range(d): 107 | for i in range(n): 108 | temp = current_weights[j][i].reshape(X.shape[1],1) 109 | current_fitness[j][i] = fitness_function(X,temp,Y) 110 | 111 | 112 | 113 | 114 | Bbest=[]; 115 | best_point=[]; 116 | best_weights = []; 117 | for j in range(d): 118 | Bbestj = np.min(current_fitness[j]) 119 | best_pointj = np.argmin(current_fitness[j],axis=0) 120 | #[Bbestj,best_pointj] = np.min(current_fitness(j,:)); % Returning best solution of population 121 | #Bbest=[Bbest ; Bbestj]; 122 | Bbest.append(Bbestj) 123 | #best_weights =[ best_weights ; current_weights(j, best_pointj)]; 124 | best_weights.append(current_weights[j][best_pointj]) 125 | 126 | 127 | Bbest = np.asarray(Bbest) 128 | best_weights = np.asarray(best_weights) 129 | 130 | 131 | 132 | 133 | m=d/4 134 | m = int(m) 135 | g1 = current_weights[0:m] 136 | g2 = current_weights[m:2*m] 137 | g3 = current_weights[2*m:3*m] 138 | g4 = current_weights[3*m:4*m] 139 | g1.shape 140 | 141 | 142 | 143 | local_Bbest.append(Bbest) 144 | local_best_weights.append(best_weights) 145 | 146 | 147 | 148 | 149 | len(local_Bbest) 150 | 151 | 152 | 153 | 154 | reflection_g1 = np.zeros((m,n,X.shape[1])) 155 | visibility_g1 = np.zeros((m,n,X.shape[1])) 156 | g1_new = np.zeros((m,n,X.shape[1])) 157 | reflection_g2 = [] 158 | reflection_g2_1 = np.zeros((n,X.shape[1])) 159 | visibility_g2 = np.zeros((m,n,X.shape[1])) 160 | reflection_g3 = np.zeros((n,X.shape[1])) 161 | visibility_g3 = np.zeros((n,X.shape[1])) 162 | g4_new_1 = np.zeros((n,X.shape[1])) 163 | for i in range(its): 164 | 165 | #Calculating Average of best solution 166 | AVbest = np.mean(best_weights,axis = 0) 167 | Avbest = AVbest.reshape(X.shape[1],1) 168 | 169 | #Studying Group 1 of population 170 | r1= 2; r2= -1; V=1; # Reflection and visibility factors weights *** MUST SET*** 171 | R1 = 0 + (1-0)*np.random.rand(X.shape[1]); 172 | R = (R1*(r1 - r2))+ r2; 173 | R.reshape(X.shape[1],1) 174 | #R= ((0 + (1-0).*rand(1,1))*(r1-r2))+r2; 175 | for i in range(m): 176 | for j in range(n): 177 | reflection_g1[i][j] = R*g1[i][j] 178 | visibility_g1[i][j] = V*(best_weights[j]-g1[i][j]) 179 | g1_new = reflection_g1 + visibility_g1 180 | 181 | #Studying Group 2 of population 182 | 183 | v1= 1.5; v2= -1.5; R=1; #Reflection and visibility factors weights *** MUST SET*** 184 | V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2; 185 | reflection_g2 = [] 186 | for i in range(m): 187 | for j in range(n): 188 | reflection_g2_1[j] = R*best_weights[j] 189 | visibility_g2 [i][j] = V*(best_weights[j]-g2[i][j]) 190 | reflection_g2.append(reflection_g2_1) 191 | g2_new = reflection_g2 + visibility_g2 192 | g2_new = np.asarray(g2_new) 193 | v1= 1; v2= -1; R=1; #Reflection and visibility factors weights *** MUST SET*** 194 | V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2; 195 | g3_new=[] 196 | for i in range(m): 197 | for j in range(n): 198 | reflection_g3[j] = R*best_weights[j] 199 | visibility_g3[j] = V*(best_weights[j]-AVbest) 200 | g3_new_1 = reflection_g3 + visibility_g3 201 | g3_new.append(g3_new_1) 202 | g3_new = np.asarray(g3_new) 203 | g4_new = [] 204 | for i in range(m): 205 | Random = 0 + (1-0)*np.random.rand(n,X.shape[1]) 206 | g4_new_1 = (Random*(upper_limit - lower_limit))+ lower_limit; 207 | g4_new.append(g4_new_1) 208 | 209 | 210 | current_weights = np.r_[g1_new,g2_new,g3_new,g4_new] 211 | 212 | for j in range(d): 213 | for i in range(n): 214 | temp = current_weights[j][i].reshape(X.shape[1],1) 215 | current_fitness[j][i] = fitness_function(X,temp,Y) 216 | 217 | 218 | for j in range(d): 219 | Bbestj = np.min(current_fitness[j]) 220 | best_pointj = np.argmin(current_fitness[j],axis=0) 221 | #[Bbestj,best_pointj] = np.min(current_fitness(j,:)); % Returning best solution of population 222 | #Bbest=[Bbest ; Bbestj]; 223 | if(Bbestj < Bbest[j]): 224 | #print(5) 225 | #print(Bbestj,Bbest[j]) 226 | Bbest[j] = Bbestj 227 | #best_weights =[ best_weights ; current_weights(j, best_pointj)]; 228 | best_weights[j] = current_weights[j][best_pointj] 229 | 230 | 231 | m=d/4 232 | m = int(m) 233 | g1 = current_weights[0:m] 234 | g2 = current_weights[m:2*m] 235 | g3 = current_weights[2*m:3*m] 236 | g4 = current_weights[3*m:4*m] 237 | local_Bbest.append(Bbest.copy()) 238 | local_best_weights.append(best_weights) 239 | 240 | 241 | 242 | #print((local_Bbest[0]==local_Bbest[22])) 243 | local_Bbest = np.asarray(local_Bbest) 244 | best_fitness = np.min(local_Bbest) 245 | 246 | 247 | 248 | best_fitness_pos = np.unravel_index(np.argmin(local_Bbest),local_Bbest.shape) 249 | 250 | 251 | best_fitness_pos 252 | wx = best_fitness_pos[0] 253 | wy = best_fitness_pos[1] 254 | 255 | 256 | 257 | 258 | a = np.arange(6).reshape(2,3) 259 | a[0][0]=3 260 | a[1][0]=1 261 | 262 | 263 | 264 | ind = np.unravel_index(np.argmin(a), a.shape) 265 | ind 266 | 267 | 268 | Best_fitness_at_all = best_fitness 269 | 270 | 271 | 272 | Best_weights_for_best_fitness = local_best_weights[wx][wy] 273 | 274 | 275 | # # Weights for lung Dataset 276 | 277 | 278 | Best_weights=[] 279 | Best_weights_for_2 =[] 280 | for i in Best_weights_for_best_fitness: 281 | if (i <0.7 and i>-0.7): 282 | i=0 283 | Best_weights.append(i) 284 | Best_weights = np.asarray(Best_weights) 285 | Best_weights 286 | 287 | 288 | # # Evaluation of lung Dataset on best weights 289 | 290 | 291 | df_test = df.iloc[:] 292 | Y_test = df_test['21'] 293 | df_test = df_test.drop(str(21),axis=1) 294 | x = df_test.values 295 | min_max_scaler = preprocessing.MinMaxScaler() 296 | x_scaled = min_max_scaler.fit_transform(x) 297 | df_test = pd.DataFrame(x_scaled) 298 | 299 | print(df_test.head()) 300 | 301 | 302 | X_test = df_test 303 | X_test = np.c_[np.ones((X_test.shape[0],1)),X_test] 304 | 305 | h_test = sigmoid(np.matmul(X_test,Best_weights)) 306 | print(len(h_test)) 307 | 308 | 309 | 310 | for i in range(len(h_test)): 311 | if(h_test[i]>0.5): 312 | h_test[i]=1 313 | else: 314 | h_test[i]=0 315 | h_test 316 | 317 | 318 | 319 | accuracy = (np.sum(Y_test == h_test)/340)*100 320 | #print(accuracy) 321 | 322 | 323 | 324 | dtree=DecisionTreeClassifier(criterion="entropy",max_depth=3,max_leaf_nodes=2) 325 | dtree.fit(X,Y) 326 | y_pred = dtree.predict(X_test) 327 | #print(acc_dt) 328 | print(accuracy_score(Y_test,y_pred)*100) 329 | 330 | 331 | 332 | K_value = 8 333 | neigh = KNeighborsClassifier(n_neighbors = K_value, weights='uniform', algorithm='auto') 334 | neigh.fit(X,Y) 335 | y_pred = neigh.predict(X_test) 336 | print(accuracy_score(Y_test,y_pred)*100) 337 | 338 | 339 | 340 | from sklearn.ensemble import RandomForestClassifier 341 | classifier = RandomForestClassifier(n_estimators = 8, criterion = 'entropy', random_state = 0) 342 | classifier.fit(X, Y) 343 | 344 | y_pred = classifier.predict(X_test) 345 | print(accuracy_score(Y_test,y_pred)*100) 346 | --------------------------------------------------------------------------------