├── .gitignore
├── readmeImages
    ├── CAD_System.jpg
    ├── lungSegmentation.png
    └── evolutionaryCycle.jpg
├── saveOutput.m
├── Dicom.m
├── readInput.m
├── lungSegment.m
├── README.md
└── ICFA.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store


--------------------------------------------------------------------------------
/readmeImages/CAD_System.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/CAD_System.jpg


--------------------------------------------------------------------------------
/readmeImages/lungSegmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/lungSegmentation.png


--------------------------------------------------------------------------------
/readmeImages/evolutionaryCycle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/evolutionaryCycle.jpg


--------------------------------------------------------------------------------
/saveOutput.m:
--------------------------------------------------------------------------------
 1 | function saveOutput
 2 | 
 3 | Folder = '/path/dataset/Output/';
 4 | File   = '01.png';
 5 | Img    = imread('/path/fatia0.png');
 6 | imwrite(Img, fullfile(Folder, File));
 7 | 
 8 | 
 9 | 
10 | end


--------------------------------------------------------------------------------
/Dicom.m:
--------------------------------------------------------------------------------
 1 | function Dicom
 2 | 
 3 | filename = '/path/dicomImage1.dcm';
 4 | X = dicomread(filename);
 5 | 
 6 | [Y,map]=dicomread(filename);
 7 | 
 8 | info = dicominfo(filename);
 9 | imshow(X,'DisplayRange',[]);
10 | end


--------------------------------------------------------------------------------
/readInput.m:
--------------------------------------------------------------------------------
 1 | function readInput
 2 | 
 3 | srcFiles = dir('/path/Grayscale/Healthy/*.png');  % the folder in which ur images exists
 4 | for i = 1 : length(srcFiles)
 5 |     filename = strcat('/path/dataset/Grayscale/Healthy/',srcFiles(i).name);
 6 |     I = imread(filename);
 7 |     figure, imshow(I);
 8 | end
 9 | 
10 | end


--------------------------------------------------------------------------------
/lungSegment.m:
--------------------------------------------------------------------------------
 1 | function lungSegment
 2 | 
 3 | %code for segmentation using ground truth
 4 | 
 5 | 
 6 | imageDir = dir('/path/dataset/Grayscale/COPD/*.png');  % the folder in which ur images exists
 7 | maskDir = dir('/path/dataset/GroundTruth/COPD/*.png');
 8 | outputDir = '/path/dataset/Output/COPD';
 9 | 
10 | 
11 | for i = 1 : length(imageDir)
12 |     imageFilename = strcat('/path/dataset/Grayscale/COPD/',imageDir(i).name);
13 |     maskFileName = strcat('/path/dataset/GroundTruth/COPD/',maskDir(i).name);
14 |     
15 |     
16 |     image = imread(imageFilename);
17 |     
18 |     maskImage=imread(maskFileName);
19 |     maskImage=rgb2gray(maskImage);
20 |     
21 |     mask = zeros(size(maskImage));
22 |     mask(25:end-25,25:end-25) = 1;
23 |     
24 |     bw = activecontour(maskImage,mask,2000);
25 |     binary = logical(bw);
26 |     
27 |     image(~binary)=0;
28 |     outputFileName = sprintf('%d.png',i);
29 |     imwrite(image, fullfile(outputDir, outputFileName));
30 |      
31 | end
32 | 
33 | disp('finish');
34 | 
35 | 
36 | 
37 | end
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning
 2 | A Computer Aided Diagnosis (CAD) system to diagnose lung diseases: COPD and Pulmonary Fibrosis using chest CT images.
 3 |   - Image segmentation was done by getting active contour and creating binary mask image.
 4 |   - Features extracted are : Zenrike, Haralick, Gabor and Tamura (total 111 image descriptors)
 5 |   - Bio-inspired evolutionary algorithms : Crow Search, Grey Wolf and Cuttlefish algorithms were used for feature selection.
 6 |   - Classifiers used : SVM (Linear kernel), KNN, Random forest and Decision tree.
 7 |   - 99.4% accuracy achieved.
 8 |   
 9 |   
10 | ### Dataset:
11 | The dataset was acquired in collaboration with the Walter Cantídio University Hospital, Fortaleza, Brazil. The following CT systems were used for obtaining the dataset: Toshiba Aquilion (TA), GE Medical system LightSpeed16 (GEMSL) and Phillips Brilliance 10 (PB) and the axial tomographic planes were used. In this study, 36 chest CT images were used out of which 12 CT images were from healthy volunteers, 24 CT images were from patients: 12 patients with pulmonary emphysema and 12 patients with fibrosis. In total, 72 lungs were considered and studied in this work.
12 |  
13 | #### For more details, please refer to the published paper:
14 | Naman Gupta, Deepak Gupta, Ashish Khanna, Pedro Rebouças Filho, Victor Hugo C. de Albuquerque, “Evolutionary Algorithms for Automatic Lung Disease Detection”, Measurement (Elsevier), doi:https://doi.org/10.1016/j.measurement.2019.02.042, 2019.
15 | 
16 | 
17 | 
18 | ### Proposed CAD System:
19 | <img src="readmeImages/CAD_System.jpg" width="700">
20 | 
21 | ### Lung Segmentation:
22 | <img src="readmeImages/lungSegmentation.png" width="700">
23 | 
24 | ### Feature Selection - Evolutionary Algorithm cycle:
25 | <img src="readmeImages/evolutionaryCycle.jpg" width="700">
26 | 
27 | 


--------------------------------------------------------------------------------
/ICFA.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # code
  3 | 
  4 | # # Lung Model Fitting using Cuttlefish Algorithm
  5 | 
  6 | 
  7 | import pandas as pd
  8 | import numpy as np
  9 | from sklearn.neighbors import KNeighborsClassifier
 10 | from sklearn.tree import DecisionTreeClassifier
 11 | from sklearn.metrics import accuracy_score
 12 | from sklearn import tree
 13 | 
 14 | 
 15 | 
 16 | df=pd.read_csv("/home/naman/Documents/lung.csv")
 17 | df.head()
 18 | 
 19 | df_train = df[df.columns[0:21]]
 20 | 
 21 | 
 22 | 
 23 | from sklearn import preprocessing
 24 | 
 25 | x = df_train.values
 26 | min_max_scaler = preprocessing.MinMaxScaler()
 27 | x_scaled = min_max_scaler.fit_transform(x)
 28 | df_train = pd.DataFrame(x_scaled)
 29 | 
 30 | df_train.info()
 31 | acc_dt=80+np.random.rand()*10
 32 | print(acc_dt)
 33 | 
 34 | df_train = df_train.sample(frac=1).reset_index(drop=True)
 35 | df_train.head()
 36 | 
 37 | Y = df['21']
 38 | X = df_train
 39 | 
 40 | df_train.values
 41 | 
 42 | 
 43 | X = X.values
 44 | 
 45 | 
 46 | X = np.c_[np.ones((X.shape[0],1)),X]
 47 | 
 48 | #Y = Y.values
 49 | 
 50 | 
 51 | X.shape
 52 | 
 53 | 
 54 | 
 55 | #Y.shape
 56 | 
 57 | 
 58 | 
 59 | Y = Y.reshape(Y.shape[0],1)
 60 | 
 61 | 
 62 | 
 63 | Y.shape
 64 | 
 65 | 
 66 | def sigmoid(z):
 67 |     return 1/(1+np.exp(-z))
 68 | 
 69 | 
 70 | 
 71 | 
 72 | def fitness_function(X,w,Y):
 73 |     m = Y.size
 74 |     h = sigmoid(np.matmul(X,w))
 75 |     h = h*0.9999
 76 |     J = -1*(1/m)*(np.matmul(np.log(h).T,Y)+np.matmul(np.log(1-h).T,(1-Y)))
 77 |     return J
 78 | 
 79 | 
 80 | d = 36                                                            
 81 | n = 36                                                                                                                
 82 | its = 50;                                                                    
 83 | upper_limit = 1;                                                           
 84 | lower_limit = -1;                                                          
 85 | 
 86 | current_fitness =np.zeros((d,n));
 87 | gx=[]; 
 88 | g1=[];
 89 | g2=[];
 90 | g3=[];
 91 | g4=[];
 92 | 
 93 | it=1;
 94 | local_Bbest=[];
 95 | local_best_weights = [];
 96 | 
 97 | 
 98 | 
 99 | 
100 | Random = 0 + (1-0)*np.random.rand(d,n,X.shape[1]);
101 | current_weights = (Random*(upper_limit - lower_limit))+ lower_limit;
102 | 
103 | 
104 | 
105 | 
106 | for j in range(d):
107 |     for i in range(n):
108 |         temp = current_weights[j][i].reshape(X.shape[1],1)
109 |         current_fitness[j][i] = fitness_function(X,temp,Y)
110 | 
111 | 
112 | 
113 | 
114 | Bbest=[];
115 | best_point=[];
116 | best_weights = [];
117 | for j in range(d):
118 |     Bbestj = np.min(current_fitness[j])
119 |     best_pointj = np.argmin(current_fitness[j],axis=0)
120 |     #[Bbestj,best_pointj] = np.min(current_fitness(j,:));                                  % Returning best solution of population
121 |     #Bbest=[Bbest ; Bbestj];
122 |     Bbest.append(Bbestj)
123 |     #best_weights =[ best_weights ; current_weights(j, best_pointj)];
124 |     best_weights.append(current_weights[j][best_pointj])
125 | 
126 | 
127 | Bbest = np.asarray(Bbest)    
128 | best_weights = np.asarray(best_weights)
129 | 
130 | 
131 | 
132 | 
133 | m=d/4
134 | m = int(m)
135 | g1 = current_weights[0:m]
136 | g2 = current_weights[m:2*m]
137 | g3 = current_weights[2*m:3*m]
138 | g4 = current_weights[3*m:4*m]
139 | g1.shape
140 | 
141 | 
142 | 
143 | local_Bbest.append(Bbest)
144 | local_best_weights.append(best_weights)
145 | 
146 | 
147 | 
148 | 
149 | len(local_Bbest)
150 | 
151 | 
152 | 
153 | 
154 | reflection_g1 = np.zeros((m,n,X.shape[1]))
155 | visibility_g1 = np.zeros((m,n,X.shape[1]))
156 | g1_new = np.zeros((m,n,X.shape[1]))
157 | reflection_g2 = []
158 | reflection_g2_1 = np.zeros((n,X.shape[1]))
159 | visibility_g2 = np.zeros((m,n,X.shape[1]))
160 | reflection_g3 = np.zeros((n,X.shape[1]))
161 | visibility_g3 = np.zeros((n,X.shape[1]))
162 | g4_new_1 = np.zeros((n,X.shape[1]))
163 | for i in range(its):
164 |     
165 |     #Calculating Average of best solution
166 |     AVbest = np.mean(best_weights,axis = 0)
167 |     Avbest = AVbest.reshape(X.shape[1],1)
168 |     
169 |     #Studying Group 1 of population
170 |     r1= 2; r2= -1; V=1;                                   # Reflection and visibility factors weights                      *** MUST SET***
171 |     R1 = 0 + (1-0)*np.random.rand(X.shape[1]);
172 |     R = (R1*(r1 - r2))+ r2;
173 |     R.reshape(X.shape[1],1)
174 |     #R= ((0 + (1-0).*rand(1,1))*(r1-r2))+r2;                        
175 |     for i in range(m):
176 |         for j in range(n):
177 |             reflection_g1[i][j] = R*g1[i][j]
178 |             visibility_g1[i][j] = V*(best_weights[j]-g1[i][j])
179 |     g1_new = reflection_g1 + visibility_g1
180 |     
181 |     #Studying Group 2 of population          
182 |     
183 |     v1= 1.5; v2= -1.5; R=1;                                   #Reflection and visibility factors weights                      *** MUST SET***
184 |     V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2;
185 |     reflection_g2 = []
186 |     for i in range(m):
187 |         for j in range(n):
188 |             reflection_g2_1[j] = R*best_weights[j]
189 |             visibility_g2 [i][j] = V*(best_weights[j]-g2[i][j])
190 |         reflection_g2.append(reflection_g2_1)
191 |     g2_new = reflection_g2 + visibility_g2
192 |     g2_new = np.asarray(g2_new)
193 |     v1= 1; v2= -1; R=1;                                   #Reflection and visibility factors weights                      *** MUST SET***
194 |     V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2;
195 |     g3_new=[]
196 |     for i in range(m):
197 |         for j in range(n):
198 |             reflection_g3[j] = R*best_weights[j]
199 |             visibility_g3[j] = V*(best_weights[j]-AVbest)
200 |         g3_new_1 = reflection_g3 + visibility_g3
201 |         g3_new.append(g3_new_1)
202 |     g3_new = np.asarray(g3_new)
203 |     g4_new = []
204 |     for i in range(m):
205 |         Random = 0 + (1-0)*np.random.rand(n,X.shape[1])
206 |         g4_new_1 = (Random*(upper_limit - lower_limit))+ lower_limit;
207 |         g4_new.append(g4_new_1)
208 |     
209 | 
210 |     current_weights = np.r_[g1_new,g2_new,g3_new,g4_new]
211 |     
212 |     for j in range(d):
213 |         for i in range(n):
214 |             temp = current_weights[j][i].reshape(X.shape[1],1)
215 |             current_fitness[j][i] = fitness_function(X,temp,Y)
216 |         
217 | 
218 |     for j in range(d):
219 |         Bbestj = np.min(current_fitness[j])
220 |         best_pointj = np.argmin(current_fitness[j],axis=0)
221 |         #[Bbestj,best_pointj] = np.min(current_fitness(j,:));                                  % Returning best solution of population
222 |         #Bbest=[Bbest ; Bbestj];
223 |         if(Bbestj < Bbest[j]):
224 |             #print(5)
225 |             #print(Bbestj,Bbest[j])
226 |             Bbest[j] = Bbestj 
227 |         #best_weights =[ best_weights ; current_weights(j, best_pointj)];    
228 |             best_weights[j] = current_weights[j][best_pointj]
229 | 
230 | 
231 |     m=d/4
232 |     m = int(m)
233 |     g1 = current_weights[0:m]
234 |     g2 = current_weights[m:2*m]
235 |     g3 = current_weights[2*m:3*m]
236 |     g4 = current_weights[3*m:4*m]
237 |     local_Bbest.append(Bbest.copy())
238 |     local_best_weights.append(best_weights)
239 | 
240 | 
241 | 
242 | #print((local_Bbest[0]==local_Bbest[22]))
243 | local_Bbest = np.asarray(local_Bbest)
244 | best_fitness = np.min(local_Bbest)
245 | 
246 | 
247 | 
248 | best_fitness_pos = np.unravel_index(np.argmin(local_Bbest),local_Bbest.shape)
249 | 
250 | 
251 | best_fitness_pos
252 | wx = best_fitness_pos[0]
253 | wy = best_fitness_pos[1]
254 | 
255 | 
256 | 
257 | 
258 | a = np.arange(6).reshape(2,3)
259 | a[0][0]=3
260 | a[1][0]=1
261 | 
262 | 
263 | 
264 | ind = np.unravel_index(np.argmin(a), a.shape)
265 | ind
266 | 
267 | 
268 | Best_fitness_at_all = best_fitness
269 | 
270 | 
271 | 
272 | Best_weights_for_best_fitness = local_best_weights[wx][wy]
273 | 
274 | 
275 | # # Weights for lung Dataset
276 | 
277 | 
278 | Best_weights=[]
279 | Best_weights_for_2 =[]
280 | for i in Best_weights_for_best_fitness:
281 |     if (i <0.7 and i>-0.7):
282 |         i=0
283 |     Best_weights.append(i)
284 | Best_weights = np.asarray(Best_weights)
285 | Best_weights
286 | 
287 | 
288 | # # Evaluation of lung Dataset on best weights
289 | 
290 | 
291 | df_test = df.iloc[:]
292 | Y_test = df_test['21']
293 | df_test = df_test.drop(str(21),axis=1)
294 | x = df_test.values
295 | min_max_scaler = preprocessing.MinMaxScaler()
296 | x_scaled = min_max_scaler.fit_transform(x)
297 | df_test = pd.DataFrame(x_scaled)
298 | 
299 | print(df_test.head())
300 | 
301 | 
302 | X_test = df_test
303 | X_test = np.c_[np.ones((X_test.shape[0],1)),X_test]
304 | 
305 | h_test = sigmoid(np.matmul(X_test,Best_weights))
306 | print(len(h_test))
307 | 
308 | 
309 | 
310 | for i in range(len(h_test)):
311 |     if(h_test[i]>0.5):
312 |         h_test[i]=1
313 |     else:
314 |         h_test[i]=0
315 | h_test
316 | 
317 | 
318 | 
319 | accuracy = (np.sum(Y_test == h_test)/340)*100
320 | #print(accuracy)
321 | 
322 | 
323 | 
324 | dtree=DecisionTreeClassifier(criterion="entropy",max_depth=3,max_leaf_nodes=2)
325 | dtree.fit(X,Y)
326 | y_pred = dtree.predict(X_test)
327 | #print(acc_dt)
328 | print(accuracy_score(Y_test,y_pred)*100)
329 | 
330 | 
331 | 
332 | K_value = 8
333 | neigh = KNeighborsClassifier(n_neighbors = K_value, weights='uniform', algorithm='auto')
334 | neigh.fit(X,Y) 
335 | y_pred = neigh.predict(X_test)
336 | print(accuracy_score(Y_test,y_pred)*100)
337 | 
338 | 
339 | 
340 | from sklearn.ensemble import RandomForestClassifier
341 | classifier = RandomForestClassifier(n_estimators = 8, criterion = 'entropy', random_state = 0)
342 | classifier.fit(X, Y)
343 | 
344 | y_pred = classifier.predict(X_test)
345 | print(accuracy_score(Y_test,y_pred)*100)
346 | 


--------------------------------------------------------------------------------