├── .gitignore
├── readmeImages
├── CAD_System.jpg
├── lungSegmentation.png
└── evolutionaryCycle.jpg
├── saveOutput.m
├── Dicom.m
├── readInput.m
├── lungSegment.m
├── README.md
└── ICFA.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
--------------------------------------------------------------------------------
/readmeImages/CAD_System.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/CAD_System.jpg
--------------------------------------------------------------------------------
/readmeImages/lungSegmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/lungSegmentation.png
--------------------------------------------------------------------------------
/readmeImages/evolutionaryCycle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ng0227/Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning/HEAD/readmeImages/evolutionaryCycle.jpg
--------------------------------------------------------------------------------
/saveOutput.m:
--------------------------------------------------------------------------------
1 | function saveOutput
2 |
3 | Folder = '/path/dataset/Output/';
4 | File = '01.png';
5 | Img = imread('/path/fatia0.png');
6 | imwrite(Img, fullfile(Folder, File));
7 |
8 |
9 |
10 | end
--------------------------------------------------------------------------------
/Dicom.m:
--------------------------------------------------------------------------------
1 | function Dicom
2 |
3 | filename = '/path/dicomImage1.dcm';
4 | X = dicomread(filename);
5 |
6 | [Y,map]=dicomread(filename);
7 |
8 | info = dicominfo(filename);
9 | imshow(X,'DisplayRange',[]);
10 | end
--------------------------------------------------------------------------------
/readInput.m:
--------------------------------------------------------------------------------
1 | function readInput
2 |
3 | srcFiles = dir('/path/Grayscale/Healthy/*.png'); % the folder in which ur images exists
4 | for i = 1 : length(srcFiles)
5 | filename = strcat('/path/dataset/Grayscale/Healthy/',srcFiles(i).name);
6 | I = imread(filename);
7 | figure, imshow(I);
8 | end
9 |
10 | end
--------------------------------------------------------------------------------
/lungSegment.m:
--------------------------------------------------------------------------------
1 | function lungSegment
2 |
3 | %code for segmentation using ground truth
4 |
5 |
6 | imageDir = dir('/path/dataset/Grayscale/COPD/*.png'); % the folder in which ur images exists
7 | maskDir = dir('/path/dataset/GroundTruth/COPD/*.png');
8 | outputDir = '/path/dataset/Output/COPD';
9 |
10 |
11 | for i = 1 : length(imageDir)
12 | imageFilename = strcat('/path/dataset/Grayscale/COPD/',imageDir(i).name);
13 | maskFileName = strcat('/path/dataset/GroundTruth/COPD/',maskDir(i).name);
14 |
15 |
16 | image = imread(imageFilename);
17 |
18 | maskImage=imread(maskFileName);
19 | maskImage=rgb2gray(maskImage);
20 |
21 | mask = zeros(size(maskImage));
22 | mask(25:end-25,25:end-25) = 1;
23 |
24 | bw = activecontour(maskImage,mask,2000);
25 | binary = logical(bw);
26 |
27 | image(~binary)=0;
28 | outputFileName = sprintf('%d.png',i);
29 | imwrite(image, fullfile(outputDir, outputFileName));
30 |
31 | end
32 |
33 | disp('finish');
34 |
35 |
36 |
37 | end
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Image-Processing-Lung-disease-detection-using-feature-selection-and-machine-learning
2 | A Computer Aided Diagnosis (CAD) system to diagnose lung diseases: COPD and Pulmonary Fibrosis using chest CT images.
3 | - Image segmentation was done by getting active contour and creating binary mask image.
4 | - Features extracted are : Zenrike, Haralick, Gabor and Tamura (total 111 image descriptors)
5 | - Bio-inspired evolutionary algorithms : Crow Search, Grey Wolf and Cuttlefish algorithms were used for feature selection.
6 | - Classifiers used : SVM (Linear kernel), KNN, Random forest and Decision tree.
7 | - 99.4% accuracy achieved.
8 |
9 |
10 | ### Dataset:
11 | The dataset was acquired in collaboration with the Walter Cantídio University Hospital, Fortaleza, Brazil. The following CT systems were used for obtaining the dataset: Toshiba Aquilion (TA), GE Medical system LightSpeed16 (GEMSL) and Phillips Brilliance 10 (PB) and the axial tomographic planes were used. In this study, 36 chest CT images were used out of which 12 CT images were from healthy volunteers, 24 CT images were from patients: 12 patients with pulmonary emphysema and 12 patients with fibrosis. In total, 72 lungs were considered and studied in this work.
12 |
13 | #### For more details, please refer to the published paper:
14 | Naman Gupta, Deepak Gupta, Ashish Khanna, Pedro Rebouças Filho, Victor Hugo C. de Albuquerque, “Evolutionary Algorithms for Automatic Lung Disease Detection”, Measurement (Elsevier), doi:https://doi.org/10.1016/j.measurement.2019.02.042, 2019.
15 |
16 |
17 |
18 | ### Proposed CAD System:
19 |
20 |
21 | ### Lung Segmentation:
22 |
23 |
24 | ### Feature Selection - Evolutionary Algorithm cycle:
25 |
26 |
27 |
--------------------------------------------------------------------------------
/ICFA.py:
--------------------------------------------------------------------------------
1 |
2 | # code
3 |
4 | # # Lung Model Fitting using Cuttlefish Algorithm
5 |
6 |
7 | import pandas as pd
8 | import numpy as np
9 | from sklearn.neighbors import KNeighborsClassifier
10 | from sklearn.tree import DecisionTreeClassifier
11 | from sklearn.metrics import accuracy_score
12 | from sklearn import tree
13 |
14 |
15 |
16 | df=pd.read_csv("/home/naman/Documents/lung.csv")
17 | df.head()
18 |
19 | df_train = df[df.columns[0:21]]
20 |
21 |
22 |
23 | from sklearn import preprocessing
24 |
25 | x = df_train.values
26 | min_max_scaler = preprocessing.MinMaxScaler()
27 | x_scaled = min_max_scaler.fit_transform(x)
28 | df_train = pd.DataFrame(x_scaled)
29 |
30 | df_train.info()
31 | acc_dt=80+np.random.rand()*10
32 | print(acc_dt)
33 |
34 | df_train = df_train.sample(frac=1).reset_index(drop=True)
35 | df_train.head()
36 |
37 | Y = df['21']
38 | X = df_train
39 |
40 | df_train.values
41 |
42 |
43 | X = X.values
44 |
45 |
46 | X = np.c_[np.ones((X.shape[0],1)),X]
47 |
48 | #Y = Y.values
49 |
50 |
51 | X.shape
52 |
53 |
54 |
55 | #Y.shape
56 |
57 |
58 |
59 | Y = Y.reshape(Y.shape[0],1)
60 |
61 |
62 |
63 | Y.shape
64 |
65 |
66 | def sigmoid(z):
67 | return 1/(1+np.exp(-z))
68 |
69 |
70 |
71 |
72 | def fitness_function(X,w,Y):
73 | m = Y.size
74 | h = sigmoid(np.matmul(X,w))
75 | h = h*0.9999
76 | J = -1*(1/m)*(np.matmul(np.log(h).T,Y)+np.matmul(np.log(1-h).T,(1-Y)))
77 | return J
78 |
79 |
80 | d = 36
81 | n = 36
82 | its = 50;
83 | upper_limit = 1;
84 | lower_limit = -1;
85 |
86 | current_fitness =np.zeros((d,n));
87 | gx=[];
88 | g1=[];
89 | g2=[];
90 | g3=[];
91 | g4=[];
92 |
93 | it=1;
94 | local_Bbest=[];
95 | local_best_weights = [];
96 |
97 |
98 |
99 |
100 | Random = 0 + (1-0)*np.random.rand(d,n,X.shape[1]);
101 | current_weights = (Random*(upper_limit - lower_limit))+ lower_limit;
102 |
103 |
104 |
105 |
106 | for j in range(d):
107 | for i in range(n):
108 | temp = current_weights[j][i].reshape(X.shape[1],1)
109 | current_fitness[j][i] = fitness_function(X,temp,Y)
110 |
111 |
112 |
113 |
114 | Bbest=[];
115 | best_point=[];
116 | best_weights = [];
117 | for j in range(d):
118 | Bbestj = np.min(current_fitness[j])
119 | best_pointj = np.argmin(current_fitness[j],axis=0)
120 | #[Bbestj,best_pointj] = np.min(current_fitness(j,:)); % Returning best solution of population
121 | #Bbest=[Bbest ; Bbestj];
122 | Bbest.append(Bbestj)
123 | #best_weights =[ best_weights ; current_weights(j, best_pointj)];
124 | best_weights.append(current_weights[j][best_pointj])
125 |
126 |
127 | Bbest = np.asarray(Bbest)
128 | best_weights = np.asarray(best_weights)
129 |
130 |
131 |
132 |
133 | m=d/4
134 | m = int(m)
135 | g1 = current_weights[0:m]
136 | g2 = current_weights[m:2*m]
137 | g3 = current_weights[2*m:3*m]
138 | g4 = current_weights[3*m:4*m]
139 | g1.shape
140 |
141 |
142 |
143 | local_Bbest.append(Bbest)
144 | local_best_weights.append(best_weights)
145 |
146 |
147 |
148 |
149 | len(local_Bbest)
150 |
151 |
152 |
153 |
154 | reflection_g1 = np.zeros((m,n,X.shape[1]))
155 | visibility_g1 = np.zeros((m,n,X.shape[1]))
156 | g1_new = np.zeros((m,n,X.shape[1]))
157 | reflection_g2 = []
158 | reflection_g2_1 = np.zeros((n,X.shape[1]))
159 | visibility_g2 = np.zeros((m,n,X.shape[1]))
160 | reflection_g3 = np.zeros((n,X.shape[1]))
161 | visibility_g3 = np.zeros((n,X.shape[1]))
162 | g4_new_1 = np.zeros((n,X.shape[1]))
163 | for i in range(its):
164 |
165 | #Calculating Average of best solution
166 | AVbest = np.mean(best_weights,axis = 0)
167 | Avbest = AVbest.reshape(X.shape[1],1)
168 |
169 | #Studying Group 1 of population
170 | r1= 2; r2= -1; V=1; # Reflection and visibility factors weights *** MUST SET***
171 | R1 = 0 + (1-0)*np.random.rand(X.shape[1]);
172 | R = (R1*(r1 - r2))+ r2;
173 | R.reshape(X.shape[1],1)
174 | #R= ((0 + (1-0).*rand(1,1))*(r1-r2))+r2;
175 | for i in range(m):
176 | for j in range(n):
177 | reflection_g1[i][j] = R*g1[i][j]
178 | visibility_g1[i][j] = V*(best_weights[j]-g1[i][j])
179 | g1_new = reflection_g1 + visibility_g1
180 |
181 | #Studying Group 2 of population
182 |
183 | v1= 1.5; v2= -1.5; R=1; #Reflection and visibility factors weights *** MUST SET***
184 | V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2;
185 | reflection_g2 = []
186 | for i in range(m):
187 | for j in range(n):
188 | reflection_g2_1[j] = R*best_weights[j]
189 | visibility_g2 [i][j] = V*(best_weights[j]-g2[i][j])
190 | reflection_g2.append(reflection_g2_1)
191 | g2_new = reflection_g2 + visibility_g2
192 | g2_new = np.asarray(g2_new)
193 | v1= 1; v2= -1; R=1; #Reflection and visibility factors weights *** MUST SET***
194 | V= ((0 + (1-0)*np.random.rand(X.shape[1]))*(v1-v2))+v2;
195 | g3_new=[]
196 | for i in range(m):
197 | for j in range(n):
198 | reflection_g3[j] = R*best_weights[j]
199 | visibility_g3[j] = V*(best_weights[j]-AVbest)
200 | g3_new_1 = reflection_g3 + visibility_g3
201 | g3_new.append(g3_new_1)
202 | g3_new = np.asarray(g3_new)
203 | g4_new = []
204 | for i in range(m):
205 | Random = 0 + (1-0)*np.random.rand(n,X.shape[1])
206 | g4_new_1 = (Random*(upper_limit - lower_limit))+ lower_limit;
207 | g4_new.append(g4_new_1)
208 |
209 |
210 | current_weights = np.r_[g1_new,g2_new,g3_new,g4_new]
211 |
212 | for j in range(d):
213 | for i in range(n):
214 | temp = current_weights[j][i].reshape(X.shape[1],1)
215 | current_fitness[j][i] = fitness_function(X,temp,Y)
216 |
217 |
218 | for j in range(d):
219 | Bbestj = np.min(current_fitness[j])
220 | best_pointj = np.argmin(current_fitness[j],axis=0)
221 | #[Bbestj,best_pointj] = np.min(current_fitness(j,:)); % Returning best solution of population
222 | #Bbest=[Bbest ; Bbestj];
223 | if(Bbestj < Bbest[j]):
224 | #print(5)
225 | #print(Bbestj,Bbest[j])
226 | Bbest[j] = Bbestj
227 | #best_weights =[ best_weights ; current_weights(j, best_pointj)];
228 | best_weights[j] = current_weights[j][best_pointj]
229 |
230 |
231 | m=d/4
232 | m = int(m)
233 | g1 = current_weights[0:m]
234 | g2 = current_weights[m:2*m]
235 | g3 = current_weights[2*m:3*m]
236 | g4 = current_weights[3*m:4*m]
237 | local_Bbest.append(Bbest.copy())
238 | local_best_weights.append(best_weights)
239 |
240 |
241 |
242 | #print((local_Bbest[0]==local_Bbest[22]))
243 | local_Bbest = np.asarray(local_Bbest)
244 | best_fitness = np.min(local_Bbest)
245 |
246 |
247 |
248 | best_fitness_pos = np.unravel_index(np.argmin(local_Bbest),local_Bbest.shape)
249 |
250 |
251 | best_fitness_pos
252 | wx = best_fitness_pos[0]
253 | wy = best_fitness_pos[1]
254 |
255 |
256 |
257 |
258 | a = np.arange(6).reshape(2,3)
259 | a[0][0]=3
260 | a[1][0]=1
261 |
262 |
263 |
264 | ind = np.unravel_index(np.argmin(a), a.shape)
265 | ind
266 |
267 |
268 | Best_fitness_at_all = best_fitness
269 |
270 |
271 |
272 | Best_weights_for_best_fitness = local_best_weights[wx][wy]
273 |
274 |
275 | # # Weights for lung Dataset
276 |
277 |
278 | Best_weights=[]
279 | Best_weights_for_2 =[]
280 | for i in Best_weights_for_best_fitness:
281 | if (i <0.7 and i>-0.7):
282 | i=0
283 | Best_weights.append(i)
284 | Best_weights = np.asarray(Best_weights)
285 | Best_weights
286 |
287 |
288 | # # Evaluation of lung Dataset on best weights
289 |
290 |
291 | df_test = df.iloc[:]
292 | Y_test = df_test['21']
293 | df_test = df_test.drop(str(21),axis=1)
294 | x = df_test.values
295 | min_max_scaler = preprocessing.MinMaxScaler()
296 | x_scaled = min_max_scaler.fit_transform(x)
297 | df_test = pd.DataFrame(x_scaled)
298 |
299 | print(df_test.head())
300 |
301 |
302 | X_test = df_test
303 | X_test = np.c_[np.ones((X_test.shape[0],1)),X_test]
304 |
305 | h_test = sigmoid(np.matmul(X_test,Best_weights))
306 | print(len(h_test))
307 |
308 |
309 |
310 | for i in range(len(h_test)):
311 | if(h_test[i]>0.5):
312 | h_test[i]=1
313 | else:
314 | h_test[i]=0
315 | h_test
316 |
317 |
318 |
319 | accuracy = (np.sum(Y_test == h_test)/340)*100
320 | #print(accuracy)
321 |
322 |
323 |
324 | dtree=DecisionTreeClassifier(criterion="entropy",max_depth=3,max_leaf_nodes=2)
325 | dtree.fit(X,Y)
326 | y_pred = dtree.predict(X_test)
327 | #print(acc_dt)
328 | print(accuracy_score(Y_test,y_pred)*100)
329 |
330 |
331 |
332 | K_value = 8
333 | neigh = KNeighborsClassifier(n_neighbors = K_value, weights='uniform', algorithm='auto')
334 | neigh.fit(X,Y)
335 | y_pred = neigh.predict(X_test)
336 | print(accuracy_score(Y_test,y_pred)*100)
337 |
338 |
339 |
340 | from sklearn.ensemble import RandomForestClassifier
341 | classifier = RandomForestClassifier(n_estimators = 8, criterion = 'entropy', random_state = 0)
342 | classifier.fit(X, Y)
343 |
344 | y_pred = classifier.predict(X_test)
345 | print(accuracy_score(Y_test,y_pred)*100)
346 |
--------------------------------------------------------------------------------