loop insise the folder for train images

├── README.md
├── Random-Forest-Image-Classification-using-Python.ipynb
├── Random-Forest-Image-Classification-using-Python.py
└── _config.yml


/README.md:
--------------------------------------------------------------------------------
 1 | # Random-Forest-Image-Classification-using-Python
 2 | Random Forest Image Classification using Python
 3 | 
 4 | Please follow below folder structure.
 5 | 
 6 | 
 7 | 
 8 | <ul>
 9 |   <li>image-classification (folder)</li>
10 |     <ul>
11 |       <li>dataset (folder)</li>
12 |       <ul>
13 |         <li>train (folder)</li>
14 |           <ul>
15 |             <li>Image Cat1 Folder</li>
16 |               <ul>
17 |                 <li>train_img.jpg</li>
18 |                 <li>train_img.jpg</li>
19 |                 <li>train_img.jpg</li>
20 |                 <li>.......</li>
21 |               </ul>
22 |             <li>Image Cat2 Folder</li>
23 |             <ul>
24 |                 <li>train_img.jpg</li>
25 |                 <li>train_img.jpg</li>
26 |                 <li>train_img.jpg</li>
27 |                 <li>.......</li>
28 |               </ul>
29 |           </ul>  
30 |         <li>test (folder)</li>
31 |           <ul>
32 |              <li>test_img.jpg</li>
33 |              <li>test_img.jpg</li>
34 |              <li>test_img.jpg</li>
35 |              <li>.......</li>
36 |            </ul>
37 |       </ul> 
38 |      <li>output (folder)</li> 
39 |       <ul>
40 |           <li>data.h5</li>
41 |           <li>labels.h5</li>
42 |        </ul> 
43 |     <li>random_fo_image.py</li>
44 |     </ul> 
45 | </ul>
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/Random-Forest-Image-Classification-using-Python.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[2]:
  5 | 
  6 | 
  7 | from sklearn.preprocessing import LabelEncoder
  8 | from sklearn.preprocessing import MinMaxScaler
  9 | import numpy as np
 10 | import mahotas
 11 | import cv2
 12 | import os
 13 | import h5py
 14 | import glob
 15 | import matplotlib.pyplot as plt
 16 | from sklearn.model_selection import train_test_split, cross_val_score
 17 | from sklearn.ensemble import RandomForestClassifier
 18 | 
 19 | 
 20 | # In[3]:
 21 | 
 22 | 
 23 | # make a fix file size
 24 | fixed_size  = tuple((500,500))
 25 | 
 26 | #train path 
 27 | train_path = "dataset/train"
 28 | 
 29 | # no of trees for Random Forests
 30 | num_tree = 100
 31 | 
 32 | # bins for histograms 
 33 | bins = 8
 34 | 
 35 | # train_test_split size
 36 | test_size = 0.10
 37 | 
 38 | # seed for reproducing same result 
 39 | seed = 9 
 40 | 
 41 | 
 42 | # In[4]:
 43 | 
 44 | 
 45 | # features description -1:  Hu Moments
 46 | 
 47 | def fd_hu_moments(image):
 48 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 49 |     feature = cv2.HuMoments(cv2.moments(image)).flatten()
 50 |     return feature
 51 | 
 52 | 
 53 | # In[5]:
 54 | 
 55 | 
 56 | # feature-descriptor -2 Haralick Texture 
 57 | 
 58 | def fd_haralick(image):
 59 |     # conver the image to grayscale
 60 |     gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
 61 |     # Ccompute the haralick texture fetature ve tor 
 62 |     haralic = mahotas.features.haralick(gray).mean(axis=0)
 63 |     return haralic
 64 | 
 65 | 
 66 | # In[6]:
 67 | 
 68 | 
 69 | # feature-description -3 Color Histogram
 70 | 
 71 | def fd_histogram(image, mask=None):
 72 |     # conver the image to HSV colors-space
 73 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 74 |     #COPUTE THE COLOR HISTPGRAM
 75 |     hist  = cv2.calcHist([image],[0,1,2],None,[bins,bins,bins], [0, 256, 0, 256, 0, 256])
 76 |     # normalize the histogram
 77 |     cv2.normalize(hist,hist)
 78 |     # return the histog....
 79 |     return hist.flatten()
 80 | 
 81 | 
 82 | # In[7]:
 83 | 
 84 | 
 85 | # get the training data labels 
 86 | train_labels = os.listdir(train_path)
 87 | 
 88 | # sort the training labesl 
 89 | train_labels.sort()
 90 | print(train_labels)
 91 | 
 92 | # empty list to hold feature vectors and labels 
 93 | global_features = []
 94 | labels = []
 95 | 
 96 | i, j = 0, 0 
 97 | k = 0
 98 | 
 99 | # num of images per class 
100 | images_per_class = 80
101 | 
102 | 
103 | # <h1>loop insise the folder for train images </h1>
104 | 
105 | # In[84]:
106 | 
107 | 
108 | # ittirate the folder to get the image label name
109 | 
110 | get_ipython().run_line_magic('time', '')
111 | # lop over the training data sub folder 
112 | 
113 | for training_name in train_labels:
114 |     # join the training data path and each species training folder
115 |     dir = os.path.join(train_path, training_name)
116 | 
117 |     # get the current training label
118 |     current_label = training_name
119 | 
120 |     k = 1
121 |     # loop over the images in each sub-folder
122 |         
123 |     for file in os.listdir(dir):
124 | 
125 |         file = dir + "/" + os.fsdecode(file)
126 |        
127 |         # read the image and resize it to a fixed-size
128 |         image = cv2.imread(file) 
129 |         
130 |         if image is not None:
131 |             image = cv2.resize(image,fixed_size)
132 |             fv_hu_moments = fd_hu_moments(image)
133 |             fv_haralick   = fd_haralick(image)
134 |             fv_histogram  = fd_histogram(image)
135 |         #else:
136 |             #print("image not loaded")
137 |                 
138 |         #image = cv2.imread(file)        
139 |         #image = cv2.resize(image,fixed_size)
140 | 
141 |         # Concatenate global features
142 |         global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
143 | 
144 |         # update the list of labels and feature vectors
145 |         labels.append(current_label)
146 |         global_features.append(global_feature)
147 | 
148 |         i += 1
149 |         k += 1
150 |     print("[STATUS] processed folder: {}".format(current_label))
151 |     j += 1
152 | 
153 | print("[STATUS] completed Global Feature Extraction...")
154 | 
155 | 
156 | # In[30]:
157 | 
158 | 
159 | get_ipython().run_line_magic('time', '')
160 | # get the overall feature vector size
161 | print("[STATUS] feature vector size {}".format(np.array(global_features).shape))
162 | 
163 | # get the overall training label size
164 | print("[STATUS] training Labels {}".format(np.array(labels).shape))
165 | 
166 | # encode the target labels
167 | targetNames = np.unique(labels)
168 | le = LabelEncoder()
169 | target = le.fit_transform(labels)
170 | print("[STATUS] training labels encoded...{}")
171 | # normalize the feature vector in the range (0-1)
172 | scaler = MinMaxScaler(feature_range=(0, 1))
173 | rescaled_features = scaler.fit_transform(global_features)
174 | print("[STATUS] feature vector normalized...")
175 | 
176 | print("[STATUS] target labels: {}".format(target))
177 | print("[STATUS] target labels shape: {}".format(target.shape))
178 | 
179 | # save the feature vector using HDF5
180 | h5f_data = h5py.File('output/data.h5', 'w')
181 | h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))
182 | 
183 | h5f_label = h5py.File('output/labels.h5', 'w')
184 | h5f_label.create_dataset('dataset_1', data=np.array(target))
185 | 
186 | h5f_data.close()
187 | h5f_label.close()
188 | 
189 | print("[STATUS] end of training..")
190 | 
191 | 
192 | # In[10]:
193 | 
194 | 
195 | # import the feature vector and trained labels
196 | 
197 | h5f_data = h5py.File('output/data.h5', 'r')
198 | h5f_label = h5py.File('output/labels.h5', 'r')
199 | 
200 | global_features_string = h5f_data['dataset_1']
201 | global_labels_string = h5f_label['dataset_1']
202 | 
203 | global_features = np.array(global_features_string)
204 | global_labels = np.array(global_labels_string)
205 | 
206 | 
207 | # In[11]:
208 | 
209 | 
210 | 
211 | # split the training and testing data
212 | (trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
213 |                                                                                           np.array(global_labels),
214 |                                                                                           test_size=test_size,
215 |                                                                                           random_state=seed)
216 | 
217 | 
218 | # <h3>RandomForest</h3>
219 | 
220 | # In[128]:
221 | 
222 | 
223 | # create the model - Random Forests
224 | clf  = RandomForestClassifier(n_estimators=100)
225 | 
226 | # fit the training data to the model
227 | clf.fit(trainDataGlobal, trainLabelsGlobal)
228 | 
229 | #print(clf.fit(trainDataGlobal, trainLabelsGlobal))
230 | 
231 | clf_pred = clf.predict(trainDataGlobal)
232 | #clf_pred = clf.predict(global_feature.reshape(1,-1))[0]
233 | print(classification_report(trainLabelsGlobal,clf_pred))
234 | #print(confusion_matrix(trainLabelsGlobal,clf_pred))
235 | 
236 | #print(clf.predict(trainDataGlobal))
237 | 
238 | #print(clf.predict(global_feature.reshape(1,-1))[0])
239 | 
240 | 
241 | # In[129]:
242 | 
243 | 
244 | 
245 | # path to test data
246 | test_path = "dataset/test"
247 | 
248 | # loop through the test images
249 | #for file in glob.glob(test_path + "/*.jpg"):
250 | for file in os.listdir(test_path):    
251 | 
252 |     file = test_path + "/" + file
253 |     #print(file)
254 |     
255 |     # read the image
256 |     image = cv2.imread(file)
257 | 
258 |     # resize the image
259 |     image = cv2.resize(image, fixed_size)
260 | 
261 |     # Global Feature extraction
262 |     fv_hu_moments = fd_hu_moments(image)
263 |     fv_haralick   = fd_haralick(image)
264 |     fv_histogram  = fd_histogram(image)
265 | 
266 |     # Concatenate global features
267 | 
268 |     global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
269 | 
270 |     # predict label of test image
271 |     prediction = clf.predict(global_feature.reshape(1,-1))[0]
272 | 
273 |     # show predicted label on image
274 |     cv2.putText(image, train_labels[prediction], (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
275 | 
276 |     # display the output image
277 |     plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
278 |     plt.show()
279 | 
280 | 
281 | # In[ ]:
282 | 
283 | 
284 | 
285 | 
286 | 
287 | # In[109]:
288 | 
289 | 
290 | #rfc_pred = rfc.predict(trainDataGlobal)
291 | 
292 | 
293 | # In[108]:
294 | 
295 | 
296 | #print(confusion_matrix(trainLabelsGlobal,rfc_pred))
297 | 
298 | 
299 | # In[107]:
300 | 
301 | 
302 | #print(classification_report(trainLabelsGlobal,rfc_pred))
303 | 
304 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------