├── README.md
├── Random-Forest-Image-Classification-using-Python.ipynb
├── Random-Forest-Image-Classification-using-Python.py
└── _config.yml
/README.md:
--------------------------------------------------------------------------------
1 | # Random-Forest-Image-Classification-using-Python
2 | Random Forest Image Classification using Python
3 |
4 | Please follow below folder structure.
5 |
6 |
7 |
8 |
9 | - image-classification (folder)
10 |
11 | - dataset (folder)
12 |
13 | - train (folder)
14 |
15 | - Image Cat1 Folder
16 |
17 | - train_img.jpg
18 | - train_img.jpg
19 | - train_img.jpg
20 | - .......
21 |
22 | - Image Cat2 Folder
23 |
24 | - train_img.jpg
25 | - train_img.jpg
26 | - train_img.jpg
27 | - .......
28 |
29 |
30 | - test (folder)
31 |
32 | - test_img.jpg
33 | - test_img.jpg
34 | - test_img.jpg
35 | - .......
36 |
37 |
38 | - output (folder)
39 |
40 | - data.h5
41 | - labels.h5
42 |
43 | - random_fo_image.py
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/Random-Forest-Image-Classification-using-Python.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # In[2]:
5 |
6 |
7 | from sklearn.preprocessing import LabelEncoder
8 | from sklearn.preprocessing import MinMaxScaler
9 | import numpy as np
10 | import mahotas
11 | import cv2
12 | import os
13 | import h5py
14 | import glob
15 | import matplotlib.pyplot as plt
16 | from sklearn.model_selection import train_test_split, cross_val_score
17 | from sklearn.ensemble import RandomForestClassifier
18 |
19 |
20 | # In[3]:
21 |
22 |
23 | # make a fix file size
24 | fixed_size = tuple((500,500))
25 |
26 | #train path
27 | train_path = "dataset/train"
28 |
29 | # no of trees for Random Forests
30 | num_tree = 100
31 |
32 | # bins for histograms
33 | bins = 8
34 |
35 | # train_test_split size
36 | test_size = 0.10
37 |
38 | # seed for reproducing same result
39 | seed = 9
40 |
41 |
42 | # In[4]:
43 |
44 |
45 | # features description -1: Hu Moments
46 |
47 | def fd_hu_moments(image):
48 | image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
49 | feature = cv2.HuMoments(cv2.moments(image)).flatten()
50 | return feature
51 |
52 |
53 | # In[5]:
54 |
55 |
56 | # feature-descriptor -2 Haralick Texture
57 |
58 | def fd_haralick(image):
59 | # conver the image to grayscale
60 | gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
61 | # Ccompute the haralick texture fetature ve tor
62 | haralic = mahotas.features.haralick(gray).mean(axis=0)
63 | return haralic
64 |
65 |
66 | # In[6]:
67 |
68 |
69 | # feature-description -3 Color Histogram
70 |
71 | def fd_histogram(image, mask=None):
72 | # conver the image to HSV colors-space
73 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
74 | #COPUTE THE COLOR HISTPGRAM
75 | hist = cv2.calcHist([image],[0,1,2],None,[bins,bins,bins], [0, 256, 0, 256, 0, 256])
76 | # normalize the histogram
77 | cv2.normalize(hist,hist)
78 | # return the histog....
79 | return hist.flatten()
80 |
81 |
82 | # In[7]:
83 |
84 |
85 | # get the training data labels
86 | train_labels = os.listdir(train_path)
87 |
88 | # sort the training labesl
89 | train_labels.sort()
90 | print(train_labels)
91 |
92 | # empty list to hold feature vectors and labels
93 | global_features = []
94 | labels = []
95 |
96 | i, j = 0, 0
97 | k = 0
98 |
99 | # num of images per class
100 | images_per_class = 80
101 |
102 |
103 | # loop insise the folder for train images
104 |
105 | # In[84]:
106 |
107 |
108 | # ittirate the folder to get the image label name
109 |
110 | get_ipython().run_line_magic('time', '')
111 | # lop over the training data sub folder
112 |
113 | for training_name in train_labels:
114 | # join the training data path and each species training folder
115 | dir = os.path.join(train_path, training_name)
116 |
117 | # get the current training label
118 | current_label = training_name
119 |
120 | k = 1
121 | # loop over the images in each sub-folder
122 |
123 | for file in os.listdir(dir):
124 |
125 | file = dir + "/" + os.fsdecode(file)
126 |
127 | # read the image and resize it to a fixed-size
128 | image = cv2.imread(file)
129 |
130 | if image is not None:
131 | image = cv2.resize(image,fixed_size)
132 | fv_hu_moments = fd_hu_moments(image)
133 | fv_haralick = fd_haralick(image)
134 | fv_histogram = fd_histogram(image)
135 | #else:
136 | #print("image not loaded")
137 |
138 | #image = cv2.imread(file)
139 | #image = cv2.resize(image,fixed_size)
140 |
141 | # Concatenate global features
142 | global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
143 |
144 | # update the list of labels and feature vectors
145 | labels.append(current_label)
146 | global_features.append(global_feature)
147 |
148 | i += 1
149 | k += 1
150 | print("[STATUS] processed folder: {}".format(current_label))
151 | j += 1
152 |
153 | print("[STATUS] completed Global Feature Extraction...")
154 |
155 |
156 | # In[30]:
157 |
158 |
159 | get_ipython().run_line_magic('time', '')
160 | # get the overall feature vector size
161 | print("[STATUS] feature vector size {}".format(np.array(global_features).shape))
162 |
163 | # get the overall training label size
164 | print("[STATUS] training Labels {}".format(np.array(labels).shape))
165 |
166 | # encode the target labels
167 | targetNames = np.unique(labels)
168 | le = LabelEncoder()
169 | target = le.fit_transform(labels)
170 | print("[STATUS] training labels encoded...{}")
171 | # normalize the feature vector in the range (0-1)
172 | scaler = MinMaxScaler(feature_range=(0, 1))
173 | rescaled_features = scaler.fit_transform(global_features)
174 | print("[STATUS] feature vector normalized...")
175 |
176 | print("[STATUS] target labels: {}".format(target))
177 | print("[STATUS] target labels shape: {}".format(target.shape))
178 |
179 | # save the feature vector using HDF5
180 | h5f_data = h5py.File('output/data.h5', 'w')
181 | h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))
182 |
183 | h5f_label = h5py.File('output/labels.h5', 'w')
184 | h5f_label.create_dataset('dataset_1', data=np.array(target))
185 |
186 | h5f_data.close()
187 | h5f_label.close()
188 |
189 | print("[STATUS] end of training..")
190 |
191 |
192 | # In[10]:
193 |
194 |
195 | # import the feature vector and trained labels
196 |
197 | h5f_data = h5py.File('output/data.h5', 'r')
198 | h5f_label = h5py.File('output/labels.h5', 'r')
199 |
200 | global_features_string = h5f_data['dataset_1']
201 | global_labels_string = h5f_label['dataset_1']
202 |
203 | global_features = np.array(global_features_string)
204 | global_labels = np.array(global_labels_string)
205 |
206 |
207 | # In[11]:
208 |
209 |
210 |
211 | # split the training and testing data
212 | (trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features),
213 | np.array(global_labels),
214 | test_size=test_size,
215 | random_state=seed)
216 |
217 |
218 | # RandomForest
219 |
220 | # In[128]:
221 |
222 |
223 | # create the model - Random Forests
224 | clf = RandomForestClassifier(n_estimators=100)
225 |
226 | # fit the training data to the model
227 | clf.fit(trainDataGlobal, trainLabelsGlobal)
228 |
229 | #print(clf.fit(trainDataGlobal, trainLabelsGlobal))
230 |
231 | clf_pred = clf.predict(trainDataGlobal)
232 | #clf_pred = clf.predict(global_feature.reshape(1,-1))[0]
233 | print(classification_report(trainLabelsGlobal,clf_pred))
234 | #print(confusion_matrix(trainLabelsGlobal,clf_pred))
235 |
236 | #print(clf.predict(trainDataGlobal))
237 |
238 | #print(clf.predict(global_feature.reshape(1,-1))[0])
239 |
240 |
241 | # In[129]:
242 |
243 |
244 |
245 | # path to test data
246 | test_path = "dataset/test"
247 |
248 | # loop through the test images
249 | #for file in glob.glob(test_path + "/*.jpg"):
250 | for file in os.listdir(test_path):
251 |
252 | file = test_path + "/" + file
253 | #print(file)
254 |
255 | # read the image
256 | image = cv2.imread(file)
257 |
258 | # resize the image
259 | image = cv2.resize(image, fixed_size)
260 |
261 | # Global Feature extraction
262 | fv_hu_moments = fd_hu_moments(image)
263 | fv_haralick = fd_haralick(image)
264 | fv_histogram = fd_histogram(image)
265 |
266 | # Concatenate global features
267 |
268 | global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
269 |
270 | # predict label of test image
271 | prediction = clf.predict(global_feature.reshape(1,-1))[0]
272 |
273 | # show predicted label on image
274 | cv2.putText(image, train_labels[prediction], (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
275 |
276 | # display the output image
277 | plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
278 | plt.show()
279 |
280 |
281 | # In[ ]:
282 |
283 |
284 |
285 |
286 |
287 | # In[109]:
288 |
289 |
290 | #rfc_pred = rfc.predict(trainDataGlobal)
291 |
292 |
293 | # In[108]:
294 |
295 |
296 | #print(confusion_matrix(trainLabelsGlobal,rfc_pred))
297 |
298 |
299 | # In[107]:
300 |
301 |
302 | #print(classification_report(trainLabelsGlobal,rfc_pred))
303 |
304 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------