├── README.md
├── classifier.py
├── constants.py
├── dataset.py
├── descriptors.py
├── filenames.py
├── log.py
├── main.py
├── scripts.py
├── unit_tests.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | Image Classification performed on Caltech 256 dataset: http://www.vision.caltech.edu/Image_Datasets/Caltech256/ 
 2 | 30607 Images
 3 | 
 4 | Techniques Used:
 5 | 1)	Scale-invariant feature transform (SIFT)
 6 | 2)	Bag of Visual words
 7 | 3)	K Means Clustering
 8 | 4)	SVM Classification
 9 | 
10 | Usage
11 | To run the main program run `python main.py`
12 | 
13 | Dependencies
14 | Used with OpenCV 3 and Python 3.5. Python libraries required are scipy, numpy and matplotlib.
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/classifier.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import time
  4 | from sklearn.svm import LinearSVC 
  5 | from sklearn.externals import joblib
  6 | from sklearn.cluster import MiniBatchKMeans
  7 | # Local dependencies
  8 | 
  9 | import constants
 10 | import descriptors
 11 | import filenames
 12 | import utils
 13 | 
 14 | 
 15 | class Classifier:
 16 |     """
 17 |     Class for making training and testing in image classification.
 18 |     """
 19 |     def __init__(self, dataset, log):
 20 |         """
 21 |         Initialize the classifier object.
 22 |         Args:
 23 |             dataset (Dataset): The object that stores the information about the dataset.
 24 |             log (Log): The object that stores the information about the times and the results of the process.
 25 | 
 26 |         Returns:
 27 |             void
 28 |         """
 29 |         self.dataset = dataset
 30 |         self.log = log
 31 | 
 32 |     def train(self, svm_kernel, k, des_name, des_option=constants.ORB_FEAT_OPTION, is_interactive=True):
 33 |         """
 34 |         Gets the descriptors for the training set and then calculates the SVM for them.
 35 | 
 36 |         Args:
 37 |             svm_kernel (constant): The kernel of the SVM that will be created.
 38 |             codebook (NumPy float matrix): Each row is a center of a codebook of Bag of Words approach.
 39 |             des_option (integer): The option of the feature that is going to be used as local descriptor.
 40 |             is_interactive (boolean): If it is the user can choose to load files or generate.
 41 | 
 42 |         Returns:
 43 |             cv2.SVM: The Support Vector Machine obtained in the training phase.
 44 |         """
 45 |         isTrain= True
 46 |         des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
 47 |         x_filename = filenames.vlads_train(k, des_name)
 48 |         print("Getting global descriptors for the training set.")
 49 |         start = time.time()
 50 |         x, y, cluster_model = self.get_data_and_labels(self.dataset.get_train_set(),None, k, des_name ,des_option,isTrain)
 51 |         utils.save(x_filename, x)
 52 |         end = time.time()
 53 |         svm_filename = filenames.svm(k, des_name, svm_kernel)
 54 |         print("Calculating the Support Vector Machine for the training set...")
 55 |         svm = cv2.ml.SVM_create()
 56 |         svm.setType(cv2.ml.SVM_C_SVC)
 57 |         svm.setKernel(svm_kernel)
 58 |         svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6))
 59 |         svm.train(x, cv2.ml.ROW_SAMPLE, y)
 60 |         return svm, cluster_model
 61 | 
 62 |     def test(self, svm, cluster_model, k, des_option = constants.ORB_FEAT_OPTION, is_interactive=True):
 63 |         """
 64 |         Gets the descriptors for the testing set and use the svm given as a parameter to predict all the elements
 65 | 
 66 |         Args:
 67 |             codebook (NumPy matrix): Each row is a center of a codebook of Bag of Words approach.
 68 |             svm (cv2.SVM): The Support Vector Machine obtained in the training phase.
 69 |             des_option (integer): The option of the feature that is going to be used as local descriptor.
 70 |             is_interactive (boolean): If it is the user can choose to load files or generate.
 71 | 
 72 |         Returns:
 73 |             NumPy float array: The result of the predictions made.
 74 |             NumPy float array: The real labels for the testing set.
 75 |         """
 76 |         isTrain = False
 77 |         des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
 78 |         print("Getting global descriptors for the testing set...")
 79 |         start = time.time()
 80 |         x, y, cluster_model= self.get_data_and_labels(self.dataset.get_test_set(), cluster_model, k, des_name,isTrain,des_option)
 81 |         end = time.time()
 82 |         start = time.time()
 83 |         _, result = svm.predict(x)
 84 |         end = time.time()
 85 |         self.log.predict_time(end - start)
 86 |         mask = result == y
 87 |         correct = np.count_nonzero(mask)
 88 |         accuracy = (correct * 100.0 / result.size)
 89 |         self.log.accuracy(accuracy)
 90 |         return result, y
 91 | 
 92 |     def get_data_and_labels(self, img_set, cluster_model, k, des_name, codebook,isTrain, des_option = constants.ORB_FEAT_OPTION):
 93 |         """
 94 |         Calculates all the local descriptors for an image set and then uses a codebook to calculate the VLAD global
 95 |         descriptor for each image and stores the label with the class of the image.
 96 |         Args:
 97 |             img_set (string array): The list of image paths for the set.
 98 |             codebook (numpy float matrix): Each row is a center and each column is a dimension of the centers.
 99 |             des_option (integer): The option of the feature that is going to be used as local descriptor.
100 | 
101 |         Returns:
102 |             NumPy float matrix: Each row is the global descriptor of an image and each column is a dimension.
103 |             NumPy float array: Each element is the number of the class for the corresponding image.
104 |         """
105 |         y = []
106 |         x = None
107 |         img_descs = []
108 |         
109 |         for class_number in range(len(img_set)):
110 |             img_paths = img_set[class_number]
111 |             
112 |             step = round(constants.STEP_PERCENTAGE * len(img_paths) / 100)
113 |             for i in range(len(img_paths)):
114 |                 if (step > 0) and (i % step == 0):
115 |                     percentage = (100 * i) / len(img_paths)
116 |                 img = cv2.imread(img_paths[i])
117 |                 
118 |                 des,y = descriptors.sift(img,img_descs,y,class_number)
119 |         isTrain = int(isTrain)
120 |         if isTrain == 1:
121 |             X, cluster_model = descriptors.cluster_features(des,cluster_model=MiniBatchKMeans(n_clusters=64))
122 |         else:
123 |             X = descriptors.img_to_vect(des,cluster_model)
124 |         print('X',X.shape,X)
125 |         y = np.int32(y)[:,np.newaxis]
126 |         x = np.matrix(X, dtype=np.float32)
127 |         return x, y, cluster_model


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | NUMPY_DELIMITER = ","
 2 | PICKLE_PROTOCOL = 2
 3 | DATASET_PATH = "dataset"
 4 | ORB_FEAT_OPTION = 1
 5 | SIFT_FEAT_OPTION = 2
 6 | TESTING_IMG_PATH = "dataset/cassava/train/n12926689_5139.JPEG"
 7 | ORB_FEAT_NAME = "ORB"
 8 | SIFT_FEAT_NAME = "SIFT"
 9 | GENERATE_OPTION = 1
10 | FILES_DIR_NAME = "files"
11 | TRAIN_TXT_FILE = "train.txt"
12 | TEST_TXT_FILE = "test.txt"
13 | STEP_PERCENTAGE = 20


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import numpy as np
  3 | from sklearn.model_selection import train_test_split
  4 | 
  5 | # Local dependencies
  6 | import constants
  7 | 
  8 | 
  9 | class Dataset:
 10 |     """
 11 |     This class manages the information for the dataset.
 12 |     """
 13 | 
 14 |     def __init__(self, path):
 15 |         """
 16 |         Initialize the Dataset object.
 17 | 
 18 |         Args:
 19 |             path: The path on where the dataset of images is stored.
 20 | 
 21 |         Returns:
 22 |             void
 23 |         """
 24 |         self.path = path
 25 |         self.train_set = {}
 26 |         self.test_set = {}
 27 |         self.classes = []
 28 |         self.classes_counts = []
 29 |         self.myarray = []
 30 |         self.imageList = []
 31 | 
 32 |     def generate_sets(self):
 33 |         """
 34 |         Reads the information of the training and testings sets and stores it into attributes of the object.
 35 | 
 36 |         Returns:
 37 |             void
 38 |         """
 39 |         dataset_classes = glob.glob(self.path + "/*")
 40 |         i=0
 41 |         for folder in dataset_classes:
 42 |             
 43 |             path = folder.replace("\\", "/")
 44 |             #print(path)
 45 |             if "/" in folder:
 46 |                 class_name = folder.split("/")[-1]
 47 |             else:
 48 |                 class_name = folder.split("\\")[-1]
 49 |             self.classes.append(class_name)
 50 |             #print(class_name)
 51 | #            train = glob.glob(path + "/train/*")
 52 | #            print(train)
 53 | #            test = glob.glob(path + "/test/*")
 54 |             anotherList = []
 55 |             rasterList = glob.glob(path + '/*.jpg')
 56 |             for image in rasterList:
 57 |                 imgList = image.replace("\\", "/")
 58 |                 self.imageList.append(imgList)
 59 |                 anotherList.append(imgList)
 60 |             
 61 |             
 62 | #            self.imageList = np.random.rand(100, 5)
 63 | #            np.random.shuffle(self.imageList)
 64 |             myarray = np.asarray(self.imageList)
 65 |             
 66 |             
 67 |             self.train_set[i] = anotherList[0:]
 68 |             self.test_set[i] = anotherList[:15]
 69 |             print('len of train set',len(self.train_set[i]))
 70 |             #print('self.test_set',self.test_set[i],i)
 71 |             i = i+1
 72 |             #mydict[''train' + str(class_name)'] = 'someval'
 73 |             #"{0}_train_set".format(class_name),"{0}_test_set".format(class_name),b_train, b_test = train_test_split(self.imageList, self.classes, test_size=0.33, random_state=42)
 74 |                        
 75 |             #rasterList = rasterList.replace("\\", "/")
 76 |         
 77 |         #print(self.imageList)
 78 |         myarray = np.asarray(self.imageList)
 79 |         self.classes = np.asarray(self.classes)
 80 |        
 81 |         
 82 |         
 83 |         print('traina dn test length')
 84 |         print(len(self.train_set))
 85 |         print(len(self.test_set))
 86 |         #self.train_set, self.test_set, b_train, b_test = train_test_split(myarray, self.classes, test_size=0.33, random_state=42)
 87 |         
 88 |         #print(self.train_set[20])
 89 | #     self.train_set.append(train)
 90 | #            self.test_set.append(test)
 91 |         self.classes_counts.append(0)
 92 | 
 93 |     def get_train_set(self):
 94 |         """
 95 |         Get the paths of the objects in the training set.
 96 | 
 97 |         Returns:
 98 |             list of strings: Paths for objects in the training set.
 99 |         """
100 |         if len(self.train_set) == 0:
101 |             self.generate_sets()
102 |         return self.train_set
103 | 
104 |     def get_test_set(self):
105 |         """
106 |         Get the paths of the objects in the testing set.
107 | 
108 |         Returns:
109 |             list of strings: Paths for objects in the testing set.
110 |         """
111 |         if len(self.test_set) == 0:
112 |             self.generate_sets()
113 |         return self.test_set
114 | 
115 |     def get_classes(self):
116 |         """
117 |         Get the names of the classes that are in the dataset.
118 | 
119 |         Returns:
120 |             list of strings: List with the names of the classes.
121 |         """
122 |         if len(self.classes) == 0:
123 |             self.generate_sets()
124 |         return self.classes
125 | 
126 |     def get_classes_counts(self):
127 |         """
128 |         Get a list with the count of total local descriptors for each class.
129 | 
130 |         Returns:
131 |             list of integers: List with the count of all the local descriptors in each class.
132 |         """
133 |         return self.classes_counts
134 | 
135 |     def get_y(self, my_set):
136 |         """
137 |         Get the labels for the a given set.
138 | 
139 |         Args:
140 |             my_set (matrix of strings): Each row has the paths for the objects in that class.
141 | 
142 |         Returns:
143 |             NumPy float array: The labels for a given set.
144 |         """
145 |         y = []
146 |         if len(my_set) == 0:
147 |             self.generate_sets()
148 |         for class_ID in range(len(my_set)):
149 |                 y += [class_ID] * len(my_set[class_ID])
150 |         # Transform the list in to a vector
151 |         y = np.float32(y)[:, np.newaxis]
152 |         return y
153 | 
154 |     def get_train_y(self):
155 |         """
156 |         Get the labels for the training set.
157 | 
158 |         Returns:
159 |             NumPy float array: The labels for the training set.
160 |         """
161 |         return self.get_y(self.train_set)
162 | 
163 |     def get_test_y(self):
164 |         """
165 |         Get the labels for the testing set.
166 | 
167 |         Returns:
168 |             NumPy float array: The labels for the testing set.
169 |         """
170 |         return self.get_y(self.test_set)
171 | 
172 |     def store_listfile(self):
173 |         """
174 |         Used for creating files in the format filelist used in Caffe for
175 |         converting an image set. (caffe/tools/convert_imageset.cpp)
176 | 
177 |         Returns:
178 |             void
179 |         """
180 |         train_file = open(constants.TRAIN_TXT_FILE, "w")
181 |         test_file = open(constants.TEST_TXT_FILE, "w")
182 |         self.get_train_set()
183 |         self.get_test_set()
184 |         for class_id in range(len(self.classes)):
185 |             current_train = self.train_set[class_id]
186 |             for filename in current_train:
187 |                 # Changing path in Windows
188 |                 path = filename.replace("\\", "/")
189 |                 idx = path.index("/")
190 |                 path = path[(idx + 1):]
191 |                 train_file.write("{0} {1}\n".format(path, class_id))
192 |             current_test = self.test_set[class_id]
193 |             for filename in current_test:
194 |                 # Changing path in Windows
195 |                 path = filename.replace("\\", "/")
196 |                 idx = path.index("/")
197 |                 path = path[(idx + 1):]
198 |                 test_file.write("{0} {1}\n".format(path, class_id))
199 |         train_file.close()
200 |         test_file.close()
201 | 
202 |     def set_class_count(self, class_number, class_count):
203 |         print(class_number, class_count)
204 |         #class_count = 1200
205 |         """
206 |         Set the count of local descriptors in one class.
207 | 
208 |         Args:
209 |             class_number: ID for the class.
210 |             class_count:  Number of local descriptors that were found in the class.
211 | 
212 |         Returns:
213 |             void
214 |         """
215 |         if(class_number == 0):
216 |             self.classes_counts.pop(0)
217 |         self.classes_counts.append(class_count)
218 |         
219 |    
220 | 


--------------------------------------------------------------------------------
/descriptors.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | # Local dependencies
  5 | import utils
  6 | import constants
  7 | 
  8 | def orb(img):
  9 |     """
 10 |     Calculate the ORB descriptors for an image and resizes the image
 11 |     having the larger dimension set to 640 and keeping the size relation.
 12 | 
 13 |     Args:
 14 |         img (BGR matrix): The image that will be used.
 15 | 
 16 |     Returns:
 17 |         list of floats array: The descriptors found in the image.
 18 |     """
 19 |     orb = cv2.ORB()
 20 |     kp, des = orb.detectAndCompute(img, None)
 21 |     return des
 22 | 
 23 | def sift(img,img_descs,y,class_number):
 24 |     """
 25 |     Gets a list of 128 - dimensional descriptors using SIFT and DoG
 26 |     for keypoints and resizes the image having the larger dimension set to 640
 27 |     and keeping the size relation.
 28 | 
 29 |     Args:
 30 |         img (BGR matrix): The grayscale image that will be used.
 31 | 
 32 |     Returns:
 33 |         list of floats array: The descriptors found in the image.
 34 |     """
 35 |     
 36 |     sift = cv2.xfeatures2d.SIFT_create()
 37 |     kp, des = sift.detectAndCompute(img, None)
 38 |     if des is not None:
 39 |         img_descs.append(des)
 40 |         y.append(class_number)
 41 |     else:
 42 |         print('Found you!!!!!!!')
 43 |     return img_descs,y
 44 | 
 45 | def descriptors_from_class(dataset, class_img_paths, class_number, option = constants.ORB_FEAT_OPTION):
 46 |     """
 47 |     Gets all the local descriptors for a class. If an image has a side with more than 640 pixels it will be resized
 48 |     leaving the biggest side at 640 pixels and conserving the aspect ratio for the other side.
 49 | 
 50 |     Args:
 51 |         dataset (Dataset object): An object that stores information about the dataset.
 52 |         class_img_paths (array of strings): The paths for each image in certain class.
 53 |         class_number (integer): The number of the class.
 54 |         option (integer): If this is 49 (The key '1') uses ORB features, else use SIFT.
 55 | 
 56 |     Returns:
 57 |         numpy float matrix: Each row are the descriptors found in an image of the class
 58 |     """
 59 |     des = None
 60 |     step = (constants.STEP_PERCENTAGE * len(class_img_paths)) / 100
 61 |     for i in range(len(class_img_paths)):
 62 |         img_path = class_img_paths[i]
 63 |         img = cv2.imread(img_path)
 64 |         resize_to = 640
 65 |         h, w, channels = img.shape
 66 |         if h > resize_to or w > resize_to:
 67 |             img = utils.resize(img, resize_to, h, w)
 68 |         if option == constants.ORB_FEAT_OPTION:
 69 |             des_name = "ORB"
 70 |             new_des = orb(img)
 71 |         else:
 72 |             des_name = "SIFT"
 73 |             new_des = sift(img)
 74 |         if new_des is not None:
 75 |             if des is None:
 76 |                 des = np.array(new_des, dtype=np.float32)
 77 |             else:
 78 |                 des = np.vstack((des, np.array(new_des)))
 79 |         # Print a message to show the status of the function
 80 |         if i % step == 0:
 81 |             percentage = (100 * i) / len(class_img_paths)
 82 |             message = "Calculated {0} descriptors for image {1} of {2}({3}%) of class number {4} ...".format(
 83 |                 des_name, i, len(class_img_paths), percentage, class_number
 84 |             )
 85 |             print(message)
 86 |     message = "* Finished getting the descriptors for the class number {0}*".format(class_number)
 87 |     print(message)
 88 |     print("Number of descriptors in class: {0}".format(len(des)))
 89 |     print(class_number, len(des))
 90 |     dataset.set_class_count(class_number, len(des))
 91 |     return des
 92 | 
 93 | def all_descriptors(dataset, class_list, option = constants.ORB_FEAT_OPTION):
 94 |     """
 95 |     Gets every local descriptor of a set with different classes (This is useful for getting a codebook).
 96 | 
 97 |     Args:
 98 |         class_list (list of arrays of strings): The list has information for a specific class in each element and each
 99 |             element is an array of strings which are the paths for the image of that class.
100 |         option (integer): It's 49 (the key '1') if ORB features are going to be used, else use SIFT features.
101 | 
102 |     Returns:
103 |         numpy float matrix: Each row are the descriptors found in an image of the set
104 |     """
105 |     des = None
106 |     for i in range(len(class_list)):
107 |         message = "*** Getting descriptors for class number {0} of {1} ***".format(i, len(class_list))
108 |         print(message)
109 |         class_img_paths = class_list[i]
110 |         new_des = descriptors_from_class(dataset, class_img_paths, i, option)
111 |         if des is None:
112 |             des = new_des
113 |         else:
114 |             des = np.vstack((des, new_des))
115 |     message = "*****************************\n"\
116 |               "Finished getting all the descriptors\n"
117 |     print(message)
118 |     print("Total number of descriptors: {0}".format(len(des)))
119 |     if len(des) > 0:
120 |         print("Dimension of descriptors: {0}".format(len(des[0])))
121 |         print("First descriptor:\n{0}".format(des[0]))
122 |     return des
123 | 
124 | def gen_codebook(dataset, descriptors, k = 64):
125 |     """
126 |     Generate a k codebook for the dataset.
127 | 
128 |     Args:
129 |         dataset (Dataset object): An object that stores information about the dataset.
130 |         descriptors (list of integer arrays): The descriptors for every class.
131 |         k (integer): The number of clusters that are going to be calculated.
132 | 
133 |     Returns:
134 |         list of integer arrays: The k codewords for the dataset.
135 |     """
136 |     k=int(k)
137 |     print(type(dataset),type(descriptors),type(k))
138 |     
139 |     iterations = 10
140 |     epsilon = 1.0
141 |     criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, iterations, epsilon)
142 |     compactness, labels, centers = cv2.kmeans(descriptors, k , None,criteria, iterations, cv2.KMEANS_RANDOM_CENTERS)
143 |     return centers
144 | 
145 | def vlad(descriptors, centers):
146 |     """
147 |     Calculate the Vector of Locally Aggregated Descriptors (VLAD) which is a global descriptor from a group of
148 |     descriptors and centers that are codewords of a codebook, obtained for example with K-Means.
149 | 
150 |     Args:
151 |         descriptors (numpy float matrix): The local descriptors.
152 |         centers (numpy float matrix): The centers are points representatives of the classes.
153 | 
154 |     Returns:
155 |         numpy float array: The VLAD vector.
156 |     """
157 |     dimensions = len(descriptors[0])
158 |     vlad_vector = np.zeros((len(centers), dimensions), dtype=np.float32)
159 |     for descriptor in descriptors:
160 |         nearest_center, center_idx = utils.find_nn(descriptor, centers)
161 |         for i in range(dimensions):
162 |             vlad_vector[center_idx][i] += (descriptor[i] - nearest_center[i])
163 |     # L2 Normalization
164 |     vlad_vector = cv2.normalize(vlad_vector,vlad_vector)
165 |     vlad_vector = vlad_vector.flatten()
166 |     return vlad_vector
167 | 
168 | def img_to_vect(img_descs, cluster_model):
169 |     """
170 |     Given an image path and a trained clustering model (eg KMeans),
171 |     generates a feature vector representing that image.
172 |     Useful for processing new images for a classifier prediction.
173 |     """
174 | 
175 |     
176 |     clustered_descs = [cluster_model.predict(raw_words) for raw_words in img_descs]
177 |     img_bow_hist = np.array([np.bincount(clustered_desc, minlength=cluster_model.n_clusters) for clustered_desc in clustered_descs ])
178 |     return img_bow_hist
179 | 
180 | 
181 | 
182 | 
183 | def cluster_features(img_descs, cluster_model):
184 |     """
185 |     Cluster the training features using the cluster_model
186 |     and convert each set of descriptors in img_descs
187 |     to a Visual Bag of Words histogram.
188 |     Parameters:
189 |     -----------
190 |     X : list of lists of SIFT descriptors (img_descs)
191 |     training_idxs : array/list of integers
192 |         Indicies for the training rows in img_descs
193 |     cluster_model : clustering model (eg KMeans from scikit-learn)
194 |         The model used to cluster the SIFT features
195 |     Returns:
196 |     --------
197 |     X, cluster_model :
198 |         X has K feature columns, each column corresponding to a visual word
199 |         cluster_model has been fit to the training set
200 |     """
201 |     n_clusters = cluster_model.n_clusters
202 |     # Concatenate all descriptors in the training set together
203 |     training_descs = img_descs
204 |     all_train_descriptors = [desc for desc_list in training_descs for desc in desc_list]
205 |     all_train_descriptors = np.array(all_train_descriptors)
206 | 
207 |     if all_train_descriptors.shape[1] != 128:
208 |         raise ValueError('Expected SIFT descriptors to have 128 features, got', all_train_descriptors.shape[1])
209 | 
210 |     # train kmeans or other cluster model on those descriptors selected above
211 |     cluster_model.fit(all_train_descriptors)
212 |     print('done clustering. Using clustering model to generate BoW histograms for each image.')
213 | 
214 |     # compute set of cluster-reduced words for each image
215 |     img_clustered_words = [cluster_model.predict(raw_words) for raw_words in img_descs]
216 | 
217 |     # finally make a histogram of clustered word counts for each image. These are the final features.
218 |     img_bow_hist = np.array(
219 |         [np.bincount(clustered_words, minlength=n_clusters) for clustered_words in img_clustered_words])
220 | 
221 |     X = img_bow_hist
222 |     print('done generating BoW histograms.')
223 | 
224 |     return X, cluster_model
225 | 
226 | 


--------------------------------------------------------------------------------
/filenames.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | # Local dependencies
 4 | import constants
 5 | 
 6 | 
 7 | def codebook(k, des_name):
 8 |     print('files.dirname')
 9 |     print(constants.FILES_DIR_NAME)
10 |     return "{0}/codebook_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name))
11 | 
12 | 
13 | def result(k, des_name, svm_kernel):
14 |     return "{0}/result_{1}.csv".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel)))
15 | 
16 | 
17 | def vlads_train(k, des_name):
18 |     return "{0}/VLADS_train_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name))
19 | 
20 | 
21 | def vlads_test(k, des_name):
22 |     return "{0}/VLADS_test_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name))
23 | 
24 | 
25 | def svm(k, des_name, svm_kernel):
26 |     return "{0}/svm_data_{1}.dat".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel)))
27 | 
28 | 
29 | def log(k, des_name, svm_kernel):
30 |     return "{0}/log_{1}.txt".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel)))
31 | 
32 | 
33 | def signature(k, des_name, svm_kernel=None):
34 |     if svm_kernel is None:
35 |         return "{0}_{1}".format(k, des_name)
36 |     else:
37 |         return "{0}_{1}_{2}".format(k, des_name, svm_kernel)
38 | 
39 | def kernel_name(svm_kernel):
40 |     if svm_kernel == cv2.ml.SVM_LINEAR:
41 |         kernel_name = "LINEAR"
42 |     elif svm_kernel == cv2.ml.SVM_POLY:
43 |         kernel_name = "POLY"
44 |     elif svm_kernel == cv2.ml.SVM_RBF:
45 |         kernel_name = "RBF"
46 |     else:
47 |         kernel_name = "SIGMOID"
48 |     return kernel_name
49 | 


--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Local dependencies
 3 | import utils
 4 | import filenames
 5 | 
 6 | 
 7 | class Log:
 8 |     def __init__(self, k, des_name, svm_kernel):
 9 |         self.text = ""
10 |         self.k = k
11 |         self.des_name = des_name
12 |         self.svm_kernel = svm_kernel
13 | 
14 |     def save(self):
15 |         file = open(filenames.log(self.k, self.des_name, self.svm_kernel), "w")
16 |         file.write(self.text)
17 |         file.close()
18 | 
19 |     def train_des_time(self, time):
20 |         str = "Time for getting all the local descriptors of the training images was {0}.\n"
21 |         elapsed_time = utils.humanize_time(time)
22 |         self.text += str.format(elapsed_time)
23 | 
24 |     def codebook_time(self, time):
25 |         str = "Time for generating the codebook with k-means was {0}.\n"
26 |         elapsed_time = utils.humanize_time(time)
27 |         self.text += str.format(elapsed_time)
28 | 
29 |     def train_vlad_time(self, time):
30 |         self.vlad_time(time, "training")
31 | 
32 |     def svm_time(self, time):
33 |         str = "Time for calculating the SVM was {0}.\n"
34 |         elapsed_time = utils.humanize_time(time)
35 |         self.text += str.format(elapsed_time)
36 | 
37 |     def test_vlad_time(self, time):
38 |         self.vlad_time(time, "testing")
39 | 
40 |     def predict_time(self, time):
41 |         elapsed_time = utils.humanize_time(time)
42 |         self.text += "Elapsed time predicting the testing set is {0}\n".format(elapsed_time)
43 | 
44 |     def accuracy(self, accuracy):
45 |         self.text += "Accuracy = {0}.\n".format(accuracy)
46 | 
47 |     def classes(self, classes):
48 |         self.text += "Classes = {0}\n".format(classes)
49 | 
50 |     def classes_counts(self, classes_counts):
51 |         self.text += "Classes Local Descriptors Counts = {0}\n".format(classes_counts)
52 | 
53 |     def confusion_matrix(self, conf_mat):
54 |         self.text += "Confusion Matrix =\n{0}".format(conf_mat)
55 | 
56 |     def vlad_time(self, time, set):
57 |         str = "Time for getting VLAD global descriptors of the {0} images was {1}.\n"
58 |         elapsed_time = utils.humanize_time(time)
59 |         self.text += str.format(set, elapsed_time)


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import joblib
 3 | import numpy as np
 4 | import time
 5 | import os
 6 | 
 7 | # Local dependencies
 8 | from classifier import Classifier
 9 | from dataset import Dataset
10 | import descriptors
11 | import constants
12 | import utils
13 | import filenames
14 | from log import Log
15 | 
16 | 
17 | def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.ml.SVM_LINEAR):
18 |     if not is_interactive:
19 |         experiment_start = time.time()
20 |     # Check for the dataset of images
21 |     if not os.path.exists(constants.DATASET_PATH):
22 |         print("Dataset not found, please copy one.")
23 |         return
24 |     dataset = Dataset(constants.DATASET_PATH)
25 |     dataset.generate_sets()
26 | 
27 |     # Check for the directory where stores generated files
28 |     if not os.path.exists(constants.FILES_DIR_NAME):
29 |         os.makedirs(constants.FILES_DIR_NAME)
30 | 
31 |     if is_interactive:
32 |         des_option = int(input("Enter [1] for using ORB features or [2] to use SIFT features.\n"))
33 |         k = input("Enter the number of cluster centers you want for the codebook.\n")
34 |         svm_option = int(input("Enter [1] for using SVM kernel Linear or [2] to use RBF.\n"))
35 |         svm_kernel = cv2.ml.SVM_LINEAR if svm_option == 1 else cv2.ml.SVM_RBF
36 | 
37 |     des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
38 |     print(des_name)
39 |     log = Log(k, des_name, svm_kernel)
40 |     
41 | 
42 |     codebook_filename = filenames.codebook(k, des_name)
43 |     print('codebook_filename')
44 |     print(codebook_filename)
45 |     start = time.time()   
46 |     end = time.time()
47 |     log.train_des_time(end - start)
48 |     start = time.time()
49 |     end = time.time()
50 |     log.codebook_time(end - start)
51 |     # Train and test the dataset
52 |     classifier = Classifier(dataset, log)
53 |     svm, cluster_model = classifier.train(svm_kernel, k, des_name, des_option=des_option, is_interactive=is_interactive)
54 |     svm.save("svm_result.dat")
55 |     joblib.dump(cluster_model, 'cluster_model.plk')
56 |     print("Training ready. Now beginning with testing")
57 |     result, labels = classifier.test(svm, cluster_model, k, des_option=des_option, is_interactive=is_interactive)
58 |     print('test result')
59 |     print(result,labels)
60 |     # Store the results from the test
61 |     classes = dataset.get_classes()
62 |     log.classes(classes)
63 |     log.classes_counts(dataset.get_classes_counts())
64 |     result_filename = filenames.result(k, des_name, svm_kernel)
65 |     test_count = len(dataset.get_test_set()[0])
66 |     result_matrix = np.reshape(result, (len(classes), test_count))
67 |     utils.save_csv(result_filename, result_matrix)
68 | 
69 |     # Create a confusion matrix
70 |     confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32)
71 |     for i in range(len(result)):
72 |         predicted_id = int(result[i])
73 |         real_id = int(labels[i])
74 |         confusion_matrix[real_id][predicted_id] += 1
75 | 
76 |     print("Confusion Matrix =\n{0}".format(confusion_matrix))
77 |     log.confusion_matrix(confusion_matrix)
78 |     log.save()
79 |     print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel)))
80 |     if not is_interactive:
81 |         experiment_end = time.time()
82 |         elapsed_time = utils.humanize_time(experiment_end - experiment_start)
83 |         print("Total time during the experiment was {0}".format(elapsed_time))
84 |     else:
85 |         # Show a plot of the confusion matrix on interactive mode
86 |         utils.show_conf_mat(confusion_matrix)
87 |         #raw_input("Press [Enter] to exit ...")
88 | 
89 | if __name__ == '__main__':
90 |     main()


--------------------------------------------------------------------------------
/scripts.py:
--------------------------------------------------------------------------------
 1 | import constants
 2 | import cv2
 3 | import os
 4 | import time
 5 | 
 6 | # Local dependencies
 7 | import filenames
 8 | import utils
 9 | import main
10 | 
11 | 
12 | def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME):
13 |     if not os.path.exists(constants.FILES_DIR_NAME):
14 |         os.makedirs(constants.FILES_DIR_NAME)
15 |     codebook = utils.load(filenames.codebook(k, des_name))
16 |     filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k, des_name)
17 |     utils.save_csv(filename, codebook)
18 |     print("Copied codebook into the file with name {0}. Press any key to exit...".format(filename))
19 |     cv2.waitKey()
20 | 
21 | def run_all():
22 |     main.main(is_interactive=False, k_opt=32, des_opt=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_RBF)
23 | 
24 | if __name__ == '__main__':
25 |     run_all()
26 | 


--------------------------------------------------------------------------------
/unit_tests.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import time
  3 | import pickle
  4 | import numpy as np
  5 | from matplotlib import pyplot as plt
  6 | 
  7 | #----------------------------------
  8 | # Local dependencies
  9 | import descriptors
 10 | import utils
 11 | from dataset import Dataset
 12 | import constants
 13 | import filenames
 14 | 
 15 | def test_dataset():
 16 |     dataset = Dataset(constants.DATASET_PATH)
 17 |     pickle.dump(dataset, open(constants.DATASET_OBJ_FILENAME, "wb"), protocol=constants.PICKLE_PROTOCOL)
 18 |     classes = dataset.get_classes()
 19 |     print("Dataset generated with {0} classes.".format(len(classes)))
 20 |     print(classes)
 21 |     train = dataset.get_train_set()
 22 |     test = dataset.get_test_set()
 23 |     for i in range(len(classes)):
 24 |         print(
 25 |             "There are {0} training files and {1} testing files for class number {2} ({3})".format(
 26 |                 len(train[i]), len(test[i]), i, classes[i]
 27 |             )
 28 |         )
 29 | 
 30 | def test_des_type():
 31 |     img = cv2.imread(constants.TESTING_IMG_PATH)
 32 |     kp, des = descriptors.orb(img)
 33 |     return des
 34 | 
 35 | def test_descriptors():
 36 |     img = cv2.imread(constants.TESTING_IMG_PATH)
 37 |     cv2.imshow("Normal Image", img)
 38 |     print("Normal Image")
 39 |     option = input("Enter [1] for using ORB features and other number to use SIFT.\n")
 40 |     start = time.time()
 41 |     if option == 1:
 42 |         orb = cv2.ORB()
 43 |         kp, des = orb.detectAndCompute(img, None)
 44 |     else:
 45 |         sift = cv2.SIFT()
 46 |         kp, des = sift.detectAndCompute(img, None)
 47 |     end = time.time()
 48 |     elapsed_time = utils.humanize_time(end - start)
 49 |     des_name = constants.ORB_FEAT_NAME if option == ord(constants.ORB_FEAT_OPTION_KEY) else constants.SIFT_FEAT_NAME
 50 |     print("Elapsed time getting descriptors {0}".format(elapsed_time))
 51 |     print("Number of descriptors found {0}".format(len(des)))
 52 |     if des is not None and len(des) > 0:
 53 |         print("Dimension of descriptors {0}".format(len(des[0])))
 54 |     print("Name of descriptors used is {0}".format(des_name))
 55 |     img2 = cv2.drawKeypoints(img, kp)
 56 |     # plt.imshow(img2), plt.show()
 57 |     cv2.imshow("{0} descriptors".format(des_name), img2)
 58 |     print("Press any key to exit ...")
 59 |     cv2.waitKey()
 60 | 
 61 | def test_codebook():
 62 |     dataset = pickle.load(open(constants.DATASET_OBJ_FILENAME, "rb"))
 63 |     option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n")
 64 |     start = time.time()
 65 |     des = descriptors.all_descriptors(dataset, dataset.get_train_set(), option)
 66 |     end = time.time()
 67 |     elapsed_time = utils.humanize_time(end - start)
 68 |     print("Elapsed time getting all the descriptors is {0}".format(elapsed_time))
 69 |     k = 64
 70 |     des_name = constants.ORB_FEAT_NAME if option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
 71 |     codebook_filename = "codebook_{0}_{1}.csv".format(k, des_name)
 72 |     start = time.time()
 73 |     codebook = descriptors.gen_codebook(dataset, des, k)
 74 |     end = time.time()
 75 |     elapsed_time = utils.humanize_time(end - start)
 76 |     print("Elapsed time calculating the k means for the codebook is {0}".format(elapsed_time))
 77 |     np.savetxt(codebook_filename, codebook, delimiter=constants.NUMPY_DELIMITER)
 78 |     print("Codebook loaded in {0}, press any key to exit ...".format(constants.CODEBOOK_FILE_NAME))
 79 |     cv2.waitKey()
 80 | 
 81 | def test_vlad():
 82 |     img = cv2.imread(constants.TESTING_IMG_PATH)
 83 |     option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n")
 84 |     if option == 1:
 85 |         des = descriptors.orb(img)
 86 |     else:
 87 |         des = descriptors.sift(img)
 88 |     des_name = constants.ORB_FEAT_NAME if option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
 89 |     k = 128
 90 |     codebook_filename = "codebook_{0}_{1}.csv".format(k, des_name)
 91 |     centers = np.loadtxt(codebook_filename, delimiter=constants.NUMPY_DELIMITER)
 92 |     vlad_vector = descriptors.vlad(des, centers)
 93 |     print(vlad_vector)
 94 |     return vlad_vector
 95 | 
 96 | def test_one_img_classification():
 97 |     img = cv2.imread("test.jpg")
 98 |     resize_to = 640
 99 |     h, w, channels = img.shape
100 |     img = utils.resize(img, resize_to, h, w)
101 |     des = descriptors.sift(img)
102 |     k = 128
103 |     des_name = "SIFT"
104 |     codebook_filename = filenames.codebook(k, des_name)
105 |     codebook = utils.load(codebook_filename)
106 |     img_vlad = descriptors.vlad(des, codebook)
107 |     svm_filename = filenames.svm(k, des_name)
108 |     svm = cv2.SVM()
109 |     svm.load(svm_filename)
110 |     result = svm.predict(img_vlad)
111 |     print("result is {0}".format(result))
112 | 
113 | if __name__ == '__main__':
114 |     test_descriptors()


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy.random as nprnd
  2 | import numpy as np
  3 | import cv2
  4 | import scipy.io as sio
  5 | import matplotlib.pyplot as plt
  6 | import h5py 
  7 | 
  8 | 
  9 | def random_split(l, sample_size):
 10 |     """
 11 |     Randomly splits a list in two parts. A sample and a rest (other part).
 12 | 
 13 |     Args:
 14 |         l (list): A list that is going to be splitted.
 15 |         sample_size (integer): The size of the sample that is going to be taken.
 16 | 
 17 |     Returns:
 18 |         list: One random group from the list.
 19 |         list: Another group from the list with size equal to sample_size.
 20 |     """
 21 |     sample_indices = nprnd.choice(len(l), size=sample_size, replace=False)
 22 |     # print (len(sample_indices))
 23 |     sample_indices.sort()
 24 |     # print("sample_indices = {0}".format(sample_indices))
 25 |     other_part = []
 26 |     sample_part = []
 27 |     indices_counter = 0
 28 |     for index in range(len(l)):
 29 |         current_elem = l[index]
 30 |         if indices_counter == sample_size:
 31 |             other_part = other_part + l[index:]
 32 |             break
 33 |         if index == sample_indices[indices_counter]:
 34 |             sample_part.append(current_elem)
 35 |             indices_counter += 1
 36 |         else:
 37 |             other_part.append(current_elem)
 38 |     return other_part, sample_part
 39 | 
 40 | def humanize_time(secs):
 41 |     """
 42 |     Extracted from http://testingreflections.com/node/6534
 43 |     """
 44 |     mins, secs = divmod(secs, 60)
 45 |     hours, mins = divmod(mins, 60)
 46 |     return '%02d:%02d:%02f' % (hours, mins, secs)
 47 | 
 48 | def resize(img, new_size, h, w):
 49 |     """
 50 |     Changes the largest side of an image to the new size and changes the other to maintain the aspect ratio.
 51 | 
 52 |     Args:
 53 |         img (BGR Matrix): The image that is going to be resized.
 54 |         new_size (integer): The value wanted for the biggest side of the image.
 55 | 
 56 |     Returns:
 57 |         BGR Matrix: The image resized to the new value keeping the aspect ratio.
 58 |     """
 59 |     if h > w:
 60 |         new_h = 640
 61 |         new_w = (640 * w) / h
 62 |     else:
 63 |         new_h = (640 * h) / w
 64 |         new_w = 640
 65 |     img = cv2.resize(img, (new_w, new_h))
 66 |     return img
 67 | 
 68 | def find_nn(point, neighborhood):
 69 |     """
 70 |     Finds the nearest neighborhood of a vector.
 71 | 
 72 |     Args:
 73 |         point (float array): The initial point.
 74 |         neighborhood (numpy float matrix): The points that are around the initial point.
 75 | 
 76 |     Returns:
 77 |         float array: The point that is the nearest neighbor of the initial point.
 78 |         integer: Index of the nearest neighbor inside the neighborhood list
 79 |     """
 80 |     min_dist = float('inf')
 81 |     nn = neighborhood[0]
 82 |     nn_idx = 0
 83 |     for i in range(len(neighborhood)):
 84 |         neighbor = neighborhood[i]
 85 |         dist = cv2.norm(point - neighbor)
 86 |         if dist < min_dist:
 87 |             min_dist = dist
 88 |             nn = neighbor
 89 |             nn_idx = i
 90 | 
 91 |     return nn, nn_idx
 92 | 
 93 | def save(filename, arr):
 94 |     """
 95 |     Stores a numpy array in a file.
 96 | 
 97 |     Args:
 98 |         filename (string): The name for the file.
 99 |         arr (numpy array):
100 | 
101 |     Returns:
102 |         void
103 |     """
104 |     data = {"stored": arr}
105 |     sio.savemat(filename, data)
106 | 
107 | def load(filename):
108 |     """
109 |     Load a numpy array from a file.
110 | 
111 |     Args:
112 |         filename (string): The name of the file.
113 | 
114 |     Returns:
115 |         numpy array: The array stored in the file.
116 |     """
117 |     #data = sio.loadmat(filename)
118 |     
119 |     
120 |     f = h5py.File(filename,'r') 
121 |     data = f.get('data/variable1') 
122 |     data = np.array(data) # For converting to numpy array
123 |     return data["stored"]
124 | 
125 | def save_csv(filename, arr):
126 |     """
127 |     Save an array into a file with the filename.
128 | 
129 |     Args:
130 |         filename (string): The name for the file.
131 |         arr (numpy array): The array that is going to be saved.
132 | 
133 |     Returns:
134 |         void
135 |     """
136 |     file = open(filename, "w")
137 |     for row in arr:
138 |         for i in range(len(row) - 1):
139 |             file.write("{0} ".format(row[i]))
140 |         file.write("{0}\n".format(row[len(row) - 1]))
141 | 
142 | def show_conf_mat(confusion_matrix):
143 |     """
144 |     Show a windows with a color image for a confusion matrix
145 | 
146 |     Args:
147 |         confusion_matrix (NumPy Array): The matrix to be shown.
148 | 
149 |     Returns:
150 |         void
151 |     """
152 |     plt.matshow(confusion_matrix)
153 |     plt.title('Confusion Matrix')
154 |     plt.colorbar()
155 |     plt.show()


--------------------------------------------------------------------------------