├── README.md ├── classifier.py ├── constants.py ├── dataset.py ├── descriptors.py ├── filenames.py ├── log.py ├── main.py ├── scripts.py ├── unit_tests.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | Image Classification performed on Caltech 256 dataset: http://www.vision.caltech.edu/Image_Datasets/Caltech256/ 2 | 30607 Images 3 | 4 | Techniques Used: 5 | 1) Scale-invariant feature transform (SIFT) 6 | 2) Bag of Visual words 7 | 3) K Means Clustering 8 | 4) SVM Classification 9 | 10 | Usage 11 | To run the main program run `python main.py` 12 | 13 | Dependencies 14 | Used with OpenCV 3 and Python 3.5. Python libraries required are scipy, numpy and matplotlib. 15 | 16 | 17 | -------------------------------------------------------------------------------- /classifier.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import time 4 | from sklearn.svm import LinearSVC 5 | from sklearn.externals import joblib 6 | from sklearn.cluster import MiniBatchKMeans 7 | # Local dependencies 8 | 9 | import constants 10 | import descriptors 11 | import filenames 12 | import utils 13 | 14 | 15 | class Classifier: 16 | """ 17 | Class for making training and testing in image classification. 18 | """ 19 | def __init__(self, dataset, log): 20 | """ 21 | Initialize the classifier object. 22 | Args: 23 | dataset (Dataset): The object that stores the information about the dataset. 24 | log (Log): The object that stores the information about the times and the results of the process. 25 | 26 | Returns: 27 | void 28 | """ 29 | self.dataset = dataset 30 | self.log = log 31 | 32 | def train(self, svm_kernel, k, des_name, des_option=constants.ORB_FEAT_OPTION, is_interactive=True): 33 | """ 34 | Gets the descriptors for the training set and then calculates the SVM for them. 35 | 36 | Args: 37 | svm_kernel (constant): The kernel of the SVM that will be created. 38 | codebook (NumPy float matrix): Each row is a center of a codebook of Bag of Words approach. 39 | des_option (integer): The option of the feature that is going to be used as local descriptor. 40 | is_interactive (boolean): If it is the user can choose to load files or generate. 41 | 42 | Returns: 43 | cv2.SVM: The Support Vector Machine obtained in the training phase. 44 | """ 45 | isTrain= True 46 | des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME 47 | x_filename = filenames.vlads_train(k, des_name) 48 | print("Getting global descriptors for the training set.") 49 | start = time.time() 50 | x, y, cluster_model = self.get_data_and_labels(self.dataset.get_train_set(),None, k, des_name ,des_option,isTrain) 51 | utils.save(x_filename, x) 52 | end = time.time() 53 | svm_filename = filenames.svm(k, des_name, svm_kernel) 54 | print("Calculating the Support Vector Machine for the training set...") 55 | svm = cv2.ml.SVM_create() 56 | svm.setType(cv2.ml.SVM_C_SVC) 57 | svm.setKernel(svm_kernel) 58 | svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, 100, 1e-6)) 59 | svm.train(x, cv2.ml.ROW_SAMPLE, y) 60 | return svm, cluster_model 61 | 62 | def test(self, svm, cluster_model, k, des_option = constants.ORB_FEAT_OPTION, is_interactive=True): 63 | """ 64 | Gets the descriptors for the testing set and use the svm given as a parameter to predict all the elements 65 | 66 | Args: 67 | codebook (NumPy matrix): Each row is a center of a codebook of Bag of Words approach. 68 | svm (cv2.SVM): The Support Vector Machine obtained in the training phase. 69 | des_option (integer): The option of the feature that is going to be used as local descriptor. 70 | is_interactive (boolean): If it is the user can choose to load files or generate. 71 | 72 | Returns: 73 | NumPy float array: The result of the predictions made. 74 | NumPy float array: The real labels for the testing set. 75 | """ 76 | isTrain = False 77 | des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME 78 | print("Getting global descriptors for the testing set...") 79 | start = time.time() 80 | x, y, cluster_model= self.get_data_and_labels(self.dataset.get_test_set(), cluster_model, k, des_name,isTrain,des_option) 81 | end = time.time() 82 | start = time.time() 83 | _, result = svm.predict(x) 84 | end = time.time() 85 | self.log.predict_time(end - start) 86 | mask = result == y 87 | correct = np.count_nonzero(mask) 88 | accuracy = (correct * 100.0 / result.size) 89 | self.log.accuracy(accuracy) 90 | return result, y 91 | 92 | def get_data_and_labels(self, img_set, cluster_model, k, des_name, codebook,isTrain, des_option = constants.ORB_FEAT_OPTION): 93 | """ 94 | Calculates all the local descriptors for an image set and then uses a codebook to calculate the VLAD global 95 | descriptor for each image and stores the label with the class of the image. 96 | Args: 97 | img_set (string array): The list of image paths for the set. 98 | codebook (numpy float matrix): Each row is a center and each column is a dimension of the centers. 99 | des_option (integer): The option of the feature that is going to be used as local descriptor. 100 | 101 | Returns: 102 | NumPy float matrix: Each row is the global descriptor of an image and each column is a dimension. 103 | NumPy float array: Each element is the number of the class for the corresponding image. 104 | """ 105 | y = [] 106 | x = None 107 | img_descs = [] 108 | 109 | for class_number in range(len(img_set)): 110 | img_paths = img_set[class_number] 111 | 112 | step = round(constants.STEP_PERCENTAGE * len(img_paths) / 100) 113 | for i in range(len(img_paths)): 114 | if (step > 0) and (i % step == 0): 115 | percentage = (100 * i) / len(img_paths) 116 | img = cv2.imread(img_paths[i]) 117 | 118 | des,y = descriptors.sift(img,img_descs,y,class_number) 119 | isTrain = int(isTrain) 120 | if isTrain == 1: 121 | X, cluster_model = descriptors.cluster_features(des,cluster_model=MiniBatchKMeans(n_clusters=64)) 122 | else: 123 | X = descriptors.img_to_vect(des,cluster_model) 124 | print('X',X.shape,X) 125 | y = np.int32(y)[:,np.newaxis] 126 | x = np.matrix(X, dtype=np.float32) 127 | return x, y, cluster_model -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | NUMPY_DELIMITER = "," 2 | PICKLE_PROTOCOL = 2 3 | DATASET_PATH = "dataset" 4 | ORB_FEAT_OPTION = 1 5 | SIFT_FEAT_OPTION = 2 6 | TESTING_IMG_PATH = "dataset/cassava/train/n12926689_5139.JPEG" 7 | ORB_FEAT_NAME = "ORB" 8 | SIFT_FEAT_NAME = "SIFT" 9 | GENERATE_OPTION = 1 10 | FILES_DIR_NAME = "files" 11 | TRAIN_TXT_FILE = "train.txt" 12 | TEST_TXT_FILE = "test.txt" 13 | STEP_PERCENTAGE = 20 -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | from sklearn.model_selection import train_test_split 4 | 5 | # Local dependencies 6 | import constants 7 | 8 | 9 | class Dataset: 10 | """ 11 | This class manages the information for the dataset. 12 | """ 13 | 14 | def __init__(self, path): 15 | """ 16 | Initialize the Dataset object. 17 | 18 | Args: 19 | path: The path on where the dataset of images is stored. 20 | 21 | Returns: 22 | void 23 | """ 24 | self.path = path 25 | self.train_set = {} 26 | self.test_set = {} 27 | self.classes = [] 28 | self.classes_counts = [] 29 | self.myarray = [] 30 | self.imageList = [] 31 | 32 | def generate_sets(self): 33 | """ 34 | Reads the information of the training and testings sets and stores it into attributes of the object. 35 | 36 | Returns: 37 | void 38 | """ 39 | dataset_classes = glob.glob(self.path + "/*") 40 | i=0 41 | for folder in dataset_classes: 42 | 43 | path = folder.replace("\\", "/") 44 | #print(path) 45 | if "/" in folder: 46 | class_name = folder.split("/")[-1] 47 | else: 48 | class_name = folder.split("\\")[-1] 49 | self.classes.append(class_name) 50 | #print(class_name) 51 | # train = glob.glob(path + "/train/*") 52 | # print(train) 53 | # test = glob.glob(path + "/test/*") 54 | anotherList = [] 55 | rasterList = glob.glob(path + '/*.jpg') 56 | for image in rasterList: 57 | imgList = image.replace("\\", "/") 58 | self.imageList.append(imgList) 59 | anotherList.append(imgList) 60 | 61 | 62 | # self.imageList = np.random.rand(100, 5) 63 | # np.random.shuffle(self.imageList) 64 | myarray = np.asarray(self.imageList) 65 | 66 | 67 | self.train_set[i] = anotherList[0:] 68 | self.test_set[i] = anotherList[:15] 69 | print('len of train set',len(self.train_set[i])) 70 | #print('self.test_set',self.test_set[i],i) 71 | i = i+1 72 | #mydict[''train' + str(class_name)'] = 'someval' 73 | #"{0}_train_set".format(class_name),"{0}_test_set".format(class_name),b_train, b_test = train_test_split(self.imageList, self.classes, test_size=0.33, random_state=42) 74 | 75 | #rasterList = rasterList.replace("\\", "/") 76 | 77 | #print(self.imageList) 78 | myarray = np.asarray(self.imageList) 79 | self.classes = np.asarray(self.classes) 80 | 81 | 82 | 83 | print('traina dn test length') 84 | print(len(self.train_set)) 85 | print(len(self.test_set)) 86 | #self.train_set, self.test_set, b_train, b_test = train_test_split(myarray, self.classes, test_size=0.33, random_state=42) 87 | 88 | #print(self.train_set[20]) 89 | # self.train_set.append(train) 90 | # self.test_set.append(test) 91 | self.classes_counts.append(0) 92 | 93 | def get_train_set(self): 94 | """ 95 | Get the paths of the objects in the training set. 96 | 97 | Returns: 98 | list of strings: Paths for objects in the training set. 99 | """ 100 | if len(self.train_set) == 0: 101 | self.generate_sets() 102 | return self.train_set 103 | 104 | def get_test_set(self): 105 | """ 106 | Get the paths of the objects in the testing set. 107 | 108 | Returns: 109 | list of strings: Paths for objects in the testing set. 110 | """ 111 | if len(self.test_set) == 0: 112 | self.generate_sets() 113 | return self.test_set 114 | 115 | def get_classes(self): 116 | """ 117 | Get the names of the classes that are in the dataset. 118 | 119 | Returns: 120 | list of strings: List with the names of the classes. 121 | """ 122 | if len(self.classes) == 0: 123 | self.generate_sets() 124 | return self.classes 125 | 126 | def get_classes_counts(self): 127 | """ 128 | Get a list with the count of total local descriptors for each class. 129 | 130 | Returns: 131 | list of integers: List with the count of all the local descriptors in each class. 132 | """ 133 | return self.classes_counts 134 | 135 | def get_y(self, my_set): 136 | """ 137 | Get the labels for the a given set. 138 | 139 | Args: 140 | my_set (matrix of strings): Each row has the paths for the objects in that class. 141 | 142 | Returns: 143 | NumPy float array: The labels for a given set. 144 | """ 145 | y = [] 146 | if len(my_set) == 0: 147 | self.generate_sets() 148 | for class_ID in range(len(my_set)): 149 | y += [class_ID] * len(my_set[class_ID]) 150 | # Transform the list in to a vector 151 | y = np.float32(y)[:, np.newaxis] 152 | return y 153 | 154 | def get_train_y(self): 155 | """ 156 | Get the labels for the training set. 157 | 158 | Returns: 159 | NumPy float array: The labels for the training set. 160 | """ 161 | return self.get_y(self.train_set) 162 | 163 | def get_test_y(self): 164 | """ 165 | Get the labels for the testing set. 166 | 167 | Returns: 168 | NumPy float array: The labels for the testing set. 169 | """ 170 | return self.get_y(self.test_set) 171 | 172 | def store_listfile(self): 173 | """ 174 | Used for creating files in the format filelist used in Caffe for 175 | converting an image set. (caffe/tools/convert_imageset.cpp) 176 | 177 | Returns: 178 | void 179 | """ 180 | train_file = open(constants.TRAIN_TXT_FILE, "w") 181 | test_file = open(constants.TEST_TXT_FILE, "w") 182 | self.get_train_set() 183 | self.get_test_set() 184 | for class_id in range(len(self.classes)): 185 | current_train = self.train_set[class_id] 186 | for filename in current_train: 187 | # Changing path in Windows 188 | path = filename.replace("\\", "/") 189 | idx = path.index("/") 190 | path = path[(idx + 1):] 191 | train_file.write("{0} {1}\n".format(path, class_id)) 192 | current_test = self.test_set[class_id] 193 | for filename in current_test: 194 | # Changing path in Windows 195 | path = filename.replace("\\", "/") 196 | idx = path.index("/") 197 | path = path[(idx + 1):] 198 | test_file.write("{0} {1}\n".format(path, class_id)) 199 | train_file.close() 200 | test_file.close() 201 | 202 | def set_class_count(self, class_number, class_count): 203 | print(class_number, class_count) 204 | #class_count = 1200 205 | """ 206 | Set the count of local descriptors in one class. 207 | 208 | Args: 209 | class_number: ID for the class. 210 | class_count: Number of local descriptors that were found in the class. 211 | 212 | Returns: 213 | void 214 | """ 215 | if(class_number == 0): 216 | self.classes_counts.pop(0) 217 | self.classes_counts.append(class_count) 218 | 219 | 220 | -------------------------------------------------------------------------------- /descriptors.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | # Local dependencies 5 | import utils 6 | import constants 7 | 8 | def orb(img): 9 | """ 10 | Calculate the ORB descriptors for an image and resizes the image 11 | having the larger dimension set to 640 and keeping the size relation. 12 | 13 | Args: 14 | img (BGR matrix): The image that will be used. 15 | 16 | Returns: 17 | list of floats array: The descriptors found in the image. 18 | """ 19 | orb = cv2.ORB() 20 | kp, des = orb.detectAndCompute(img, None) 21 | return des 22 | 23 | def sift(img,img_descs,y,class_number): 24 | """ 25 | Gets a list of 128 - dimensional descriptors using SIFT and DoG 26 | for keypoints and resizes the image having the larger dimension set to 640 27 | and keeping the size relation. 28 | 29 | Args: 30 | img (BGR matrix): The grayscale image that will be used. 31 | 32 | Returns: 33 | list of floats array: The descriptors found in the image. 34 | """ 35 | 36 | sift = cv2.xfeatures2d.SIFT_create() 37 | kp, des = sift.detectAndCompute(img, None) 38 | if des is not None: 39 | img_descs.append(des) 40 | y.append(class_number) 41 | else: 42 | print('Found you!!!!!!!') 43 | return img_descs,y 44 | 45 | def descriptors_from_class(dataset, class_img_paths, class_number, option = constants.ORB_FEAT_OPTION): 46 | """ 47 | Gets all the local descriptors for a class. If an image has a side with more than 640 pixels it will be resized 48 | leaving the biggest side at 640 pixels and conserving the aspect ratio for the other side. 49 | 50 | Args: 51 | dataset (Dataset object): An object that stores information about the dataset. 52 | class_img_paths (array of strings): The paths for each image in certain class. 53 | class_number (integer): The number of the class. 54 | option (integer): If this is 49 (The key '1') uses ORB features, else use SIFT. 55 | 56 | Returns: 57 | numpy float matrix: Each row are the descriptors found in an image of the class 58 | """ 59 | des = None 60 | step = (constants.STEP_PERCENTAGE * len(class_img_paths)) / 100 61 | for i in range(len(class_img_paths)): 62 | img_path = class_img_paths[i] 63 | img = cv2.imread(img_path) 64 | resize_to = 640 65 | h, w, channels = img.shape 66 | if h > resize_to or w > resize_to: 67 | img = utils.resize(img, resize_to, h, w) 68 | if option == constants.ORB_FEAT_OPTION: 69 | des_name = "ORB" 70 | new_des = orb(img) 71 | else: 72 | des_name = "SIFT" 73 | new_des = sift(img) 74 | if new_des is not None: 75 | if des is None: 76 | des = np.array(new_des, dtype=np.float32) 77 | else: 78 | des = np.vstack((des, np.array(new_des))) 79 | # Print a message to show the status of the function 80 | if i % step == 0: 81 | percentage = (100 * i) / len(class_img_paths) 82 | message = "Calculated {0} descriptors for image {1} of {2}({3}%) of class number {4} ...".format( 83 | des_name, i, len(class_img_paths), percentage, class_number 84 | ) 85 | print(message) 86 | message = "* Finished getting the descriptors for the class number {0}*".format(class_number) 87 | print(message) 88 | print("Number of descriptors in class: {0}".format(len(des))) 89 | print(class_number, len(des)) 90 | dataset.set_class_count(class_number, len(des)) 91 | return des 92 | 93 | def all_descriptors(dataset, class_list, option = constants.ORB_FEAT_OPTION): 94 | """ 95 | Gets every local descriptor of a set with different classes (This is useful for getting a codebook). 96 | 97 | Args: 98 | class_list (list of arrays of strings): The list has information for a specific class in each element and each 99 | element is an array of strings which are the paths for the image of that class. 100 | option (integer): It's 49 (the key '1') if ORB features are going to be used, else use SIFT features. 101 | 102 | Returns: 103 | numpy float matrix: Each row are the descriptors found in an image of the set 104 | """ 105 | des = None 106 | for i in range(len(class_list)): 107 | message = "*** Getting descriptors for class number {0} of {1} ***".format(i, len(class_list)) 108 | print(message) 109 | class_img_paths = class_list[i] 110 | new_des = descriptors_from_class(dataset, class_img_paths, i, option) 111 | if des is None: 112 | des = new_des 113 | else: 114 | des = np.vstack((des, new_des)) 115 | message = "*****************************\n"\ 116 | "Finished getting all the descriptors\n" 117 | print(message) 118 | print("Total number of descriptors: {0}".format(len(des))) 119 | if len(des) > 0: 120 | print("Dimension of descriptors: {0}".format(len(des[0]))) 121 | print("First descriptor:\n{0}".format(des[0])) 122 | return des 123 | 124 | def gen_codebook(dataset, descriptors, k = 64): 125 | """ 126 | Generate a k codebook for the dataset. 127 | 128 | Args: 129 | dataset (Dataset object): An object that stores information about the dataset. 130 | descriptors (list of integer arrays): The descriptors for every class. 131 | k (integer): The number of clusters that are going to be calculated. 132 | 133 | Returns: 134 | list of integer arrays: The k codewords for the dataset. 135 | """ 136 | k=int(k) 137 | print(type(dataset),type(descriptors),type(k)) 138 | 139 | iterations = 10 140 | epsilon = 1.0 141 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, iterations, epsilon) 142 | compactness, labels, centers = cv2.kmeans(descriptors, k , None,criteria, iterations, cv2.KMEANS_RANDOM_CENTERS) 143 | return centers 144 | 145 | def vlad(descriptors, centers): 146 | """ 147 | Calculate the Vector of Locally Aggregated Descriptors (VLAD) which is a global descriptor from a group of 148 | descriptors and centers that are codewords of a codebook, obtained for example with K-Means. 149 | 150 | Args: 151 | descriptors (numpy float matrix): The local descriptors. 152 | centers (numpy float matrix): The centers are points representatives of the classes. 153 | 154 | Returns: 155 | numpy float array: The VLAD vector. 156 | """ 157 | dimensions = len(descriptors[0]) 158 | vlad_vector = np.zeros((len(centers), dimensions), dtype=np.float32) 159 | for descriptor in descriptors: 160 | nearest_center, center_idx = utils.find_nn(descriptor, centers) 161 | for i in range(dimensions): 162 | vlad_vector[center_idx][i] += (descriptor[i] - nearest_center[i]) 163 | # L2 Normalization 164 | vlad_vector = cv2.normalize(vlad_vector,vlad_vector) 165 | vlad_vector = vlad_vector.flatten() 166 | return vlad_vector 167 | 168 | def img_to_vect(img_descs, cluster_model): 169 | """ 170 | Given an image path and a trained clustering model (eg KMeans), 171 | generates a feature vector representing that image. 172 | Useful for processing new images for a classifier prediction. 173 | """ 174 | 175 | 176 | clustered_descs = [cluster_model.predict(raw_words) for raw_words in img_descs] 177 | img_bow_hist = np.array([np.bincount(clustered_desc, minlength=cluster_model.n_clusters) for clustered_desc in clustered_descs ]) 178 | return img_bow_hist 179 | 180 | 181 | 182 | 183 | def cluster_features(img_descs, cluster_model): 184 | """ 185 | Cluster the training features using the cluster_model 186 | and convert each set of descriptors in img_descs 187 | to a Visual Bag of Words histogram. 188 | Parameters: 189 | ----------- 190 | X : list of lists of SIFT descriptors (img_descs) 191 | training_idxs : array/list of integers 192 | Indicies for the training rows in img_descs 193 | cluster_model : clustering model (eg KMeans from scikit-learn) 194 | The model used to cluster the SIFT features 195 | Returns: 196 | -------- 197 | X, cluster_model : 198 | X has K feature columns, each column corresponding to a visual word 199 | cluster_model has been fit to the training set 200 | """ 201 | n_clusters = cluster_model.n_clusters 202 | # Concatenate all descriptors in the training set together 203 | training_descs = img_descs 204 | all_train_descriptors = [desc for desc_list in training_descs for desc in desc_list] 205 | all_train_descriptors = np.array(all_train_descriptors) 206 | 207 | if all_train_descriptors.shape[1] != 128: 208 | raise ValueError('Expected SIFT descriptors to have 128 features, got', all_train_descriptors.shape[1]) 209 | 210 | # train kmeans or other cluster model on those descriptors selected above 211 | cluster_model.fit(all_train_descriptors) 212 | print('done clustering. Using clustering model to generate BoW histograms for each image.') 213 | 214 | # compute set of cluster-reduced words for each image 215 | img_clustered_words = [cluster_model.predict(raw_words) for raw_words in img_descs] 216 | 217 | # finally make a histogram of clustered word counts for each image. These are the final features. 218 | img_bow_hist = np.array( 219 | [np.bincount(clustered_words, minlength=n_clusters) for clustered_words in img_clustered_words]) 220 | 221 | X = img_bow_hist 222 | print('done generating BoW histograms.') 223 | 224 | return X, cluster_model 225 | 226 | -------------------------------------------------------------------------------- /filenames.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # Local dependencies 4 | import constants 5 | 6 | 7 | def codebook(k, des_name): 8 | print('files.dirname') 9 | print(constants.FILES_DIR_NAME) 10 | return "{0}/codebook_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name)) 11 | 12 | 13 | def result(k, des_name, svm_kernel): 14 | return "{0}/result_{1}.csv".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel))) 15 | 16 | 17 | def vlads_train(k, des_name): 18 | return "{0}/VLADS_train_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name)) 19 | 20 | 21 | def vlads_test(k, des_name): 22 | return "{0}/VLADS_test_{1}.mat".format(constants.FILES_DIR_NAME, signature(k, des_name)) 23 | 24 | 25 | def svm(k, des_name, svm_kernel): 26 | return "{0}/svm_data_{1}.dat".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel))) 27 | 28 | 29 | def log(k, des_name, svm_kernel): 30 | return "{0}/log_{1}.txt".format(constants.FILES_DIR_NAME, signature(k, des_name, kernel_name(svm_kernel))) 31 | 32 | 33 | def signature(k, des_name, svm_kernel=None): 34 | if svm_kernel is None: 35 | return "{0}_{1}".format(k, des_name) 36 | else: 37 | return "{0}_{1}_{2}".format(k, des_name, svm_kernel) 38 | 39 | def kernel_name(svm_kernel): 40 | if svm_kernel == cv2.ml.SVM_LINEAR: 41 | kernel_name = "LINEAR" 42 | elif svm_kernel == cv2.ml.SVM_POLY: 43 | kernel_name = "POLY" 44 | elif svm_kernel == cv2.ml.SVM_RBF: 45 | kernel_name = "RBF" 46 | else: 47 | kernel_name = "SIGMOID" 48 | return kernel_name 49 | -------------------------------------------------------------------------------- /log.py: -------------------------------------------------------------------------------- 1 | 2 | # Local dependencies 3 | import utils 4 | import filenames 5 | 6 | 7 | class Log: 8 | def __init__(self, k, des_name, svm_kernel): 9 | self.text = "" 10 | self.k = k 11 | self.des_name = des_name 12 | self.svm_kernel = svm_kernel 13 | 14 | def save(self): 15 | file = open(filenames.log(self.k, self.des_name, self.svm_kernel), "w") 16 | file.write(self.text) 17 | file.close() 18 | 19 | def train_des_time(self, time): 20 | str = "Time for getting all the local descriptors of the training images was {0}.\n" 21 | elapsed_time = utils.humanize_time(time) 22 | self.text += str.format(elapsed_time) 23 | 24 | def codebook_time(self, time): 25 | str = "Time for generating the codebook with k-means was {0}.\n" 26 | elapsed_time = utils.humanize_time(time) 27 | self.text += str.format(elapsed_time) 28 | 29 | def train_vlad_time(self, time): 30 | self.vlad_time(time, "training") 31 | 32 | def svm_time(self, time): 33 | str = "Time for calculating the SVM was {0}.\n" 34 | elapsed_time = utils.humanize_time(time) 35 | self.text += str.format(elapsed_time) 36 | 37 | def test_vlad_time(self, time): 38 | self.vlad_time(time, "testing") 39 | 40 | def predict_time(self, time): 41 | elapsed_time = utils.humanize_time(time) 42 | self.text += "Elapsed time predicting the testing set is {0}\n".format(elapsed_time) 43 | 44 | def accuracy(self, accuracy): 45 | self.text += "Accuracy = {0}.\n".format(accuracy) 46 | 47 | def classes(self, classes): 48 | self.text += "Classes = {0}\n".format(classes) 49 | 50 | def classes_counts(self, classes_counts): 51 | self.text += "Classes Local Descriptors Counts = {0}\n".format(classes_counts) 52 | 53 | def confusion_matrix(self, conf_mat): 54 | self.text += "Confusion Matrix =\n{0}".format(conf_mat) 55 | 56 | def vlad_time(self, time, set): 57 | str = "Time for getting VLAD global descriptors of the {0} images was {1}.\n" 58 | elapsed_time = utils.humanize_time(time) 59 | self.text += str.format(set, elapsed_time) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import joblib 3 | import numpy as np 4 | import time 5 | import os 6 | 7 | # Local dependencies 8 | from classifier import Classifier 9 | from dataset import Dataset 10 | import descriptors 11 | import constants 12 | import utils 13 | import filenames 14 | from log import Log 15 | 16 | 17 | def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.ml.SVM_LINEAR): 18 | if not is_interactive: 19 | experiment_start = time.time() 20 | # Check for the dataset of images 21 | if not os.path.exists(constants.DATASET_PATH): 22 | print("Dataset not found, please copy one.") 23 | return 24 | dataset = Dataset(constants.DATASET_PATH) 25 | dataset.generate_sets() 26 | 27 | # Check for the directory where stores generated files 28 | if not os.path.exists(constants.FILES_DIR_NAME): 29 | os.makedirs(constants.FILES_DIR_NAME) 30 | 31 | if is_interactive: 32 | des_option = int(input("Enter [1] for using ORB features or [2] to use SIFT features.\n")) 33 | k = input("Enter the number of cluster centers you want for the codebook.\n") 34 | svm_option = int(input("Enter [1] for using SVM kernel Linear or [2] to use RBF.\n")) 35 | svm_kernel = cv2.ml.SVM_LINEAR if svm_option == 1 else cv2.ml.SVM_RBF 36 | 37 | des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME 38 | print(des_name) 39 | log = Log(k, des_name, svm_kernel) 40 | 41 | 42 | codebook_filename = filenames.codebook(k, des_name) 43 | print('codebook_filename') 44 | print(codebook_filename) 45 | start = time.time() 46 | end = time.time() 47 | log.train_des_time(end - start) 48 | start = time.time() 49 | end = time.time() 50 | log.codebook_time(end - start) 51 | # Train and test the dataset 52 | classifier = Classifier(dataset, log) 53 | svm, cluster_model = classifier.train(svm_kernel, k, des_name, des_option=des_option, is_interactive=is_interactive) 54 | svm.save("svm_result.dat") 55 | joblib.dump(cluster_model, 'cluster_model.plk') 56 | print("Training ready. Now beginning with testing") 57 | result, labels = classifier.test(svm, cluster_model, k, des_option=des_option, is_interactive=is_interactive) 58 | print('test result') 59 | print(result,labels) 60 | # Store the results from the test 61 | classes = dataset.get_classes() 62 | log.classes(classes) 63 | log.classes_counts(dataset.get_classes_counts()) 64 | result_filename = filenames.result(k, des_name, svm_kernel) 65 | test_count = len(dataset.get_test_set()[0]) 66 | result_matrix = np.reshape(result, (len(classes), test_count)) 67 | utils.save_csv(result_filename, result_matrix) 68 | 69 | # Create a confusion matrix 70 | confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32) 71 | for i in range(len(result)): 72 | predicted_id = int(result[i]) 73 | real_id = int(labels[i]) 74 | confusion_matrix[real_id][predicted_id] += 1 75 | 76 | print("Confusion Matrix =\n{0}".format(confusion_matrix)) 77 | log.confusion_matrix(confusion_matrix) 78 | log.save() 79 | print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel))) 80 | if not is_interactive: 81 | experiment_end = time.time() 82 | elapsed_time = utils.humanize_time(experiment_end - experiment_start) 83 | print("Total time during the experiment was {0}".format(elapsed_time)) 84 | else: 85 | # Show a plot of the confusion matrix on interactive mode 86 | utils.show_conf_mat(confusion_matrix) 87 | #raw_input("Press [Enter] to exit ...") 88 | 89 | if __name__ == '__main__': 90 | main() -------------------------------------------------------------------------------- /scripts.py: -------------------------------------------------------------------------------- 1 | import constants 2 | import cv2 3 | import os 4 | import time 5 | 6 | # Local dependencies 7 | import filenames 8 | import utils 9 | import main 10 | 11 | 12 | def codebook_to_csv(k=128, des_name=constants.ORB_FEAT_NAME): 13 | if not os.path.exists(constants.FILES_DIR_NAME): 14 | os.makedirs(constants.FILES_DIR_NAME) 15 | codebook = utils.load(filenames.codebook(k, des_name)) 16 | filename = "{0}/codebook_{1}_{2}.csv".format(constants.FILES_DIR_NAME, k, des_name) 17 | utils.save_csv(filename, codebook) 18 | print("Copied codebook into the file with name {0}. Press any key to exit...".format(filename)) 19 | cv2.waitKey() 20 | 21 | def run_all(): 22 | main.main(is_interactive=False, k_opt=32, des_opt=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_RBF) 23 | 24 | if __name__ == '__main__': 25 | run_all() 26 | -------------------------------------------------------------------------------- /unit_tests.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import pickle 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | 7 | #---------------------------------- 8 | # Local dependencies 9 | import descriptors 10 | import utils 11 | from dataset import Dataset 12 | import constants 13 | import filenames 14 | 15 | def test_dataset(): 16 | dataset = Dataset(constants.DATASET_PATH) 17 | pickle.dump(dataset, open(constants.DATASET_OBJ_FILENAME, "wb"), protocol=constants.PICKLE_PROTOCOL) 18 | classes = dataset.get_classes() 19 | print("Dataset generated with {0} classes.".format(len(classes))) 20 | print(classes) 21 | train = dataset.get_train_set() 22 | test = dataset.get_test_set() 23 | for i in range(len(classes)): 24 | print( 25 | "There are {0} training files and {1} testing files for class number {2} ({3})".format( 26 | len(train[i]), len(test[i]), i, classes[i] 27 | ) 28 | ) 29 | 30 | def test_des_type(): 31 | img = cv2.imread(constants.TESTING_IMG_PATH) 32 | kp, des = descriptors.orb(img) 33 | return des 34 | 35 | def test_descriptors(): 36 | img = cv2.imread(constants.TESTING_IMG_PATH) 37 | cv2.imshow("Normal Image", img) 38 | print("Normal Image") 39 | option = input("Enter [1] for using ORB features and other number to use SIFT.\n") 40 | start = time.time() 41 | if option == 1: 42 | orb = cv2.ORB() 43 | kp, des = orb.detectAndCompute(img, None) 44 | else: 45 | sift = cv2.SIFT() 46 | kp, des = sift.detectAndCompute(img, None) 47 | end = time.time() 48 | elapsed_time = utils.humanize_time(end - start) 49 | des_name = constants.ORB_FEAT_NAME if option == ord(constants.ORB_FEAT_OPTION_KEY) else constants.SIFT_FEAT_NAME 50 | print("Elapsed time getting descriptors {0}".format(elapsed_time)) 51 | print("Number of descriptors found {0}".format(len(des))) 52 | if des is not None and len(des) > 0: 53 | print("Dimension of descriptors {0}".format(len(des[0]))) 54 | print("Name of descriptors used is {0}".format(des_name)) 55 | img2 = cv2.drawKeypoints(img, kp) 56 | # plt.imshow(img2), plt.show() 57 | cv2.imshow("{0} descriptors".format(des_name), img2) 58 | print("Press any key to exit ...") 59 | cv2.waitKey() 60 | 61 | def test_codebook(): 62 | dataset = pickle.load(open(constants.DATASET_OBJ_FILENAME, "rb")) 63 | option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n") 64 | start = time.time() 65 | des = descriptors.all_descriptors(dataset, dataset.get_train_set(), option) 66 | end = time.time() 67 | elapsed_time = utils.humanize_time(end - start) 68 | print("Elapsed time getting all the descriptors is {0}".format(elapsed_time)) 69 | k = 64 70 | des_name = constants.ORB_FEAT_NAME if option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME 71 | codebook_filename = "codebook_{0}_{1}.csv".format(k, des_name) 72 | start = time.time() 73 | codebook = descriptors.gen_codebook(dataset, des, k) 74 | end = time.time() 75 | elapsed_time = utils.humanize_time(end - start) 76 | print("Elapsed time calculating the k means for the codebook is {0}".format(elapsed_time)) 77 | np.savetxt(codebook_filename, codebook, delimiter=constants.NUMPY_DELIMITER) 78 | print("Codebook loaded in {0}, press any key to exit ...".format(constants.CODEBOOK_FILE_NAME)) 79 | cv2.waitKey() 80 | 81 | def test_vlad(): 82 | img = cv2.imread(constants.TESTING_IMG_PATH) 83 | option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n") 84 | if option == 1: 85 | des = descriptors.orb(img) 86 | else: 87 | des = descriptors.sift(img) 88 | des_name = constants.ORB_FEAT_NAME if option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME 89 | k = 128 90 | codebook_filename = "codebook_{0}_{1}.csv".format(k, des_name) 91 | centers = np.loadtxt(codebook_filename, delimiter=constants.NUMPY_DELIMITER) 92 | vlad_vector = descriptors.vlad(des, centers) 93 | print(vlad_vector) 94 | return vlad_vector 95 | 96 | def test_one_img_classification(): 97 | img = cv2.imread("test.jpg") 98 | resize_to = 640 99 | h, w, channels = img.shape 100 | img = utils.resize(img, resize_to, h, w) 101 | des = descriptors.sift(img) 102 | k = 128 103 | des_name = "SIFT" 104 | codebook_filename = filenames.codebook(k, des_name) 105 | codebook = utils.load(codebook_filename) 106 | img_vlad = descriptors.vlad(des, codebook) 107 | svm_filename = filenames.svm(k, des_name) 108 | svm = cv2.SVM() 109 | svm.load(svm_filename) 110 | result = svm.predict(img_vlad) 111 | print("result is {0}".format(result)) 112 | 113 | if __name__ == '__main__': 114 | test_descriptors() -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy.random as nprnd 2 | import numpy as np 3 | import cv2 4 | import scipy.io as sio 5 | import matplotlib.pyplot as plt 6 | import h5py 7 | 8 | 9 | def random_split(l, sample_size): 10 | """ 11 | Randomly splits a list in two parts. A sample and a rest (other part). 12 | 13 | Args: 14 | l (list): A list that is going to be splitted. 15 | sample_size (integer): The size of the sample that is going to be taken. 16 | 17 | Returns: 18 | list: One random group from the list. 19 | list: Another group from the list with size equal to sample_size. 20 | """ 21 | sample_indices = nprnd.choice(len(l), size=sample_size, replace=False) 22 | # print (len(sample_indices)) 23 | sample_indices.sort() 24 | # print("sample_indices = {0}".format(sample_indices)) 25 | other_part = [] 26 | sample_part = [] 27 | indices_counter = 0 28 | for index in range(len(l)): 29 | current_elem = l[index] 30 | if indices_counter == sample_size: 31 | other_part = other_part + l[index:] 32 | break 33 | if index == sample_indices[indices_counter]: 34 | sample_part.append(current_elem) 35 | indices_counter += 1 36 | else: 37 | other_part.append(current_elem) 38 | return other_part, sample_part 39 | 40 | def humanize_time(secs): 41 | """ 42 | Extracted from http://testingreflections.com/node/6534 43 | """ 44 | mins, secs = divmod(secs, 60) 45 | hours, mins = divmod(mins, 60) 46 | return '%02d:%02d:%02f' % (hours, mins, secs) 47 | 48 | def resize(img, new_size, h, w): 49 | """ 50 | Changes the largest side of an image to the new size and changes the other to maintain the aspect ratio. 51 | 52 | Args: 53 | img (BGR Matrix): The image that is going to be resized. 54 | new_size (integer): The value wanted for the biggest side of the image. 55 | 56 | Returns: 57 | BGR Matrix: The image resized to the new value keeping the aspect ratio. 58 | """ 59 | if h > w: 60 | new_h = 640 61 | new_w = (640 * w) / h 62 | else: 63 | new_h = (640 * h) / w 64 | new_w = 640 65 | img = cv2.resize(img, (new_w, new_h)) 66 | return img 67 | 68 | def find_nn(point, neighborhood): 69 | """ 70 | Finds the nearest neighborhood of a vector. 71 | 72 | Args: 73 | point (float array): The initial point. 74 | neighborhood (numpy float matrix): The points that are around the initial point. 75 | 76 | Returns: 77 | float array: The point that is the nearest neighbor of the initial point. 78 | integer: Index of the nearest neighbor inside the neighborhood list 79 | """ 80 | min_dist = float('inf') 81 | nn = neighborhood[0] 82 | nn_idx = 0 83 | for i in range(len(neighborhood)): 84 | neighbor = neighborhood[i] 85 | dist = cv2.norm(point - neighbor) 86 | if dist < min_dist: 87 | min_dist = dist 88 | nn = neighbor 89 | nn_idx = i 90 | 91 | return nn, nn_idx 92 | 93 | def save(filename, arr): 94 | """ 95 | Stores a numpy array in a file. 96 | 97 | Args: 98 | filename (string): The name for the file. 99 | arr (numpy array): 100 | 101 | Returns: 102 | void 103 | """ 104 | data = {"stored": arr} 105 | sio.savemat(filename, data) 106 | 107 | def load(filename): 108 | """ 109 | Load a numpy array from a file. 110 | 111 | Args: 112 | filename (string): The name of the file. 113 | 114 | Returns: 115 | numpy array: The array stored in the file. 116 | """ 117 | #data = sio.loadmat(filename) 118 | 119 | 120 | f = h5py.File(filename,'r') 121 | data = f.get('data/variable1') 122 | data = np.array(data) # For converting to numpy array 123 | return data["stored"] 124 | 125 | def save_csv(filename, arr): 126 | """ 127 | Save an array into a file with the filename. 128 | 129 | Args: 130 | filename (string): The name for the file. 131 | arr (numpy array): The array that is going to be saved. 132 | 133 | Returns: 134 | void 135 | """ 136 | file = open(filename, "w") 137 | for row in arr: 138 | for i in range(len(row) - 1): 139 | file.write("{0} ".format(row[i])) 140 | file.write("{0}\n".format(row[len(row) - 1])) 141 | 142 | def show_conf_mat(confusion_matrix): 143 | """ 144 | Show a windows with a color image for a confusion matrix 145 | 146 | Args: 147 | confusion_matrix (NumPy Array): The matrix to be shown. 148 | 149 | Returns: 150 | void 151 | """ 152 | plt.matshow(confusion_matrix) 153 | plt.title('Confusion Matrix') 154 | plt.colorbar() 155 | plt.show() --------------------------------------------------------------------------------