├── OpenCV_HOG_Detector.py ├── README.md ├── Train_HOG_SVM.py ├── testing_HOG_SVM.py └── visualise_HOGdescriptors.py /OpenCV_HOG_Detector.py: -------------------------------------------------------------------------------- 1 | # Modified from OpenCV HOG person detector (should work straight off the bat) 2 | 3 | import numpy as np 4 | import cv2 5 | import sys 6 | from glob import glob 7 | import itertools as it 8 | 9 | def inside(r, q): 10 | rx, ry, rw, rh = r 11 | qx, qy, qw, qh = q 12 | return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh 13 | 14 | def draw_detections(img, rects, thickness = 1): 15 | for x, y, w, h in rects: 16 | # the HOG detector returns slightly larger rectangles than the real objects. 17 | # so we slightly shrink the rectangles to get a nicer output. 18 | pad_w, pad_h = int(0.15*w), int(0.05*h) 19 | cv2.rectangle(img, (x+pad_w, y+pad_h), (x+w-pad_w, y+h-pad_h), (0, 0, 255), thickness) 20 | 21 | hog = cv2.HOGDescriptor() 22 | hog.setSVMDetector( cv2.HOGDescriptor_getDefaultPeopleDetector() ) 23 | ''' the above code uses a pretrained SVM via HOG descriptors provided by the open cv database. 24 | This database is limited to the training it has performed hence cannot be used in any other angle other than perp. to the centroid 25 | Thus if you want to implement the HOG + SVM method, you'll have to train your own SVM with your own data''' 26 | cap= cv2.VideoCapture(0) 27 | # the above code uses the OpenCV library to capture video frames from the camera: select 0 for the primary pc webcam & 1 for an external camera 28 | 29 | while True: 30 | #running an infinite loop so that the process is run real time. 31 | ret, img = cap.read() # reading the frames produced from the webcam in 'img' an then returning them using the 'ret' function. 32 | found, w = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05) # describing the parameters of HOG and returning them as a Human found function in 'found' 33 | found_filtered = [] #filtering the found human... to further improve visualisation (uses Gaussian filter for eradication of errors produced by luminescence. 34 | for ri, r in enumerate(found): 35 | for qi, q in enumerate(found): 36 | if ri != qi and inside(r, q): 37 | break 38 | else: 39 | found_filtered.append(r) 40 | draw_detections(img, found) # using the predefined bounding box to encapsulate the human detected within the bounding box. 41 | draw_detections(img, found_filtered, 3) # further filtering the box to improve visualisation. 42 | print('%d (%d) found' % (len(found_filtered), len(found))) # this will produce the output of the number of humans found in the actual command box) 43 | cv2.imshow('img', img) # finally showing the resulting image captured from the webcam. 44 | if cv2.waitKey(1) & 0xFF == ord('q'): 45 | break # defining a key to quit and stop all processes. The key is 'q' 46 | cap.release() 47 | cv2.destroyAllWindows() # finally, destroying all open windows. 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Object-detection-via-HOG-SVM 2 | This is an application of Object detection using Histogram of Oriented Gradients (HOG) as features and Support Vector Machines (SVM) 3 | as the classifier. 4 | 5 | This process is implemented in python, the following libraries are required: 6 | 1. Scikit-learn (For implementing SVM) 7 | 2. Scikit-image (For HOG feature extraction) 8 | 3. OpenCV (for testing) 9 | 4. PIL (Image processing library) 10 | 5. Numpy (matrix multiplication) 11 | 6. Imutils for Non-maximum suppression 12 | 13 | A training set should comprise of: 14 | 1. Positive images: these images should contain only the object you are trying to detect 15 | 2. Negative images: these images can contain anything except for the object you are detecting 16 | 17 | Web link for the Inria dataset is shown below - the dataset is for pedestrian detection but this code can be adapted for other datasets too eg. car detection (dataset link: http://cogcomp.org/Data/Car/). Inria dataset link: http://pascal.inrialpes.fr/data/human/ 18 | 19 | The files are divided into the following: 20 | Training & Testing (this is where you evaluate your trained classifier) 21 | Visualise Hog: simply allows you to see what the gradients calculated look like on a given image (specified by the user). 22 | 23 | The results of the trained person detector on a test image are as follows: Normal (RGB image)> HOG descriptors 24 | ![test](https://user-images.githubusercontent.com/35964759/38281042-9523760a-37a0-11e8-914d-917308e3ac22.png) 25 | 26 | After classifying with a trained SVM model and applying NMS the following result is achieved: 27 | ![raw detections after nms](https://user-images.githubusercontent.com/35964759/38281024-75dcb50e-37a0-11e8-81fe-6df2dede1f78.png) 28 | 29 | Note The results are better when the background is not cluttered (shown below). As seen from the original and the extracted HOG image, majority of the gradients binned come from the background (this could be due to the training set not being robust to more greener/cluttered background as shown in the test) 30 | 31 | ![image](https://user-images.githubusercontent.com/35964759/38281107-e92f8d60-37a0-11e8-951b-d1d316460386.png) 32 | 33 | This method is also robust to different poses and presence of multiple subjects as shown in the figure below where an image from google was shown to the webcam 34 | ![svm_robust](https://user-images.githubusercontent.com/35964759/39673857-9c4452a6-513b-11e8-8e64-42b55e23c200.png) 35 | 36 | -------------------------------------------------------------------------------- /Train_HOG_SVM.py: -------------------------------------------------------------------------------- 1 | # Importing the necessary modules: 2 | 3 | from skimage.feature import hog 4 | from skimage.transform import pyramid_gaussian 5 | from skimage.io import imread 6 | from sklearn.externals import joblib 7 | from sklearn.preprocessing import LabelEncoder 8 | from sklearn.svm import LinearSVC 9 | from sklearn.metrics import classification_report 10 | from sklearn.cross_validation import train_test_split 11 | from skimage import color 12 | from imutils.object_detection import non_max_suppression 13 | import imutils 14 | import numpy as np 15 | import argparse 16 | import cv2 17 | import os 18 | import glob 19 | from PIL import Image # This will be used to read/modify images (can be done via OpenCV too) 20 | from numpy import * 21 | 22 | # define parameters of HOG feature extraction 23 | orientations = 9 24 | pixels_per_cell = (8, 8) 25 | cells_per_block = (2, 2) 26 | threshold = .3 27 | 28 | 29 | # define path to images: 30 | 31 | pos_im_path = r"Insert\path\for\positive_images\here" # This is the path of our positive input dataset 32 | # define the same for negatives 33 | neg_im_path= r"Insert\path\for\negative_images\here" 34 | 35 | # read the image files: 36 | pos_im_listing = os.listdir(pos_im_path) # it will read all the files in the positive image path (so all the required images) 37 | neg_im_listing = os.listdir(neg_im_path) 38 | num_pos_samples = size(pos_im_listing) # simply states the total no. of images 39 | num_neg_samples = size(neg_im_listing) 40 | print(num_pos_samples) # prints the number value of the no.of samples in positive dataset 41 | print(num_neg_samples) 42 | data= [] 43 | labels = [] 44 | 45 | # compute HOG features and label them: 46 | 47 | for file in pos_im_listing: #this loop enables reading the files in the pos_im_listing variable one by one 48 | img = Image.open(pos_im_path + '\\' + file) # open the file 49 | #img = img.resize((64,128)) 50 | gray = img.convert('L') # convert the image into single channel i.e. RGB to grayscale 51 | # calculate HOG for positive features 52 | fd = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True)# fd= feature descriptor 53 | data.append(fd) 54 | labels.append(1) 55 | 56 | # Same for the negative images 57 | for file in neg_im_listing: 58 | img= Image.open(neg_im_path + '\\' + file) 59 | #img = img.resize((64,128)) 60 | gray= img.convert('L') 61 | # Now we calculate the HOG for negative features 62 | fd = hog(gray, orientations, pixels_per_cell, cells_per_block, block_norm='L2', feature_vector=True) 63 | data.append(fd) 64 | labels.append(0) 65 | # encode the labels, converting them from strings to integers 66 | le = LabelEncoder() 67 | labels = le.fit_transform(labels) 68 | 69 | #%% 70 | # Partitioning the data into training and testing splits, using 80% 71 | # of the data for training and the remaining 20% for testing 72 | print(" Constructing training/testing split...") 73 | (trainData, testData, trainLabels, testLabels) = train_test_split( 74 | np.array(data), labels, test_size=0.20, random_state=42) 75 | #%% Train the linear SVM 76 | print(" Training Linear SVM classifier...") 77 | model = LinearSVC() 78 | model.fit(trainData, trainLabels) 79 | #%% Evaluate the classifier 80 | print(" Evaluating classifier on test data ...") 81 | predictions = model.predict(testData) 82 | print(classification_report(testLabels, predictions)) 83 | 84 | 85 | # Save the model: 86 | #%% Save the Model 87 | joblib.dump(model, 'model_name.npy') 88 | -------------------------------------------------------------------------------- /testing_HOG_SVM.py: -------------------------------------------------------------------------------- 1 | from skimage.feature import hog 2 | from skimage.transform import pyramid_gaussian 3 | from sklearn.externals import joblib 4 | from skimage import color 5 | from imutils.object_detection import non_max_suppression 6 | import imutils 7 | import numpy as np 8 | import cv2 9 | import os 10 | import glob 11 | 12 | #Define HOG Parameters 13 | # change them if necessary to orientations = 8, pixels per cell = (16,16), cells per block to (1,1) for weaker HOG 14 | orientations = 9 15 | pixels_per_cell = (8, 8) 16 | cells_per_block = (2, 2) 17 | threshold = .3 18 | 19 | # define the sliding window: 20 | def sliding_window(image, stepSize, windowSize):# image is the input, step size is the no.of pixels needed to skip and windowSize is the size of the actual window 21 | # slide a window across the image 22 | for y in range(0, image.shape[0], stepSize):# this line and the line below actually defines the sliding part and loops over the x and y coordinates 23 | for x in range(0, image.shape[1], stepSize): 24 | # yield the current window 25 | yield (x, y, image[y: y + windowSize[1], x:x + windowSize[0]]) 26 | #%% 27 | # Upload the saved svm model: 28 | model = joblib.load('Inser\Path\of_the_trained\SVM-model\here') 29 | 30 | # Test the trained classifier on an image below! 31 | scale = 0 32 | detections = [] 33 | # read the image you want to detect the object in: 34 | img= cv2.imread("Insert\Path\of_the_image\here") 35 | 36 | # Try it with image resized if the image is too big 37 | img= cv2.resize(img,(300,200)) # can change the size to default by commenting this code out our put in a random number 38 | 39 | # defining the size of the sliding window (has to be, same as the size of the image in the training data) 40 | (winW, winH)= (64,128) 41 | windowSize=(winW,winH) 42 | downscale=1.5 43 | # Apply sliding window: 44 | for resized in pyramid_gaussian(img, downscale=1.5): # loop over each layer of the image that you take! 45 | # loop over the sliding window for each layer of the pyramid 46 | for (x,y,window) in sliding_window(resized, stepSize=10, windowSize=(winW,winH)): 47 | # if the window does not meet our desired window size, ignore it! 48 | if window.shape[0] != winH or window.shape[1] !=winW: # ensure the sliding window has met the minimum size requirement 49 | continue 50 | window=color.rgb2gray(window) 51 | fds = hog(window, orientations, pixels_per_cell, cells_per_block, block_norm='L2') # extract HOG features from the window captured 52 | fds = fds.reshape(1, -1) # re shape the image to make a silouhette of hog 53 | pred = model.predict(fds) # use the SVM model to make a prediction on the HOG features extracted from the window 54 | 55 | if pred == 1: 56 | if model.decision_function(fds) > 0.6: # set a threshold value for the SVM prediction i.e. only firm the predictions above probability of 0.6 57 | print("Detection:: Location -> ({}, {})".format(x, y)) 58 | print("Scale -> {} | Confidence Score {} \n".format(scale,model.decision_function(fds))) 59 | detections.append((int(x * (downscale**scale)), int(y * (downscale**scale)), model.decision_function(fds), 60 | int(windowSize[0]*(downscale**scale)), # create a list of all the predictions found 61 | int(windowSize[1]*(downscale**scale)))) 62 | scale+=1 63 | 64 | clone = resized.copy() 65 | for (x_tl, y_tl, _, w, h) in detections: 66 | cv2.rectangle(img, (x_tl, y_tl), (x_tl + w, y_tl + h), (0, 0, 255), thickness = 2) 67 | rects = np.array([[x, y, x + w, y + h] for (x, y, _, w, h) in detections]) # do nms on the detected bounding boxes 68 | sc = [score[0] for (x, y, score, w, h) in detections] 69 | print("detection confidence score: ", sc) 70 | sc = np.array(sc) 71 | pick = non_max_suppression(rects, probs = sc, overlapThresh = 0.3) 72 | 73 | # the peice of code above creates a raw bounding box prior to using NMS 74 | # the code below creates a bounding box after using nms on the detections 75 | # you can choose which one you want to visualise, as you deem fit... simply use the following function: 76 | # cv2.imshow in this right place (since python is procedural it will go through the code line by line). 77 | 78 | for (xA, yA, xB, yB) in pick: 79 | cv2.rectangle(img, (xA, yA), (xB, yB), (0,255,0), 2) 80 | cv2.imshow("Raw Detections after NMS", img) 81 | #### Save the images below 82 | = cv2.waitKey(0) & 0xFF 83 | if k == 27: #wait for ESC key to exit 84 | cv2.destroyAllWindows() 85 | elif k == ord('s'): 86 | cv2.imwrite('Path\to_the_directory\of_saved_image.png',img) 87 | cv2.destroyAllWindows() 88 | 89 | -------------------------------------------------------------------------------- /visualise_HOGdescriptors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Mar 23 20:53:18 2017 4 | 5 | @author: Samyakh Tukra 6 | """ 7 | #%% 8 | import matplotlib.pyplot as plt 9 | from skimage import io 10 | from skimage.feature import hog 11 | from skimage import data, color, exposure 12 | from PIL import Image 13 | #%% 14 | img = io.imread(r"Insert\Image\Path\Here.jpg") 15 | #im= Image.open(r"Insert\Image\Path\Here.jpg") 16 | image = color.rgb2gray(img) 17 | 18 | fd, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16), 19 | cells_per_block=(1, 1), visualize=True) 20 | 21 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) 22 | 23 | ax1.axis('off') 24 | ax1.imshow(image, cmap=plt.cm.gray) 25 | ax1.set_title('Input image') 26 | ax1.set_adjustable('box') 27 | 28 | # Rescale histogram for better display 29 | hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) 30 | 31 | ax2.axis('off') 32 | ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray) 33 | ax2.set_title('Histogram of Oriented Gradients') 34 | ax1.set_adjustable('box') 35 | plt.show() 36 | --------------------------------------------------------------------------------