├── 01_Video_Transformations ├── Readme.txt ├── inputfile.txt ├── inputvideo.mp4 └── videoTransforms.py ├── 02_Background_Subtraction ├── MOG.cpp ├── Readme.md ├── mog2.cpp └── vibe.cpp ├── 03_Video_Read-Write ├── Readme.md ├── videoWriter.py └── videotoframe.py ├── 04_Motion_Detection ├── READMEfile.txt ├── accuracy_measure.py ├── groundtruth.zip ├── main.py └── movie_cars.ogv ├── 05_object_classification ├── Readme.txt ├── main.py └── svmTraining.py ├── 06_Human_Activity_Recognition ├── HOGFile.py ├── HOGTest.py ├── Readme.txt └── VideoMatching_PCA_SVD.py ├── 07_PCA_Eigenfaces ├── Readme.txt └── eignfaces.cpp ├── 08_Logo_Identification ├── Readme.txt ├── Results.zip └── edgeTemplateMatching_single.cpp └── README.md /01_Video_Transformations/Readme.txt: -------------------------------------------------------------------------------- 1 | Working with OpenCV: 2 | 3 | Main code is " videoTransforms.py ". 4 | The file contains the functions for Similarity, Affine and Perspective transform. 5 | 6 | The code takes three arguments as follows: 7 | 8 | 9 | The input file takes Floating point values. 10 | 11 | The extension mp4 is required for input as well as output videos, as the video codec for MacOS is 'avc1' 12 | and 'mp4' format. 13 | 14 | The code generates two output videos: 15 | First video (Pathtooutputvideo.mp4) shows the output corresponding to the given transform coordinates in the input file. 16 | 17 | Second video (changedOutput.mp4) shows the extra work on the video. 18 | This includes the flipping of the video and applying edge detection to it. 19 | 20 | An example of the input given to the program will be as: 21 | inputvideo.mp4 inputfile.txt outputvideo.mp4 22 | 23 | Contact GitHub API Training Shop Blog About 24 | -------------------------------------------------------------------------------- /01_Video_Transformations/inputfile.txt: -------------------------------------------------------------------------------- 1 | 1.1 1.1 10.1 10.1 2 | 2.2 3.1 11.3 12.2 3 | -------------------------------------------------------------------------------- /01_Video_Transformations/inputvideo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PratikRamdasi/Computer-Vision/529989cb467730e786209773bb9ed02c266c1fc7/01_Video_Transformations/inputvideo.mp4 -------------------------------------------------------------------------------- /01_Video_Transformations/videoTransforms.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Project: Applying similarity, affine and perspective transformations on a video. 3 | 4 | Author: Pratik Ramdasi 5 | 6 | Date: 02/ 10/ 2016 7 | 8 | ''' 9 | 10 | # Import Libraries 11 | 12 | import numpy as np 13 | import glob 14 | import cv2 15 | import cv2.cv as cv 16 | import os 17 | 18 | # Define class sturcture Video including transform functions 19 | 20 | class Video: 21 | def __init__(self,input_path,transformIndex,coordinates,output_path): 22 | self.fname=[] 23 | self.path=input_path 24 | self.output=output_path 25 | self.transformIndex=transformIndex 26 | self.coordinates=coordinates 27 | 28 | # Directory is checked for the existence of the required folders. 29 | # Folder contains the extracted input video frames 30 | # call Processing Functions to get transforms 31 | newpath = r'Frames' 32 | if not os.path.exists(newpath): os.makedirs(newpath) 33 | self.framing(self.path) 34 | self.decideTransform() 35 | self.applyTransform() 36 | 37 | # Method for similarity transformation 38 | 39 | def similarityTransform(self): 40 | self.height,self.width=cv2.imread("Frames/1.jpg").shape[:2] 41 | A = abs(float(self.coordinates[1][0]) - float(self.coordinates[0][0])) 42 | B = abs(float(self.coordinates[1][1]) - float(self.coordinates[0][1])) 43 | C = abs(float(self.coordinates[1][2]) - float(self.coordinates[0][2])) 44 | D = abs(float(self.coordinates[1][3]) - float(self.coordinates[0][3])) 45 | 46 | a = np.float32(float(C*A*(A+B*D))/float(A**2 + B)) 47 | b = np.float32(float(C-D*A)/float(A**2 + B)) 48 | c = np.float32(abs(float(self.coordinates[0][2]) - a*float(self.coordinates[0][0]) - b*float(self.coordinates[0][1]))) 49 | d = np.float32(abs(float(self.coordinates[0][3]) - b*float(self.coordinates[0][0]) - a*float(self.coordinates[0][1]))) 50 | 51 | M=np.array([[a ,b ,c],[-b ,a ,d]]) 52 | #print "M is: ",M 53 | 54 | # sort the input frames 55 | folder=self.sort_files() 56 | 57 | # Process next frames 58 | for i in folder: 59 | pic="Frames/"+str(i)+".jpg" 60 | img = cv2.imread(pic) 61 | dst=cv2.warpAffine(img,M,(self.width,self.height)) 62 | cv2.imwrite("Frames/%d.jpg" % i, dst) 63 | 64 | 65 | # Method for affine transformation: OpenCV Module 66 | 67 | def affineTransform(self): 68 | folder=self.sort_files() 69 | P=self.get_points() 70 | self.height,self.width=cv2.imread("Frames/1.jpg").shape[:2] 71 | # Process frames 72 | for i in folder: 73 | pic="Frames/"+str(i)+".jpg" 74 | img = cv2.imread(pic) 75 | pts1 = np.float32([[P[0][0],P[0][1]],[P[1][0],P[1][1]],[P[2][0],P[2][1]]]) 76 | pts2 = np.float32([[P[0][2],P[0][3]],[P[1][2],P[1][3]],[P[2][2],P[2][3]]]) 77 | M = cv2.getAffineTransform(pts1,pts2) 78 | dst = cv2.warpAffine(img,M,(self.width,self.height)) 79 | cv2.imwrite("Frames/%d.jpg" % i, dst) 80 | 81 | 82 | # Method for Perspective transformation: OpenCV Module 83 | 84 | def perspectiveTransform(self): 85 | folder=self.sort_files() 86 | P=self.get_points() 87 | self.height,self.width=cv2.imread("Frames/1.jpg").shape[:2] 88 | # Process frames 89 | for i in folder: 90 | pic="Frames/"+str(i)+".jpg" 91 | img = cv2.imread(pic) 92 | pts1 = np.float32([[P[0][0],P[0][1]],[P[1][0],P[1][1]],[P[2][0],P[2][1]],[P[3][0],P[3][1]]]) 93 | pts2 = np.float32([[P[0][2],P[0][3]],[P[1][2],P[1][3]],[P[2][2],P[2][3]],[P[3][2],P[3][3]]]) 94 | M = cv2.getPerspectiveTransform(pts1,pts2) 95 | dst = cv2.warpPerspective(img,M,(self.width,self.height)) 96 | cv2.imwrite("Frames/%d.jpg" % i, dst) 97 | 98 | # Get x,y co-ordinates 99 | 100 | def get_points(self): 101 | P=np.array(self.coordinates) 102 | return P 103 | 104 | # Extract frames from the video 105 | 106 | def framing(self,path): 107 | cap = cv2.VideoCapture(path) 108 | success,frame=cap.read(cv.CV_IMWRITE_JPEG_QUALITY) #handle of the Video Capture is required for obtaining frame. 109 | 110 | count = 1 111 | while success: 112 | cv2.imwrite("Frames/%d.jpg" % count, frame) # save frame as JPEG file 113 | count += 1 114 | success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) # to read the last frame 115 | 116 | cap.release() 117 | 118 | # select transformation 119 | 120 | def decideTransform(self): 121 | if self.transformIndex == 2: 122 | self.similarityTransform() 123 | elif self.transformIndex == 3: 124 | self.affineTransform() 125 | elif self.transformIndex == 4: 126 | self.perspectiveTransform() 127 | else: 128 | print ("Not correct number of pair of points") 129 | 130 | self.writeOutputFile(self.output) 131 | 132 | # Apply selected transformation 133 | 134 | def applyTransform(self): 135 | self.framing(self.path) 136 | self.height,self.width=cv2.imread("Frames/1.jpg").shape[:2] 137 | 138 | # write transformed video 139 | 140 | out = cv2.VideoWriter("changedOutput.mp4",cv.CV_FOURCC('a','v','c','1'), 30.0, (self.width, self.height)) 141 | folder=self.sort_files() 142 | 143 | # write Transformed video frames 144 | 145 | for i in folder: 146 | pic="Frames/"+str(i)+".jpg" 147 | Newpic=cv2.imread(pic,0) 148 | frame=cv2.Canny(Newpic,100,200) 149 | cv2.imwrite(pic,frame) 150 | Newpic=cv2.imread(pic) 151 | img=cv2.flip(Newpic,0) 152 | out.write(img) 153 | out.release() 154 | 155 | # Writing output video file 156 | 157 | def writeOutputFile(self,output): 158 | self.height,self.width=cv2.imread("Frames/1.jpg").shape[:2] 159 | out = cv2.VideoWriter(output,cv.CV_FOURCC('a','v','c','1'), 30.0, (self.width, self.height)) 160 | folder=self.sort_files() 161 | 162 | for i in folder: 163 | pic="Frames/"+str(i)+".jpg" 164 | img=cv2.imread(pic) 165 | out.write(img) 166 | out.release() 167 | 168 | # Method to sort the files (here, frames!) 169 | 170 | def sort_files(self): 171 | '''Files in python are not sorted normally, they are sorted in the order in which the numbers appear: 172 | 1. That means 1, 10, 100, 2, 20, 200...and so on... 173 | 2. so we obtain the ending part of the filenames and then sort that array and return it. 174 | ''' 175 | for file in sorted(glob.glob("Frames/*.*")): 176 | s=file.split('/') 177 | a=s[-1].split('\\') 178 | x=a[-1].split('.') 179 | self.fname.append(int(x[0])) 180 | return(sorted(self.fname)) 181 | 182 | # Main Function 183 | 184 | if __name__ == "__main__": 185 | flag=False 186 | while(flag!=True): 187 | # User input arguments in the format mentioned in Readme document. 188 | string=raw_input("Enter the arguments: ") 189 | tokens=string.split(" ") 190 | if(len(tokens)==3): 191 | inputfile,textfile,outputfile = tokens[0], tokens[1], tokens[2] 192 | print "Input file is: ", inputfile 193 | print "Test file is: ", textfile 194 | print "Output file is: ",outputfile 195 | flag=True 196 | else: 197 | flag=False 198 | 199 | count = 0 200 | coordinates = [] 201 | with open(textfile) as txtfile: 202 | for line in txtfile: 203 | count += 1 204 | val = list(line.split()) 205 | coordinates.append(val) 206 | 207 | v=Video(inputfile,count,coordinates,outputfile) 208 | 209 | -------------------------------------------------------------------------------- /02_Background_Subtraction/MOG.cpp: -------------------------------------------------------------------------------- 1 | /* PROJECT : Person Detection 2 | Author: Pratik Ramdasi 3 | TITLE: MOG based background subtraction 4 | Date: 07/ 19/ 2016 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace cv; 18 | using namespace std; 19 | 20 | // Method to perform morphological filtering 21 | 22 | Mat filter_image(Mat& img) 23 | { 24 | int morph_size1 = 2; 25 | int morph_size2 = 2; 26 | Mat kernel = cv::getStructuringElement(MORPH_RECT, Size(2*morph_size1 +1, 2*morph_size2 +1)); 27 | Mat filtered; 28 | dilate(img, filtered, kernel, cv::Point(morph_size1, morph_size2), 2); 29 | 30 | return filtered; 31 | }; 32 | 33 | // Method to reduce bounding box height 34 | 35 | Rect compressROI(Mat frm, Rect boundingBox, int padding) { 36 | Rect returnRect = Rect(boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height - padding); 37 | if (returnRect.x < 0)returnRect.x = 0; 38 | if (returnRect.y < 0)returnRect.y = 0; 39 | if (returnRect.x+returnRect.width >= frm.cols)returnRect.width = frm.cols-returnRect.x; 40 | if (returnRect.y+returnRect.height >= frm.rows)returnRect.height = frm.rows-returnRect.y; 41 | return returnRect; 42 | }; 43 | 44 | 45 | int main() 46 | { 47 | /* read input video */ 48 | VideoCapture cap; 49 | cap.open(0); // input video path 50 | 51 | int frame_no = 0; 52 | 53 | /* define Bg model parameters */ 54 | const int nmixtures = 5; 55 | const bool bShadowDetection = false; 56 | const int history = 100; 57 | BackgroundSubtractorMOG bg(history, nmixtures, bShadowDetection); 58 | 59 | 60 | vector < vector < Point > >contours; 61 | vector < Point > points; 62 | vector hierarchy; 63 | 64 | Mat frame, fgmask, fgimg, backgroundImage; 65 | 66 | while(1) 67 | { 68 | bool bSuccess = cap.read(frame); // read a new frame from video 69 | if (!bSuccess) // if not success, break loop 70 | { 71 | cout << "Cannot read a frame from video file" << endl; 72 | break; 73 | } 74 | 75 | frame_no ++; 76 | // increase the frame count 77 | Size s = frame.size(); // size of the frame 78 | 79 | // Pre process the frame, denoising 80 | 81 | medianBlur(frame, frame, 5); 82 | Mat blur_out; 83 | GaussianBlur(frame, blur_out, Size(5,5),0,0); 84 | 85 | 86 | /* Strategy: apply learning rate > 0 for first frame to learn the background and for next successive frames, 87 | apply learning rate = 0. */ 88 | 89 | // Get intial frame 90 | 91 | if (frame_no == 1) 92 | { 93 | const double learningRate = 0.8; 94 | bg.operator()(blur_out, fgimg, learningRate); 95 | } 96 | 97 | // Process next frames 98 | else { 99 | const double learningRate = 0; 100 | bg.operator()(blur_out, fgimg, learningRate); 101 | } 102 | 103 | bg.getBackgroundImage (backgroundImage); 104 | 105 | // filter the segmented image using morphology 106 | 107 | fgimg = filter_image(fgimg); 108 | 109 | // define horizontal line parameters 110 | 111 | Point mid_left, mid_right; 112 | mid_left.y = s.height/2; 113 | mid_left.x = 0; 114 | mid_right.x = s.width; 115 | mid_right.y = s.height/2; 116 | 117 | // find the contours 118 | 119 | findContours (fgimg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); 120 | 121 | vector > contours_poly( contours.size() ); 122 | vector boundRect( contours.size() ); 123 | 124 | // Get the moments 125 | 126 | vector mu(contours.size() ); 127 | for( size_t i = 0; i < contours.size(); i++ ) 128 | { 129 | mu[i] = moments( contours[i], false ); 130 | } 131 | 132 | // Approx. contours to polygons and get bounding boxes 133 | 134 | for( int i = 0; i < contours.size(); i++ ) 135 | { 136 | approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true ); 137 | boundRect[i] = boundingRect( Mat(contours_poly[i]) ); 138 | 139 | }; 140 | 141 | // define threshold values - specific to application video 142 | 143 | int min_area = 100; // area thresholding for contours, value can be changed 144 | int max_height = 100; // maximum height of bounding box 145 | int line_thresh = 70; // contours above this horizontal line are ignored 146 | 147 | for( int i = 0; i< contours.size(); i++ ) 148 | { 149 | if (contourArea(contours[i]) > min_area && boundRect[i].y < mid_left.y-line_thresh) { 150 | 151 | if (boundRect[i].height >= max_height) { 152 | boundRect[i] = compressROI(frame, boundRect[i], boundRect[i].height*3/4); 153 | } 154 | 155 | rectangle(frame, boundRect[i].tl(), boundRect[i].br(), Scalar(0,255,0), 2, 8, 0 ); 156 | 157 | } 158 | } 159 | 160 | // show output frame 161 | 162 | imshow ("Frame", frame); 163 | 164 | char k = (char)waitKey(30); 165 | if( k == 27 ) break; 166 | 167 | } 168 | 169 | return 0; 170 | } 171 | 172 | -------------------------------------------------------------------------------- /02_Background_Subtraction/Readme.md: -------------------------------------------------------------------------------- 1 | Folder contains Bakcground Subtraction strategies for motion detection in video processing. 2 | -------------------------------------------------------------------------------- /02_Background_Subtraction/mog2.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* PROJECT : Person detection 3 | Author: Pratik Ramdasi 4 | TITLE: MOG2 based background subtraction 5 | Date: 07/ 19/ 2016 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | using namespace cv; 21 | 22 | // Method to perform morphological filtering 23 | 24 | Mat filter_image(Mat& img) 25 | { 26 | int morph_size1 = 1; 27 | int morph_size2 = 1; 28 | Mat kernel = cv::getStructuringElement(MORPH_RECT, Size(2*morph_size1 +1, 2*morph_size2 +1)); 29 | Mat filtered; 30 | 31 | // perform opeing 32 | erode(img, filtered, kernel); 33 | dilate(filtered, filtered, kernel); 34 | 35 | return filtered; 36 | }; 37 | 38 | // Method to reduce bounding box height 39 | 40 | Rect compressROI(Mat frm, Rect boundingBox, int padding) { 41 | Rect returnRect = Rect(boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height - padding); 42 | if (returnRect.x < 0)returnRect.x = 0; 43 | if (returnRect.y < 0)returnRect.y = 0; 44 | if (returnRect.x+returnRect.width >= frm.cols)returnRect.width = frm.cols-returnRect.x; 45 | if (returnRect.y+returnRect.height >= frm.rows)returnRect.height = frm.rows-returnRect.y; 46 | return returnRect; 47 | }; 48 | 49 | int main() 50 | { 51 | VideoCapture cap; 52 | cap.open(0); // input video path 53 | if ( !cap.isOpened() ) 54 | return -1; 55 | 56 | // define Bg model parameters 57 | 58 | const int nmixtures = 4; 59 | const bool bShadowDetection = false; 60 | const int history = 150; 61 | BackgroundSubtractorMOG2 bg(history, nmixtures, bShadowDetection); 62 | 63 | vector < vector < Point > >contours; 64 | vector < Point > points; 65 | vector hierarchy; 66 | 67 | Mat frame, fgmask, fgimg, backgroundImage; 68 | 69 | while(1) 70 | { 71 | 72 | bool bSuccess = cap.read(frame); // read a new frame from video 73 | if (!bSuccess) // if not success, break loop 74 | { 75 | cout << "Cannot read a frame from video file" << endl; 76 | break; 77 | } 78 | 79 | // get image size 80 | 81 | Size s = frame.size(); 82 | 83 | // remove noise 84 | 85 | medianBlur(frame, frame, 5); 86 | Mat blur_out; 87 | GaussianBlur(frame, blur_out, Size(5,5),0,0); 88 | 89 | // motion detection 90 | 91 | const double learningRate = -1; 92 | bg.operator()(frame, fgimg, learningRate); 93 | 94 | // smoothen the mask 95 | 96 | medianBlur(fgimg, fgimg, 5); 97 | GaussianBlur(fgimg, fgimg, Size(5,5),0,0); 98 | 99 | //Morphology operaitons 100 | 101 | fgimg = filter_image(fgimg); 102 | 103 | // define horizontal line parameters 104 | 105 | Point mid_left, mid_right; 106 | mid_left.y = s.height/2; 107 | mid_left.x = 0; 108 | mid_right.x = s.width; 109 | mid_right.y = s.height/2; 110 | 111 | 112 | // find contours 113 | findContours (fgimg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); 114 | 115 | vector > contours_poly( contours.size() ); 116 | vector boundRect( contours.size() ); 117 | 118 | // Approx. contours to polygons and get bounding boxes 119 | 120 | for( int i = 0; i < contours.size(); i++ ) 121 | { 122 | approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true ); 123 | boundRect[i] = boundingRect( Mat(contours_poly[i]) ); 124 | 125 | }; 126 | 127 | // define threshold values - specific to application video 128 | 129 | int min_area = 400; // area thresholding for contours, value can be changed 130 | 131 | int line_thresh = 70; // contours above this line are considered 132 | 133 | int max_height = 100; // if contour height is more than this, it is ignored 134 | 135 | for( int i = 0; i< contours.size(); i++ ) 136 | { 137 | //cout << "area is: " << contourArea(contours[i]) << endl; 138 | 139 | if (contourArea(contours[i]) > min_area && boundRect[i].y < mid_left.y-line_thresh) 140 | { 141 | 142 | if (boundRect[i].height >= max_height) { 143 | 144 | boundRect[i] = compressROI(frame, boundRect[i], boundRect[i].height*3/4); 145 | } 146 | 147 | rectangle(frame, boundRect[i].tl(), boundRect[i].br(), Scalar(0,255,0), 2, 8, 0 ); 148 | } 149 | 150 | } 151 | 152 | imshow("frame", frame); 153 | 154 | char k = (char)waitKey(30); 155 | if( k == 27 ) break; 156 | 157 | } 158 | 159 | return 0; 160 | } 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /02_Background_Subtraction/vibe.cpp: -------------------------------------------------------------------------------- 1 | /* PROJECT : Person Detection 2 | Author: Pratik Ramdasi 3 | TITLE: VIBE background subtraction. 4 | Date: 07/ 14/ 2016 5 | */ 6 | 7 | #include "opencv2/core/core.hpp" 8 | #include 9 | #include 10 | #include "opencv2/core/types_c.h" 11 | #include "opencv2/highgui/highgui.hpp" 12 | #include "opencv2/imgproc/imgproc.hpp" 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace cv; 20 | using namespace std; 21 | 22 | // define structure vibe 23 | 24 | struct Vibe 25 | { 26 | int width; // width of the image 27 | int height; // height of the image 28 | int nbSamples; // number of samples per pixel 29 | int reqMatches; // #_min matches 30 | int radius; // R^2 31 | int bogo_radius; // adaptive radius when resizing /initializing the samples ( my addition ;] ) 32 | int subsamplingFactor; // amount of random subsampling 33 | 34 | vector< Mat_ > samples; // the 'model' 35 | Mat_ segmentation; // 0:bg , 255:fg 36 | 37 | RNG rng; 38 | 39 | // select the parameter values 40 | 41 | Vibe (int w, int h, int nbSamples=20, int reqMatches=2, int radius=400, int subsamplingFactor=8) 42 | : width(w) 43 | , height(h) 44 | , nbSamples(nbSamples) 45 | , reqMatches(reqMatches) 46 | , radius(radius) // R^2 47 | , bogo_radius(200000) 48 | , subsamplingFactor(subsamplingFactor) 49 | , rng(getTickCount()) 50 | , segmentation(height,width) 51 | { 52 | clear(); 53 | }; 54 | 55 | void clear() 56 | { 57 | samples.clear(); 58 | for ( int i=0; i(height,width,128) ); 60 | bogo_radius= 200000; 61 | }; 62 | 63 | // VIBE segmentation 64 | 65 | void segment(const Mat & img, Mat & segmentationMap) 66 | { 67 | if ( nbSamples != samples.size() ) 68 | clear(); 69 | 70 | bogo_radius = bogo_radius > radius 71 | ? bogo_radius *= 0.8 72 | : radius; 73 | 74 | Mat_ image(img); 75 | for (int x=1; x= reqMatches) // the pixel belongs to the background 90 | { 91 | // store 'bg' in the segmentation map 92 | segmentation(y,x) = 0; 93 | // gets a random number between 0 and subsamplingFactor-1 94 | int randomNumber = rng.uniform(0, subsamplingFactor); 95 | // update of the current pixel model 96 | if (randomNumber == 0) // random subsampling 97 | { 98 | // other random values are ignored 99 | randomNumber = rng.uniform(0, nbSamples); 100 | samples[randomNumber](y,x) = pixel; 101 | } 102 | // update of a neighboring pixel model 103 | randomNumber = rng.uniform(0, subsamplingFactor); 104 | if (randomNumber == 0) // random subsampling 105 | { 106 | // chooses a neighboring pixel randomly 107 | const static int nb[8][2] = {-1,0, -1,1, 0,1, 1,1, 1,0, 1,-1, 0,-1, -1,-1}; 108 | int n = rng.uniform(0,8); 109 | int neighborX = x + nb[n][1], neighborY = y + nb[n][0]; 110 | // chooses the value to be replaced randomly 111 | randomNumber = rng.uniform(0, nbSamples); 112 | samples[randomNumber](neighborY,neighborX) = pixel; 113 | } 114 | } 115 | else // the pixel belongs to the foreground 116 | { // store 'fg' in the segmentation map 117 | segmentation(y,x) = 255; 118 | } 119 | } 120 | } 121 | segmentationMap = segmentation; 122 | } 123 | }; 124 | 125 | Rect compressROI(Mat frm, Rect boundingBox, int padding) { 126 | Rect returnRect = Rect(boundingBox.x, boundingBox.y, boundingBox.width, boundingBox.height - padding); 127 | if (returnRect.x < 0)returnRect.x = 0; 128 | if (returnRect.y < 0)returnRect.y = 0; 129 | if (returnRect.x+returnRect.width >= frm.cols)returnRect.width = frm.cols-returnRect.x; 130 | if (returnRect.y+returnRect.height >= frm.rows)returnRect.height = frm.rows-returnRect.y; 131 | return returnRect; 132 | }; 133 | 134 | int main() 135 | { 136 | // get the input video 137 | VideoCapture cap; 138 | cap.open(0); // input video path 139 | if ( !cap.isOpened() ) 140 | return -1; 141 | 142 | // input parameters 143 | int w = (int)cap.get(CV_CAP_PROP_FRAME_WIDTH); 144 | int h = (int)cap.get(CV_CAP_PROP_FRAME_HEIGHT); 145 | int ct = 0; 146 | 147 | stringstream ss; 148 | string folderName = "cropped"; 149 | string folderCreateCommand = "mkdir " + folderName; 150 | system(folderCreateCommand.c_str()); 151 | 152 | vector < vector < Point > >contours; 153 | vector < Point > points; 154 | vector hierarchy; 155 | 156 | Vibe vibe(w,h); 157 | 158 | while(1) 159 | { 160 | // read the input frame 161 | Mat frame; 162 | if ( !cap.read(frame) ) continue; 163 | 164 | // convert to Gray image for segmentation 165 | Mat gray; 166 | cvtColor(frame, gray, COLOR_BGR2GRAY); 167 | 168 | // size of the frame 169 | Size s = gray.size(); 170 | 171 | // get segmented image 172 | 173 | Mat seg; 174 | vibe.segment(gray,seg); 175 | 176 | // removal of noise by median filtering 177 | 178 | medianBlur(seg, seg, 5); 179 | 180 | // morphology 181 | 182 | dilate(seg, seg, Mat(10,5,CV_8U)); 183 | 184 | // define horizontal line parameters 185 | 186 | Point mid_left, mid_right; 187 | mid_left.y = s.height/2; 188 | mid_left.x = 0; 189 | mid_right.x = s.width; 190 | mid_right.y = s.height/2; 191 | 192 | // find the contours 193 | 194 | findContours (seg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE); 195 | 196 | vector > contours_poly( contours.size() ); 197 | vector boundRect( contours.size() ); 198 | 199 | // get the moments 200 | 201 | vector mu(contours.size() ); 202 | for( size_t i = 0; i < contours.size(); i++ ) 203 | { 204 | mu[i] = moments( contours[i], false ); 205 | } 206 | 207 | // define bounding rectangle object 208 | 209 | for( int i = 0; i < contours.size(); i++ ) 210 | { 211 | approxPolyDP( Mat(contours[i]), contours_poly[i], 3, true ); 212 | boundRect[i] = boundingRect( Mat(contours_poly[i]) ); 213 | } 214 | 215 | 216 | // define threshold values - specific to application video 217 | 218 | int min_area = 100; // area thresholding for contours, value can be changed 219 | int max_height = 100; // maximum height of the contour 220 | int line_thresh = 10; // contours above this line are ignored 221 | 222 | for( int i = 0; i< contours.size(); i++ ) 223 | { 224 | if (contourArea(contours[i]) > min_area && boundRect[i].y < mid_left.y-line_thresh) { 225 | 226 | if (boundRect[i].height >= max_height) { 227 | 228 | boundRect[i] = compressROI(frame, boundRect[i], boundRect[i].height*3/4); 229 | } 230 | 231 | // ROI 232 | Rect R = boundRect[i]; 233 | Mat ROI = frame(R); 234 | 235 | ss << folderName <<"/"<< "cropped_" << (ct + 1) << ".jpg"; 236 | string fullPath = ss.str(); 237 | ss.str(""); 238 | imwrite(fullPath, ROI); 239 | ct += 1; 240 | 241 | rectangle(frame, boundRect[i].tl(), boundRect[i].br(), Scalar(0,255,0), 2, 8, 0 ); 242 | 243 | } 244 | } 245 | 246 | 247 | 248 | // show output frame 249 | 250 | imshow("vibe",frame); 251 | 252 | int k = waitKey(10); 253 | if ( k == ' ' ) vibe.clear(); 254 | if ( k == 27 ) break; 255 | } 256 | 257 | return 0; 258 | } 259 | -------------------------------------------------------------------------------- /03_Video_Read-Write/Readme.md: -------------------------------------------------------------------------------- 1 | Contains video read/write scripts. 2 | 1. videotoframe.py: Separating frames from the video. 3 | 2. videoWriter.py: combining frames to write into the video. 4 | -------------------------------------------------------------------------------- /03_Video_Read-Write/videoWriter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import glob 3 | import cv2 4 | import cv2.cv as cv 5 | from PIL import Image 6 | import os, sys 7 | 8 | class Video: 9 | def __init__(self): 10 | global FilePath, count 11 | if not os.path.exists("Images"): os.makedirs("Images") 12 | FilePath = "Training/" 13 | count = 1 14 | #for path in os.listdir(FilePath): 15 | # self.framing(path) 16 | self.writeOutputFile() 17 | 18 | def framing(self,path): 19 | global FilePath, count 20 | Newpath = FilePath + path 21 | cap = cv2.VideoCapture(Newpath) 22 | success,frame=cap.read(cv.CV_IMWRITE_JPEG_QUALITY) #handle of the Video Capture is required for obtaining frame. 23 | 24 | while success: 25 | cv2.imwrite("Images/%d.jpg" % count, frame) # save frame as JPEG file 26 | count += 1 27 | success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) # to read the last frame 28 | 29 | cap.release() 30 | 31 | def writeOutputFile(self): 32 | self.height,self.width=cv2.imread("/Users/pratikramdasi/Desktop/frames/0 (1).jpg").shape[:2] 33 | out = cv2.VideoWriter("/Users/pratikramdasi/Desktop/vtest.mp4",cv.CV_FOURCC('a','v','c','1'), 30.0, (self.width, self.height)) 34 | folder=self.sort_files() 35 | 36 | for i in folder: 37 | pic="/Users/pratikramdasi/Desktop/frames/0 ("+str(i)+").jpg" 38 | img=cv2.imread(pic) 39 | out.write(img) 40 | out.release() 41 | 42 | 43 | def sort_files(self): 44 | self.fname=[] 45 | 46 | for file in sorted(glob.glob("/Users/pratikramdasi/Desktop/frames/*.*")): 47 | s=file.split('/') 48 | a=s[-1].split('.') 49 | temp=a[0].split(' ') 50 | x=temp[-1].strip('()') 51 | self.fname.append(int(x)) 52 | return(sorted(self.fname)) 53 | 54 | if __name__ == "__main__": 55 | v=Video() 56 | -------------------------------------------------------------------------------- /03_Video_Read-Write/videotoframe.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | #import cv2.cv as cv 4 | 5 | cap = cv2.VideoCapture("v1.mp4") 6 | success,frame=cap.read(0) #handle of the Video Capture is required for obtaining frame. 7 | count = 1 8 | while success: 9 | cv2.imwrite("/Users/pratikramdasi/Desktop/frames/%d.jpg" % count, frame)# save frame as JPEG file 10 | count += 1 11 | success,frame = cap.read(0) # to read the last frame 12 | 13 | 14 | -------------------------------------------------------------------------------- /04_Motion_Detection/READMEfile.txt: -------------------------------------------------------------------------------- 1 | Object Detection in videos: 2 | 3/2/2016 3 | -------------------------------------------- 4 | 5 | NOTE: 6 | 1. The OpenCV version required for this program is 2.4.x. the department machines have different versions, while the machine where the code was developed and tested has OpenCV version 2.4.12. 7 | 2. The video format compatible with the Linux system is .ogv and the codec 'theo'. 8 | 9 | Methods applied and improvements: 10 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 11 | 1. Basic foreground-background segmentation and detection of contours using opencv functions. 12 | 2. Modification over basic foreground-background segmentation: considering previous samples for estimation of new pixel value (by using mean of previous samples and deciding threshold manually). Results were satisfactory for specific type of video where background is stationary for some initial frames. Ghosts were the biggest problem. 13 | 3. ViBe implementation: ViBe algorithm is implemented and output is compared with previous methods. Most useful advantage as can be seen from the 'changedOutput.ogv' video is elimination of ghosts. Also the accuracy obtained is satisfactory. 14 | 15 | Attached code shows the implementation of ViBe. 16 | 17 | Instructions for running the attached codes: 18 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 19 | To run the algorithm type:python main.py (You might have to change the the path of the video, ). 20 | After running the code, two folders will be generated - 'Frames' (contains output frames after object detection) and 'NewFrames' (contains corresponding binary output frames) in the working directory.Also, output video containing object detection and tracking will be generated named: 'changedOutput.ogv'. 21 | 22 | For accuracy you will have to run another file named accuracy_measure.py. 23 | For running it: 24 | python accuracy_measure.py 25 | 26 | Ground truth frames for "movie_cars.ogv" are included in "groundtruth" folder. 27 | This folder is from change detection dataset but images are renamed in the form "0 (i).png". So, kindly use the attached (renamed) groundtruth folder for verification. 28 | 29 | Computed accuracy for number of correctly detected objects (Comparing number of contours from ground truth and the obtained results). 30 | Accuracy = No of objects detected correctly / (no of objects detected correctly + no of objects not detected correctly). 31 | Obtained accuracy is: 88% for this specific video. 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /04_Motion_Detection/accuracy_measure.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import glob 3 | 4 | def sort_files(path): 5 | fname=[] 6 | path= path + "/*.png" 7 | for file in sorted(glob.glob(path)): 8 | s=file.split('/') 9 | a=s[-1].split('\\') 10 | x=a[-1].split('.') 11 | literalOne = '(' 12 | literalTwo = ')' 13 | s= x[0].split(literalOne)[-1].split(literalTwo)[0] 14 | fname.append(int(s)) 15 | return(sorted(fname)) 16 | 17 | def sort_files_fr(path1): 18 | fname=[] 19 | path1 = path1 + "/*.jpg" 20 | for file in sorted(glob.glob(path1)): 21 | s=file.split('/') 22 | a=s[-1].split('\\') 23 | x=a[-1].split('.') 24 | literalOne = '(' 25 | literalTwo = ')' 26 | s= x[0].split(literalOne)[-1].split(literalTwo)[0] 27 | fname.append(int(s)) 28 | return(sorted(fname)) 29 | 30 | def processContoursinGt(path): 31 | folder = sort_files(path) 32 | length_cont_gt = [] 33 | for i in range(20,len(folder)): 34 | newPath = path + "/0 (" + str(i) + ")" + ".png" 35 | img = cv2.imread(newPath,cv2.CV_LOAD_IMAGE_COLOR) 36 | #print "Image in processContour: ",img 37 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 38 | gray = cv2.GaussianBlur(gray, (31, 31), 0) 39 | thresh = cv2.threshold(gray, 25, 255, cv2.THRESH_BINARY)[1] 40 | thresh = cv2.dilate(thresh, None, iterations=2) 41 | (cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 42 | length_cont_gt.append(len(cnts)) 43 | return length_cont_gt 44 | 45 | 46 | def processContoursinFr(path1): 47 | folder1 = sort_files_fr(path1) 48 | length_cont_fr = [] 49 | for i in range(1,len(folder1)): 50 | newPath = path1 + "/" + str(i)+ ".jpg" 51 | img = cv2.imread(newPath,cv2.CV_LOAD_IMAGE_COLOR) 52 | #print "Image in processContourinFr: ",img 53 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 54 | gray = cv2.GaussianBlur(gray, (31, 31), 0) 55 | thresh = cv2.threshold(gray, 25, 255, cv2.THRESH_BINARY)[1] 56 | thresh = cv2.dilate(thresh, None, iterations=2) 57 | (cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) 58 | length_cont_fr.append(len(cnts)) 59 | return length_cont_fr 60 | 61 | def findPercentAccuracy(gt, fr): 62 | true_count = 0 63 | false_count = 0 64 | for i in range(1,len(gt)): 65 | if (gt[i] == fr[i]): 66 | true_count += 1 67 | else: 68 | false_count += 1 69 | return true_count, false_count 70 | 71 | 72 | if __name__ == "__main__": 73 | path = 'groundtruth' 74 | path1 = 'Frames' 75 | gt = processContoursinGt(path) 76 | fr = processContoursinFr(path1) 77 | true, false = findPercentAccuracy(gt,fr) 78 | print "Percentage accuracy of objects detected is: {}" .format(float(true)* 100/float(true + false)) 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /04_Motion_Detection/groundtruth.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PratikRamdasi/Computer-Vision/529989cb467730e786209773bb9ed02c266c1fc7/04_Motion_Detection/groundtruth.zip -------------------------------------------------------------------------------- /04_Motion_Detection/main.py: -------------------------------------------------------------------------------- 1 | # import the necessary packages 2 | import numpy as np 3 | import os 4 | import glob 5 | import cv2.cv as cv 6 | import cv2 7 | import random 8 | 9 | class video: 10 | def __init__(self,path): 11 | global newpath 12 | self.numberOfSamples = 20 13 | self.requiredMatches = 2 14 | self.distanceThreshold = 20 15 | self.subsamplingFactor = 16 16 | self.fname=[] 17 | self.path=path 18 | newpath = r'Frames' 19 | if not os.path.exists(newpath): os.makedirs(newpath) 20 | newpath = r'NewFrames' 21 | if not os.path.exists(newpath): os.makedirs(newpath) 22 | bigSampleArray = self.initialFraming(self.path) 23 | self.processVideo(bigSampleArray) 24 | 25 | def sort_files(self): 26 | for file in sorted(glob.glob("Frames/*.*")): 27 | s=file.split ('/') 28 | a=s[-1].split('\\') 29 | x=a[-1].split('.') 30 | self.fname.append(int(x[0])) 31 | return(sorted(self.fname)) 32 | 33 | def initialFraming(self,path): 34 | global cap 35 | global success 36 | global frame 37 | 38 | sampleIndex=0 39 | cap = cv2.VideoCapture(path) 40 | success,frame=cap.read(cv.CV_IMWRITE_JPEG_QUALITY) 41 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 42 | gray = cv2.GaussianBlur(gray, (21, 21), 0) 43 | height,width = gray.shape[:2] 44 | print "Dimension of the image is: ",height, width, (height*width) 45 | 46 | samples = np.array([[0 for x in range(0,self.numberOfSamples)] for x in range(0,(height*width))]) 47 | 48 | tempArray = np.reshape(gray,(height*width)).T 49 | 50 | samples[:,sampleIndex]= np.copy(tempArray) 51 | sampleIndex+=1 52 | 53 | while (success and sampleIndex!=(self.numberOfSamples)): 54 | success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) 55 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 56 | gray = cv2.GaussianBlur(gray, (21, 21), 0) 57 | tempArray = (np.reshape(gray,(height*width))).T 58 | samples[:,sampleIndex]= np.copy(tempArray) 59 | sampleIndex+=1 60 | 61 | return samples 62 | 63 | def writeVideo(self): 64 | height,width=cv2.imread("Frames/1.jpg").shape[:2] 65 | out = cv2.VideoWriter("changedOutput.ogv",cv.CV_FOURCC('t','h','e','0'), 25.0, (width,height)) 66 | folder=self.sort_files() 67 | 68 | for i in folder: 69 | pic="Frames/"+str(i)+".jpg" 70 | img=cv2.imread(pic) 71 | out.write(img) 72 | out.release() 73 | 74 | def getNeighbours(self,arrayX,arrayY, height, width): 75 | neighbourX = [(arrayX-1),arrayX,(arrayX+1),(arrayX-1),(arrayX+1),(arrayX-1),arrayX,(arrayX+1)] 76 | neighbourY = [(arrayY-1),(arrayY-1),(arrayY-1),arrayY,arrayY,(arrayY+1),(arrayY+1),(arrayY+1)] 77 | ## print "neighbourX , neighburY is: ",neighbourX, neighbourY 78 | finalX = [] 79 | finalY = [] 80 | for i in range(0,len(neighbourX)): 81 | if(neighbourX[i]>=height or neighbourY[i]>=width or neighbourX[i]<0 or neighbourY[i]<0): 82 | temp = 0 83 | else: 84 | finalX.append(neighbourX[i]) 85 | finalY.append(neighbourY[i]) 86 | 87 | return np.array(finalX),np.array(finalY) 88 | 89 | 90 | def findValues(self,neighbourX, neighbourY, width): 91 | valueArray = np.zeros(len(neighbourX)) 92 | for i in range(0,len(neighbourX)): 93 | valueArray[i] = (width* neighbourX[i]) + neighbourY[i] 94 | 95 | return valueArray 96 | 97 | def getPixelLocation(self,p, h, w): 98 | arrayX=p/w 99 | arrayY=p%w 100 | nX, nY = self.getNeighbours(arrayX, arrayY, h, w) 101 | values = self.findValues(nX, nY, w) 102 | ## print "values are: ",values 103 | randomPixel = int(values[random.randint(0,len(values)-1)]) 104 | return randomPixel 105 | 106 | def processVideo(self,bigSampleArray): 107 | global success 108 | global frame 109 | global cap 110 | 111 | Finalcount=1 112 | samples= bigSampleArray 113 | 114 | i=0 115 | while success: 116 | ## success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) 117 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 118 | gray = cv2.GaussianBlur(gray, (21, 21), 0) 119 | height,width = gray.shape[:2] 120 | tempArray = np.reshape(gray,(height*width)).T 121 | segmentationMap = np.copy(tempArray)*0 122 | for p in range(0,len(bigSampleArray)): 123 | ## print "Value of p is: ",p 124 | count = index = distance = 0 125 | 126 | while((count < self.requiredMatches) and (index < self.numberOfSamples)): 127 | distance = np.linalg.norm(tempArray[p]-samples[p][index]) 128 | ## print "Euclidean distance is: ",distance 129 | if (distance < self.distanceThreshold): 130 | count += 1 131 | ## print "count reached" ,count 132 | index += 1 133 | 134 | if(count=height or neighbourY[i]>=width or neighbourX[i]<0 or neighbourY[i]<0): 101 | temp = 0 102 | else: 103 | finalX.append(neighbourX[i]) 104 | finalY.append(neighbourY[i]) 105 | 106 | return np.array(finalX),np.array(finalY) 107 | 108 | 109 | def findValues(self,neighbourX, neighbourY, width): 110 | valueArray = np.zeros(len(neighbourX)) 111 | for i in range(0,len(neighbourX)): 112 | valueArray[i] = (width* neighbourX[i]) + neighbourY[i] 113 | 114 | return valueArray 115 | 116 | def getPixelLocation(self,p, h, w): 117 | arrayX=p/w 118 | arrayY=p%w 119 | nX, nY = self.getNeighbours(arrayX, arrayY, h, w) 120 | values = self.findValues(nX, nY, w) 121 | ## print "values are: ",values 122 | randomPixel = int(values[random.randint(0,len(values)-1)]) 123 | return randomPixel 124 | 125 | def processVideo(self,bigSampleArray): 126 | Test_Histogram=[] 127 | global success 128 | global frame 129 | global cap 130 | 131 | Finalcount=1 132 | samples= bigSampleArray 133 | 134 | i=0 135 | TemplateCount = 1 136 | while success: 137 | ## success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) 138 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 139 | #gray = cv2.GaussianBlur(gray, (21, 21), 0) 140 | height,width = gray.shape[:2] 141 | tempArray = np.reshape(gray,(height*width)).T 142 | segmentationMap = np.copy(tempArray)*0 143 | for p in range(0,len(bigSampleArray)): 144 | ## print "Value of p is: ",p 145 | count = index = distance = 0 146 | 147 | while((count < self.requiredMatches) and (index < self.numberOfSamples)): 148 | distance = np.linalg.norm(tempArray[p]-samples[p][index]) 149 | ## print "Euclidean distance is: ",distance 150 | if (distance < self.distanceThreshold): 151 | count += 1 152 | ## print "count reached" ,count 153 | index += 1 154 | 155 | if(count 1000: 186 | #mask = np.zeros_like(gray) # Create mask where white is what we want, black otherwise 187 | #cv2.drawContours(mask, [c], 0, 255) # Draw filled contour in mask 188 | #out = np.zeros_like(gray) # Extract out the object and place into output image 189 | #out[mask == 255] = gray[mask == 255] 190 | 191 | # Show the output image 192 | #cv2.imshow('Output', out) 193 | (x, y, w, h) = cv2.boundingRect(c) 194 | cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) 195 | crop_img = gray[y: y + h, x: x + w] 196 | #im = cv2.imread(new_path) 197 | kpts = fea_det.detect(crop_img) 198 | kpts, des = des_ext.compute(im, kpts) 199 | label_Test=h_cluster.predict(des) 200 | for i in range(0,len(label_Test)): 201 | LabelHistogram[label_Test[i]-1]+=1 202 | Test_Histogram=np.append(Test_Histogram,LabelHistogram) 203 | Result=clf.predict(LabelHistogram) 204 | print "Object class is: ", Result 205 | font = cv2.FONT_HERSHEY_SIMPLEX 206 | cv2.putText(frame,"Person",(y/4,x/2), font, 1,(0,255,0),2) 207 | #cv2.imwrite(NewPath,crop_img) 208 | TemplateCount += 1 209 | cv2.imwrite("Frames/%d.jpg" % Finalcount, frame) # save frame as JPEG file 210 | Finalcount += 1 211 | 212 | #cv2.imwrite("Frames/%d.jpg" % Finalcount, frame) # save frame as JPEG file 213 | Finalcount += 1 214 | success,frame = cap.read(cv.CV_IMWRITE_JPEG_QUALITY) 215 | i+=1 216 | 217 | cv2.destroyAllWindows() 218 | self.writeVideo() 219 | 220 | 221 | if __name__ == "__main__": 222 | path_file='video.avi' 223 | v = video(path_file) 224 | 225 | -------------------------------------------------------------------------------- /05_object_classification/svmTraining.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | from sklearn import cluster 5 | from sklearn.naive_bayes import MultinomialNB 6 | from sklearn.preprocessing import normalize 7 | from scipy.cluster.vq import * 8 | from sklearn.preprocessing import StandardScaler 9 | from sklearn import neighbors 10 | from sklearn import svm 11 | 12 | class_names=['Person'] 13 | path="D:/Dataset/" 14 | clusters=30 15 | entries=os.listdir(path) 16 | des_list = [] 17 | Histogram=np.array([]) 18 | class_count=[] 19 | count=0 20 | fea_det = cv2.FeatureDetector_create("SIFT") 21 | des_ext = cv2.DescriptorExtractor_create("SIFT") 22 | for item in entries: 23 | count+=1 24 | img_path=path+str(item)+"/" 25 | image_array=os.listdir(img_path) 26 | for pic in image_array: 27 | new_path=img_path+str(pic) 28 | #print new_path 29 | im = cv2.imread(new_path) 30 | kpts = fea_det.detect(im) 31 | kpts, des = des_ext.compute(im, kpts) 32 | des_list.append((new_path, des)) 33 | class_count.append(count) 34 | 35 | #print "Class_count is: ",len(class_count) 36 | descriptors = des_list[0][1] 37 | for image_path, descriptor in des_list[1:]: 38 | descriptors = np.vstack((descriptors, descriptor)) 39 | 40 | 41 | #print "Implementing Kmeans: " 42 | h_cluster=cluster.KMeans(n_clusters=clusters) 43 | h_cluster.fit(descriptors) 44 | labels=h_cluster.labels_ 45 | #print "Labels is: ",labels 46 | 47 | 48 | #print "Handling every image now: " 49 | #training each image again 50 | for item in entries: 51 | count+=1 52 | img_path=path+str(item)+"/" 53 | image_array=os.listdir(img_path) 54 | for pic in image_array: 55 | LabelHistogram=np.zeros(clusters) 56 | new_path=img_path+str(pic) 57 | im = cv2.imread(new_path) 58 | kpts = fea_det.detect(im) 59 | kpts, des = des_ext.compute(im, kpts) 60 | LabelOfEveryDescriptor=h_cluster.predict(des) 61 | for i in range(0,len(LabelOfEveryDescriptor)): 62 | LabelHistogram[LabelOfEveryDescriptor[i]-1]+=1 63 | Histogram=np.append(Histogram,LabelHistogram) 64 | 65 | Histogram=np.reshape(Histogram,(len(class_count),clusters)) 66 | #print "Histogram shape:",Histogram.shape 67 | 68 | #print "Implementing SVM: " 69 | clf = svm.SVC() 70 | clf.fit(Histogram,class_count) 71 | 72 | ''' 73 | y=np.array([[1],[3],[2]]) 74 | print "Testing phase is: " 75 | #testing of the classifier 76 | test_path="D:/Test/" 77 | Test_Histogram=[] 78 | entries=os.listdir(test_path) 79 | for pic in entries: 80 | new_path=test_path+str(pic) 81 | print new_path 82 | im = cv2.imread(new_path) 83 | kpts = fea_det.detect(im) 84 | kpts, des = des_ext.compute(im, kpts) 85 | label_Test=h_cluster.predict(des) 86 | for i in range(0,len(label_Test)): 87 | LabelHistogram[label_Test[i]-1]+=1 88 | Test_Histogram=np.append(Test_Histogram,LabelHistogram) 89 | Result=clf.predict(LabelHistogram) 90 | 91 | Test=np.reshape(Test_Histogram,(len(Test_Histogram)/clusters,clusters)) 92 | print clf.score(Test,y) 93 | ''' 94 | ## print class_names[Result-1] 95 | -------------------------------------------------------------------------------- /06_Human_Activity_Recognition/HOGFile.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import glob 4 | import cv2.cv as cv 5 | import cv2 6 | import random 7 | import cPickle 8 | from sklearn.preprocessing import normalize 9 | from sklearn.decomposition import PCA 10 | from sklearn import svm 11 | import time 12 | from PIL import Image 13 | from resizeimage import resizeimage 14 | 15 | class HOGCompute: 16 | def __init__(self): 17 | 18 | FilePath = 'TrainingNew/' 19 | Folders = os.listdir(FilePath) 20 | LabelCount = 1 21 | FolderCheck = False 22 | Labels = np.array([]) 23 | 24 | for FileName in Folders: 25 | pathToFolder = FilePath + FileName + "/" 26 | newEntry = os.listdir(pathToFolder) 27 | print "Label is: ", LabelCount 28 | 29 | for VideoEntry in newEntry: 30 | pathToVideoFile = pathToFolder + VideoEntry 31 | images = self.sort_files(pathToVideoFile) 32 | 33 | 34 | winSize = (128,64) 35 | blockSize = (16,16) 36 | blockStride = (8,8) 37 | cellSize = (8,8) 38 | nbins = 9 39 | derivAperture = 0 40 | winSigma = -1 41 | histogramNormType = 0 42 | L2HysThreshold = 2.0000000000000001e-01 43 | gammaCorrection = 0 44 | nlevels = 64 45 | hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma, 46 | histogramNormType,L2HysThreshold,gammaCorrection,nlevels) 47 | winStride = (8,8) 48 | padding = (8,8) 49 | locations = ((10,20),) 50 | 51 | nInterval = 10 52 | BigCount = 1 53 | 54 | index = 1 55 | 56 | FirstEntryFlag = False 57 | 58 | while(index < len(images)): 59 | hogCount = 0 60 | for i in range(index,(index + nInterval)): 61 | imgPath = pathToVideoFile + "/0 (" + str(i) + ").jpg" 62 | 63 | #READ THE IMAGE HERE 64 | img = cv2.imread(imgPath) 65 | img = cv2.resize(img, (160, 120)) 66 | h1 = hog.compute(img,winStride,padding,locations).T 67 | #print "Shape of HOG features is: ", h1.shape 68 | temp = np.copy(h1) 69 | #print "Shape of temp is: ", temp.shape 70 | 71 | if(hogCount == 0): 72 | hogTemp = np.zeros((nInterval, len(temp[0]))) 73 | #print "Shape of hogTemp is: ", hogTemp.shape 74 | hogTemp[hogCount]= temp[0] 75 | if (FirstEntryFlag == False): 76 | FirstHOGEntry = np.copy(temp) 77 | FirstEntryFlag = True 78 | else: 79 | hogTemp[hogCount]= temp 80 | 81 | hogCount += 1 82 | 83 | 84 | #HOGPH = self.computePCA(hogTemp) 85 | HOGPH = self.computeHOGPH(hogTemp, FirstHOGEntry) 86 | Labels = np.append(Labels, LabelCount) 87 | #HOGPH = normalize(HOGPH) 88 | 89 | #print "Shape of HOGPH is: ", HOGPH.shape 90 | 91 | if (BigCount == 1): 92 | bigArray = np.copy(HOGPH) 93 | else: 94 | bigArray = np.vstack((bigArray, HOGPH)) 95 | BigCount += 1 96 | #print "Shape of bigArray is: ", bigArray.shape 97 | 98 | 99 | 100 | index += nInterval 101 | #print "Index value is: ", index 102 | 103 | if (FolderCheck == False): 104 | TrainingData = np.copy(bigArray) 105 | FolderCheck = True 106 | else: 107 | TrainingData = np.vstack((TrainingData, bigArray)) 108 | 109 | LabelCount += 1 110 | 111 | 112 | 113 | print "TrainingData Size is: ", TrainingData.shape 114 | Labels = Labels.T 115 | print "Labels shape is: ",Labels.shape 116 | 117 | 118 | clf = svm.SVC() 119 | clf.fit(TrainingData,Labels) 120 | 121 | with open('my_SVM_file.pkl', 'wb') as fid: 122 | cPickle.dump(clf, fid) 123 | 124 | 125 | 126 | def computePCA(self,array): 127 | pca = PCA() 128 | newData = pca.fit_transform(array) 129 | MeanArray = np.mean(newData, axis =0) 130 | #print "Size of Mean array: ", MeanArray.shape 131 | return MeanArray 132 | 133 | def computeHOGPH(self,array, firstEntry): 134 | hogph = firstEntry 135 | #hogph = np.copy(array[0]) 136 | for j in range(1,len(array)): 137 | hogph += array[j-1] - array[j] 138 | 139 | return hogph 140 | 141 | 142 | def sort_files(self, index): 143 | self.fname=[] 144 | path = str(index) + "/*.*" 145 | for file in sorted(glob.glob(path)): 146 | s=file.split ('/') 147 | a=s[-1].split('\\') 148 | x=a[-1].split('.') 149 | o= x[0].split('(')[1] 150 | o = o.split(')')[0] 151 | self.fname.append(int(o)) 152 | return(sorted(self.fname)) 153 | 154 | if __name__=='__main__': 155 | start_time = time.time() 156 | h= HOGCompute() 157 | print("--- %s seconds ---" % (time.time() - start_time)) 158 | -------------------------------------------------------------------------------- /06_Human_Activity_Recognition/HOGTest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import glob 4 | import cv2.cv as cv 5 | import cv2 6 | import random 7 | import cPickle 8 | from sklearn.preprocessing import normalize 9 | from sklearn.decomposition import PCA 10 | from sklearn import svm 11 | import time 12 | from collections import Counter 13 | 14 | class HOGCompute: 15 | def __init__(self): 16 | FilePath = 'Test/' 17 | Folders = os.listdir(FilePath) 18 | 19 | with open('my_SVM_file.pkl', 'rb') as fid: 20 | clf = cPickle.load(fid) 21 | 22 | for FileName in Folders: 23 | images = self.sort_files(FilePath + FileName) 24 | 25 | winSize = (128,64) 26 | blockSize = (16,16) 27 | blockStride = (8,8) 28 | cellSize = (8,8) 29 | nbins = 9 30 | derivAperture = 0 31 | winSigma = -1 32 | histogramNormType = 0 33 | L2HysThreshold = 2.0000000000000001e-01 34 | gammaCorrection = 0 35 | nlevels = 64 36 | hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma, 37 | histogramNormType,L2HysThreshold,gammaCorrection,nlevels) 38 | winStride = (8,8) 39 | padding = (8,8) 40 | locations = ((10,20),) 41 | 42 | nInterval = 10 43 | BigCount = 1 44 | 45 | index = 1 46 | FirstEntryFlag = False 47 | 48 | while(index < len(images)): 49 | hogCount = 0 50 | for i in range(index,(index + nInterval)): 51 | imgPath = FilePath + str(FileName) + "/0 (" + str(i) + ").jpg" 52 | img = cv2.imread(imgPath,0) 53 | #img = cv2.resize(img, (160, 120)) 54 | h1 = hog.compute(img,winStride,padding,locations).T 55 | temp = np.copy(h1) 56 | 57 | 58 | if(hogCount == 0): 59 | hogTemp = np.zeros((nInterval, len(temp[0]))) 60 | #print "Shape of hogTemp is: ", hogTemp.shape 61 | hogTemp[hogCount]= temp[0] 62 | if (FirstEntryFlag == False): 63 | FirstHOGEntry = np.copy(temp) 64 | FirstEntryFlag = True 65 | else: 66 | hogTemp[hogCount]= temp 67 | 68 | #print "Shape of hogTemp is: ", hogTemp.shape 69 | hogCount += 1 70 | 71 | HOGPH = self.computeHOGPH(hogTemp, FirstHOGEntry) 72 | #HOGPH = normalize(HOGPH) 73 | 74 | if (BigCount == 1): 75 | bigArray = np.copy(HOGPH) 76 | else: 77 | bigArray = np.vstack((bigArray, HOGPH)) 78 | BigCount += 1 79 | 80 | index += nInterval 81 | 82 | print "Shape of Big array is: ", bigArray.shape 83 | print (clf.predict(bigArray)) 84 | most_common,num_most_common = Counter(clf.predict(bigArray)).most_common(1)[0] 85 | print "Action is: ",self.DisplayAction(most_common) 86 | #self.WriteAction(self.DisplayAction(most_common)) 87 | 88 | 89 | def computePCA(self,array): 90 | pca = PCA() 91 | newData = pca.fit_transform(array) 92 | MeanArray = np.mean(newData, axis =0) 93 | print "Size of Mean array: ", MeanArray.shape 94 | return MeanArray 95 | 96 | def computeHOGPH(self,array, firstEntry): 97 | hogph = firstEntry 98 | for j in range(1,len(array)): 99 | hogph += array[j-1] - array[j] 100 | 101 | return hogph 102 | 103 | def DisplayAction(self,actionIndex): 104 | if(actionIndex == 1): 105 | Action = "Handwaving" 106 | elif(actionIndex== 2): 107 | Action = "Running" 108 | elif(actionIndex == 3): 109 | Action = "Walking" 110 | return Action 111 | 112 | def WriteAction(self, string): 113 | FramePath = "FramesFinalFull/" 114 | #entries=os.listdir(FramePath) 115 | entries = self.sort_files(FramePath) 116 | for frame in entries: 117 | pic = FramePath + "0 (" + str(frame) + ").jpg" 118 | img = cv2.imread(pic) 119 | font = cv2.FONT_HERSHEY_SIMPLEX 120 | cv2.putText(img,string,(10,20), font, 1,(0,0,255),1) 121 | cv2.imwrite(pic, img) 122 | 123 | def sort_files(self, index): 124 | self.fname=[] 125 | path = str(index) + "/*.*" 126 | for file in sorted(glob.glob(path)): 127 | s=file.split ('/') 128 | a=s[-1].split('\\') 129 | x=a[-1].split('.') 130 | o= x[0].split('(')[1] 131 | o = o.split(')')[0] 132 | self.fname.append(int(o)) 133 | return(sorted(self.fname)) 134 | 135 | if __name__=='__main__': 136 | start_time = time.time() 137 | h= HOGCompute() 138 | print("--- %s seconds ---" % (time.time() - start_time)) 139 | -------------------------------------------------------------------------------- /06_Human_Activity_Recognition/Readme.txt: -------------------------------------------------------------------------------- 1 | Human Activity Recognition in Videos 2 | ------------------------------------- 3 | 4 | Objective: 5 | ---------- 6 | Identify types of human activity in a video by classification based on appearance. 7 | 8 | Datasets used for training and testing: 9 | --------------------------------------- 10 | 1. KTH human activity - Boxing, Hand clapping, Running, Walking 11 | 2. Weizmann - Bending, One hand waving 12 | 13 | Methodology: 14 | ------------ 15 | Method-1 : 16 | HOG feature vecots from n consecutive video frames are analyzed to generate HOGPH (history of HOG features over past frames). HOGPH feature vectors are used to train the multi-class SVM classifier model for all activities. 17 | For testing, HOGPH vector is generated for each sample video and SVM used for prediction of the class. 18 | 19 | Method-2 : Video Matching using PCA and SVD 20 | 21 | Results: 22 | -------- 23 | Method 1 does not account for motion information and not suitable for large dataset. 24 | Method 2, on the other hand, is easy to implement but time consuming. Method 2 accuracy found out to be around 63%. 25 | 26 | 27 | -------------------------------------------------------------------------------- /06_Human_Activity_Recognition/VideoMatching_PCA_SVD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import glob 4 | import math 5 | import cv2.cv as cv 6 | import cv2 7 | import random 8 | import cPickle 9 | from sklearn.preprocessing import normalize 10 | from sklearn.decomposition import PCA 11 | from sklearn import svm 12 | import time 13 | from PIL import Image 14 | from scipy import linalg as LA 15 | 16 | class HOGCompute: 17 | def __init__(self): 18 | 19 | FilePath = 'Training/' 20 | Folders = os.listdir(FilePath) 21 | LabelCount = 1 22 | FolderCheck = False 23 | Labels = np.array([]) 24 | 25 | output = self.GetTestData() 26 | 27 | #FPA = np.zeros((48,2)) 28 | #FPACount = 0 29 | FPA = np.array([]) 30 | 31 | 32 | 33 | for FileName in Folders: 34 | pathToFolder = FilePath + FileName + "/" 35 | newEntry = os.listdir(pathToFolder) 36 | print "Label is: ", LabelCount 37 | 38 | for VideoEntry in newEntry: 39 | pathToVideoFile = pathToFolder + VideoEntry 40 | images = self.sort_files(pathToVideoFile) 41 | 42 | Number_of_Frames = len(images) 43 | height = 64 44 | width = 64 45 | 46 | samples = np.array([[0 for x in range(0,Number_of_Frames)] for x in range(0,(height*width))]) 47 | BigCount = 1 48 | sampleIndex = 0 49 | 50 | for i in range(1,(Number_of_Frames+1)): 51 | imgPath = pathToVideoFile + "/0 (" + str(i) + ").jpg" 52 | 53 | #Read the image 54 | img = cv2.imread(imgPath,0) 55 | img = cv2.resize(img, (height, width)) 56 | 57 | #Vectorize the image 58 | tempArray = np.reshape(img,(height*width)).T 59 | 60 | #add to the big array 61 | samples[:,sampleIndex]= np.copy(tempArray) 62 | sampleIndex+=1 63 | 64 | Labels = np.append(Labels, LabelCount) 65 | data, eigenValues, eigenVectors = self.PCA(samples) 66 | 67 | input = eigenVectors 68 | print input.shape 69 | 70 | mat = np.dot(input.T, output) 71 | 72 | U, s, Vh = LA.svd(mat, full_matrices= False) 73 | 74 | 75 | angles = np.array([np.arccos(e) for e in s]) 76 | #print "Principal angles: ", angles 77 | 78 | FPA = np.append(FPA, angles) 79 | #FPA[FPACount,:] = angles 80 | #FPACount += 1 81 | 82 | #PA = self.angle_between(v1,v2) 83 | #dotProduct = sum((a*b) for a, b in zip(v1, v2)) 84 | #print FPA.shape 85 | LabelCount += 1 86 | 87 | #print FPA 88 | 89 | indx = np.argmin(FPA) 90 | 91 | ''' 92 | print min(FPA[:,0]), min(FPA[:,1]) 93 | indx1 = np.argmin(FPA[:,0]) 94 | indx2 = np.argmin(FPA[:,1]) 95 | print self.DisplayAction(Labels[indx1]),self.DisplayAction(Labels[indx2]) 96 | ''' 97 | print self.DisplayAction(Labels[indx]) 98 | 99 | def unit_vector(self, vector): 100 | return vector / np.linalg.norm(vector) 101 | 102 | def angle_between(self,v1, v2): 103 | v1_u = self.unit_vector(v1) 104 | v2_u = self.unit_vector(v2) 105 | return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) 106 | 107 | def GetTestData(self): 108 | FilePath = 'Test/' 109 | Folders = os.listdir(FilePath) 110 | 111 | for FileName in Folders: 112 | images = self.sort_files(FilePath + FileName) 113 | 114 | Number_of_Frames = len(images) 115 | height = 64 116 | width = 64 117 | 118 | Testsamples = np.array([[0 for x in range(0,Number_of_Frames)] for x in range(0,(height*width))]) 119 | BigCount = 1 120 | sampleIndex = 0 121 | 122 | for i in range(1,(Number_of_Frames+1)): 123 | TestimgPath = FilePath + str(FileName) + "/0 (" + str(i) + ").jpg" 124 | 125 | #Read the image 126 | img = cv2.imread(TestimgPath,0) 127 | img = cv2.resize(img, (height, width)) 128 | 129 | #Vectorize the image 130 | tempArray = np.reshape(img,(height*width)).T 131 | 132 | #add to the big array 133 | Testsamples[:,sampleIndex]= np.copy(tempArray) 134 | sampleIndex+=1 135 | 136 | data, eigenValues, eigenVectors = self.PCA(Testsamples) 137 | 138 | return eigenVectors 139 | 140 | 141 | def PCA(self,data): 142 | """ 143 | returns: data transformed in 2 dims/columns + regenerated original data 144 | pass in: data as 2D NumPy array 145 | """ 146 | dims_rescaled_data=2 147 | m, n = data.shape 148 | 149 | data -= data.mean(axis=0) 150 | 151 | # calculate the covariance matrix 152 | R = np.cov(data, rowvar=False) 153 | # calculate eigenvectors & eigenvalues of the covariance matrix 154 | # use 'eigh' rather than 'eig' since R is symmetric, 155 | # the performance gain is substantial 156 | evals, evecs = LA.eig(R) 157 | # sort eigenvalue in decreasing order 158 | idx = np.argsort(evals)[::-1] 159 | evecs = evecs[:,idx] 160 | # sort eigenvectors according to same index 161 | evals = evals[idx] 162 | # select the first n eigenvectors (n is desired dimension 163 | # of rescaled data array, or dims_rescaled_data) 164 | evecs = evecs[:, :dims_rescaled_data] 165 | # carry out the transformation on the data using eigenvectors 166 | # and return the re-scaled data, eigenvalues, and eigenvectors 167 | return np.dot(evecs.T, data.T).T, evals, evecs 168 | 169 | def DisplayAction(self,actionIndex): 170 | if(actionIndex == 1): 171 | Action = "Boxing" 172 | elif(actionIndex== 2): 173 | Action = "Handclapping" 174 | elif(actionIndex== 3): 175 | Action = "Handwaving" 176 | elif(actionIndex == 4): 177 | Action = "Running" 178 | elif(actionIndex == 5): 179 | Action = "Walking" 180 | return Action 181 | 182 | def sort_files(self, index): 183 | self.fname=[] 184 | path = str(index) + "/*.*" 185 | for file in sorted(glob.glob(path)): 186 | s=file.split ('/') 187 | a=s[-1].split('\\') 188 | x=a[-1].split('.') 189 | o= x[0].split('(')[1] 190 | o = o.split(')')[0] 191 | self.fname.append(int(o)) 192 | return(sorted(self.fname)) 193 | 194 | if __name__=='__main__': 195 | start_time = time.time() 196 | h= HOGCompute() 197 | print("--- %s seconds ---" % (time.time() - start_time)) 198 | -------------------------------------------------------------------------------- /07_PCA_Eigenfaces/Readme.txt: -------------------------------------------------------------------------------- 1 | Readme File 2 | 3 | Submission folder contains C++ scripts for generating Eigenfaces 4 | --------------------------------------------------------------------- 5 | 'eigenfaces.cpp' - Generate first 10 eigenfaces for given BioID face recognition dataset. 6 | 7 | System config and external libraries used: 8 | ------------------------------------------- 9 | 1. LINUX UBUNTU 14.04 - 4GB RAM 10 | 2. OpenCV 2.4.13 11 | 3. Geany IDE 12 | 13 | To run the scripts follow: 14 | --------------------------- 15 | From the folder where scripts are located, 16 | (using terminal) 17 | 1. g++ eignfaces.cpp `pkg-config --cflags --libs opencv` -o output 18 | 2. ./output 19 | -------------------------------------------------------------------------------- /07_PCA_Eigenfaces/eignfaces.cpp: -------------------------------------------------------------------------------- 1 | /* PROJECT: Generate first 10 priciple componenets of faces. 2 | * Dataset : BioID face dataset - https://www.bioid.com/About/BioID-Face-Database 3 | * Author: Pratik Mohan Ramdasi 4 | * Date: 12/14/2016 5 | * 6 | * Contents: 7 | * ========== 8 | * 1. Principle Component Analysis (PCA) to detect the eigenfaces for given face dataset. 9 | * 2. Alignment of the input face images for improved results. First 101 images in the dataset are considered. 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using namespace cv; 24 | using namespace std; 25 | 26 | /* Function to compute distance between two points 27 | */ 28 | double Distance(Point p1, Point p2) 29 | { 30 | double dx = p2.x - p1.x; 31 | double dy = p2.y - p1.y; 32 | return sqrt( dx * dx + dy * dy ); 33 | } 34 | 35 | /* Function to rotate an image along given center and rotation angle 36 | * Input: original image 37 | * Output: rotated image 38 | */ 39 | Mat rotate(Mat &image, Point2f center, double angle, double scale) 40 | { 41 | // get 2x3 rotation matrix 42 | Mat rot_matrix = getRotationMatrix2D( center, angle, scale ); 43 | Mat rotated_img( Size( image.size().height, image.size().width ), image.type()); 44 | // Perform affine transform 45 | warpAffine( image, rotated_img, rot_matrix, image.size()); 46 | return rotated_img; 47 | } 48 | 49 | /* Function to align the face images based on eye locations 50 | * Input: original face image with eye positions 51 | * Output: cropped aligned face image 52 | */ 53 | Mat cropFaces(Mat &img, Point e_left = Point(0, 0), Point e_right = Point(0, 0)) 54 | { 55 | //calculate offsets in the original image 56 | //offset perventage is selected to be 0.2 both horizontally and vertically. Destination image size 57 | //is selected to be : (100, 100) 58 | int offset_h = floor(float(0.2 * 100)); 59 | int offset_v = floor(float(0.2 * 100)); 60 | 61 | //get the direction 62 | Point eye_direction; 63 | eye_direction.x = (e_right.x - e_left.x); 64 | eye_direction.y = (e_right.y - e_left.y); 65 | 66 | //calculate rotation angle in radians 67 | double rotation = atan2(float(eye_direction.y), float(eye_direction.x)); 68 | 69 | //distance between them 70 | double dist = Distance(e_left, e_right); 71 | 72 | //calculate reference eye width 73 | double ref = 100 - 2.0 * offset_h; 74 | 75 | //scale factor 76 | double scale = float(dist)/float(ref); 77 | //cout << "scale: " << scale << endl; 78 | 79 | //rotate image around the left eye 80 | Mat rotated_img; 81 | rotated_img = rotate(img, e_left, rotation, scale); 82 | 83 | //crop the rotated image 84 | Rect crop; 85 | crop.x = e_right.x - scale * offset_h; 86 | crop.y = e_right.y - scale * offset_v; 87 | crop.width = 100 * scale; 88 | crop.height = 100 * scale; 89 | crop = Rect(crop.x, crop.y, crop.x + crop.width, crop.y + crop.height) & crop; 90 | Mat cropped; 91 | cropped = img(crop); 92 | 93 | //resize the image 94 | resize(cropped, cropped, Size(100, 100)); 95 | 96 | return cropped; 97 | } 98 | 99 | /* function to normalize the image between 0-255 100 | * Input: original image 101 | * Output: normalized image 102 | */ 103 | Mat norm_0_255(const Mat &src) 104 | { 105 | Mat dst; 106 | switch(src.channels()) { 107 | case 1: 108 | normalize(src, dst, 0, 255, NORM_MINMAX, CV_8UC1); 109 | break; 110 | case 3: 111 | normalize(src, dst, 0, 255, NORM_MINMAX, CV_8UC1); 112 | break; 113 | default: 114 | src.copyTo(dst); 115 | break; 116 | } 117 | 118 | return dst; 119 | } 120 | 121 | /* Function to read eye file 122 | * Input:'.eye' filename 123 | * Output: Extracted left and right eye positions 124 | */ 125 | vector readEyeCoordinates(const string& filename) 126 | { 127 | vector co_ordinates; 128 | ifstream file(filename.c_str()); 129 | string line; 130 | while(getline(file, line)){ 131 | istringstream ss(line); 132 | int value; 133 | while (ss >> value){ 134 | co_ordinates.push_back(value); 135 | } 136 | } 137 | 138 | return co_ordinates; 139 | } 140 | 141 | /* Function to read eye files from the directory and store them 142 | * Input: path to folder containing '.eye' files 143 | * Output: vector storing all the '.eye' files 144 | */ 145 | vector readEyeFiles(const string& folder) 146 | { 147 | vector files; 148 | glob(folder, files); 149 | 150 | return files; 151 | } 152 | 153 | /* Function to read input images from the directory and store them 154 | * Input: path to folder containing '.pgm' image files 155 | * Output: vector storing all the image files 156 | */ 157 | vector readIpImages(const string& folder) 158 | { 159 | vector files; 160 | glob(folder, files); 161 | //Store all the images into vector of images 162 | vector images; 163 | for (size_t i = 0; i < files.size(); i++){ 164 | //read the image 165 | Mat image = imread(files[i], 0); 166 | if(image.empty()){ 167 | cerr << "Could not load image!"; 168 | } 169 | //store it into the vector of images 170 | images.push_back(image); 171 | } 172 | return images; 173 | } 174 | 175 | /* Function to get all aligned images for PCA processing 176 | * Input: vector of all the input images and '.eye' files 177 | * Output: vector storing all the aligned images 178 | */ 179 | vector alignIpImages(vector ipImages, vector eyeFiles) 180 | { 181 | vector aligned_images; 182 | for(size_t i = 0; i < ipImages.size(); i++) 183 | { 184 | //read the txt file 185 | String filename = eyeFiles[i]; 186 | //get left and right eye locations 187 | vector locs; 188 | locs = readEyeCoordinates(filename); 189 | Point eye_left = Point(locs[0], locs[1]); 190 | Point eye_right = Point(locs[2], locs[3]); 191 | //get aligned images 192 | Mat aligned; 193 | aligned = cropFaces(ipImages[i], eye_left, eye_right); 194 | 195 | aligned_images.push_back(aligned); 196 | } 197 | return aligned_images; 198 | } 199 | 200 | 201 | /* Function applying PCA to get eigenvalues and eigenvectors (eigenfaces) 202 | * Input: aligned images, number of principle components 203 | * Output: eigenvectors for given number of principle components 204 | */ 205 | Mat pcaProcessing(vector alignedImages, int num_comps) 206 | { 207 | //reshape the images to generate dataset for PCA 208 | Mat dst(static_cast(alignedImages.size()), alignedImages[0].rows * alignedImages[0].cols, CV_32F); 209 | for (unsigned int i = 0; i < alignedImages.size(); i++){ 210 | Mat image_row = alignedImages[i].clone().reshape(1,1); 211 | Mat row_i = dst.row(i); 212 | image_row.convertTo(row_i, CV_32F); 213 | } 214 | 215 | cout << "Size of training set: " << dst.cols << "," << dst.rows << endl; 216 | 217 | // copy the dataset 218 | Mat data; 219 | dst.copyTo(data); 220 | 221 | //perform PCA 222 | PCA pca( data, Mat(), CV_PCA_DATA_AS_ROW, num_comps); 223 | 224 | //compute and copy PCA results 225 | Mat mean = pca.mean.clone(); 226 | Mat evals = pca.eigenvalues.clone(); 227 | Mat evecs = pca.eigenvectors.clone(); 228 | 229 | return evecs; 230 | } 231 | 232 | int main() 233 | { 234 | 235 | clog << "Reading input images and eye files ... " << endl; 236 | 237 | //read input images in the dataset 238 | vector ipImages; 239 | string img_folder = "/home/pratikramdasi/comp_inter/Koh-young/dataset/*.pgm"; 240 | ipImages = readIpImages(img_folder); 241 | 242 | //read eye locations files 243 | vector eyeFiles; 244 | string eye_folder = "/home/pratikramdasi/comp_inter/Koh-young/dataset/*.eye"; 245 | eyeFiles = readEyeFiles(eye_folder); 246 | 247 | clog << "Aligning input images ... " << endl; 248 | 249 | //align the input face images 250 | vector alignedIp; 251 | alignedIp = alignIpImages(ipImages, eyeFiles); 252 | 253 | //get number of principle components 254 | int num_comps = 10; 255 | 256 | clog << "Getting PCA results ... " << endl; 257 | 258 | //PCA processing - display first 10 eigenfaces 259 | Mat eigenVectors; 260 | eigenVectors = pcaProcessing(alignedIp, num_comps); 261 | 262 | clog << "Displaying PCA results ... " << endl; 263 | 264 | //display eigenfaces in a single window 265 | Mat win_mat(Size(1000, 100), CV_8UC3); 266 | for(int i = 0; i < num_comps; i++){ 267 | //get ith eigenvector 268 | Mat ev = eigenVectors.row(i); 269 | //reshape it to normal size and normallize it to 0-255 270 | Mat out = norm_0_255(ev.reshape(1, alignedIp[i].rows)); 271 | //apply colormap - jet 272 | Mat cout; 273 | applyColorMap(out, cout, COLORMAP_JET); 274 | cout.copyTo(win_mat(Rect(100 * i, 0, 100, 100))); 275 | } 276 | 277 | imshow("Eigenfaces", win_mat); 278 | imwrite("Eigenfaces.jpg", win_mat); 279 | waitKey(0); //press any key to continue... 280 | 281 | return 0; 282 | } 283 | -------------------------------------------------------------------------------- /08_Logo_Identification/Readme.txt: -------------------------------------------------------------------------------- 1 | Objective: 2 | ---------- 3 | For each probe image in probe.txt, write a script that can: 4 | (1) identify the brand logo in the probe image (if any), 5 | (2) draw the bounding box around the recognized logo (if any), 6 | And finally, the script should tally the number of correct vs incorrect matches and summarize the accuracy. 7 | 8 | Method Implemented: 9 | ============================================ 10 | 11 | Edge and Scale Based Template Matching 12 | --------------------------------------- 13 | Since the most distinguishing part between the logos is their shape, edge based template matching proves to be more useful than other methods mentioned below. 14 | 15 | Steps: 16 | ------ 17 | 1. Convert both logo template and image containing the same logo into edge images using sobel operator. 18 | 2. Since the image may contain scaled logo template, vary the scale of the logo and match each scaled template to the image to get correlation coefficient value. 19 | 3. Return the scaled template for which correlation coefficient value is maximum. 20 | 4. Draw the bounding box around the maximum match location. 21 | 22 | Scripts: 23 | ------------------------------ 24 | "edgeTemplateMatching_single.cpp" 25 | 26 | -> Script for single test image with certain logo (decided visually) vs same logo template 27 | -> out of 133 test images with logos, 80 identified with correct logo and bounding box around it. 28 | -> I renamed the test images by - "(logoname).png". There are average 4 images for each logo. 29 | 30 | -> Accuracy: (80 / 133) * 100 = 60.15% 31 | 32 | Advantages: 33 | ----------- 34 | -> Template Scaling proves to better matching strategy than using single sized template. 35 | -> Easier to implement. 36 | -> Works well for single test image containing specific logo with same logo template. 37 | 38 | Disadvantages: 39 | -------------- 40 | -> Threshold values for correlation coefficient need to be adjusted by trial and error. 41 | -> Very unreliable for matching test image having certain logo with different logo template. 42 | -> Very difficult to check if the test image has any logo or none. 43 | -> Not suitable to work with million images containing logos due to time constraints. Computation time will increase with number of test images or logo images. 44 | -> It does not handle multiple occurences of the same logo in test image. 45 | 46 | -------------------------------------------------------------------------------- /08_Logo_Identification/Results.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PratikRamdasi/Computer-Vision/529989cb467730e786209773bb9ed02c266c1fc7/08_Logo_Identification/Results.zip -------------------------------------------------------------------------------- /08_Logo_Identification/edgeTemplateMatching_single.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* Title : Logo identification in given image 3 | * Author: Pratik Mohan Ramdasi 4 | * Date: 1/17/2017 5 | * Methodology: Edge and Scale based Template Matching 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace cv; 21 | using namespace std; 22 | 23 | /* Function to preprocess the image: smoothing and conversion to gray 24 | * Inputs: Original test image or template (logo) image 25 | * Output: Processed image 26 | */ 27 | Mat preProcessImage(Mat& img) 28 | { 29 | Mat gray; 30 | // Gaussian smoothing 31 | GaussianBlur( img, img, Size(3, 3), 0, 0); 32 | // convert image to greyscale 33 | Mat imgGrey; 34 | cvtColor(img, gray, CV_BGR2GRAY); 35 | 36 | return gray; 37 | } 38 | 39 | 40 | /* Function to get gradient (edge) image using Sobel operator 41 | * Inputs: Original test image or template (logo) image 42 | * Output: Edged image 43 | */ 44 | Mat getGradientImage(Mat& img) 45 | { 46 | /// sobel edge operator parameters 47 | int ddepth = CV_16S; 48 | int scale = 1; 49 | int delta = 0; 50 | Mat abs_grad_x, abs_grad_y, grad_x, grad_y; 51 | Mat edged; 52 | 53 | // Edge detection using SOBEL operator 54 | Sobel(img, grad_x, ddepth, 1, 0, 3, scale, delta, BORDER_DEFAULT); 55 | convertScaleAbs(grad_x, abs_grad_x); 56 | // gradient y 57 | Sobel(img, grad_y, ddepth, 0, 1, 3, scale, delta, BORDER_DEFAULT); 58 | convertScaleAbs(grad_y, abs_grad_y); 59 | // total gradient 60 | addWeighted(abs_grad_x, 0.5, abs_grad_y, 0.5, 0, edged); 61 | 62 | return edged; 63 | } 64 | 65 | 66 | /* Function for Scaled Template Matching 67 | * Inputs: Original test image, Gradients of test image and template 68 | * Output: Bounding box around the logo if found. 69 | */ 70 | void scaledTemplateMatching(Mat& img, Mat& grad_img, Mat& grad_template) 71 | { 72 | /// loop over different template image scales 73 | Mat resized; // for resizing the template to each scale 74 | double found_maxVal; // to sore max correlation coefficient value obtained 75 | Point found_maxLoc; // location of max correlation point 76 | double found_ratio; // Useful to scale the template back to original 77 | double ccoeffThreshold = 0.10; // correlation coefficient threshoold 78 | double initScale; // starting template scale 79 | double bestVal = 0.0; 80 | double ratio; 81 | 82 | // resize very small image to template size and adjust initial scale for the template 83 | if (grad_img.cols < grad_template.cols or grad_img.rows < grad_template.rows) 84 | { 85 | resize(grad_img, grad_img, Size(grad_template.cols, grad_template.rows )); 86 | initScale = grad_img.cols / (float)grad_template.cols; 87 | } 88 | else 89 | { 90 | initScale = 1.4; // decided by trying and testing 91 | } 92 | 93 | // loop through different template scales 94 | for (double scale = initScale; scale >= 0.6; scale -= 0.1) 95 | { 96 | 97 | resize( grad_template, resized, Size(grad_template.cols * scale, grad_template.rows ) ); 98 | ratio = resized.cols / (float) grad_template.cols; 99 | 100 | // if resized shape is less than image, break from the loop 101 | if ( resized.rows > grad_img.rows or resized.cols > grad_img.cols ) 102 | continue; 103 | 104 | /// template matching 105 | // create result matrix 106 | Mat result; 107 | int result_cols = grad_img.cols - resized.cols + 1; 108 | int result_rows = grad_img.rows - resized.rows + 1; 109 | 110 | result.create( result_rows, result_cols, CV_32FC1); 111 | 112 | int match_method = CV_TM_CCOEFF_NORMED; 113 | 114 | // do the matching 115 | matchTemplate( grad_img, resized, result, match_method ); 116 | 117 | //localize best match 118 | double minVal; double maxVal; Point minLoc; Point maxLoc; Point matchLoc; 119 | minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() ); 120 | 121 | //best matches values 122 | if (match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED ) 123 | { matchLoc = minLoc; } 124 | else 125 | { matchLoc = maxLoc; } 126 | 127 | // find max correlation value 128 | if ( maxVal > bestVal) { 129 | bestVal = maxVal ; 130 | // store them 131 | found_maxVal = bestVal; 132 | found_maxLoc = maxLoc; 133 | found_ratio = ratio; 134 | } 135 | 136 | } 137 | 138 | /// If maximum value obtained is greater than threshold, logo is identified. 139 | if (found_maxVal > ccoeffThreshold) 140 | { 141 | 142 | // find bounding box cooredinates based on ratio 143 | Point start, end; 144 | start.x = ((int) found_maxLoc.x ); 145 | start.y = ((int) found_maxLoc.y ); 146 | 147 | end.x = ((int) (found_maxLoc.x + ( grad_template.cols * found_ratio ) )); 148 | end.y = ((int) (found_maxLoc.y + ( grad_template.rows * found_ratio ) )); 149 | 150 | // display outputs 151 | cout << "LOGO detected!" << endl; 152 | rectangle( img, start, end, Scalar(0,255,0), 2, 8, 0 ); 153 | 154 | // display input template name on the image 155 | //putText(img, "Template: Apple", Point(start.x , start.y - 40), FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(20,150,20), 2); 156 | 157 | imshow("output", img); 158 | //imwrite("incorrectResult_3.jpg", img); 159 | } 160 | else 161 | { 162 | cout << "LOGO is not present!" << endl; 163 | } 164 | 165 | } 166 | 167 | 168 | int main() 169 | { 170 | /// get input image and preprocess it 171 | Mat img = imread("/home/pratikramdasi/comp_inter/trademarkVision/logo_spotting_problem/test/porsche_4.jpg", 1); 172 | Mat imgGray; 173 | imgGray = preProcessImage(img); 174 | 175 | /// read template image and preprocess it 176 | Mat logo; 177 | logo = imread("/home/pratikramdasi/comp_inter/trademarkVision/logo_spotting_problem/logos/porsche.png", 1); 178 | Mat logoGray; 179 | logoGray = preProcessImage(logo); 180 | 181 | // resize very small image to template size 182 | if (img.cols < logo.cols or img.rows < logo.rows) { 183 | resize(img, img, Size(logo.cols, logo.rows )); 184 | } 185 | 186 | /// Perform edge detection on original and template image for matching 187 | Mat grad_img, grad_template; 188 | grad_img = getGradientImage(imgGray); 189 | grad_template = getGradientImage(logoGray); 190 | 191 | imshow("image scene", grad_img); 192 | imshow("Logo template", grad_template); 193 | 194 | /// perform scaled template matching 195 | scaledTemplateMatching(img, grad_img, grad_template); 196 | 197 | waitKey(0); 198 | 199 | return 0; 200 | } 201 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Contains Computer Vision Projects including datasets, proposals, scripts in Python/C++. 2 | --------------------------------------------------------------------------------