├── src ├── functs │ ├── __init__.py │ ├── getResults.py │ ├── videoReconstruction.py │ ├── drawMatches.py │ ├── stabFuncts.py │ └── frameTransformation.py ├── .gitignore └── videoStab.py ├── Videos ├── .gitignore └── patio.mp4 └── README.md /src/functs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | times.txt 3 | -------------------------------------------------------------------------------- /Videos/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | 3 | !.gitignore 4 | !patio.mp4 5 | -------------------------------------------------------------------------------- /Videos/patio.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/francocurotto/Video-Stabilization/HEAD/Videos/patio.mp4 -------------------------------------------------------------------------------- /src/functs/getResults.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from os import listdir 3 | from os.path import isfile, join 4 | from stabFuncts import getITF 5 | 6 | resPath = "Videos/" 7 | onlyfiles = [f for f in listdir(resPath) if isfile(join(resPath, f))] 8 | onlyfiles.sort() 9 | maxlength = max(len(s) for s in onlyfiles) 10 | onlyfilesext = [f.ljust(maxlength, ' ') for f in onlyfiles] 11 | 12 | names = np.array(onlyfilesext) 13 | #print onlyfiles 14 | #print names 15 | 16 | # compute ITF 17 | itf = [] 18 | for vid in onlyfiles: 19 | itf.append(str(getITF(resPath+vid))) 20 | itf = np.array(itf) 21 | 22 | res = np.column_stack((names, itf)) 23 | np.savetxt("res.txt", res, delimiter=" ", fmt="%s") 24 | -------------------------------------------------------------------------------- /src/functs/videoReconstruction.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | 4 | def reconVideo (videoInPath, videoOutPath, trans, BORDER_CUT): 5 | # video in info 6 | videoIn = cv2.VideoCapture(videoInPath) 7 | N_FRAMES = int(videoIn.get(cv2.CAP_PROP_FRAME_COUNT)) 8 | FPS = videoIn.get(cv2.CAP_PROP_FPS) 9 | FOURCC = videoIn.get(cv2.CAP_PROP_FOURCC) 10 | VID_WIDTH = videoIn.get(cv2.CAP_PROP_FRAME_WIDTH) 11 | VID_HEIGHT = videoIn.get(cv2.CAP_PROP_FRAME_HEIGHT) 12 | 13 | # video out creation 14 | videoInSize = (int(VID_WIDTH), int(VID_HEIGHT)) 15 | videoOutSize = (int(VID_WIDTH) - 2*BORDER_CUT, int(VID_HEIGHT) - 2*BORDER_CUT) 16 | videoOut = cv2.VideoWriter(videoOutPath, int(FOURCC), FPS, videoOutSize) 17 | 18 | # frame transformation 19 | for i in range(N_FRAMES): 20 | ret, frame = videoIn.read() 21 | frameOut = cv2.warpPerspective(frame, trans[i,:,:], videoInSize, flags=cv2.INTER_NEAREST) 22 | frameOut = frameOut[BORDER_CUT:-BORDER_CUT, BORDER_CUT:-BORDER_CUT] 23 | videoOut.write(frameOut) 24 | 25 | videoIn.release() 26 | videoOut.release() 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Introduction 2 | 3 | This is an implementation of a video stabilization system using ORB descriptor. The input of the program is a video, and it outputs the stabilized video in the same folder. 4 | 5 | ### Requirements 6 | 7 | The program was written in Python. The libraries used are: 8 | 9 | * OpenCV 3.1 (Use these instructions: http://docs.opencv.org/2.4/doc/tutorials/introduction/linux_install/linux_install.html#linux-installation) 10 | * Numpy 1.8.2 11 | * SciPy 0.13.3 12 | * Matplotlib 1.3.1 13 | 14 | ### To run 15 | 16 | to run the program simply use: 17 | 18 | python videoStab.py [video] 19 | 20 | Parameters must be changed manually in videoStap.py 21 | 22 | * videoInPath: path of the unstabilized video (optional) 23 | * MATCH_THRES: Matches distance threshold 24 | * RANSAC_THRES: RANSAC threshold 25 | * BORDER_CUT: Number of pixel to crop in output video 26 | * FILT: filter type (square or Gauss) 27 | * FILT_WIDTH: filter width 28 | * FILT_SIGMA: filter variance (only in Gaussian filter) 29 | * FAST: If true use the fast version of the algorithm 30 | 31 | ## Refecences 32 | 33 | [1] Xu, Jie, et al. "Fast feature-based video stabilization without accumulative global motion estimation." IEEE Transactions on Consumer Electronics 58.3 (2012). [https://ieeexplore.ieee.org/document/6311347/?arnumber=6311347] 34 | -------------------------------------------------------------------------------- /src/videoStab.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import os 4 | import sys 5 | import numpy as np 6 | from scipy import signal 7 | from matplotlib import pyplot as plt 8 | 9 | sys.path.append('functs/') 10 | from stabFuncts import * 11 | from frameTransformation import getTrans, getMotion 12 | from videoReconstruction import reconVideo 13 | 14 | start_time = time.time() 15 | 16 | # video path 17 | videoInPath = "../Videos/patio.mp4" 18 | if len(sys.argv) > 1: 19 | try: 20 | videoInPath = sys.argv[1] 21 | except: 22 | print "Error at loading video" 23 | sys.exit() 24 | 25 | videoInName, videoExt = os.path.splitext(videoInPath) 26 | videoBaseName = os.path.basename(videoInName) 27 | 28 | # detector and matcher 29 | detector = cv2.ORB_create() 30 | bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 31 | 32 | # parameters 33 | MATCH_THRES = float('Inf') 34 | RANSAC_THRES = 0.2 35 | BORDER_CUT = 10 36 | #FILT = "square" 37 | FILT = "gauss" 38 | FILT_WIDTH = 7 39 | FILT_SIGMA = 0.2 40 | FAST = True 41 | if FILT == "square": 42 | filt = (1.0/FILT_WIDTH) * np.ones(FILT_WIDTH) 43 | suffix = "_MT_" + str(MATCH_THRES) + "_RT_" + str(RANSAC_THRES) + "_FILT_" + FILT + "_FW_" + str(FILT_WIDTH) + "_FAST_" + str(FAST) 44 | elif FILT == "gauss": 45 | filtx = np.linspace (-3*FILT_SIGMA, 3*FILT_SIGMA, FILT_WIDTH) 46 | filt = np.exp(-np.square(filtx) / (2*FILT_SIGMA)) 47 | filt = 1/(np.sum(filt)) * filt 48 | suffix = "_MT_" + str(MATCH_THRES) + "_RT_" + str(RANSAC_THRES) + "_FILT_" + FILT + "_FW_" + str(FILT_WIDTH) + "_SG_" + str(FILT_SIGMA) + "_FAST_" + str(FAST) 49 | videoOutPath = videoInName + "_res" + suffix + videoExt 50 | 51 | # get video array 52 | videoArr = getVideoArray(videoInPath) 53 | 54 | ### get transformation 55 | trans = getTrans(videoArr, detector, bf, MATCH_THRES, RANSAC_THRES, filt, FAST) 56 | #plotTrans(trans, None, videoBaseName, suffix, show=False) 57 | 58 | # video reconstruction 59 | reconVideo (videoInPath, videoOutPath, trans, BORDER_CUT) 60 | 61 | # ITF 62 | print getITF(videoOutPath) 63 | 64 | # compute elapsed time 65 | elapsed_time = time.time() - start_time 66 | print "Total time tests: " + str(elapsed_time) + " [s]" 67 | f = open('times.txt', 'a') 68 | f.write(videoOutPath + ": " + str(elapsed_time) + "\n") 69 | -------------------------------------------------------------------------------- /src/functs/drawMatches.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def drawMatches(img1, kp1, img2, kp2, matches): 5 | """ 6 | My own implementation of cv2.drawMatches as OpenCV 2.4.9 7 | does not have this function available but it's supported in 8 | OpenCV 3.0.0 9 | 10 | This function takes in two images with their associated 11 | keypoints, as well as a list of DMatch data structure (matches) 12 | that contains which keypoints matched in which images. 13 | 14 | An image will be produced where a montage is shown with 15 | the first image followed by the second image beside it. 16 | 17 | Keypoints are delineated with circles, while lines are connected 18 | between matching keypoints. 19 | 20 | img1,img2 - Grayscale images 21 | kp1,kp2 - Detected list of keypoints through any of the OpenCV keypoint 22 | detection algorithms 23 | matches - A list of matches of corresponding keypoints through any 24 | OpenCV keypoint matching algorithm 25 | """ 26 | 27 | # Create a new output image that concatenates the two images together 28 | # (a.k.a) a montage 29 | rows1 = img1.shape[0] 30 | cols1 = img1.shape[1] 31 | rows2 = img2.shape[0] 32 | cols2 = img2.shape[1] 33 | 34 | out = np.zeros((max([rows1,rows2]),cols1+cols2,3), dtype='uint8') 35 | 36 | # Place the first image to the left 37 | out[:rows1,:cols1] = np.dstack([img1, img1, img1]) 38 | 39 | # Place the next image to the right of it 40 | out[:rows2,cols1:] = np.dstack([img2, img2, img2]) 41 | 42 | # For each pair of points we have between both images 43 | # draw circles, then connect a line between them 44 | for mat in matches: 45 | 46 | # Get the matching keypoints for each of the images 47 | img1_idx = mat.queryIdx 48 | img2_idx = mat.trainIdx 49 | 50 | # x - columns 51 | # y - rows 52 | (x1,y1) = kp1[img1_idx].pt 53 | (x2,y2) = kp2[img2_idx].pt 54 | 55 | # Draw a small circle at both co-ordinates 56 | # radius 4 57 | # colour blue 58 | # thickness = 1 59 | cv2.circle(out, (int(x1),int(y1)), 4, (255, 0, 0), 1) 60 | cv2.circle(out, (int(x2)+cols1,int(y2)), 4, (255, 0, 0), 1) 61 | 62 | # Draw a line in between the two points 63 | # thickness = 1 64 | # colour blue 65 | cv2.line(out, (int(x1),int(y1)), (int(x2)+cols1,int(y2)), (255, 0, 0), 1) 66 | 67 | 68 | # Show the image 69 | #cv2.imshow('Matched Features', out) 70 | #cv2.waitKey(0) 71 | #cv2.destroyWindow('Matched Features') 72 | 73 | # Also return the image if you'd like a copy 74 | return out 75 | -------------------------------------------------------------------------------- /src/functs/stabFuncts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from matplotlib import pyplot as plt 4 | from drawMatches import drawMatches 5 | 6 | def getVideoArray (videoPath): 7 | # video in info 8 | video = cv2.VideoCapture(videoPath) 9 | N_FRAMES = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 10 | FPS = video.get(cv2.CAP_PROP_FPS) 11 | VID_WIDTH = video.get(cv2.CAP_PROP_FRAME_WIDTH) 12 | VID_HEIGHT = video.get(cv2.CAP_PROP_FRAME_HEIGHT) 13 | print "N_FRAMES: " + str(N_FRAMES) 14 | print "FPS: " + str(FPS) 15 | 16 | # numpy array 17 | videoArr = np.zeros((N_FRAMES, VID_HEIGHT, VID_WIDTH), dtype=np.uint8) 18 | # fill array 19 | for i in range(N_FRAMES): 20 | _, videoArr[i,:,:] = readVideoGray(video) 21 | video.release() 22 | return videoArr 23 | 24 | def readVideoGray (video): 25 | ret, frame = video.read() 26 | if ret: 27 | frameGray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 28 | else: 29 | frameGray = None 30 | return ret, frameGray 31 | 32 | def filterMatches (matches, MATCH_THRES): 33 | goodMatches = [] 34 | for m in matches: 35 | if m.distance < MATCH_THRES: 36 | goodMatches.append(m) 37 | return goodMatches 38 | 39 | def maskMatches (matches, mask): 40 | goodMatches = [] 41 | for i in range(len(matches)): 42 | if mask[i] == 1: 43 | goodMatches.append(matches[i]) 44 | return goodMatches 45 | 46 | def plotMatches(frame1, kp1, frame2, kp2, matches, nFrame): 47 | imMatches = drawMatches(frame1, kp1, frame2, kp2, matches) 48 | imName = "Matches between frame " + str(nFrame) + " and frame " + str(nFrame+1) 49 | #cv2.namedWindow(imName, cv2.WINDOW_AUTOSIZE); 50 | cv2.namedWindow(imName, cv2.WINDOW_NORMAL); 51 | cv2.resizeWindow(imName, 1280, 480) 52 | cv2.imshow(imName, imMatches) 53 | cv2.waitKey(0) 54 | cv2.destroyWindow(imName) 55 | cv2.imwrite("frame matching.png", imMatches) 56 | 57 | def plotTrans(Macc, Macc2, videoName, suffix, show): 58 | title = [["$m_{11}$", "$m_{12}$", "$t_x$"], ["$m_{21}$", "$m_{22}$", "$t_y$"], ["$m_{31}$", "$m_{32}$", "1"]] 59 | #plotPos = [321, 322, 325, 323, 324, 326] 60 | plt.figure(1) 61 | for i in range(3): 62 | for j in range(3): 63 | ax = plt.subplot(331+3*i+j) 64 | plt.plot(Macc[i,j,:], '-') 65 | if Macc2 is not None: 66 | plt.plot(Macc2[i,j,:], 'g-') 67 | plt.title(title[i][j]) 68 | ax.autoscale_view(True, True, True) 69 | plt.grid(True) 70 | plt.xticks(np.arange(0, len(Macc[i,j,:]), 100)) 71 | #plt.figure(num=1, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k') 72 | plt.tight_layout() 73 | plt.savefig(videoName + suffix + ".pdf")#, figsize=(80, 60), dpi=80)#, bbox_inches='tight') 74 | if show: 75 | plt.show() 76 | plt.clf() 77 | 78 | # Peak Signal to Noise Ratio 79 | def getPSNR (frame1, frame2): 80 | MSE = ((frame2-frame1)**2).mean() 81 | I_MAX_SQR = 255.0**2 82 | return 10*np.log10(I_MAX_SQR/MSE) 83 | 84 | # interframe video fidelity 85 | def getITF (videoPath): 86 | video = cv2.VideoCapture(videoPath) 87 | N_FRAMES = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 88 | ITF = 0 89 | 90 | _, currFrame = readVideoGray(video) 91 | for i in range(N_FRAMES-1): 92 | _, nextFrame = readVideoGray(video) 93 | ITF += getPSNR(currFrame, nextFrame) 94 | currFrame = nextFrame 95 | 96 | ITF = 1.0/(N_FRAMES-1) * ITF 97 | video.release() 98 | return ITF 99 | -------------------------------------------------------------------------------- /src/functs/frameTransformation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from stabFuncts import * 3 | 4 | def getTrans (videoArr, detector, bf, MATCH_THRES, RANSAC_THRES, filt, fast=True): 5 | N_FRAMES = videoArr.shape[0] 6 | trans = np.zeros((N_FRAMES, 3, 3)) 7 | 8 | if fast: 9 | localMotion = getLocalMotionFast(videoArr, filt, detector, bf, MATCH_THRES, RANSAC_THRES) 10 | else: 11 | localMotion = getLocalMotion(videoArr, filt, detector, bf, MATCH_THRES, RANSAC_THRES) 12 | 13 | 14 | for i in range(N_FRAMES): 15 | for x in range(3): 16 | for y in range(3): 17 | trans[i,x,y] = np.dot(filt, localMotion[i,:,x,y]) 18 | 19 | return trans 20 | 21 | def getLocalMotion (videoArr, filt, detector, bf, MATCH_THRES, RANSAC_THRES): 22 | N_FRAMES = videoArr.shape[0] 23 | FILT_WIDTH = filt.size 24 | halfFilt = FILT_WIDTH/2 25 | localMotion = np.zeros((N_FRAMES, FILT_WIDTH, 3, 3)) 26 | for i in range(N_FRAMES): 27 | print "frame " + str(i) 28 | for j in range(FILT_WIDTH): 29 | if j < halfFilt and i+j-halfFilt >= 0: 30 | localMotion[i,j,:,:] = np.linalg.inv(localMotion[i+j-halfFilt,FILT_WIDTH-j-1,:,:]) 31 | elif j > halfFilt and i+j-halfFilt <= N_FRAMES-1: 32 | localMotion[i,j,:,:] = \ 33 | estMotion(videoArr[i,:,:], videoArr[i+j-halfFilt,:,:], detector, bf, MATCH_THRES, RANSAC_THRES, show=False) 34 | else: # j == halfFilt or out of bound 35 | localMotion[i,j,:,:] = np.identity(3) 36 | return localMotion 37 | 38 | def getLocalMotionFast (videoArr, filt, detector, bf, MATCH_THRES, RANSAC_THRES): 39 | N_FRAMES = videoArr.shape[0] 40 | FILT_WIDTH = filt.size 41 | halfFilt = FILT_WIDTH/2 42 | localMotion = np.zeros((N_FRAMES, FILT_WIDTH, 3, 3)) 43 | 44 | # get next frame motion with ORB (and same frame with identity) 45 | for i in range(N_FRAMES): 46 | print "frame " + str(i) 47 | localMotion[i,halfFilt,:,:] = np.identity(3) 48 | try: 49 | localMotion[i,halfFilt+1,:,:] = \ 50 | estMotion(videoArr[i,:,:], videoArr[i+1,:,:], detector, bf, MATCH_THRES, RANSAC_THRES, show=False) 51 | except IndexError: 52 | localMotion[i,halfFilt+1,:,:] = np.identity(3) 53 | 54 | # get n-step frame motion from next step motion 55 | for j in range(halfFilt+2, FILT_WIDTH): 56 | for i in range(N_FRAMES): 57 | try: 58 | localMotion[i,j,:,:] = np.dot(localMotion[i+1,j-1,:,:], localMotion[i,j-1,:,:]) 59 | except IndexError: 60 | localMotion[i,j,:,:] = np.identity(3) 61 | 62 | # get past n-step motion (by inversion of forward motion) 63 | for j in range(halfFilt): 64 | for i in range(N_FRAMES): 65 | try: 66 | localMotion[i,j,:,:] = np.linalg.inv(localMotion[i+j-halfFilt,FILT_WIDTH-j-1,:,:]) 67 | except IndexError: 68 | localMotion[i,j,:,:] = np.identity(3) 69 | 70 | return localMotion 71 | 72 | def estMotion (frame1, frame2, detector, bf, MATCH_THRES, RANSAC_THRES, show=False): 73 | 74 | try: 75 | # get keypoints and descriptors 76 | kp1, des1 = detector.detectAndCompute(frame1, None) 77 | kp2, des2 = detector.detectAndCompute(frame2, None) 78 | 79 | # get matches 80 | matches = bf.match(des1, des2) 81 | matches = filterMatches(matches, MATCH_THRES) 82 | 83 | # get affine transform 84 | src_pts = np.float32([ kp1[m.queryIdx].pt for m in matches ]).reshape(-1,1,2) 85 | dst_pts = np.float32([ kp2[m.trainIdx].pt for m in matches ]).reshape(-1,1,2) 86 | 87 | #M, mask = cv2.findHomography(src_pts, dst_pts, 0) 88 | M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, RANSAC_THRES) 89 | #M, mask = cv2.findHomography(src_pts, dst_pts, cv2.LMEDS) 90 | 91 | #plotMatches(frame1, kp1, frame2, kp2, matches, 0) 92 | matches = maskMatches(matches, mask) 93 | 94 | if show: 95 | plotMatches(frame1, kp1, frame2, kp2, matches, 0) 96 | except: 97 | M = np.identity(3) 98 | 99 | return M 100 | 101 | def getMotion (videoPath, detector, bf, MATCH_THRES, RANSAC_THRES): 102 | video = cv2.VideoCapture(videoPath) 103 | N_FRAMES = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 104 | Macc = np.zeros((3, 3, N_FRAMES-1)) 105 | 106 | _, prevFrame = readVideoGray(video) 107 | for i in range(N_FRAMES-1): 108 | _, currFrame = readVideoGray(video) 109 | Macc[:,:,i] = estMotion(prevFrame, currFrame, detector, bf, MATCH_THRES, RANSAC_THRES, False) 110 | 111 | return Macc 112 | --------------------------------------------------------------------------------