├── .gitignore ├── README.md ├── data └── pan.avi ├── images ├── raw.gif ├── smoothed.gif └── smoothing.png ├── prototype ├── requirements.txt └── vstab.py └── src ├── CMakeLists.txt ├── bucket_sampler.cpp ├── bucket_sampler.h ├── crop.cpp ├── crop.h ├── fit.cpp ├── fit.h ├── registration.cpp ├── registration.h ├── stabilize.cpp ├── stabilize.h ├── video.cpp ├── video.h └── vstab.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | data/ 3 | debug/ 4 | src/CMakeLists.txt.user 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vstab 2 | Video stabilizer that smoothes camera motion. 3 | 4 | Raw: 5 | 6 | ![Raw](https://github.com/oberger4711/vstab/blob/master/images/raw.gif) 7 | 8 | Stabilized: 9 | 10 | ![Stabilized](https://github.com/oberger4711/vstab/blob/master/images/smoothed.gif) 11 | 12 | ## Setup 13 | Built on Linux but may work on windows (somehow). 14 | 15 | Requirements: 16 | * OpenCV built with contrib modules (this requires building it from source) 17 | * Ceres 18 | * Boost 19 | 20 | Build it with cmake: 21 | ``` 22 | cd src 23 | mkdir build 24 | cd build 25 | cmake .. 26 | make 27 | ``` 28 | Run the following to find out how to use it: 29 | ``` 30 | ./vstab --help 31 | ``` 32 | 33 | ## Algorithm 34 | This is the pipeline: 35 | 1. Detect keypoints and descriptors with SIFT in each frame. 36 | 2. Estimate transformation between two consecutive frames using RANSAC to find keypoint correspondencies. 37 | The transformation can be undone which ideally results in video of no camera motion. 38 | 3. Smoothen camera motion by regressing a translation for each frame using non-linear Least Squares. 39 | The following two costs are minimized in the process: 40 | 1. Centered: The difference of the translation to the center of the frame estimated using the keypoint correspondencies. 41 | 2. Smoothed: The difference in the steps from the translation of the previous frame and to the translation of the next frame. 42 | 4. Apply the transformation from 2. and 3. 43 | 5. Crop the frames to the largest rectangle with the original aspect ratio that always contains content. 44 | 45 | ## Smoothing Parameter 46 | The only parameter of the algorithm is a smoothing factor that amplifies the costs of 3ii. 47 | This is effectively a trade-of between a smooth camera motion and a low loss of pixels in the following cropping step. 48 | The user can tune it to achieve the desired strength of smoothing for his application. 49 | 50 | ## Example 51 | This is shown in the following screenshot: 52 | * Blue: Correspondencies between keypoints of the current and the following frame (RANSAC is robust against outliers) 53 | * Green: Original camera motion trajectory estimated from keypoint correspondences 54 | * Red: Smoothed camera motion trajectory minimizing the cost functions (smoothness controlled by the smoothing factor) 55 | 56 | ![Screenshot of smoothing](https://github.com/oberger4711/vstab/blob/master/images/smoothing.png) 57 | -------------------------------------------------------------------------------- /data/pan.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oberger4711/vstab/8fe6f3f23d51429f96d9e578091e648ae8e7f691/data/pan.avi -------------------------------------------------------------------------------- /images/raw.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oberger4711/vstab/8fe6f3f23d51429f96d9e578091e648ae8e7f691/images/raw.gif -------------------------------------------------------------------------------- /images/smoothed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oberger4711/vstab/8fe6f3f23d51429f96d9e578091e648ae8e7f691/images/smoothed.gif -------------------------------------------------------------------------------- /images/smoothing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oberger4711/vstab/8fe6f3f23d51429f96d9e578091e648ae8e7f691/images/smoothing.png -------------------------------------------------------------------------------- /prototype/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.14.5 2 | opencv-python==3.4.1.15 3 | opencv-contrib-python==3.4.1.15 4 | -------------------------------------------------------------------------------- /prototype/vstab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | 8 | DATA_FOLDER = os.path.join('..', 'data') 9 | DEFAULT_VIDEO = 'pan.avi' 10 | DETECTOR = cv2.xfeatures2d.SIFT_create() 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description='Stabilize a given video.') 14 | parser.add_argument('file', nargs='?', type=str, help='Video file inside data folder that shall be processed.') 15 | parser.add_argument('--debug', action='store_true') 16 | 17 | args = parser.parse_args() 18 | if args.file is None: 19 | args.file = DEFAULT_VIDEO 20 | args.file = os.path.join(DATA_FOLDER, args.file) 21 | return args 22 | 23 | def read_video(file): 24 | video = cv2.VideoCapture(file) 25 | if not video.isOpened(): 26 | print("Error: Could not open video '{}'.".format(file)) 27 | return None 28 | frames = [] 29 | while True: 30 | okay, frame = video.read() 31 | if not okay: 32 | break 33 | frames.append(frame) 34 | video.release() 35 | return frames 36 | 37 | def to_int_tuple(pt): 38 | lst = np.int32(pt).tolist() 39 | return tuple(*lst) 40 | 41 | def extract_cropped_rectangle(frame, tf): 42 | height, width, channels = frame.shape 43 | corners = np.array([[0, height], [width, height], [width, 0], [0, 0]], dtype=np.float32) 44 | corners = np.expand_dims(corners, 0) 45 | corners_tfed = cv2.perspectiveTransform(corners, tf) 46 | corners_tfed = np.squeeze(corners_tfed, axis=0) 47 | top = np.max(corners_tfed[[2, 3], 1], axis=0) 48 | bottom = np.min(corners_tfed[[0, 1], 1], axis=0) 49 | right = np.min(corners_tfed[[1, 2], 0], axis=0) 50 | left = np.max(corners_tfed[[3, 0], 0], axis=0) 51 | corners_cropped = np.array([[left, bottom], [right, bottom], [right, top], [left, top]]) 52 | return corners_cropped 53 | 54 | def main(): 55 | args = parse_args() 56 | 57 | # Input 58 | print("Reading video...") 59 | frames = read_video(args.file) 60 | if frames is None: 61 | exit(1) 62 | 63 | # Process 64 | print("Detecting features...") 65 | transformations = [np.identity(3) for _ in range(len(frames))] 66 | for i in range(len(frames) - 1): 67 | frame_current = frames[i] 68 | frame_next = frames[i + 1] 69 | keypoints_current, descriptors_current = DETECTOR.detectAndCompute(frame_current, None) 70 | keypoints_next, descriptors_next = DETECTOR.detectAndCompute(frame_next, None) 71 | 72 | matcher = cv2.BFMatcher() 73 | all_matches = matcher.knnMatch(descriptors_current, descriptors_next, k = 2) 74 | 75 | good_matches = [] 76 | for m, n in all_matches: 77 | # Only use match if it is significantly better than next best match. 78 | if m.distance < 0.75 * n.distance: 79 | good_matches.append(m) 80 | 81 | pts_current = np.float32([ keypoints_current[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) 82 | pts_next = np.float32([ keypoints_next[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) 83 | 84 | # Debug viz 85 | if args.debug: 86 | for pt_current, pt_next in zip(pts_current, pts_next): 87 | pt_src, pt_dest = to_int_tuple(pt_current), to_int_tuple(pt_next) 88 | cv2.arrowedLine(frame_current, pt_src, pt_dest, (255, 120, 120)) 89 | 90 | # Estimate homography 91 | homography, mask = cv2.findHomography(pts_next, pts_current, cv2.RANSAC) 92 | #homography[0, 2] = 0 # Stabilize only y axis. 93 | transformations[i + 1] = np.matmul(transformations[i], homography) 94 | 95 | centers = [] 96 | for i, frame, tf in zip(range(len(frames)), frames, transformations): 97 | corners_cropped = extract_cropped_rectangle(frame, tf) 98 | min = np.min(corners_cropped, axis=0) 99 | max = np.max(corners_cropped, axis=0) 100 | center = (min + max) / 2 101 | centers.append(center) 102 | 103 | for i, frame, tf in zip(range(len(frames)), frames, transformations): 104 | height, width, channels = frame.shape 105 | frame = cv2.warpPerspective(frame, tf, (width, height)) 106 | frames[i] = frame 107 | 108 | # Debug viz homography centers. 109 | if args.debug: 110 | for i, frame, center in zip(range(len(frames)), frames, centers): 111 | center_int = center.astype(np.int32) 112 | for j in range(i, len(frames)): 113 | cv2.circle(frames[j], (center_int[0], center_int[1]), 2, (120, 255, 120)) 114 | 115 | # Display frame 116 | cv2.namedWindow('frame', cv2.WINDOW_NORMAL) 117 | i = 0 118 | while True: 119 | f = frames[i] 120 | cv2.imshow('frame', f) 121 | key = cv2.waitKey() 122 | if key == 27: 123 | break 124 | if key == 0x6a: 125 | # j 126 | i = (i + 1) % len(frames) 127 | if key == 0x6B: 128 | # k 129 | i = (i - 1) % len(frames) 130 | 131 | # Set down 132 | cv2.destroyAllWindows() 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED (VERSION 2.6) 2 | PROJECT (vstab) 3 | SET(CMAKE_CXX_STANDARD 14) 4 | 5 | 6 | FIND_PACKAGE(Boost 1.40 COMPONENTS program_options REQUIRED) 7 | FIND_PACKAGE(OpenCV REQUIRED COMPONENTS core imgproc highgui features2d xfeatures2d) 8 | FIND_PACKAGE(Ceres REQUIRED) 9 | 10 | INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIR} ${CERES_INCLUDE_DIRS}) 11 | 12 | ADD_EXECUTABLE(vstab 13 | ${PROJECT_SOURCE_DIR}/vstab.cpp 14 | ${PROJECT_SOURCE_DIR}/video.cpp 15 | ${PROJECT_SOURCE_DIR}/stabilize.cpp 16 | ${PROJECT_SOURCE_DIR}/registration.cpp 17 | ${PROJECT_SOURCE_DIR}/crop.cpp 18 | ${PROJECT_SOURCE_DIR}/fit.cpp 19 | ) 20 | TARGET_LINK_LIBRARIES(vstab LINK_PUBLIC ${Boost_LIBRARIES} ${OpenCV_LIBS} ${CERES_LIBRARIES}) 21 | -------------------------------------------------------------------------------- /src/bucket_sampler.cpp: -------------------------------------------------------------------------------- 1 | #include "bucket_sampler.h" 2 | 3 | BucketSampler::BucketSampler(const cv::Mat& m1, const cv::Mat& m2, const int num_buckets_x, const int num_buckets_y) : 4 | m1(m1), m2(m2), rd(), gen(rd) { 5 | 6 | const float bucket_width = 7 | // Sort points into buckets. 8 | for (int i = 0; i < 9 | } 10 | -------------------------------------------------------------------------------- /src/bucket_sampler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class BucketSampler { 6 | public: 7 | using Bucket = std::vector; 8 | 9 | BucketSampler(const cv::Mat& m1, const cv::Mat& m2, const int num_buckets_x=8, const int num_buckets_y=5); 10 | bool getSubset(cv::Mat& out_ms1, cv::Math out_ms2, const int model_points, const int max_attempts); 11 | 12 | private: 13 | cv::Mat m1; 14 | cv::Mat m2; 15 | std::vector buckets; 16 | std::random_device rd; 17 | std::mt19937 gen; 18 | std::discrete_distribution<> dist; 19 | }; 20 | -------------------------------------------------------------------------------- /src/crop.cpp: -------------------------------------------------------------------------------- 1 | #include "crop.h" 2 | 3 | std::vector extract_max_cropped_rect(const Video& frames, const std::vector& transforms) { 4 | std::vector rects_cropped(frames.size()); 5 | for (size_t i = 0; i < frames.size(); i++) { 6 | const auto& frame = frames[i]; 7 | const auto& tf = transforms[i]; 8 | const double width = static_cast(frame.cols); 9 | const double height = static_cast(frame.rows); 10 | const double aspect_ratio = width / height; 11 | 12 | // First point is also inserted as last point to make colRange work easily. 13 | double data[10] = { 14 | 0, height, 15 | width, height, 16 | width, 0, 17 | 0, 0, 18 | 0, height}; 19 | cv::Mat corners(1, 5, CV_64FC2, &data[0]); 20 | cv::Mat corners_tfed(1, 5, CV_64FC2); 21 | cv::perspectiveTransform(corners, corners_tfed, tf); 22 | cv::Mat corners_tfed_ch[2]; 23 | cv::split(corners_tfed, corners_tfed_ch); 24 | 25 | // Determine cropped rectangle. 26 | double min, max; 27 | // Top 28 | cv::minMaxLoc(corners_tfed_ch[1].colRange(2, 4), &min, &max); 29 | const double top_content = max; 30 | // Bottom 31 | cv::minMaxLoc(corners_tfed_ch[1].colRange(0, 2), &min, &max); 32 | const double bottom_content = min; 33 | // Right 34 | cv::minMaxLoc(corners_tfed_ch[0].colRange(1, 3), &min, &max); 35 | const double right_content = min; 36 | // Left 37 | cv::minMaxLoc(corners_tfed_ch[0].colRange(3, 5), &min, &max); 38 | const double left_content = max; 39 | 40 | // Respect aspect ratio. 41 | const double width_content = right_content - left_content; 42 | const double height_content = bottom_content - top_content; 43 | const double center_x = left_content + (width_content / 2.0); 44 | const double center_y = top_content + (height_content / 2.0); 45 | double width_cropped, height_cropped; 46 | if (height_content * aspect_ratio > width_content) { 47 | // Width is limiting. 48 | width_cropped = width_content; 49 | height_cropped = width_content / aspect_ratio; 50 | } 51 | else { 52 | // Height is limiting. 53 | height_cropped = height_content; 54 | width_cropped = height_content * aspect_ratio; 55 | } 56 | rects_cropped[i] = cv::Rect(center_x - (width_cropped / 2.0), center_y - (height_cropped / 2.0), width_cropped, height_cropped); 57 | } 58 | return rects_cropped; 59 | } 60 | 61 | std::vector extract_centers(const Video& frames, const std::vector& transforms) { 62 | std::vector centers(frames.size()); 63 | std::vector rects_cropped = extract_max_cropped_rect(frames, transforms); 64 | for (size_t i = 0; i < rects_cropped.size(); i++) { 65 | const auto& rect = rects_cropped[i]; 66 | centers[i] = rect.tl(); 67 | } 68 | return centers; 69 | } 70 | 71 | void crop_to_percentage(Video& frames, const float percentage) { 72 | const auto& frame = frames[0]; 73 | const int width = frame.cols; 74 | const int height = frame.rows; 75 | const int center_x = width / 2; 76 | const int center_y = height / 2; 77 | const int width_new = static_cast(width * percentage); 78 | const int height_new = static_cast(height * percentage); 79 | cv::Rect roi(center_x - width_new / 2, center_y - height_new / 2, width_new, height_new); 80 | 81 | for (auto& frame : frames) { 82 | frame = frame(roi); 83 | } 84 | } 85 | 86 | void crop_and_resize(Video& frames, const cv::Rect& rect_common) { 87 | for (auto& frame : frames) { 88 | frame = frame(rect_common); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/crop.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "video.h" 7 | 8 | // OpenCV 9 | #include "opencv2/calib3d.hpp" 10 | 11 | /** 12 | * Extracts an axis-aligned bounding box with maximal size inside the transformed frame, that contains no black border pixels. 13 | * @param frames The video frames. 14 | * @param transforms The transformation matrices for each frame. 15 | * @return The maximal cropped rectangle for each frame. 16 | */ 17 | std::vector extract_max_cropped_rect(const Video& frames, const std::vector& transforms); 18 | 19 | /** 20 | * Extracts the center of the axis-aligned bounding box with maximal size inside the transformed frame, that contains no black border pixels. 21 | * @param frames The video frames. 22 | * @param transforms The transformation matrices for each frame. 23 | * @return The center points for each frame. 24 | */ 25 | std::vector extract_centers(const Video& frames, const std::vector& transforms); 26 | 27 | void crop_to_percentage(Video& frames, const float percentage); 28 | 29 | void crop_and_resize(Video& frames, const cv::Rect& rect_common); 30 | -------------------------------------------------------------------------------- /src/fit.cpp: -------------------------------------------------------------------------------- 1 | #include "fit.h" 2 | 3 | std::vector smooth_motion_parameterless(const std::vector& centers, const float smoothness) { 4 | // Initialize parameters as the center points. 5 | std::vector params(2 * centers.size()); 6 | for (size_t i = 0; i < centers.size(); i++) { 7 | cv::Point2f center = centers[i]; 8 | params[2 * i] = center.x; 9 | params[(2 * i) + 1] = center.y; 10 | } 11 | 12 | google::InitGoogleLogging("vstab"); 13 | ceres::Problem problem; 14 | 15 | // Centered costs 16 | for (size_t i = 0; i < centers.size(); i++) { 17 | ceres::CostFunction* cost_centered = new ceres::AutoDiffCostFunction(new CenteredCostFunctor(1.0, centers[i])); 18 | problem.AddResidualBlock(cost_centered, nullptr, ¶ms[2 * i]); 19 | } 20 | 21 | // Smoothed costs 22 | for (size_t i = 1; i < centers.size() - 1; i++) { 23 | ceres::CostFunction* cost_smoothed = new ceres::AutoDiffCostFunction(new SmoothedCostFunctor(smoothness)); 24 | problem.AddResidualBlock(cost_smoothed, nullptr, ¶ms[2 * (i - 1)], ¶ms[2 * i], ¶ms[2 * (i + 1)]); 25 | } 26 | 27 | // Optimize. 28 | ceres::Solver::Options options; 29 | options.minimizer_progress_to_stdout = true; 30 | ceres::Solver::Summary summary; 31 | ceres::Solve(options, &problem, &summary); 32 | std::cout << summary.BriefReport() << std::endl; 33 | 34 | // Transform back for returning. 35 | std::vector centers_smoothed(centers.size()); 36 | for (size_t i = 0; i < centers.size(); i++) { 37 | centers_smoothed[i] = cv::Point2f(params[2 * i], params[2 * i + 1]); 38 | } 39 | return centers_smoothed; 40 | } 41 | -------------------------------------------------------------------------------- /src/fit.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // OpenCV 8 | #include 9 | 10 | // Ceres 11 | #include 12 | #include 13 | 14 | std::vector smooth_motion_parameterless(const std::vector& centers, const float smoothness); 15 | 16 | /** 17 | * Defines difference of the smoothed and the actual center as costs. 18 | */ 19 | struct CenteredCostFunctor { 20 | CenteredCostFunctor(const float weight, const cv::Point2f center) : weight(weight), center_x(center.x), center_y(center.y) { 21 | } 22 | 23 | template bool operator()(const T* const params, T* residual) const { 24 | T t_x(params[0]), t_y(params[1]); 25 | residual[0] = T(weight) * (t_x - T(center_x)); 26 | residual[1] = T(weight) * (t_y - T(center_y)); 27 | return true; 28 | } 29 | 30 | private: 31 | float weight; 32 | double center_x; 33 | double center_y; 34 | }; 35 | 36 | /** 37 | * Defines difference of two consecutive steps between smoothed centers as costs. 38 | */ 39 | struct SmoothedCostFunctor { 40 | SmoothedCostFunctor(const float weight) : weight(weight) { 41 | } 42 | 43 | template bool operator()(const T* params_prev, const T* const params, const T* const params_next, T* residual) const { 44 | T t_prev_x(params_prev[0]), t_prev_y(params_prev[1]); 45 | T t_cur_x(params[0]), t_cur_y(params[1]); 46 | T t_next_x(params_next[0]), t_next_y(params_next[1]); 47 | 48 | T s_prev_x = t_cur_x - t_prev_x; 49 | T s_prev_y = t_cur_y - t_prev_y; 50 | T s_next_x = t_next_x - t_cur_x; 51 | T s_next_y = t_next_y - t_cur_y; 52 | 53 | residual[0] = T(weight) * (s_next_x - s_prev_x); 54 | residual[1] = T(weight) * (s_next_y - s_prev_y); 55 | return true; 56 | } 57 | 58 | private: 59 | float weight; 60 | }; 61 | -------------------------------------------------------------------------------- /src/registration.cpp: -------------------------------------------------------------------------------- 1 | #include "registration.h" 2 | 3 | #include 4 | 5 | // The following is based on lkpyramid.cpp from OpenCV. 6 | 7 | static void 8 | getRTMatrix(const std::vector a, const std::vector b, 9 | int count, cv::Mat& M, bool fullAffine) 10 | { 11 | CV_Assert(M.isContinuous()); 12 | 13 | if(fullAffine) 14 | { 15 | double sa[6][6]={{0.}}, sb[6]={0.}; 16 | cv::Mat A(6, 6, CV_64F, &sa[0][0]), B(6, 1, CV_64F, sb); 17 | cv::Mat MM = M.reshape(1, 6); 18 | 19 | for (int i = 0; i < count; i++) 20 | { 21 | sa[0][0] += a[i].x*a[i].x; 22 | sa[0][1] += a[i].y*a[i].x; 23 | sa[0][2] += a[i].x; 24 | 25 | sa[1][1] += a[i].y*a[i].y; 26 | sa[1][2] += a[i].y; 27 | 28 | sb[0] += a[i].x*b[i].x; 29 | sb[1] += a[i].y*b[i].x; 30 | sb[2] += b[i].x; 31 | sb[3] += a[i].x*b[i].y; 32 | sb[4] += a[i].y*b[i].y; 33 | sb[5] += b[i].y; 34 | } 35 | 36 | sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1]; 37 | sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2]; 38 | sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2]; 39 | 40 | sa[3][3] = sa[0][0]; 41 | sa[4][4] = sa[1][1]; 42 | sa[5][5] = sa[2][2] = count; 43 | 44 | cv::solve(A, B, MM, cv::DECOMP_EIG); 45 | } 46 | else 47 | { 48 | double sa[4][4]={{0.}}, sb[4]={0.}, m[4] = {0}; 49 | cv::Mat A(4, 4, CV_64F, sa), B(4, 1, CV_64F, sb); 50 | cv::Mat MM(4, 1, CV_64F, m); 51 | 52 | for (int i = 0; i < count; i++) 53 | { 54 | sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y; 55 | sa[0][2] += a[i].x; 56 | sa[0][3] += a[i].y; 57 | 58 | sb[0] += a[i].x*b[i].x + a[i].y*b[i].y; 59 | sb[1] += a[i].x*b[i].y - a[i].y*b[i].x; 60 | sb[2] += b[i].x; 61 | sb[3] += b[i].y; 62 | } 63 | 64 | sa[1][1] = sa[0][0]; 65 | sa[2][1] = sa[1][2] = -sa[0][3]; 66 | sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2]; 67 | sa[2][2] = sa[3][3] = count; 68 | sa[3][0] = sa[0][3]; 69 | 70 | cv::solve(A, B, MM, cv::DECOMP_EIG); 71 | 72 | double* om = M.ptr(); 73 | om[0] = om[4] = m[0]; 74 | om[1] = -m[1]; 75 | om[3] = m[1]; 76 | om[2] = m[2]; 77 | om[5] = m[3]; 78 | } 79 | } 80 | 81 | cv::Mat estimateRigidTransform_extended(cv::InputArray src1, cv::InputArray src2, bool fullAffine, const SamplingMethod method, std::vector& inlier_mask) 82 | { 83 | return estimateRigidTransform_extended(src1, src2, fullAffine, method, inlier_mask, 500, 0.5, 0.04); 84 | } 85 | 86 | cv::Mat estimateRigidTransform_extended(cv::InputArray src1, cv::InputArray src2, const bool fullAffine, 87 | const SamplingMethod method, std::vector& inlier_mask, 88 | int ransacMaxIters, double ransacGoodRatio, const float inlier_threshold) 89 | { 90 | cv::Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat(); 91 | 92 | const int ransacSize0 = 3; 93 | const int COUNT = 15; 94 | const int WIDTH = 160, HEIGHT = 120; 95 | 96 | std::vector pA, pB; 97 | std::vector good_idx; 98 | std::vector status; 99 | 100 | double scale = 1.; 101 | int i, j, k, k1; 102 | 103 | cv::RNG rng((uint64)-1); 104 | int good_count = 0; 105 | 106 | assert((ransacGoodRatio <= 1 || ransacGoodRatio >= 0) && "ransacGoodRatio should have value between 0 and 1"); 107 | assert((A.size() == B.size()) && "Both input images must have the same size"); 108 | assert((A.type() == B.type()) && "Both input images must have the same data type"); 109 | 110 | int count = A.checkVector(2); 111 | 112 | assert(count > 0); 113 | A.reshape(2, count).convertTo(pA, CV_32F); 114 | B.reshape(2, count).convertTo(pB, CV_32F); 115 | 116 | good_idx.resize(count); 117 | 118 | inlier_mask.clear(); 119 | inlier_mask.resize(count, 0); 120 | 121 | if(count < ransacSize0) 122 | return cv::Mat(); 123 | 124 | cv::Rect brect = boundingRect(pB); 125 | 126 | std::vector a(ransacSize0); 127 | std::vector b(ransacSize0); 128 | 129 | // RANSAC stuff: 130 | // 1. find the consensus 131 | for (k = 0; k < ransacMaxIters; k++) 132 | { 133 | std::vector idx(ransacSize0); 134 | // choose random 3 non-complanar points from A & B 135 | for (i = 0; i < ransacSize0; i++) 136 | { 137 | for (k1 = 0; k1 < ransacMaxIters; k1++) 138 | { 139 | idx[i] = rng.uniform(0, count); 140 | 141 | for (j = 0; j < i; j++) 142 | { 143 | if(idx[j] == idx[i]) 144 | break; 145 | // check that the points are not very close one each other 146 | if(fabs(pA[idx[i]].x - pA[idx[j]].x) + 147 | fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON) 148 | break; 149 | if(fabs(pB[idx[i]].x - pB[idx[j]].x) + 150 | fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON) 151 | break; 152 | } 153 | 154 | if(j < i) 155 | continue; 156 | 157 | if(i+1 == ransacSize0) 158 | { 159 | // additional check for non-complanar vectors 160 | a[0] = pA[idx[0]]; 161 | a[1] = pA[idx[1]]; 162 | a[2] = pA[idx[2]]; 163 | 164 | b[0] = pB[idx[0]]; 165 | b[1] = pB[idx[1]]; 166 | b[2] = pB[idx[2]]; 167 | 168 | double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y; 169 | double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y; 170 | double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y; 171 | double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y; 172 | const double eps = 0.01; 173 | 174 | if(fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) || 175 | fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2)) 176 | continue; 177 | } 178 | break; 179 | } 180 | 181 | if(k1 >= ransacMaxIters) 182 | break; 183 | } 184 | 185 | if(i < ransacSize0) 186 | continue; 187 | 188 | // estimate the transformation using 3 points 189 | getRTMatrix(a, b, ransacSize0, M, fullAffine); 190 | 191 | const double* m = M.ptr(); 192 | for (i = 0, good_count = 0; i < count; i++) 193 | { 194 | if(std::abs(m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x) + 195 | std::abs(m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y) < std::max(brect.width,brect.height) * inlier_threshold) 196 | good_idx[good_count++] = i; 197 | } 198 | 199 | if(good_count >= count*ransacGoodRatio) 200 | break; 201 | } 202 | 203 | if (k >= ransacMaxIters) { 204 | std::cout << "Warning: Reached RANSAC max iterations." << std::endl; 205 | return cv::Mat(); 206 | } 207 | 208 | if(good_count < count) 209 | { 210 | for (i = 0; i < good_count; i++) 211 | { 212 | j = good_idx[i]; 213 | pA[i] = pA[j]; 214 | pB[i] = pB[j]; 215 | } 216 | } 217 | 218 | getRTMatrix(pA, pB, good_count, M, fullAffine); 219 | M.at(0, 2) /= scale; 220 | M.at(1, 2) /= scale; 221 | 222 | // Set inlier mask. 223 | for (size_t l = 0; l < good_count; l++) { 224 | const auto i_good = good_idx[l]; 225 | inlier_mask[i_good] = 1; 226 | } 227 | 228 | return M; 229 | } 230 | -------------------------------------------------------------------------------- /src/registration.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // OpenCV 6 | #include 7 | #include 8 | 9 | enum class SamplingMethod { 10 | ALL, 11 | RANSAC, 12 | BUCKET_RANSAC 13 | }; 14 | 15 | cv::Mat estimateRigidTransform_extended(cv::InputArray src1, cv::InputArray src2, bool fullAffine, 16 | const SamplingMethod method, std::vector& inlier_mask); 17 | 18 | cv::Mat estimateRigidTransform_extended(cv::InputArray src1, cv::InputArray src2, const bool fullAffine, 19 | const SamplingMethod method, std::vector& inlier_mask, 20 | int ransacMaxIters, double ransacGoodRatio, const float inlier_threshold); 21 | -------------------------------------------------------------------------------- /src/stabilize.cpp: -------------------------------------------------------------------------------- 1 | #include "stabilize.h" 2 | 3 | void add_motion(std::vector& in_out_transforms, const std::vector& centers) { 4 | for (size_t i = 0; i < in_out_transforms.size(); i++) { 5 | auto& tf = in_out_transforms[i]; 6 | const auto t = centers[i]; 7 | tf.at(0, 2) -= t.x; 8 | tf.at(1, 2) -= t.y; 9 | } 10 | } 11 | 12 | Video transform_video(Video& frames, const std::vector& transforms) { 13 | Video frames_tfed(frames.size()); 14 | for (size_t i = 0; i < frames_tfed.size(); i++) { 15 | const auto& frame = frames[i]; 16 | auto& frame_tfed = frames_tfed[i]; 17 | const auto& tf = transforms[i]; 18 | cv::warpPerspective(frame, frame_tfed, tf, frame_tfed.size()); 19 | } 20 | return frames_tfed; 21 | } 22 | -------------------------------------------------------------------------------- /src/stabilize.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // OpenCV 7 | #include "opencv2/video.hpp" 8 | #include "opencv2/calib3d.hpp" 9 | #include "opencv2/features2d.hpp" 10 | #include "opencv2/xfeatures2d.hpp" 11 | 12 | #include "video.h" 13 | #include "registration.h" 14 | 15 | static inline void printProgress(const size_t frame, const size_t num_frames) { 16 | const auto percent = static_cast(100.0 * frame / num_frames); 17 | std::cout << "\e[1A" << percent << " % " << std::endl; 18 | } 19 | 20 | template 21 | std::vector stabilize(Video& frames, const bool debug) { 22 | std::vector tfs(frames.size()); 23 | for (auto& m : tfs) { 24 | m = cv::Mat::eye(3, 3, CV_64FC1); 25 | } 26 | cv::Ptr detector = T::create(); 27 | std::vector keypoints_current, keypoints_next; 28 | cv::Mat descriptors_current, descriptors_next; 29 | detector->detectAndCompute(frames[0], cv::Mat(), keypoints_next, descriptors_next); 30 | 31 | std::cout << "0 %" << std::endl; 32 | for (size_t i = 0; i < frames.size() - 1; i++) { 33 | printProgress(i, frames.size()); 34 | 35 | auto& frame_current = frames[i]; 36 | auto& frame_next = frames[i + 1]; 37 | 38 | // Apply detector. 39 | keypoints_current.clear(); 40 | keypoints_current.swap(keypoints_next); 41 | descriptors_current = descriptors_next.clone(); 42 | detector->detectAndCompute(frame_next, cv::Mat(), keypoints_next, descriptors_next); 43 | 44 | auto& tf_next = tfs[i + 1]; 45 | if (!descriptors_current.empty() && !descriptors_next.empty()) { 46 | // Find keypoint matches. 47 | cv::FlannBasedMatcher matcher; 48 | std::vector> matches_all; 49 | matcher.knnMatch(descriptors_current, descriptors_next, matches_all, 2); 50 | 51 | // Filter matches. 52 | // Keep only good matches. 53 | std::vector matches_good; 54 | for (const auto& neighbours : matches_all) { 55 | if (neighbours[0].distance < 0.75 * neighbours[1].distance) { 56 | matches_good.push_back(neighbours[0]); 57 | } 58 | } 59 | if (!matches_good.empty()) { 60 | // Keep only close matches. I. e. closer than the median distance. 61 | /* 62 | std::vector distances(matches_good.size()); 63 | for (size_t i = 0; i < matches_good.size(); i++) { 64 | const auto& pt_current = keypoints_current[matches_good[i].queryIdx].pt; 65 | const auto& pt_next = keypoints_next[matches_good[i].trainIdx].pt; 66 | distances[i] = cv::norm(pt_next - pt_current); 67 | } 68 | std::sort(distances.begin(), distances.end()); 69 | const float median = distances[distances.size() / 2]; 70 | for (auto it = matches_good.begin(); it != matches_good.end();) { 71 | const auto& pt_current = keypoints_current[it->queryIdx].pt; 72 | const auto& pt_next = keypoints_next[it->trainIdx].pt; 73 | if (cv::norm(pt_next - pt_current) > median) { 74 | it = matches_good.erase(it); 75 | } 76 | else { 77 | ++it; 78 | } 79 | } 80 | */ 81 | 82 | // Extract corresponding keypoints. 83 | std::vector pts_current, pts_next; 84 | for (const auto& match : matches_good) { 85 | pts_current.push_back(keypoints_current[match.queryIdx].pt); 86 | pts_next.push_back(keypoints_next[match.trainIdx].pt); 87 | } 88 | 89 | // Estimate transform. 90 | std::vector inlier_mask; 91 | //tf_next = cv::findHomography(pts_next, pts_current, cv::RANSAC, 3.0, inlier_mask); 92 | estimateRigidTransform_extended(pts_next, pts_current, false, SamplingMethod::RANSAC, inlier_mask) 93 | .copyTo(tf_next(cv::Range(0, 2), cv::Range::all())); 94 | 95 | // Debug visualize correspondencies. 96 | if (debug) { 97 | assert(inlier_mask.size() == pts_current.size()); 98 | std::array colors = {cv::Scalar(100, 100, 255), cv::Scalar(255, 120, 120)}; 99 | for (size_t j = 0; j < pts_current.size(); j++) { 100 | auto& color = colors[inlier_mask[j]]; 101 | cv::arrowedLine(frame_current, 102 | static_cast(pts_current[j]), 103 | static_cast(pts_next[j]), color); 104 | } 105 | } 106 | } 107 | } 108 | else { 109 | // No keypoints available for estimation. 110 | std::cerr << "Warning: No keypoints in current or next frame." << std::endl; 111 | std::cout << std::endl; 112 | } 113 | 114 | if (tf_next.empty()) { 115 | std::cerr << "Warning: Empty homography for frame " << i << "." << std::endl; 116 | std::cout << std::endl; 117 | } 118 | tf_next = tfs[i] * tf_next; // Accumulate transforms. 119 | } 120 | printProgress(frames.size(), frames.size()); 121 | return tfs; 122 | } 123 | 124 | void add_motion(std::vector& in_out_transforms, const std::vector& centers); 125 | 126 | /** 127 | * Applies the transformations on the frames 128 | * @param frames The video frames. 129 | * @param transforms The transformation matrices for each frame. 130 | * @return Transformed copy of the frames. 131 | */ 132 | Video transform_video(Video& frames, const std::vector& transforms); 133 | -------------------------------------------------------------------------------- /src/video.cpp: -------------------------------------------------------------------------------- 1 | #include "video.h" 2 | 3 | Video read_video(const std::string& file_name) { 4 | Video frames; 5 | cv::VideoCapture cap(file_name); 6 | if (!cap.isOpened()) { 7 | std::cerr << "Error: Could not open video '" << file_name << "'." << std::endl; 8 | } 9 | cv::Mat frame; 10 | while (cap.grab()) { 11 | frames.emplace_back(); 12 | auto& frame = frames.back(); 13 | cap.retrieve(frame); 14 | } 15 | cap.release(); 16 | return frames; 17 | } 18 | -------------------------------------------------------------------------------- /src/video.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // OpenCV 8 | #include 9 | #include 10 | 11 | using Video = std::vector; 12 | 13 | Video read_video(const std::string& file_name); 14 | -------------------------------------------------------------------------------- /src/vstab.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // Boost 7 | #include 8 | 9 | // OpenCV 10 | #include 11 | #include 12 | 13 | #include "video.h" 14 | #include "stabilize.h" 15 | #include "crop.h" 16 | #include "fit.h" 17 | 18 | boost::program_options::variables_map parse_args(int argc, char *argv[], std::string& out_file, float& out_smoothing) { 19 | namespace po = boost::program_options; 20 | po::options_description desc("Allowed options"); 21 | desc.add_options() 22 | ("help", "Produces help message") 23 | ("debug", "Enables debug output") 24 | ("file", po::value(&out_file)->required(), "The file to process") 25 | ("smoothing", po::value(&out_smoothing)->required(), "Smoothing factor") 26 | ("crop", po::value(), "Crops down to this percent") 27 | ("export-frames", "Exports each stabilized frame as image file") 28 | ; 29 | po::positional_options_description pos; 30 | pos.add("file", 1); 31 | pos.add("smoothing", 1); 32 | 33 | po::variables_map vm; 34 | try { 35 | po::store(po::command_line_parser(argc, argv).options(desc).positional(pos).run(), vm); 36 | if (vm.count("help")) { 37 | std::cout << "Usage: vstab [OPTIONS]" << std::endl; 38 | std::cout << desc << std::endl; 39 | return vm; 40 | } 41 | po::notify(vm); 42 | } 43 | catch (boost::program_options::required_option& e) { 44 | std::cerr << "Error: " << e.what() << std::endl; 45 | } 46 | return vm; 47 | } 48 | 49 | void display(const Video& frames) { 50 | cv::namedWindow("vstab", cv::WINDOW_NORMAL); 51 | int i = 0; 52 | char key; 53 | do { 54 | const auto& frame = frames[i]; 55 | cv::imshow("vstab", frame); 56 | key = cv::waitKey(); 57 | if (key == 0x6a) { 58 | i = (i + 1) % static_cast(frames.size()); 59 | } 60 | if (key == 0x6B) { 61 | i--; 62 | if (i < 0) { 63 | i = frames.size() - 1; 64 | } 65 | } 66 | } while (key != 27); 67 | cv::destroyAllWindows(); 68 | } 69 | 70 | int main(int argc, char *argv[]) { 71 | std::string file; 72 | float smoothing; 73 | auto options = parse_args(argc, argv, file, smoothing); 74 | if (options.count("help")) { 75 | return 1; 76 | } 77 | if (!options.count("file")) { 78 | return 1; 79 | } 80 | if (!options.count("smoothing")) { 81 | return 1; 82 | } 83 | const bool debug = static_cast(options.count("debug")); 84 | 85 | std::cout << "Reading video..." << std::endl; 86 | Video frames = read_video(file); 87 | 88 | if (options.count("crop")) { 89 | std::cout << "Cropping video..." << std::endl; 90 | const float crop_percentage = options["crop"].as(); 91 | crop_to_percentage(frames, crop_percentage); 92 | } 93 | 94 | std::cout << "Estimating transformations..." << std::endl; 95 | std::vector transforms = stabilize(frames, debug); 96 | 97 | std::cout << "Extracting motion..." << std::endl; 98 | std::vector centers = extract_centers(frames, transforms); 99 | 100 | // The following may go in a loop allowing the change of smoothing parameters. 101 | 102 | std::cout << "Smoothing motion..." << std::endl; 103 | std::vector centers_smoothed = smooth_motion_parameterless(centers, smoothing); 104 | add_motion(transforms, centers_smoothed); 105 | 106 | std::cout << "Transforming frames..." << std::endl; 107 | Video frames_tfed = transform_video(frames, transforms); 108 | 109 | std::cout << "Cropping to common rectangle..." << std::endl; 110 | std::vector rects_cropped = extract_max_cropped_rect(frames, transforms); 111 | cv::Rect rect_common = std::accumulate(rects_cropped.begin(), rects_cropped.end(), rects_cropped.front(), [](const auto& a, const auto& b) { 112 | return a & b; 113 | }); 114 | if (!debug) { 115 | crop_and_resize(frames_tfed, rect_common); 116 | } 117 | 118 | // Draw frame centers. 119 | if (debug) { 120 | cv::Point2i center_frame = cv::Point2i(frames[0].cols / 2, frames[0].rows / 2); 121 | for (size_t i = 0; i < frames_tfed.size(); i++) { 122 | cv::Point2i center_int = static_cast(centers[i]); 123 | cv::Point2i center_smoothed_int = static_cast(centers_smoothed[i]); 124 | // Draw trace. 125 | for (size_t j = i; j < frames_tfed.size(); j++) { 126 | cv::circle(frames_tfed[j], center_frame + center_int, 2, cv::Scalar(120, 255, 120)); 127 | cv::circle(frames_tfed[j], center_frame + center_smoothed_int, 2, cv::Scalar(120, 120, 255)); 128 | } 129 | } 130 | } 131 | 132 | if (options.count("export-frames")) { 133 | for (size_t i = 0; i < frames_tfed.size(); i++) { 134 | const auto& frame = frames_tfed[i]; 135 | std::stringstream str; 136 | str << "frame_" << std::setw(4) << std::setfill('0') << i << ".png"; 137 | cv::imwrite(str.str(), frame); 138 | } 139 | } 140 | 141 | std::cout << "Display..." << std::endl; 142 | display(frames_tfed); 143 | } 144 | --------------------------------------------------------------------------------