├── .gitignore ├── CMakeLists.txt ├── include ├── bundle_adjust.hpp ├── camera_pose.hpp └── sfm.hpp ├── readme.md └── src ├── main.cpp └── sfm.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | debug/ 3 | main 4 | main.cpp 5 | datasets/ 6 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(sfm VERSION 0.0.1) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wpedantic -Wextra") 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") 7 | 8 | set(default_build_type "Debug") 9 | if(NOT CMAKE_BUILD_TYPE) 10 | message(STATUS "Setting default build type to: " ${default_build_type}) 11 | set(CMAKE_BUILD_TYPE ${default_build_type}) 12 | endif() 13 | 14 | 15 | # add the executable 16 | add_executable(${CMAKE_PROJECT_NAME} "${PROJECT_SOURCE_DIR}/src/main.cpp" "${PROJECT_SOURCE_DIR}/src/sfm.cpp") 17 | 18 | # include directories 19 | target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${PROJECT_SOURCE_DIR}/include") 20 | 21 | # Boost 22 | set(BOOST_ROOT "/usr/local") 23 | # set(BOOST_INCLUDEDIR "${BOOST_ROOT}/include/boost") 24 | # set(BOOST_LIBRARYDIR "${BOOST_ROOT}/lib") 25 | set(Boost_NO_SYSTEM_PATHS TRUE) 26 | set(Boost_USE_MULTITHREADED OFF) 27 | # set(Boost_DEBUG TRUE) 28 | set(Boost_DETAILED_FAILURE_MSG TRUE) 29 | 30 | find_package(Boost 1.69.0 REQUIRED COMPONENTS system filesystem) 31 | target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${Boost_INCLUDE_DIRS}) 32 | target_link_libraries(${CMAKE_PROJECT_NAME} ${Boost_LIBRARIES}) 33 | 34 | # OpenCV 35 | find_package(OpenCV 3.3 REQUIRED core highgui imgproc features2d calib3d imgcodecs) 36 | message(STATUS "OpenCV_INCLUDE_DIRS: " ${OpenCV_INCLUDE_DIRS}) 37 | message(STATUS "Opencv_LIBS: " ${OpenCV_LIBS}) 38 | target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS}) 39 | target_link_libraries(${CMAKE_PROJECT_NAME} ${OpenCV_LIBS}) 40 | 41 | # Eigen 42 | find_package(Eigen3 3.3 REQUIRED NO_MODULES) 43 | target_link_libraries(${CMAKE_PROJECT_NAME} Eigen3::Eigen) 44 | 45 | # Ceres 46 | find_package(Ceres 1.14 REQUIRED) 47 | target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${CERES_INCLUDE_DIRS}) 48 | target_link_libraries(${CMAKE_PROJECT_NAME} ${CERES_LIBRARIES}) 49 | 50 | # PCL 51 | find_package(PCL 1.7 REQUIRED COMPONENTS common io visualization) 52 | target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${PCL_INCLUDE_DIRS}) 53 | add_definitions(${PCL_DEFINITIONS}) 54 | target_link_libraries(${CMAKE_PROJECT_NAME} ${PCL_LIBRARIES}) 55 | 56 | 57 | -------------------------------------------------------------------------------- /include/bundle_adjust.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | struct SnavelyReprojectionError 7 | { 8 | SnavelyReprojectionError(const double observed_x, 9 | const double observed_y, 10 | const double focal, 11 | const double cx, 12 | const double cy) 13 | : observed_x(observed_x), observed_y(observed_y), focal(focal), cx(cx), cy(cy) {} 14 | 15 | template 16 | bool operator()(const T *const _T, 17 | const T *const landmark_x, 18 | const T *const landmark_y, 19 | const T *const landmark_z, 20 | T *residuals) const 21 | { 22 | // R_inverse = R.transpose 23 | T R_inv[9] = {_T[0], _T[4], _T[8], _T[1], _T[5], _T[9], _T[2], _T[6], _T[10]}; 24 | 25 | // [X', Y', Z'] = [X-t_x, Y-t_y, Z-t_z] 26 | T pt3d[3] = {landmark_x[0] - _T[3], landmark_y[0] - _T[7], landmark_z[0] - _T[11]}; 27 | // [X'', Y'', Z''] = R_inverse.[X', Y', Z'] 28 | T p[3] = {R_inv[0] * pt3d[0] + R_inv[1] * pt3d[1] + R_inv[2] * pt3d[2], 29 | R_inv[3] * pt3d[0] + R_inv[4] * pt3d[1] + R_inv[5] * pt3d[2], 30 | R_inv[6] * pt3d[0] + R_inv[7] * pt3d[1] + R_inv[8] * pt3d[2]}; 31 | 32 | // Normalize homogeneous coordinates 33 | T xp = p[0] / p[2]; 34 | T yp = p[1] / p[2]; 35 | 36 | // Compute final projected point position. 37 | T predicted_x = focal * xp + cx; 38 | T predicted_y = focal * yp + cy; 39 | 40 | // The error is the difference between the predicted and observed position. 41 | residuals[0] = predicted_x - observed_x; 42 | residuals[1] = predicted_y - observed_y; 43 | 44 | return true; 45 | } 46 | 47 | // Factory to hide the construction of the CostFunction object from 48 | // the client code. 49 | static ceres::CostFunction *Create(const double observed_x, 50 | const double observed_y, 51 | const double focal, 52 | const double cx, 53 | const double cy) 54 | { 55 | return (new ceres::AutoDiffCostFunction( 56 | new SnavelyReprojectionError(observed_x, observed_y, focal, cx, cy))); 57 | } 58 | 59 | double observed_x; 60 | double observed_y; 61 | double focal, cx, cy; 62 | }; -------------------------------------------------------------------------------- /include/camera_pose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | struct CameraPose 10 | { 11 | 12 | cv::Mat T; // transform [R|t] 13 | cv::Mat P; // projection K*T 14 | cv::Mat image; // downsampled image 15 | std::vector keypoints; // list of keypoints 16 | cv::Mat descriptors; // matrix of keypoint descriptors 17 | 18 | std::map> keypoint_match; // maps current camera's keypoint id to other cameras' keypoint id 19 | std::map keypoint_landmark; // maps a keypoint id in current camera to a landmark id 20 | 21 | bool is_keypoint_exists(size_t keypoint_id, size_t camera_index) 22 | { 23 | return keypoint_match[keypoint_id].count(camera_index) > 0; 24 | } 25 | 26 | const size_t &get_keypoint_match(size_t keypoint_id, size_t camera_index) 27 | { 28 | return keypoint_match[keypoint_id][camera_index]; 29 | } 30 | 31 | bool is_keypoint_landmark(size_t keypoint_id) 32 | { 33 | return keypoint_landmark.count(keypoint_id) > 0; 34 | } 35 | 36 | size_t &get_keypoint_landmark(size_t keypoint_id) 37 | { 38 | return keypoint_landmark[keypoint_id]; 39 | } 40 | }; 41 | 42 | struct Landmark 43 | { 44 | cv::Point3d pt3d; 45 | cv::Vec3b color; 46 | size_t id; 47 | uint visible = 0; 48 | }; -------------------------------------------------------------------------------- /include/sfm.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "camera_pose.hpp" 15 | 16 | class SFM 17 | { 18 | public: 19 | cv::Mat K; // camera intrinsics 20 | std::vector poses; // camera poses 21 | std::vector landmarks; // 3d points 22 | 23 | private: 24 | const int IMAGE_DOWNSAMPLE; // downsample the image to speed up processing 25 | const double FOCAL_LENGTH_X; // focal length in pixels, after downsampling, guess from jpeg EXIF data 26 | const double FOCAL_LENGTH_Y; // focal length in pixels, after downsampling, guess from jpeg EXIF data 27 | const uint MIN_LANDMARK_SEEN; // minimum number of camera views a 3d point (landmark) has to be seen in to be used 28 | const std::string IMAGE_DIR; // Dataset directory 29 | 30 | private: 31 | void initialise_intrinsics(); 32 | 33 | public: 34 | SFM(uint downsample, 35 | double fx, double fy, 36 | uint landmark_visbility, 37 | std::string image_directory) 38 | : IMAGE_DOWNSAMPLE(downsample), 39 | FOCAL_LENGTH_X(fx), 40 | FOCAL_LENGTH_Y(fy), 41 | MIN_LANDMARK_SEEN(landmark_visbility), 42 | IMAGE_DIR(image_directory) {} 43 | 44 | void find_keypoints(); 45 | 46 | void match_keypoints(); 47 | 48 | void triangulate_points(); 49 | 50 | void visualise_pointcloud(std::string name); 51 | 52 | void bundle_adjust(); 53 | 54 | void generate_for_pmvs2(); 55 | }; 56 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Structure from Motion 2 | 3 | Given a sequence of images, estimates the 3D structure of the scene (sparse point cloud) and the relative motion between each image. 4 | 5 | ### Compile 6 | 7 | ``` 8 | $ mkdir build && cd build 9 | ``` 10 | 11 | ``` 12 | $ cmake .. 13 | ``` 14 | 15 | ``` 16 | $ make -j 17 | ``` 18 | 19 | Use ```ccmake``` to further tweak options. 20 | 21 | ### Usage 22 | ``` 23 | $ usage: GLOG_logtostderr=1 ./sfm downsample fx fy dataset 24 | 25 | downsample: (int) scaling factor for images (increases performance for values greater than 1) 26 | fx : (double) focal length in 'px' 27 | -> calculated as image_width(px)*focal_length(mm)/sensor_width(mm) 28 | fy : (double) focal length in 'px' 29 | -> calculated as image_height(px)*focal_length(mm)/sensor_height(mm) 30 | dataset : (string) path to dataset directory 31 | 32 | ``` 33 | 34 | ### Dependency 35 | - OpenCV 3.3 36 | - Eigen 3.3 37 | - Boost 1.69 38 | - Ceres 1.14 39 | - PCL 1.7 40 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "sfm.hpp" 7 | 8 | int main(int argc, char *argv[]) 9 | { 10 | google::InitGoogleLogging(argv[0]); 11 | LOG(INFO) << "Starting..." << std::endl; 12 | 13 | if (argc < 5) 14 | { 15 | LOG(ERROR) << R"( 16 | usage: GLOG_logtostderr=1 ./sfm downsample fx fy dataset 17 | 18 | downsample: (int) scaling factor for images (increases performance for values greater than 1) 19 | fx : (double) focal length in 'px' calculated as image_width(px)*focal_length(mm)/sensor_width(mm) 20 | fy : (double) focal length in 'px' calculated as image_height(px)*focal_length(mm)/sensor_height(mm) 21 | dataset : (string) path to dataset directory)"; 22 | 23 | return 1; 24 | } 25 | 26 | int downsample = std::stoi(argv[1]); 27 | double fx = std::stod(argv[2]); 28 | double fy = std::stod(argv[3]); 29 | std::string dataset = argv[4]; 30 | 31 | // int downsample = 4; 32 | // double fx, fy; 33 | // fx = (4368 * 24 / 36); 34 | // fy = (2912 * 24 / 24); 35 | // std::string dataset = "/media/ankurrc/new_volume/sfm/datasets/building"; 36 | // fx = (5472 * 10.4 / 13.20) / downsample; 37 | // fy = (3648 * 10.4 / 8.80) / downsample; 38 | // std::string dataset = "/media/ankurrc/new_volume/sfm/datasets/desk"; 39 | 40 | SFM sfm(downsample, fx, fy, 3, dataset); 41 | sfm.find_keypoints(); 42 | sfm.match_keypoints(); 43 | sfm.triangulate_points(); 44 | sfm.visualise_pointcloud("Triangulated Points: Before bundle adjustment"); 45 | sfm.bundle_adjust(); 46 | sfm.visualise_pointcloud("Triangulated Points: After bundle adjustment"); 47 | 48 | LOG(INFO) << "...done." << std::endl; 49 | return 0; 50 | } -------------------------------------------------------------------------------- /src/sfm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | #include "camera_pose.hpp" 18 | #include "sfm.hpp" 19 | #include "bundle_adjust.hpp" 20 | 21 | void SFM::initialise_intrinsics() 22 | { 23 | double cx = poses[0].image.size().width / 2; 24 | double cy = poses[0].image.size().height / 2; 25 | 26 | K = cv::Mat::eye(3, 3, CV_64F); 27 | K.at(0, 0) = FOCAL_LENGTH_X / IMAGE_DOWNSAMPLE; 28 | K.at(1, 1) = FOCAL_LENGTH_Y / IMAGE_DOWNSAMPLE; 29 | K.at(0, 2) = cx; 30 | K.at(1, 2) = cy; 31 | 32 | LOG(INFO) << "Camera intrinsics are: " << std::endl 33 | << K << std::endl; 34 | } 35 | 36 | void SFM::visualise_pointcloud(std::string name = "Point Cloud") 37 | { 38 | pcl::PointCloud::Ptr cloud(new pcl::PointCloud); 39 | 40 | for (const auto &landmark : landmarks) 41 | { 42 | if (landmark.visible >= MIN_LANDMARK_SEEN) 43 | { 44 | pcl::PointXYZRGB pt; 45 | pt.x = landmark.pt3d.x; 46 | pt.y = landmark.pt3d.y; 47 | pt.z = landmark.pt3d.z; 48 | pt.b = landmark.color[0]; 49 | pt.g = landmark.color[1]; 50 | pt.r = landmark.color[2]; 51 | cloud->push_back(pt); 52 | } 53 | } 54 | 55 | pcl::visualization::CloudViewer viewer(name); 56 | viewer.showCloud(cloud); 57 | while (!viewer.wasStopped()) 58 | { 59 | } 60 | } 61 | 62 | void SFM::find_keypoints() 63 | { 64 | 65 | using namespace boost::filesystem; 66 | using namespace cv; 67 | using namespace std; 68 | 69 | path image_directory = path(IMAGE_DIR); 70 | if (!is_directory(image_directory)) 71 | { 72 | LOG(FATAL) << "Please specify a path to a directory!" << endl; 73 | } 74 | 75 | Ptr feature_extractor = cv::AKAZE::create(); 76 | 77 | for (const auto &file : directory_iterator(image_directory)) 78 | { 79 | if (!is_regular_file(file)) 80 | continue; 81 | 82 | Mat img = imread(file.path().string()); 83 | assert(!img.empty()); 84 | 85 | resize(img, img, img.size() / IMAGE_DOWNSAMPLE); 86 | 87 | CameraPose pose; 88 | pose.image = img; 89 | 90 | cvtColor(img, img, COLOR_BGR2GRAY); 91 | 92 | vector kps; 93 | Mat descriptors; 94 | feature_extractor->detectAndCompute(img, noArray(), kps, descriptors); 95 | 96 | pose.keypoints = move(kps); 97 | pose.descriptors = move(descriptors); 98 | 99 | poses.emplace_back(pose); 100 | } 101 | 102 | LOG(INFO) << "Found " << poses.size() << " cameras." << endl; 103 | } 104 | 105 | void SFM::match_keypoints() 106 | { 107 | using namespace cv; 108 | using namespace std; 109 | 110 | Ptr matcher = DescriptorMatcher::create("BruteForce-Hamming"); 111 | 112 | const char *window_name = "Keypoint matches"; 113 | namedWindow(window_name, WINDOW_NORMAL); 114 | 115 | for (size_t i = 0; i < poses.size(); i++) 116 | { 117 | CameraPose &src_pose = poses[i]; 118 | for (size_t j = i + 1; j < poses.size(); j++) 119 | { 120 | CameraPose &dst_pose = poses[j]; 121 | 122 | vector src_2d, dst_2d; // temporary holder for 2d image points; will be refined gradually 123 | vector> matches; // holder for keypoint match pairs 124 | vector src_kps, dst_kps; // holder for keypoints; will be refined 125 | 126 | matcher->knnMatch(src_pose.descriptors, dst_pose.descriptors, matches, 2); 127 | for (const auto &match : matches) 128 | { 129 | // check goodness; if criteria met add to map of keypoint matches 130 | if (match[0].distance < 0.7 * match[1].distance) 131 | { 132 | // store the corresponding 2d points 133 | src_2d.emplace_back(src_pose.keypoints[match[0].queryIdx].pt); 134 | dst_2d.emplace_back(dst_pose.keypoints[match[0].trainIdx].pt); 135 | 136 | // store the corresponding keypoints 137 | src_kps.emplace_back(match[0].queryIdx); 138 | dst_kps.emplace_back(match[0].trainIdx); 139 | } 140 | } 141 | 142 | vector outlier_mask; // mask bad indices 143 | // refine further; check if points meet the fundamental matrix constraint 144 | findFundamentalMat(src_2d, dst_2d, CV_FM_RANSAC, 3.0, 0.9, outlier_mask); 145 | 146 | Mat canvas = src_pose.image.clone(); 147 | canvas.push_back(dst_pose.image.clone()); 148 | 149 | for (size_t k = 0; k < outlier_mask.size(); k++) 150 | { 151 | if (outlier_mask[k]) 152 | { 153 | src_pose.keypoint_match[src_kps[k]][j] = dst_kps[k]; 154 | dst_pose.keypoint_match[dst_kps[k]][i] = src_kps[k]; 155 | 156 | line(canvas, src_2d[k], dst_2d[k] + Point2f(0, src_pose.image.rows), Scalar(0, 0, 255)); 157 | } 158 | } 159 | 160 | LOG(INFO) << "Matches between Camera " << i << " --> Camera " << j << " := " 161 | << sum(outlier_mask)[0] << "/" << src_pose.keypoints.size() << endl; 162 | 163 | imshow(window_name, canvas); 164 | waitKey(1); 165 | } 166 | } 167 | 168 | destroyAllWindows(); 169 | } 170 | 171 | void SFM::triangulate_points() 172 | { 173 | using namespace cv; 174 | using namespace std; 175 | 176 | initialise_intrinsics(); 177 | 178 | // fix the first camera as the origin 179 | poses[0].T = Mat::eye(4, 4, CV_64F); 180 | poses[0].P = K * Mat::eye(3, 4, CV_64F); 181 | 182 | const char *window_name = "Keypoint matches after triangulation"; 183 | namedWindow(window_name, WINDOW_NORMAL); 184 | 185 | // random number seeding 186 | // random_device rd; // obtain a random number from hardware 187 | // mt19937 rnd_eng(rd()); // seed the generator 188 | 189 | for (size_t i = 0; i < poses.size() - 1; i++) 190 | { 191 | CameraPose &prev = poses[i]; 192 | CameraPose &curr = poses[i + 1]; 193 | 194 | // accumulate matching keypoints 195 | vector src_2d, dst_2d; 196 | vector kp_used; 197 | 198 | for (size_t j = 0; j < prev.keypoints.size(); j++) 199 | { 200 | if (prev.is_keypoint_exists(j, i + 1)) 201 | { 202 | size_t curr_kp_idx = prev.get_keypoint_match(j, i + 1); 203 | src_2d.emplace_back(prev.keypoints[j].pt); 204 | dst_2d.emplace_back(curr.keypoints[curr_kp_idx].pt); 205 | 206 | kp_used.emplace_back(j); 207 | } 208 | } 209 | 210 | if (kp_used.size() < 7) 211 | { 212 | LOG(FATAL) << "Image " << i << " and image " << i + 1 << " have only " << kp_used.size() 213 | << " matching points. Need atleast 7 to compute the pose between them. Exiting..." << endl; 214 | } 215 | 216 | // NOTE: pose from dst to src: C(n-1)->C(n)... (transformation bw the previous and the current camera) 217 | Mat outlier_mask; 218 | Mat E, local_R, local_t; 219 | E = findEssentialMat(dst_2d, src_2d, K, RANSAC, 0.999, 1.0, outlier_mask); 220 | 221 | recoverPose(E, dst_2d, src_2d, K, local_R, local_t, outlier_mask); 222 | 223 | // camera center transformation 224 | Mat local_T = Mat::eye(4, 4, CV_64F); 225 | local_R.copyTo(local_T(Range(0, 3), Range(0, 3))); 226 | local_t.copyTo(local_T(Range(0, 3), Range(3, 4))); 227 | 228 | // total transformation: C0->C1->C2... (transformation bw the first and the current camera) 229 | curr.T = prev.T * local_T; 230 | 231 | // projection matrix 232 | Mat R = curr.T(Range(0, 3), Range(0, 3)); 233 | Mat t = curr.T(Range(0, 3), Range(3, 4)); 234 | Mat P(3, 4, CV_64F); 235 | 236 | P(Range(0, 3), Range(0, 3)) = R.t(); 237 | P(Range(0, 3), Range(3, 4)) = -R.t() * t; 238 | P = K * P; // projection matrix 239 | 240 | curr.P = P; 241 | 242 | // trinagulate points 243 | Mat landmarks_4d; 244 | triangulatePoints(prev.P, curr.P, src_2d, dst_2d, landmarks_4d); 245 | 246 | Mat canvas = prev.image.clone(); 247 | canvas.push_back(curr.image.clone()); 248 | 249 | // attempt to rescale transformation if current landmark is visible in previous frames 250 | if (i > 0) 251 | { 252 | double scale = 0; 253 | int count = 0; 254 | 255 | Point3d prev_camera; 256 | 257 | prev_camera.x = prev.T.at(0, 3); 258 | prev_camera.y = prev.T.at(1, 3); 259 | prev_camera.z = prev.T.at(2, 3); 260 | 261 | vector new_pts; 262 | vector existing_pts; 263 | 264 | for (size_t j = 0; j < kp_used.size(); j++) 265 | { 266 | size_t k = kp_used[j]; 267 | if (outlier_mask.at(j) && prev.is_keypoint_exists(k, i + 1) && prev.is_keypoint_landmark(k)) 268 | { 269 | Point3d landmark_3d; 270 | 271 | landmark_3d.x = landmarks_4d.at(0, j) / landmarks_4d.at(3, j); 272 | landmark_3d.y = landmarks_4d.at(1, j) / landmarks_4d.at(3, j); 273 | landmark_3d.z = landmarks_4d.at(2, j) / landmarks_4d.at(3, j); 274 | 275 | size_t idx = prev.get_keypoint_landmark(k); 276 | Point3d avg_landmark; 277 | avg_landmark.x = landmarks[idx].pt3d.x / static_cast(landmarks[idx].visible - 1); 278 | avg_landmark.y = landmarks[idx].pt3d.y / static_cast(landmarks[idx].visible - 1); 279 | avg_landmark.z = landmarks[idx].pt3d.z / static_cast(landmarks[idx].visible - 1); 280 | 281 | new_pts.push_back(landmark_3d); 282 | existing_pts.push_back(avg_landmark); 283 | } 284 | } 285 | 286 | // ratio of distance for all possible point pairing 287 | size_t samples = new_pts.size() - 1; 288 | if (samples > 0) 289 | { 290 | // uniform_int_distribution<> dist_j(0, samples); 291 | // uniform_int_distribution<> dist_k(0, samples); 292 | 293 | for (size_t j = 0; j < samples; j++) 294 | { 295 | // int j_idx = dist_j(rnd_eng); 296 | for (size_t k = j + 1; k < samples; k++) 297 | { 298 | // int k_idx = dist_k(rnd_eng); 299 | double s = norm(existing_pts[j] - existing_pts[k]) / norm(new_pts[j] - new_pts[k]); 300 | 301 | scale += s; 302 | count++; 303 | } 304 | } 305 | } 306 | 307 | // DLOG(INFO) << "samples: " << samples << "\tcount: " << count << "\tscale: " << scale << endl; 308 | 309 | if (count > 0) 310 | { 311 | scale /= count; 312 | 313 | LOG(INFO) << "image " << (i + 1) << " ==> " << i << " scale=" << scale << " count=" << count << endl; 314 | 315 | // apply scale and re-calculate T and P matrix 316 | local_t *= scale; 317 | 318 | // local tansform 319 | Mat T = Mat::eye(4, 4, CV_64F); 320 | local_R.copyTo(T(Range(0, 3), Range(0, 3))); 321 | local_t.copyTo(T(Range(0, 3), Range(3, 4))); 322 | 323 | // accumulate transform 324 | curr.T = prev.T * T; 325 | 326 | // make projection ,matrix 327 | R = curr.T(Range(0, 3), Range(0, 3)); 328 | t = curr.T(Range(0, 3), Range(3, 4)); 329 | 330 | Mat P(3, 4, CV_64F); 331 | P(Range(0, 3), Range(0, 3)) = R.t(); 332 | P(Range(0, 3), Range(3, 4)) = -R.t() * t; 333 | P = K * P; 334 | 335 | curr.P = P; 336 | triangulatePoints(prev.P, curr.P, src_2d, dst_2d, landmarks_4d); 337 | } 338 | } 339 | 340 | // Find good triangulated points 341 | for (size_t j = 0; j < kp_used.size(); j++) 342 | { 343 | if (outlier_mask.at(j)) 344 | { 345 | size_t k = kp_used[j]; 346 | size_t kp_match = prev.get_keypoint_match(k, i + 1); 347 | 348 | Point3d landmark_3d; 349 | 350 | landmark_3d.x = landmarks_4d.at(0, j) / landmarks_4d.at(3, j); 351 | landmark_3d.y = landmarks_4d.at(1, j) / landmarks_4d.at(3, j); 352 | landmark_3d.z = landmarks_4d.at(2, j) / landmarks_4d.at(3, j); 353 | 354 | if (prev.is_keypoint_landmark(k)) 355 | { 356 | // Found a match with an existing landmark 357 | curr.get_keypoint_landmark(kp_match) = prev.get_keypoint_landmark(k); 358 | 359 | landmarks[prev.get_keypoint_landmark(k)].pt3d += landmark_3d; 360 | landmarks[prev.get_keypoint_landmark(k)].color = curr.image.at(curr.keypoints[kp_match].pt); 361 | landmarks[curr.get_keypoint_landmark(kp_match)].visible++; // same as "landmarks[prev.get_keypoint_landmark(k)].seen++;" 362 | } 363 | else 364 | { 365 | // Add new 3d point 366 | Landmark landmark; 367 | 368 | landmark.pt3d = landmark_3d; 369 | landmark.color = curr.image.at(curr.keypoints[kp_match].pt); 370 | // landmark.color += prev.image.at(prev.keypoints[k].pt); 371 | landmark.visible = 2; 372 | 373 | landmarks.emplace_back(landmark); 374 | 375 | prev.get_keypoint_landmark(k) = landmarks.size() - 1; 376 | curr.get_keypoint_landmark(kp_match) = landmarks.size() - 1; 377 | } 378 | 379 | line(canvas, src_2d[j], dst_2d[j] + Point2f(0, prev.image.rows), Scalar(0, 0, 255)); 380 | // imshow(window_name, canvas); 381 | // waitKey(1); 382 | } 383 | } 384 | } 385 | 386 | destroyAllWindows(); 387 | 388 | // Avg out the values for the landmarks 389 | for (auto &landmark : landmarks) 390 | { 391 | if (landmark.visible > 2) 392 | { 393 | landmark.pt3d.x /= (landmark.visible - 1); 394 | landmark.pt3d.y /= (landmark.visible - 1); 395 | landmark.pt3d.z /= (landmark.visible - 1); 396 | } 397 | } 398 | } 399 | 400 | void SFM::bundle_adjust() 401 | { 402 | using namespace ceres; 403 | 404 | Problem problem; 405 | 406 | for (auto &pose : poses) 407 | { 408 | auto &T = pose.T; 409 | assert(T.isContinuous()); 410 | 411 | for (const auto &map : pose.keypoint_landmark) 412 | { 413 | const auto kp_id = map.first; 414 | const auto landmark_id = map.second; 415 | 416 | auto &landmark = landmarks[landmark_id]; 417 | if (landmark.visible < MIN_LANDMARK_SEEN) 418 | continue; 419 | 420 | const auto kp = pose.keypoints[kp_id]; 421 | 422 | // prepare datapoints 423 | const auto obs_x = kp.pt.x; 424 | const auto obs_y = kp.pt.y; 425 | const auto focal = K.at(0, 0); 426 | const auto cx = K.at(0, 2); 427 | const auto cy = K.at(1, 2); 428 | 429 | CostFunction *cost_function = 430 | SnavelyReprojectionError::Create(obs_x, obs_y, focal, cx, cy); 431 | problem.AddResidualBlock(cost_function, 432 | NULL /* squared loss */, 433 | T.ptr(), 434 | &(landmark.pt3d.x), 435 | &(landmark.pt3d.y), 436 | &(landmark.pt3d.z)); 437 | } 438 | } 439 | Solver::Options options; 440 | options.linear_solver_type = DENSE_SCHUR; 441 | options.max_num_iterations = 50; 442 | options.use_explicit_schur_complement = true; 443 | options.minimizer_progress_to_stdout = true; 444 | 445 | Solver::Summary summary; 446 | Solve(options, &problem, &summary); 447 | LOG(INFO) << summary.FullReport() << "\n"; 448 | } 449 | 450 | void SFM::generate_for_pmvs2() 451 | { 452 | /* WIP*/ 453 | auto K_ = K.clone(); 454 | K_.at(0, 0) *= IMAGE_DOWNSAMPLE; 455 | K_.at(1, 1) *= IMAGE_DOWNSAMPLE; 456 | K_.at(0, 2) *= IMAGE_DOWNSAMPLE; 457 | K_.at(1, 2) *= IMAGE_DOWNSAMPLE; 458 | } --------------------------------------------------------------------------------