├── .gitignore ├── LICENSE ├── README.md ├── baseline.py ├── images ├── 0010.jpg ├── example02.png └── example102.png ├── main.py ├── match.py ├── sfm.py ├── utils.py └── view.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # IPython Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | venv/ 85 | ENV/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | ### VirtualEnv template 93 | # Virtualenv 94 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 95 | .Python 96 | [Bb]in 97 | [Ii]nclude 98 | [Ll]ib 99 | [Ll]ib64 100 | [Ll]ocal 101 | [Ss]cripts 102 | pyvenv.cfg 103 | .venv 104 | pip-selfcheck.json 105 | ### JetBrains template 106 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 107 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 108 | 109 | # User-specific stuff: 110 | .idea/workspace.xml 111 | .idea/tasks.xml 112 | .idea/dictionaries 113 | .idea/vcs.xml 114 | .idea/jsLibraryMappings.xml 115 | 116 | # Sensitive or high-churn files: 117 | .idea/dataSources.ids 118 | .idea/dataSources.xml 119 | .idea/dataSources.local.xml 120 | .idea/sqlDataSources.xml 121 | .idea/dynamic.xml 122 | .idea/uiDesigner.xml 123 | 124 | # Gradle: 125 | .idea/gradle.xml 126 | .idea/libraries 127 | 128 | # Mongo Explorer plugin: 129 | .idea/mongoSettings.xml 130 | 131 | .idea/ 132 | 133 | ## File-based project format: 134 | *.iws 135 | 136 | ## Plugin-specific files: 137 | 138 | # IntelliJ 139 | /out/ 140 | 141 | # mpeltonen/sbt-idea plugin 142 | .idea_modules/ 143 | 144 | # JIRA plugin 145 | atlassian-ide-plugin.xml 146 | 147 | # Crashlytics plugin (for Android Studio and IntelliJ) 148 | com_crashlytics_export_strings.xml 149 | crashlytics.properties 150 | crashlytics-build.properties 151 | fabric.properties -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Harish Venkataraman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 3D Reconstruction using Structure from Motion 2 | 3 | This repository contains code to reconstruct a scene using [structure from motion](https://en.wikipedia.org/wiki/Structure_from_Motion) (SfM). SfM is a technique to recover 3D structure of a scene by making use of a sequence of 2D images. In the process, the technique also recovers the relative pose of a particular view (an image taken by a camera) with respect to the first view in the sequence. This is a **personal project**. 4 | 5 | The process consists of a series of steps: 6 | 7 | 1. Extract keypoints and feature descriptors from images 8 | 2. Match features between images 9 | 3. Find a suitable baseline (initial two views) to kickstart the reconstruction 10 | 4. Recover pose of the baseline 11 | 5. Reconstruct 3D points 12 | 6. Recover pose of the next view using [Perspective-n-Point](https://en.wikipedia.org/wiki/Perspective-n-Point) 13 | 7. Reconstruct the next set of points 14 | 8. Perform [bundle adjustment](https://en.wikipedia.org/wiki/Bundle_adjustment) 15 | 9. Plot points 16 | 17 | ### Requirements 18 | 19 | - Python 3 20 | - Numpy 21 | - Opencv-contrib-python == 3.4.2 22 | - Open3D == 0.8.0.0 23 | 24 | ### Images 25 | 26 | The code has been run on openMVG's [benchmark images](https://github.com/openMVG/SfM_quality_evaluation). The images come with the intrinsic matrix of the camera as a txt file in the images folder. This code has not been tested on images taken on my camera. Users choosing to do so need to calibrate their camera to obtain the intrinsic matrix required for this code. Follow [this](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_calib3d/py_calibration/py_calibration.html) tutorial to calibrate your camera. 27 | 28 | ### Running the code 29 | 30 | The entry point of the module is the ```main.py``` file. This file requires three arguments - 31 | 32 | - ```--root_dir``` : specifies the path of the folder containing the image folder 33 | - ```--feat_type``` : specifies the kind of features to be extracted from the images (sift/surf/orb), default is sift 34 | - ```--image_format``` : specifies the extension of the images present inside the ```images/``` folder of the ```root_dir```, default is jpg 35 | 36 | For example, if you have downloaded the [benchmark images](https://github.com/openMVG/SfM_quality_evaluation) inside the repository root, then the command would be - 37 | 38 | ```python main.py --root_dir ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/entry-P10 --feat_type surf``` 39 | 40 | Or: 41 | 42 | ```python main.py --root_dir ./SfM_quality_evaluation/Benchmarking_Camera_Calibration_2008/fountain-P11 --feat_type sift``` 43 | 44 | Each of the above mentioned folders have an ```images/``` folder that contains the images, along with a txt file called ```K.txt``` which contains the intrinsic matrix. If you are experimenting on a new dataset, use the same folder structure. The code stores the computed features and matches in respective pkl files inside the root directory to speed up repeated computations. The results are stored as ply files inside a ```points/``` folder inside the root directory. For visualizing the point clouds inside the ply files, use suitable software such as [MeshLab](https://www.meshlab.net/#description). 45 | 46 | ### Results 47 | 48 | From the ```fountain-P11``` image-set: 49 | 50 | 1. Original image 51 | 52 | ![](./images/0010.jpg) 53 | 54 | 2. Reconstruction after 3 images 55 | 56 | ![](./images/example02.png) 57 | 58 | 3, Reconstruction after 6 images 59 | 60 | ![](./images/example102.png) 61 | 62 | ### Limitations 63 | 64 | - This code is a simplified version of incremental SfM. Incremental SfM adds images sequentially to the reconstruction wheras global SfM considers all camera poses at once. In incremental SfM, the next view is chosen for reconstruction based on how many triangulated points it contains. The process examines the available views to choose the best one. In this code, the images are assumed to be taken in sequential order. For example, ```0003.jpg``` is taken before ```0004.jpg```, and the baseline views are ```0000.jpg``` and ```0001.jpg```. The images are sorted based on their names before reconstruction. View selection and filtering is currently not implemented here 65 | - Bundle adjustment is not implemented here, so some reconstructions may have high reprojection error and consequentially, erroneous reconstructions 66 | 67 | ### References 68 | 69 | 1. [Hartley and Zisserman's Multiple View Geometry](https://www.amazon.com/Multiple-View-Geometry-Computer-Vision/dp/0521540518) 70 | 2. [Mastering OpenCV with Practical Computer Vision Projects](https://www.packtpub.com/application-development/mastering-opencv-practical-computer-vision-projects) 71 | 3. [Colmap](https://github.com/colmap/colmap) 72 | 4. [OpenSfM](https://www.opensfm.org/) 73 | 3. [how-to-sfm](https://github.com/muneebaadil/how-to-sfm) 74 | -------------------------------------------------------------------------------- /baseline.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | import logging 3 | 4 | 5 | class Baseline: 6 | """Represents the functions that compute the baseline pose from the initial images of a reconstruction""" 7 | 8 | def __init__(self, view1, view2, match_object): 9 | 10 | self.view1 = view1 # first view 11 | self.view1.R = np.eye(3, 3) # identity rotation since the first view is said to be at the origin 12 | self.view2 = view2 # second view 13 | self.match_object = match_object # match object between first and second view 14 | 15 | def get_pose(self, K): 16 | """Computes and returns the rotation and translation components for the second view""" 17 | 18 | F = remove_outliers_using_F(self.view1, self.view2, self.match_object) 19 | E = K.T @ F @ K # compute the essential matrix from the fundamental matrix 20 | logging.info("Computed essential matrix") 21 | logging.info("Choosing correct pose out of 4 solutions") 22 | 23 | return self.check_pose(E, K) 24 | 25 | def check_pose(self, E, K): 26 | """Retrieves the rotation and translation components from the essential matrix by decomposing it and verifying the validity of the 4 possible solutions""" 27 | 28 | R1, R2, t1, t2 = get_camera_from_E(E) # decompose E 29 | if not check_determinant(R1): 30 | R1, R2, t1, t2 = get_camera_from_E(-E) # change sign of E if R1 fails the determinant test 31 | 32 | # solution 1 33 | reprojection_error, points_3D = self.triangulate(K, R1, t1) 34 | # check if reprojection is not faulty and if the points are correctly triangulated in the front of the camera 35 | if reprojection_error > 100.0 or not check_triangulation(points_3D, np.hstack((R1, t1))): 36 | 37 | # solution 2 38 | reprojection_error, points_3D = self.triangulate(K, R1, t2) 39 | if reprojection_error > 100.0 or not check_triangulation(points_3D, np.hstack((R1, t2))): 40 | 41 | # solution 3 42 | reprojection_error, points_3D = self.triangulate(K, R2, t1) 43 | if reprojection_error > 100.0 or not check_triangulation(points_3D, np.hstack((R2, t1))): 44 | 45 | # solution 4 46 | return R2, t2 47 | 48 | else: 49 | return R2, t1 50 | 51 | else: 52 | return R1, t2 53 | 54 | else: 55 | return R1, t1 56 | 57 | def triangulate(self, K, R, t): 58 | """Triangulate points between the baseline views and calculates the mean reprojection error of the triangulation""" 59 | 60 | K_inv = np.linalg.inv(K) 61 | P1 = np.hstack((self.view1.R, self.view1.t)) 62 | P2 = np.hstack((R, t)) 63 | 64 | # only reconstructs the inlier points filtered using the fundamental matrix 65 | pixel_points1, pixel_points2 = get_keypoints_from_indices(keypoints1=self.view1.keypoints, 66 | keypoints2=self.view2.keypoints, 67 | index_list1=self.match_object.inliers1, 68 | index_list2=self.match_object.inliers2) 69 | 70 | # convert 2D pixel points to homogeneous coordinates 71 | pixel_points1 = cv2.convertPointsToHomogeneous(pixel_points1)[:, 0, :] 72 | pixel_points2 = cv2.convertPointsToHomogeneous(pixel_points2)[:, 0, :] 73 | 74 | reprojection_error = [] 75 | 76 | points_3D = np.zeros((0, 3)) # stores the triangulated points 77 | 78 | for i in range(len(pixel_points1)): 79 | u1 = pixel_points1[i, :] 80 | u2 = pixel_points2[i, :] 81 | 82 | # convert homogeneous 2D points to normalized device coordinates 83 | u1_normalized = K_inv.dot(u1) 84 | u2_normalized = K_inv.dot(u2) 85 | 86 | # calculate 3D point 87 | point_3D = get_3D_point(u1_normalized, P1, u2_normalized, P2) 88 | 89 | # calculate reprojection error 90 | error = calculate_reprojection_error(point_3D, u2[0:2], K, R, t) 91 | reprojection_error.append(error) 92 | 93 | # append point 94 | points_3D = np.concatenate((points_3D, point_3D.T), axis=0) 95 | 96 | return np.mean(reprojection_error), points_3D 97 | -------------------------------------------------------------------------------- /images/0010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harish-vnkt/structure-from-motion/63f886ba31e162a2f11eb4ff089916a44de4eb31/images/0010.jpg -------------------------------------------------------------------------------- /images/example02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harish-vnkt/structure-from-motion/63f886ba31e162a2f11eb4ff089916a44de4eb31/images/example02.png -------------------------------------------------------------------------------- /images/example102.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harish-vnkt/structure-from-motion/63f886ba31e162a2f11eb4ff089916a44de4eb31/images/example102.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from view import * 2 | from match import * 3 | from sfm import * 4 | import numpy as np 5 | import logging 6 | import argparse 7 | 8 | 9 | def run(args): 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | views = create_views(args.root_dir, args.image_format) 13 | matches = create_matches(views) 14 | K = np.loadtxt(os.path.join(args.root_dir, 'images', 'K.txt')) 15 | sfm = SFM(views, matches, K) 16 | sfm.reconstruct() 17 | 18 | 19 | def set_args(parser): 20 | 21 | parser.add_argument('--root_dir', action='store', type=str, dest='root_dir', 22 | help='root directory containing the images/ folder') 23 | parser.add_argument('--feat_type', action='store', type=str, dest='feat_type', default='sift', 24 | help='type of features to be extracted [sift | surf | orb]') 25 | parser.add_argument('--image_format', action='store', type=str, dest='image_format', default='jpg', 26 | help='extension of the images in the images/ folder') 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | parser = argparse.ArgumentParser() 32 | set_args(parser) 33 | args = parser.parse_args() 34 | run(args) 35 | -------------------------------------------------------------------------------- /match.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import cv2 4 | import logging 5 | 6 | 7 | class Match: 8 | """Represents a feature matches between two views""" 9 | 10 | def __init__(self, view1, view2, match_path): 11 | 12 | self.indices1 = [] # indices of the matched keypoints in the first view 13 | self.indices2 = [] # indices of the matched keypoints in the second view 14 | self.distances = [] # distance between the matched keypoints in the first view 15 | self.image_name1 = view1.name # name of the first view 16 | self.image_name2 = view2.name # name of the second view 17 | self.root_path = view1.root_path # root directory containing the image folder 18 | self.inliers1 = [] # list to store the indices of the keypoints from the first view not removed using the fundamental matrix 19 | self.inliers2 = [] # list to store the indices of the keypoints from the second view not removed using the fundamental matrix 20 | self.view1 = view1 21 | self.view2 = view2 22 | 23 | if view1.feature_type in ['sift', 'surf']: 24 | self.matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) 25 | else: 26 | self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 27 | 28 | if not match_path: 29 | self.get_matches(view1, view2) 30 | else: 31 | self.read_matches() 32 | 33 | def get_matches(self, view1, view2): 34 | """Extracts feature matches between two views""" 35 | 36 | matches = self.matcher.match(view1.descriptors, view2.descriptors) 37 | matches = sorted(matches, key=lambda x: x.distance) 38 | 39 | # store match components in their respective lists 40 | for i in range(len(matches)): 41 | self.indices1.append(matches[i].queryIdx) 42 | self.indices2.append(matches[i].trainIdx) 43 | self.distances.append(matches[i].distance) 44 | 45 | logging.info("Computed matches between view %s and view %s", self.image_name1, self.image_name2) 46 | 47 | self.write_matches() 48 | 49 | def write_matches(self): 50 | """Writes a match to a pkl file in the root_path/matches directory""" 51 | 52 | if not os.path.exists(os.path.join(self.root_path, 'matches')): 53 | os.makedirs(os.path.join(self.root_path, 'matches')) 54 | 55 | temp_array = [] 56 | for i in range(len(self.indices1)): 57 | temp = (self.distances[i], self.indices1[i], self.indices2[i]) 58 | temp_array.append(temp) 59 | 60 | matches_file = open(os.path.join(self.root_path, 'matches', self.image_name1 + '_' + self.image_name2 + '.pkl'), 'wb') 61 | pickle.dump(temp_array, matches_file) 62 | matches_file.close() 63 | 64 | def read_matches(self): 65 | """Reads matches from file""" 66 | 67 | try: 68 | matches = pickle.load( 69 | open( 70 | os.path.join(self.root_path, 'matches', self.image_name1 + '_' + self.image_name2 + '.pkl'), 71 | "rb" 72 | ) 73 | ) 74 | logging.info("Read matches from file for view pair pair %s %s", self.image_name1, self.image_name2) 75 | 76 | for point in matches: 77 | self.distances.append(point[0]) 78 | self.indices1.append(point[1]) 79 | self.indices2.append(point[2]) 80 | 81 | except FileNotFoundError: 82 | logging.error("Pkl file not found for match %s_%s. Computing from scratch", self.image_name1, self.image_name2) 83 | self.get_matches(self.view1, self.view2) 84 | 85 | 86 | def create_matches(views): 87 | """Computes matches between every possible pair of views and stores in a dictionary""" 88 | 89 | match_path = False 90 | 91 | root_path = views[0].root_path 92 | 93 | if os.path.exists(os.path.join(root_path, 'matches')): 94 | match_path = True 95 | 96 | matches = {} 97 | for i in range(0, len(views) - 1): 98 | for j in range(i+1, len(views)): 99 | matches[(views[i].name, views[j].name)] = Match(views[i], views[j], match_path) 100 | 101 | return matches 102 | -------------------------------------------------------------------------------- /sfm.py: -------------------------------------------------------------------------------- 1 | import os 2 | from utils import * 3 | import open3d as o3d 4 | from baseline import Baseline 5 | 6 | 7 | class SFM: 8 | """Represents the main reconstruction loop""" 9 | 10 | def __init__(self, views, matches, K): 11 | 12 | self.views = views # list of views 13 | self.matches = matches # dictionary of matches 14 | self.names = [] # image names 15 | self.done = [] # list of views that have been reconstructed 16 | self.K = K # intrinsic matrix 17 | self.points_3D = np.zeros((0, 3)) # reconstructed 3D points 18 | self.point_counter = 0 # keeps track of the reconstructed points 19 | self.point_map = {} # a dictionary of the 2D points that contributed to a given 3D point 20 | self.errors = [] # list of mean reprojection errors taken at the end of every new view being added 21 | 22 | for view in self.views: 23 | self.names.append(view.name) 24 | 25 | if not os.path.exists(self.views[0].root_path + '/points'): 26 | os.makedirs(self.views[0].root_path + '/points') 27 | 28 | # store results in a root_path/points 29 | self.results_path = os.path.join(self.views[0].root_path, 'points') 30 | 31 | def get_index_of_view(self, view): 32 | """Extracts the position of a view in the list of views""" 33 | 34 | return self.names.index(view.name) 35 | 36 | def remove_mapped_points(self, match_object, image_idx): 37 | """Removes points that have already been reconstructed in the completed views""" 38 | 39 | inliers1 = [] 40 | inliers2 = [] 41 | 42 | for i in range(len(match_object.inliers1)): 43 | if (image_idx, match_object.inliers1[i]) not in self.point_map: 44 | inliers1.append(match_object.inliers1[i]) 45 | inliers2.append(match_object.inliers2[i]) 46 | 47 | match_object.inliers1 = inliers1 48 | match_object.inliers2 = inliers2 49 | 50 | def compute_pose(self, view1, view2=None, is_baseline=False): 51 | """Computes the pose of the new view""" 52 | 53 | # procedure for baseline pose estimation 54 | if is_baseline and view2: 55 | 56 | match_object = self.matches[(view1.name, view2.name)] 57 | baseline_pose = Baseline(view1, view2, match_object) 58 | view2.R, view2.t = baseline_pose.get_pose(self.K) 59 | 60 | rpe1, rpe2 = self.triangulate(view1, view2) 61 | self.errors.append(np.mean(rpe1)) 62 | self.errors.append(np.mean(rpe2)) 63 | 64 | self.done.append(view1) 65 | self.done.append(view2) 66 | 67 | # procedure for estimating the pose of all other views 68 | else: 69 | 70 | view1.R, view1.t = self.compute_pose_PNP(view1) 71 | errors = [] 72 | 73 | # reconstruct unreconstructed points from all of the previous views 74 | for i, old_view in enumerate(self.done): 75 | 76 | match_object = self.matches[(old_view.name, view1.name)] 77 | _ = remove_outliers_using_F(old_view, view1, match_object) 78 | self.remove_mapped_points(match_object, i) 79 | _, rpe = self.triangulate(old_view, view1) 80 | errors += rpe 81 | 82 | self.done.append(view1) 83 | self.errors.append(np.mean(errors)) 84 | 85 | def triangulate(self, view1, view2): 86 | """Triangulates 3D points from two views whose poses have been recovered. Also updates the point_map dictionary""" 87 | 88 | K_inv = np.linalg.inv(self.K) 89 | P1 = np.hstack((view1.R, view1.t)) 90 | P2 = np.hstack((view2.R, view2.t)) 91 | 92 | match_object = self.matches[(view1.name, view2.name)] 93 | pixel_points1, pixel_points2 = get_keypoints_from_indices(keypoints1=view1.keypoints, 94 | keypoints2=view2.keypoints, 95 | index_list1=match_object.inliers1, 96 | index_list2=match_object.inliers2) 97 | pixel_points1 = cv2.convertPointsToHomogeneous(pixel_points1)[:, 0, :] 98 | pixel_points2 = cv2.convertPointsToHomogeneous(pixel_points2)[:, 0, :] 99 | reprojection_error1 = [] 100 | reprojection_error2 = [] 101 | 102 | for i in range(len(pixel_points1)): 103 | 104 | u1 = pixel_points1[i, :] 105 | u2 = pixel_points2[i, :] 106 | 107 | u1_normalized = K_inv.dot(u1) 108 | u2_normalized = K_inv.dot(u2) 109 | 110 | point_3D = get_3D_point(u1_normalized, P1, u2_normalized, P2) 111 | self.points_3D = np.concatenate((self.points_3D, point_3D.T), axis=0) 112 | 113 | error1 = calculate_reprojection_error(point_3D, u1[0:2], self.K, view1.R, view1.t) 114 | reprojection_error1.append(error1) 115 | error2 = calculate_reprojection_error(point_3D, u2[0:2], self.K, view2.R, view2.t) 116 | reprojection_error2.append(error2) 117 | 118 | # updates point_map with the key (index of view, index of point in the view) and value point_counter 119 | # multiple keys can have the same value because a 3D point is reconstructed using 2 points 120 | self.point_map[(self.get_index_of_view(view1), match_object.inliers1[i])] = self.point_counter 121 | self.point_map[(self.get_index_of_view(view2), match_object.inliers2[i])] = self.point_counter 122 | self.point_counter += 1 123 | 124 | return reprojection_error1, reprojection_error2 125 | 126 | def compute_pose_PNP(self, view): 127 | """Computes pose of new view using perspective n-point""" 128 | 129 | if view.feature_type in ['sift', 'surf']: 130 | matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False) 131 | else: 132 | matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) 133 | 134 | # collects all the descriptors of the reconstructed views 135 | old_descriptors = [] 136 | for old_view in self.done: 137 | old_descriptors.append(old_view.descriptors) 138 | 139 | # match old descriptors against the descriptors in the new view 140 | matcher.add(old_descriptors) 141 | matcher.train() 142 | matches = matcher.match(queryDescriptors=view.descriptors) 143 | points_3D, points_2D = np.zeros((0, 3)), np.zeros((0, 2)) 144 | 145 | # build corresponding array of 2D points and 3D points 146 | for match in matches: 147 | old_image_idx, new_image_kp_idx, old_image_kp_idx = match.imgIdx, match.queryIdx, match.trainIdx 148 | 149 | if (old_image_idx, old_image_kp_idx) in self.point_map: 150 | 151 | # obtain the 2D point from match 152 | point_2D = np.array(view.keypoints[new_image_kp_idx].pt).T.reshape((1, 2)) 153 | points_2D = np.concatenate((points_2D, point_2D), axis=0) 154 | 155 | # obtain the 3D point from the point_map 156 | point_3D = self.points_3D[self.point_map[(old_image_idx, old_image_kp_idx)], :].T.reshape((1, 3)) 157 | points_3D = np.concatenate((points_3D, point_3D), axis=0) 158 | 159 | # compute new pose using solvePnPRansac 160 | _, R, t, _ = cv2.solvePnPRansac(points_3D[:, np.newaxis], points_2D[:, np.newaxis], self.K, None, 161 | confidence=0.99, reprojectionError=8.0, flags=cv2.SOLVEPNP_DLS) 162 | R, _ = cv2.Rodrigues(R) 163 | return R, t 164 | 165 | def plot_points(self): 166 | """Saves the reconstructed 3D points to ply files using Open3D""" 167 | 168 | number = len(self.done) 169 | filename = os.path.join(self.results_path, str(number) + '_images.ply') 170 | pcd = o3d.geometry.PointCloud() 171 | pcd.points = o3d.utility.Vector3dVector(self.points_3D) 172 | o3d.io.write_point_cloud(filename, pcd) 173 | 174 | def reconstruct(self): 175 | """Starts the main reconstruction loop for a given set of views and matches""" 176 | 177 | # compute baseline pose 178 | baseline_view1, baseline_view2 = self.views[0], self.views[1] 179 | logging.info("Computing baseline pose and reconstructing points") 180 | self.compute_pose(view1=baseline_view1, view2=baseline_view2, is_baseline=True) 181 | logging.info("Mean reprojection error for 1 image is %f", self.errors[0]) 182 | logging.info("Mean reprojection error for 2 images is %f", self.errors[1]) 183 | self.plot_points() 184 | logging.info("Points plotted for %d views", len(self.done)) 185 | 186 | for i in range(2, len(self.views)): 187 | 188 | logging.info("Computing pose and reconstructing points for view %d", i+1) 189 | self.compute_pose(view1=self.views[i]) 190 | logging.info("Mean reprojection error for %d images is %f", i+1, self.errors[i]) 191 | self.plot_points() 192 | logging.info("Points plotted for %d views", i+1) 193 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import logging 4 | 5 | 6 | def get_keypoints_from_indices(keypoints1, index_list1, keypoints2, index_list2): 7 | """Filters a list of keypoints based on the indices given""" 8 | 9 | points1 = np.array([kp.pt for kp in keypoints1])[index_list1] 10 | points2 = np.array([kp.pt for kp in keypoints2])[index_list2] 11 | return points1, points2 12 | 13 | 14 | def get_3D_point(u1, P1, u2, P2): 15 | """Solves for 3D point using homogeneous 2D points and the respective camera matrices""" 16 | 17 | A = np.array([[u1[0] * P1[2, 0] - P1[0, 0], u1[0] * P1[2, 1] - P1[0, 1], u1[0] * P1[2, 2] - P1[0, 2]], 18 | [u1[1] * P1[2, 0] - P1[1, 0], u1[1] * P1[2, 1] - P1[1, 1], u1[1] * P1[2, 2] - P1[1, 2]], 19 | [u2[0] * P2[2, 0] - P2[0, 0], u2[0] * P2[2, 1] - P2[0, 1], u2[0] * P2[2, 2] - P2[0, 2]], 20 | [u2[1] * P2[2, 0] - P2[1, 0], u2[1] * P2[2, 1] - P2[1, 1], u2[1] * P2[2, 2] - P2[1, 2]]]) 21 | 22 | B = np.array([-(u1[0] * P1[2, 3] - P1[0, 3]), 23 | -(u1[1] * P1[2, 3] - P1[1, 3]), 24 | -(u2[0] * P2[2, 3] - P2[0, 3]), 25 | -(u2[1] * P2[2, 3] - P2[1, 3])]) 26 | 27 | X = cv2.solve(A, B, flags=cv2.DECOMP_SVD) 28 | return X[1] 29 | 30 | 31 | def remove_outliers_using_F(view1, view2, match_object): 32 | """Removes outlier keypoints using the fundamental matrix""" 33 | 34 | pixel_points1, pixel_points2 = get_keypoints_from_indices(keypoints1=view1.keypoints, 35 | keypoints2=view2.keypoints, 36 | index_list1=match_object.indices1, 37 | index_list2=match_object.indices2) 38 | F, mask = cv2.findFundamentalMat(pixel_points1, pixel_points2, method=cv2.FM_RANSAC, 39 | ransacReprojThreshold=0.9, confidence=0.99) 40 | mask = mask.astype(bool).flatten() 41 | match_object.inliers1 = np.array(match_object.indices1)[mask] 42 | match_object.inliers2 = np.array(match_object.indices2)[mask] 43 | 44 | return F 45 | 46 | 47 | def calculate_reprojection_error(point_3D, point_2D, K, R, t): 48 | """Calculates the reprojection error for a 3D point by projecting it back into the image plane""" 49 | 50 | reprojected_point = K.dot(R.dot(point_3D) + t) 51 | reprojected_point = cv2.convertPointsFromHomogeneous(reprojected_point.T)[:, 0, :].T 52 | error = np.linalg.norm(point_2D.reshape((2, 1)) - reprojected_point) 53 | return error 54 | 55 | 56 | def get_camera_from_E(E): 57 | """Calculates rotation and translation component from essential matrix""" 58 | 59 | W = np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) 60 | W_t = W.T 61 | u, w, vt = np.linalg.svd(E) 62 | 63 | R1 = u @ W @ vt 64 | R2 = u @ W_t @ vt 65 | t1 = u[:, -1].reshape((3, 1)) 66 | t2 = - t1 67 | return R1, R2, t1, t2 68 | 69 | 70 | def check_determinant(R): 71 | """Validates using the determinant of the rotation matrix""" 72 | 73 | if np.linalg.det(R) + 1.0 < 1e-9: 74 | return False 75 | else: 76 | return True 77 | 78 | 79 | def check_triangulation(points, P): 80 | """Checks whether reconstructed points lie in front of the camera""" 81 | 82 | P = np.vstack((P, np.array([0, 0, 0, 1]))) 83 | reprojected_points = cv2.perspectiveTransform(src=points[np.newaxis], m=P) 84 | z = reprojected_points[0, :, -1] 85 | if (np.sum(z > 0)/z.shape[0]) < 0.75: 86 | return False 87 | else: 88 | return True 89 | -------------------------------------------------------------------------------- /view.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | import cv2 5 | import numpy as np 6 | import glob 7 | import logging 8 | 9 | 10 | class View: 11 | """Represents an image used in the reconstruction""" 12 | 13 | def __init__(self, image_path, root_path, feature_path, feature_type='sift'): 14 | 15 | self.name = image_path[image_path.rfind('/') + 1:-4] # image name without extension 16 | self.image = cv2.imread(image_path) # numpy array of the image 17 | self.keypoints = [] # list of keypoints obtained from feature extraction 18 | self.descriptors = [] # list of descriptors obtained from feature extraction 19 | self.feature_type = feature_type # feature extraction method 20 | self.root_path = root_path # root directory containing the image folder 21 | self.R = np.zeros((3, 3), dtype=float) # rotation matrix for the view 22 | self.t = np.zeros((3, 1), dtype=float) # translation vector for the view 23 | 24 | if not feature_path: 25 | self.extract_features() 26 | else: 27 | self.read_features() 28 | 29 | def extract_features(self): 30 | """Extracts features from the image""" 31 | 32 | if self.feature_type == 'sift': 33 | detector = cv2.xfeatures2d.SIFT_create() 34 | elif self.feature_type == 'surf': 35 | detector = cv2.xfeatures2d.SURF_create() 36 | elif self.feature_type == 'orb': 37 | detector = cv2.ORB_create(nfeatures=1500) 38 | else: 39 | logging.error("Admitted feature types are SIFT, SURF or ORB") 40 | sys.exit(0) 41 | 42 | self.keypoints, self.descriptors = detector.detectAndCompute(self.image, None) 43 | logging.info("Computed features for image %s", self.name) 44 | 45 | self.write_features() 46 | 47 | def read_features(self): 48 | """Reads features stored in files. Feature files have filenames corresponding to image names without extensions""" 49 | 50 | # logic to compute features for images that don't have pkl files 51 | try: 52 | features = pickle.load(open(os.path.join(self.root_path, 'features', self.name + '.pkl'), "rb")) 53 | logging.info("Read features from file for image %s", self.name) 54 | 55 | keypoints = [] 56 | descriptors = [] 57 | 58 | for point in features: 59 | keypoint = cv2.KeyPoint(x=point[0][0], y=point[0][1], _size=point[1], _angle=point[2], 60 | _response=point[3], _octave=point[4], _class_id=point[5]) 61 | descriptor = point[6] 62 | keypoints.append(keypoint) 63 | descriptors.append(descriptor) 64 | 65 | self.keypoints = keypoints 66 | self.descriptors = np.array(descriptors) # convert descriptors into n x 128 numpy array 67 | 68 | except FileNotFoundError: 69 | logging.error("Pkl file not found for image %s. Computing from scratch", self.name) 70 | self.extract_features() 71 | 72 | def write_features(self): 73 | """Stores computed features to pkl files. The files are written inside a features directory inside the root directory""" 74 | 75 | if not os.path.exists(os.path.join(self.root_path, 'features')): 76 | os.makedirs(os.path.join(self.root_path, 'features')) 77 | 78 | temp_array = [] 79 | for idx, point in enumerate(self.keypoints): 80 | temp = (point.pt, point.size, point.angle, point.response, point.octave, point.class_id, 81 | self.descriptors[idx]) 82 | temp_array.append(temp) 83 | 84 | features_file = open(os.path.join(self.root_path, 'features', self.name + '.pkl'), 'wb') 85 | pickle.dump(temp_array, features_file) 86 | features_file.close() 87 | 88 | 89 | def create_views(root_path, image_format='jpg'): 90 | """Loops through the images and creates an array of views""" 91 | 92 | feature_path = False 93 | 94 | # if features directory exists, the feature files are read from there 95 | logging.info("Created features directory") 96 | if os.path.exists(os.path.join(root_path, 'features')): 97 | feature_path = True 98 | 99 | image_names = sorted(glob.glob(os.path.join(root_path, 'images', '*.' + image_format))) 100 | 101 | logging.info("Computing features") 102 | views = [] 103 | for image_name in image_names: 104 | views.append(View(image_name, root_path, feature_path=feature_path)) 105 | 106 | return views 107 | --------------------------------------------------------------------------------