├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── download_meshloc_data.sh
├── localize.py
├── localize_12scenes.py
├── setup.py
└── src
    ├── CMakeLists.txt
    ├── absolute_pose_estimator.cc
    ├── absolute_pose_estimator.h
    ├── main.cc
    ├── multi_absolute_pose_estimator.cc
    ├── multi_absolute_pose_estimator.h
    ├── pose_estimation.cc
    ├── spatial_ransac.h
    ├── types.h
    ├── utils.cc
    └── utils.h


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | meshloc.cpython-37m-x86_64-linux-gnu.so
3 | meshloc.egg-info/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "PoseLib"]
 2 | 	path = PoseLib
 3 | 	url = https://github.com/vlarsson/PoseLib.git
 4 | [submodule "RansacLib"]
 5 | 	path = RansacLib
 6 | 	url = https://github.com/tsattler/RansacLib.git
 7 | [submodule "pybind11"]
 8 | 	path = pybind11
 9 | 	url = https://github.com/pybind/pybind11.git
10 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.0)
 2 | project(meshloc)
 3 | 
 4 | # Same compiler options as PoseLib
 5 | if(NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
 6 |     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
 7 | endif()
 8 | 
 9 | # Enable PyBind11 error messages
10 | add_definitions(-DPYBIND11_DETAILED_ERROR_MESSAGES)
11 | 
12 | add_subdirectory (pybind11)
13 | add_subdirectory (src)
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MeshLoc
  2 | This repository contains the code for our publication [MeshLoc: Mesh-Based Visual Localization](https://arxiv.org/abs/2207.10762), presented at ECCV 2022.
  3 | 
  4 | ## License
  5 | This repository is licensed under the 3-Clause BSD License. See the [LICENSE](https://github.com/tsattler/meshloc_release/blob/main/LICENSE) file for full text.
  6 | 
  7 | ## Citation
  8 | If you are using the code in this repository, please cite the following paper:
  9 | ```
 10 | @inproceedings{Panek2022ECCV,
 11 |   author = {Panek, Vojtech and Kukelova, Zuzana and Sattler, Torsten},
 12 |   title = {{MeshLoc: Mesh-Based Visual Localization}},
 13 |   booktitle={European Conference on Computer Vision (ECCV)},
 14 |   year = {2022},
 15 | }
 16 | ```
 17 | 
 18 | ## Installation
 19 | Make sure you have [COLMAP](https://colmap.github.io/index.html) installed on your system. If not, please follow the instructions on the [COLMAP website](https://colmap.github.io/install.html). The current version of MeshLoc was tested with COLMAP 3.11, but should be compatible with any version >= 3.9.
 20 | 
 21 | This version of MeshLoc uses the [Image Matching Toolbox](https://github.com/GrumpyZhou/image-matching-toolbox) for feature matching. Immatch package unfortunately cannot be directly installed as a package from GitHub due to the way it downloads the individual matching methods as submodules. Therefore, it has to be installed from the source. The full installation process is as follows:
 22 | 
 23 | ```
 24 | git clone https://github.com/GrumpyZhou/image-matching-toolbox.git
 25 | cd image-matching-toolbox
 26 | git submodule update --init
 27 | conda create --name immatch python=3.7
 28 | conda activate immatch
 29 | pip install Cython loguru jupyter scipy matplotlib opencv-python torch==1.13.1 torchvision pytorch-lightning faiss-gpu h5py==3.6.0
 30 | pip install -e .
 31 | cd pretrained
 32 | bash download.sh
 33 | cd ../..
 34 | git clone --recurse-submodules https://github.com/tsattler/meshloc_release.git
 35 | cd meshloc_release
 36 | pip install -e .
 37 | ```
 38 | 
 39 | In case the Image Matching Toolbox fails to download the pretrained models, you might need to switch to a more recent branch of the submodules. Example for Patch2Pix:
 40 | 
 41 | ```
 42 | cd image-matching-toolbox/third_party/patch2pix
 43 | git checkout main
 44 | ```
 45 | 
 46 | If the CMake configuration fails with `Couldn't find CUDA library root.`, set `CUDA_HOME` (for example `export CUDA_HOME=/usr/local/cuda`) and try again.
 47 | 
 48 | 
 49 | ## Usage
 50 | Current implementation contains localization scripts for two datasets - [Aachen v1.1](https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/) and [12 Scenes](http://graphics.stanford.edu/projects/reloc/). The scripts should be launched from `image-matching-toolbox` directory so the scripts have direct access to the feature matching configuration files. Also activate the prepared `immatch` conda environment with all necessary packages.
 51 | 
 52 | Example on Aachen v1.1 dataset:
 53 | ```
 54 | cd <image_matching_toolbox_dir>
 55 | 
 56 | conda activate immatch
 57 | 
 58 | python3 <mesh_loc_dir>/localize.py \
 59 | --db_image_dir <meshloc_dataset_path>/aachen_day_night_v11/images/images_db_undist_800 \
 60 | --db_depth_image_dir <meshloc_dataset_path>/aachen_day_night_v11/db_renderings/AC14_depth_800_undist \
 61 | --colmap_model_dir <meshloc_dataset_path>/aachen_day_night_v11/db_colmap_models/800_undist_underscores \
 62 | --query_dir <meshloc_dataset_path>/aachen_day_night_v11/images/images_q_night_800 \
 63 | --query_list <meshloc_dataset_path>/aachen_day_night_v11/night_time_queries_with_intrinsics_800_basenames.txt \
 64 | --out_prefix <experiment_outputs_dir_path> \
 65 | --match_prefix <experiment_matches_dir_path> \
 66 | --method_name patch2pix \
 67 | --method_config aachen_v1.1 \
 68 | --method_string patch2pix_aachen_v1_1_ \
 69 | --retrieval_pairs <meshloc_dataset_path>/aachen_day_night_v11/retrieval_pairs/NetVLAD_top50_underscores.txt \
 70 | --top_k 50 \
 71 | --max_side_length -1 \
 72 | --ransac_type POSELIB+REF \
 73 | --min_ransac_iterations 10000 \
 74 | --max_ransac_iterations 100000 \
 75 | --reproj_error 20.0 \
 76 | --use_orig_db_images \
 77 | --cluster_keypoints
 78 | ```
 79 | 
 80 | Example on 12 Scenes dataset:
 81 | ```
 82 | cd <image_matching_toolbox_dir>
 83 | 
 84 | conda activate immatch
 85 | 
 86 | python3 <mesh_loc_dir>/localize_12scenes.py \
 87 | --db_image_dir <12_scenes_dataset_path>/office1/manolis/data \
 88 | --db_depth_image_dir <meshloc_dataset_path>/12_scenes/db_renderings/12_scenes_apt1_kitchen_depth \
 89 | --colmap_model_dir <meshloc_dataset_path>/12_scenes/db_colmap_models/apt1_kitchen \
 90 | --query_dir <12_scenes_dataset_path>/office1/manolis/data \
 91 | --query_list <meshloc_dataset_path>/12_scenes/queries_with_intrinsics/apt1_kitchen_queries_with_intrinsics.txt \
 92 | --out_prefix <experiment_outputs_dir_path> \
 93 | --match_prefix <experiment_matches_dir_path> \
 94 | --method_name loftr \
 95 | --method_config default \
 96 | --method_string loftr_default_ \
 97 | --retrieval_pairs <meshloc_dataset_path>/12_scenes/retrieval_pairs/apt1_kitchen_DVLAD_top20.txt \
 98 | --top_k 20 \
 99 | --max_side_length -1 \
100 | --ransac_type POSELIB+REF \
101 | --min_ransac_iterations 10000 \
102 | --max_ransac_iterations 100000 \
103 | --reproj_error 20.0 \
104 | --use_orig_db_images
105 | ```
106 | 
107 | List of localization script arguments:
108 | - **db_image_dir** = directory with database images
109 | - **db_depth_image_dir** = directory with depth database images (assume `_depth.npz`  postfix)
110 | - **rendering_postfix** = postfix of rendered database images if used (our IBMR rendering pipeline produces either `_rendered_color.png` or `_rendered_no_color.png` depending on shader preset)
111 | - **colmap_model_dir** = directory of a COLMAP model defining the database cameras - `images` file contains subpaths from `db_image_dir` to the images
112 | - **query_dir** = directory with query images
113 | - **query_list** = list of query images with intrinsics - contains subpaths from `query_dir` to the images
114 | - **out_prefix** = prefix (including path) of output file (with estimated query poses) in format accepted by [The Visual Localization Benchmark](https://www.visuallocalization.net/)
115 | - **match_prefix** = prefix (including path) for files containing local matches between query and database images
116 | - **method_name** = matching method name - see file names in [image-matching-toolbox/configs](https://github.com/GrumpyZhou/image-matching-toolbox/tree/main/configs) - e.g. `patch2pix`
117 | - **method_config** = one of the method configurations - see configurations inside individual YAML files (e.g. [image-matching-toolbox/configs/patch2pix.yml](https://github.com/GrumpyZhou/image-matching-toolbox/blob/main/configs/patch2pix.yml)) - e.g. `aachen_v1_1`
118 | - **method_string** - string used for your identification of matching method in output files - e.g. `patch2pix_aachen_v1_1_`
119 | - **retrieval_pairs** = file from a retrieval step containing similar database images for each query image (text file, each line contains `query_image_file database_image_file`)
120 | - **top_k** = number of top retrieved database images, which will be used for localization
121 | - **max_side_length** = longer side of the database images - for scaling of the query intrinsics (-1 for full resolution)
122 | - **ransac_type** = RANSAC type (possible values: `POSELIB`, `POSELIB+REF`)
123 | - **min_ransac_iterations** = minimum number of RANSAC iterations
124 | - **max_ransac_iterations** = maximum number of RANSAC iterations
125 | - **reproj_error** = reprojection error RANSAC threshold
126 | - **use_orig_db_images** = set if using original images (not renderings) for `db_image_dir` 
127 | - **triangulate** = set to use triangulation instead of 3D points from depth maps
128 | - **merge_3D_points** = set to select one of multiple 3D points available per query feature
129 | - **cluster_keypoints** = set to cluster keypoints (applicable only for patch2pix)
130 | - **covisibility_filtering** = set to use covisibility filtering
131 | - **all_matches_ransac** = use all possible 2D-3D matches in RANSAC
132 | - **refinement_range** = range for the +REF refinement
133 | - **refinement_step** = step size for the +REF refinement
134 | - **bias_x** = bias term for x-direction for feature detections
135 | - **bias_y** = bias term for y-direction for feature detections
136 | 
137 | The pose estimates from the localization pipeline on Aachen v1.1 can be evaluated by benchmark at [https://www.visuallocalization.net/](https://www.visuallocalization.net/). The pose estimates on 12 Scenes can be evaluated by the [https://github.com/tsattler/visloc_pseudo_gt_limitations](https://github.com/tsattler/visloc_pseudo_gt_limitations) repository, which also contains links to 12 Scenes COLMAP models.
138 | 
139 | ## Data
140 | We used [Aachen v1.1](https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/) and [12 Scenes](http://graphics.stanford.edu/projects/reloc/#data) datasets for the evaluation in our paper.
141 | 
142 | The data derived from the above mentioned datasets (such as meshes, renderings, or COLMAP models) can be found in our data repository at [https://data.ciirc.cvut.cz/public/projects/2022MeshLoc](https://data.ciirc.cvut.cz/public/projects/2022MeshLoc).
143 | 
144 | You can use [the download script](https://github.com/tsattler/meshloc_release/blob/main/download_meshloc_data.sh) to easily get the meshes and renderings. The repository also contains resized and undistorted images for Aachen v1.1 dataset and corresponding COLMAP models.
145 | 
146 | Script parameters:
147 | - if no parameters are passed, the whole data repository will be downloaded to current directory
148 | - **-n < all \| aachen \| 12_scenes >** - specifies which dataset to download
149 | - **-p \< string >** - specifies the directory where the data will be downloaded
150 | - **-z** - unzips everything and removes the zip files
151 | 
152 | Note that all the images and renderings for Aachen v1.1 in our repository use a flattened data structure - all database images are in a single directory and their original relative path is in their names (`db/1883.jpg` --> `db_1883.jpg`), query images use only their basenames (`query/night/nexus5x_additional_night/IMG_20170702_005927.jpg` --> `IMG_20170702_005927.jpg`). We provide database COLMAP models and retrieval files for both the original data structure (`800_undist`, `NetVLAD_top50.txt`) and flattened data structure (`800_undist_underscores`, `NetVLAD_top50_underscores.txt`). All the provided images and COLMAP models are undistorted and some are resized to 800px on the longer side.
153 | 
154 | ## Acknowledgements
155 | This repository is heavily using [PoseLib](https://github.com/vlarsson/PoseLib), [RansacLib](https://github.com/tsattler/RansacLib) and [Image Matching Toolbox](https://github.com/GrumpyZhou/image-matching-toolbox/). We would like to thank all the contributors of these repositories.
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/download_meshloc_data.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | #
  9 | # 1. Redistributions of source code must retain the above copyright notice, this
 10 | #    list of conditions and the following disclaimer.
 11 | #
 12 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 13 | #    this list of conditions and the following disclaimer in the documentation
 14 | #    and/or other materials provided with the distribution.
 15 | #
 16 | # 3. Neither the name of the copyright holder nor the names of its
 17 | #    contributors may be used to endorse or promote products derived from
 18 | #    this software without specific prior written permission.
 19 | #
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | 
 32 | 
 33 | # Script for downloading the data from MeshLoc data repository
 34 | # - usage:
 35 | #   - if no parameter are passed, the whole data repository will be downloaded
 36 | #     to current directory
 37 | #   -n <all|aachen|12_scenes> - specifies which dataset to download
 38 | #   -p <string> - specifies directory where the data will be downloaded
 39 | #   -z - unzips everything and removes the zip files
 40 | #
 41 | # - individual files can be manually downloaded from: 
 42 | #   https://data.ciirc.cvut.cz/public/projects/2022MeshLoc
 43 | 
 44 | base_url="https://data.ciirc.cvut.cz/public/projects/2022MeshLoc"
 45 | 
 46 | dataset_name="all"
 47 | target_path=$(pwd)
 48 | unzip=false
 49 | 
 50 | help() { echo "Usage: $0 [-n <all|aachen|12_scenes>] [-p <string>] [-z]" 1>&2; exit 1; }
 51 | 
 52 | while getopts ":n:p:z" o; do
 53 |     case "${o}" in
 54 |         n)
 55 |             n=${OPTARG}
 56 |             [[ "${n}" == "all" || "${n}" == "aachen" || "${n}" == "12_scenes" ]] && dataset_name=${n} || help
 57 |             ;;
 58 |         p)
 59 |             p=${OPTARG}
 60 |             [ -d ${p} ] && target_path=${p} || { echo "The given path is not a directory: ${p}" >&2; exit 1; }
 61 |             ;;
 62 |         z)
 63 |             unzip=true
 64 |             ;;
 65 |         :)
 66 |             echo "Missing option argument for -$OPTARG" >&2; exit 1
 67 |             ;;
 68 |         *)
 69 |             help
 70 |             ;;
 71 |     esac
 72 | done
 73 | shift $((OPTIND-1))
 74 | 
 75 | case "${dataset_name}" in 
 76 |     all)
 77 |         download_url=${base_url}
 78 |         ;;
 79 |     aachen)
 80 |         download_url="${base_url}/aachen_day_night_v11"
 81 |         ;;
 82 |     12_scenes)
 83 |         download_url="${base_url}/12_scenes"
 84 |         ;;
 85 | esac
 86 | 
 87 | echo "Downloading the data..."
 88 | wget -r -nH --no-parent --cut-dirs=3 --reject="index*" --directory-prefix=${target_path} ${download_url}
 89 | 
 90 | if [[ "${unzip}" == true ]]
 91 | then
 92 |     echo "Unzipping the data..."
 93 |     case "${dataset_name}" in 
 94 |     all)
 95 |         unzip "${target_path}/12_scenes/db_renderings/*.zip" -d "${target_path}/12_scenes/db_renderings"
 96 |         unzip "${target_path}/aachen_day_night_v11/db_renderings/*.zip" -d "${target_path}/aachen_day_night_v11/db_renderings"
 97 |         unzip "${target_path}/aachen_day_night_v11/meshes/*.zip" -d "${target_path}/aachen_day_night_v11/meshes"
 98 | 
 99 |         rm ${target_path}/12_scenes/db_renderings/*.zip
100 |         rm ${target_path}/aachen_day_night_v11/db_renderings/*.zip
101 |         rm ${target_path}/aachen_day_night_v11/meshes/*.zip
102 |         ;;
103 |     aachen)
104 |         unzip "${target_path}/aachen_day_night_v11/db_renderings/*.zip" -d "${target_path}/aachen_day_night_v11/db_renderings"
105 |         unzip "${target_path}/aachen_day_night_v11/meshes/*.zip" -d "${target_path}/aachen_day_night_v11/meshes"
106 | 
107 |         rm ${target_path}/aachen_day_night_v11/db_renderings/*.zip
108 |         rm ${target_path}/aachen_day_night_v11/meshes/*.zip
109 |         ;;
110 |     12_scenes)
111 |         unzip "${target_path}/12_scenes/db_renderings/*.zip" -d "${target_path}/12_scenes/db_renderings"
112 | 
113 |         rm ${target_path}/12_scenes/db_renderings/*.zip
114 |         ;;
115 | esac
116 | fi


--------------------------------------------------------------------------------
/localize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | #
  9 | # 1. Redistributions of source code must retain the above copyright notice, this
 10 | #    list of conditions and the following disclaimer.
 11 | #
 12 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 13 | #    this list of conditions and the following disclaimer in the documentation
 14 | #    and/or other materials provided with the distribution.
 15 | #
 16 | # 3. Neither the name of the copyright holder nor the names of its
 17 | #    contributors may be used to endorse or promote products derived from
 18 | #    this software without specific prior written permission.
 19 | #
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import os
 32 | import yaml
 33 | import immatch
 34 | # import pycolmap
 35 | 
 36 | import numpy as np
 37 | from immatch.utils import plot_matches
 38 | 
 39 | import collections
 40 | import os
 41 | import struct
 42 | 
 43 | from tqdm import tqdm
 44 | 
 45 | import scipy.sparse
 46 | import scipy.spatial.distance
 47 | 
 48 | import meshloc
 49 | 
 50 | import argparse
 51 | 
 52 | # Defines helper function
 53 | 
 54 | #### Code taken from Colmap:
 55 | # from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
 56 | CameraModel = collections.namedtuple(
 57 |     "CameraModel", ["model_id", "model_name", "num_params"])
 58 | Camera = collections.namedtuple(
 59 |     "Camera", ["id", "model", "width", "height", "params"])
 60 | BaseImage = collections.namedtuple(
 61 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 62 | Point3D = collections.namedtuple(
 63 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 64 | 
 65 | 
 66 | class Image(BaseImage):
 67 |     def qvec2rotmat(self):
 68 |         return qvec2rotmat(self.qvec)
 69 | 
 70 | 
 71 | CAMERA_MODELS = {
 72 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 73 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 74 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 75 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 76 |     CameraModel(model_id=4, model_name="OPENCV", num_params=8),
 77 |     CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
 78 |     CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
 79 |     CameraModel(model_id=7, model_name="FOV", num_params=5),
 80 |     CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
 81 |     CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
 82 |     CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 83 | }
 84 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
 85 |                          for camera_model in CAMERA_MODELS])
 86 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
 87 |                            for camera_model in CAMERA_MODELS])
 88 | 
 89 | 
 90 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 91 |     """Read and unpack the next bytes from a binary file.
 92 |     :param fid:
 93 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 94 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 95 |     :param endian_character: Any of {@, =, <, >, !}
 96 |     :return: Tuple of read and unpacked values.
 97 |     """
 98 |     data = fid.read(num_bytes)
 99 |     return struct.unpack(endian_character + format_char_sequence, data)
100 | 
101 | def qvec2rotmat(qvec):
102 |     return np.array([
103 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
104 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
105 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
106 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
107 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
108 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
109 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
110 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
111 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
112 | 
113 | def read_cameras_text(path):
114 |     """
115 |     see: src/base/reconstruction.cc
116 |         void Reconstruction::WriteCamerasText(const std::string& path)
117 |         void Reconstruction::ReadCamerasText(const std::string& path)
118 |     """
119 |     cameras = {}
120 |     with open(path, "r") as fid:
121 |         while True:
122 |             line = fid.readline()
123 |             if not line:
124 |                 break
125 |             line = line.strip()
126 |             if len(line) > 0 and line[0] != "#":
127 |                 elems = line.split()
128 |                 camera_id = int(elems[0])
129 |                 model = elems[1]
130 |                 width = int(elems[2])
131 |                 height = int(elems[3])
132 |                 params = np.array(tuple(map(float, elems[4:])))
133 |                 cameras[camera_id] = Camera(id=camera_id, model=model,
134 |                                             width=width, height=height,
135 |                                             params=params)
136 |     return cameras
137 | 
138 | 
139 | def read_cameras_binary(path_to_model_file):
140 |     """
141 |     see: src/base/reconstruction.cc
142 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
143 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
144 |     """
145 |     cameras = {}
146 |     with open(path_to_model_file, "rb") as fid:
147 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
148 |         for _ in range(num_cameras):
149 |             camera_properties = read_next_bytes(
150 |                 fid, num_bytes=24, format_char_sequence="iiQQ")
151 |             camera_id = camera_properties[0]
152 |             model_id = camera_properties[1]
153 |             model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
154 |             width = camera_properties[2]
155 |             height = camera_properties[3]
156 |             num_params = CAMERA_MODEL_IDS[model_id].num_params
157 |             params = read_next_bytes(fid, num_bytes=8*num_params,
158 |                                      format_char_sequence="d"*num_params)
159 |             cameras[camera_id] = Camera(id=camera_id,
160 |                                         model=model_name,
161 |                                         width=width,
162 |                                         height=height,
163 |                                         params=np.array(params))
164 |         assert len(cameras) == num_cameras
165 |     return cameras
166 | 
167 | 
168 | 
169 | def read_images_text(path):
170 |     """
171 |     see: src/base/reconstruction.cc
172 |         void Reconstruction::ReadImagesText(const std::string& path)
173 |         void Reconstruction::WriteImagesText(const std::string& path)
174 |     """
175 |     images = {}
176 |     with open(path, "r") as fid:
177 |         while True:
178 |             line = fid.readline()
179 |             if not line:
180 |                 break
181 |             line = line.strip()
182 |             if len(line) > 0 and line[0] != "#":
183 |                 elems = line.split()
184 |                 image_id = int(elems[0])
185 |                 qvec = np.array(tuple(map(float, elems[1:5])))
186 |                 tvec = np.array(tuple(map(float, elems[5:8])))
187 |                 camera_id = int(elems[8])
188 |                 image_name = elems[9]
189 |                 elems = fid.readline().split()
190 |                 # xys = np.column_stack([tuple(map(float, elems[0::3])),
191 |                                        # tuple(map(float, elems[1::3]))])
192 |                 # point3D_ids = np.array(tuple(map(int, elems[2::3])))
193 |                 # images[image_id] = Image(
194 |                     # id=image_id, qvec=qvec, tvec=tvec,
195 |                     # camera_id=camera_id, name=image_name,
196 |                     # xys=xys, point3D_ids=point3D_ids)
197 |                 images[image_id] = Image(
198 |                 id=image_id, qvec=qvec, tvec=tvec,
199 |                 camera_id=camera_id, name=image_name,
200 |                 xys={}, point3D_ids={})
201 |     return images
202 | 
203 | 
204 | def read_images_binary(path_to_model_file):
205 |     """
206 |     see: src/base/reconstruction.cc
207 |         void Reconstruction::ReadImagesBinary(const std::string& path)
208 |         void Reconstruction::WriteImagesBinary(const std::string& path)
209 |     """
210 |     images = {}
211 |     with open(path_to_model_file, "rb") as fid:
212 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
213 |         for _ in range(num_reg_images):
214 |             binary_image_properties = read_next_bytes(
215 |                 fid, num_bytes=64, format_char_sequence="idddddddi")
216 |             image_id = binary_image_properties[0]
217 |             qvec = np.array(binary_image_properties[1:5])
218 |             tvec = np.array(binary_image_properties[5:8])
219 |             camera_id = binary_image_properties[8]
220 |             image_name = ""
221 |             current_char = read_next_bytes(fid, 1, "c")[0]
222 |             while current_char != b"\x00":   # look for the ASCII 0 entry
223 |                 image_name += current_char.decode("utf-8")
224 |                 current_char = read_next_bytes(fid, 1, "c")[0]
225 |             num_points2D = read_next_bytes(fid, num_bytes=8,
226 |                                            format_char_sequence="Q")[0]
227 |             x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
228 |                                        format_char_sequence="ddq"*num_points2D)
229 |             xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
230 |                                    tuple(map(float, x_y_id_s[1::3]))])
231 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
232 |             # images[image_id] = Image(
233 |                 # id=image_id, qvec=qvec, tvec=tvec,
234 |                 # camera_id=camera_id, name=image_name,
235 |                 # xys=xys, point3D_ids=point3D_ids)
236 |             images[image_id] = Image(
237 |                 id=image_id, qvec=qvec, tvec=tvec,
238 |                 camera_id=camera_id, name=image_name,
239 |                 xys={}, point3D_ids={})
240 |     return images
241 | 
242 | def detect_model_format(path, ext):
243 |     if os.path.isfile(os.path.join(path, "cameras"  + ext)) and \
244 |        os.path.isfile(os.path.join(path, "images"   + ext)):
245 |         print("Detected model format: '" + ext + "'")
246 |         return True
247 | 
248 |     return False
249 | 
250 | 
251 | def read_model(path, ext=""):
252 |     # try to detect the extension automatically
253 |     if ext == "":
254 |         if detect_model_format(path, ".bin"):
255 |             ext = ".bin"
256 |         elif detect_model_format(path, ".txt"):
257 |             ext = ".txt"
258 |         else:
259 |             print("Provide model format: '.bin' or '.txt'")
260 |             return
261 | 
262 |     if ext == ".txt":
263 |         cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
264 |         images = read_images_text(os.path.join(path, "images" + ext))
265 |     else:
266 |         cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
267 |         images = read_images_binary(os.path.join(path, "images" + ext))
268 |     return cameras, images
269 | 
270 | 
271 | ####
272 | # Main function
273 | 
274 | 
275 | def main():
276 |     parser = argparse.ArgumentParser(description="Localization against 3D model")
277 |     parser.add_argument("--db_image_dir", type=str, help="Directory with database images")
278 |     parser.add_argument("--colmap_model_dir", type=str, help="Directory with colmap model")
279 |     parser.add_argument("--db_depth_image_dir", type=str, help="Directory with database image depth maps")
280 |     parser.add_argument("--method_name", type=str, help="Method name")
281 |     parser.add_argument("--method_config", type=str, help="Method config name")
282 |     parser.add_argument("--method_string", type=str, help="String name for method used for output")
283 |     parser.add_argument("--out_prefix", type=str, help="Prefix of output file (including path)")
284 |     parser.add_argument("--query_list", type=str, help="Text file containing the query names and intrinsics")
285 |     parser.add_argument("--query_dir", type=str, help="Directory with query images")
286 |     parser.add_argument("--retrieval_pairs", type=str, help="Text file with results of retrieval")
287 |     parser.add_argument("--top_k", type=int, help="Number of top-ranked images to use")
288 |     parser.add_argument("--reproj_error", type=float, default=12.0, help="Reprojection error threshold for RANSAC")
289 |     parser.add_argument("--use_orig_db_images", action="store_true", help="Use real or rendered database images.")
290 |     parser.add_argument("--triangulate", action="store_true", help="Use triangulation instead of 3D points from depth maps.")
291 |     parser.add_argument("--merge_3D_points", action="store_true", help="If multiple 3D points are available per query feature, whether to select one or not.")
292 |     parser.add_argument("--cluster_keypoints", action="store_true", help="Whether to cluster keypoints. Only applicable for patch2pix at the moment.")
293 |     parser.add_argument("--covisibility_filtering", action="store_true", help="Use covisibility filtering or not.")
294 |     parser.add_argument("--all_matches_ransac", action="store_true", help="Use all possible 2D-3D matches in RANSAC.")
295 |     parser.add_argument("--min_ransac_iterations", type=int, default=1000, required=False, help="Minimum number of RANSAC iterations.")
296 |     parser.add_argument("--max_ransac_iterations", type=int, default=100000, required=False, help="Maximum number of RANSAC iterations.")
297 |     parser.add_argument("--max_side_length", type=int, default=800, required=False, help="Maximum side length to use for queries, -1 for original resolution")
298 |     parser.add_argument("--ransac_type", type=str, default="MSAC", required=False, help="RANSAC type: MSAC, EFFSAC, or PYCOLMAP")
299 |     parser.add_argument("--match_prefix", type=str, required=True, help="Contains the directory name and a prefix for the filenames that will be used to write out matches")
300 |     parser.add_argument("--rendering_postfix", type=str, required=False, help="Ending for the images")
301 |     parser.add_argument("--refinement_range", type=float, default=1.0, help="Range for the +REF refinement")
302 |     parser.add_argument("--refinement_step", type=float, default=0.25, help="Step size for the +REF refinement")
303 |     parser.add_argument("--bias_x", type=float, default=0.0, help="Bias term for x-direction for feature detections")
304 |     parser.add_argument("--bias_y", type=float, default=0.0, help="Bias term for y-direction for feature detections")
305 |     args = parser.parse_args()
306 |     print(args)
307 | 
308 |     assert os.path.exists(args.db_image_dir), "Given db_image_dir does not exist: {}".format(args.db_image_dir)
309 |     assert os.path.exists(args.colmap_model_dir), "Given colmap_model_dir does not exist: {}".format(args.colmap_model_dir)
310 |     assert os.path.exists(args.db_depth_image_dir), "Given db_depth_image_dir does not exist: {}".format(args.db_depth_image_dir)
311 |     assert os.path.exists(args.query_list), "Given query_list file does not exist: {}".format(args.query_list)
312 |     assert os.path.exists(args.query_dir), "Given query_dir does not exist: {}".format(args.query_dir)
313 |     assert os.path.exists(args.retrieval_pairs), "Given retrieval_pairs file does not exist: {}".format(args.retrieval_pairs)
314 | 
315 |     # get the query information (names, intrinsics calibrations)
316 |     query_infos = open(args.query_list, "r").readlines()
317 |     print(len(query_infos))
318 | 
319 |     # load the retrieval pairs
320 |     retrieval_pairs = open(args.retrieval_pairs, "r").readlines()
321 |     retrieval_results = {}
322 |     for i in range(0, len(retrieval_pairs)):
323 |         if ',' in retrieval_pairs[i]:
324 |             # AP-GeM retrieval pairs format
325 |             q_name, db_name, score = retrieval_pairs[i].split(', ')
326 |         else:
327 |             # NetVLAD retrieval pairs format
328 |             q_name, db_name = retrieval_pairs[i].split(' ')
329 | 
330 |         if q_name not in retrieval_results:
331 |             retrieval_results[q_name] = []
332 |         retrieval_results[q_name].append(db_name)
333 | 
334 |     # maximum side length of the query images
335 |     max_side_length = np.float32(args.max_side_length)
336 | 
337 |     # load the reference camera poses
338 |     print('Loading the reference poses')
339 |     cameras, images = read_model(args.colmap_model_dir)
340 |     print('Found %d images and %d cameras' % (len(images), len(cameras)))
341 | 
342 |     # maps from image names to image ids
343 |     map_db_name_to_id = {}
344 |     for img in images:
345 |         map_db_name_to_id[images[img].name] = images[img].id
346 | 
347 |     reproj_error = args.reproj_error
348 | 
349 |     db_image_dir = args.db_image_dir
350 |     use_orig_images = args.use_orig_db_images
351 |     rendering_postfix = args.rendering_postfix  #'_rendered_no_color.png'
352 |     db_depth_dir = args.db_depth_image_dir
353 | 
354 |     # main code: matching and pose estimation
355 |     poses = {}
356 |     best_inliers = {}
357 |     num_top_ranked = args.top_k
358 |     print('Using the %d top-ranked images' % num_top_ranked)
359 | 
360 |     # initialize matcher
361 |     if "#" not in args.method_name:
362 |         config_file = f'configs/{args.method_name}.yml'
363 |         print(config_file)
364 |         print(args.method_config)
365 |         with open(config_file, 'r') as f:
366 |             imm_args = yaml.load(f, Loader=yaml.FullLoader)[args.method_config]
367 |             if 'ckpt' in imm_args:
368 |                 imm_args['ckpt'] = os.path.join('.', imm_args['ckpt'])
369 |             class_name = imm_args['class']
370 |         # Init model
371 |         model = immatch.__dict__[class_name](imm_args)
372 |         matcher = lambda im1, im2: model.match_pairs(im1, im2)
373 |     method = args.method_string
374 | 
375 |     for i in tqdm(range(0, len(query_infos))):
376 |         print('   ')
377 |         q_name = query_infos[i].split(' ')[0]
378 |         q_data = query_infos[i].split(' ')[1:]
379 |         print(' Trying to localize query image ' + q_name)
380 |         if q_name not in retrieval_results:
381 |             print('   Could not find retrieval results, skipping')
382 |             continue
383 |         width = np.float32(q_data[1])
384 |         height = np.float32(q_data[2])
385 |         if max_side_length > 0:
386 |             scaling_factor = max_side_length / max(width, height)
387 |         else:
388 |             scaling_factor = 1.0
389 |         # We are currently assuming the SIMPLE_RADIAL camera model
390 |         camera_dict = {'model': q_data[0], 'width' : int(width * scaling_factor),
391 |                        'height' : int(height * scaling_factor),
392 |                        'params' : [np.float32(q_data[3]) * scaling_factor,
393 |                                    np.float32(q_data[4]) * scaling_factor,
394 |                                    np.float32(q_data[5]) * scaling_factor,
395 |                                    np.float32(q_data[6])]}
396 | 
397 |         retrieved_db = retrieval_results[q_name]
398 | 
399 |         best_inliers[q_name] = 0
400 | 
401 |         top_ranked_cameras = []
402 | 
403 |         matches_per_feat = {}
404 | 
405 | 
406 |         for j in range(0, num_top_ranked):
407 |             q_name_base = q_name.split('/')[-1]
408 |             db_name_underscore = retrieved_db[j].replace('/', '_')
409 | 
410 |             img1_name = os.path.join(args.query_dir, q_name)
411 |             if not os.path.exists(img1_name):
412 |                 img1_name = os.path.join(args.query_dir, q_name_base)
413 | 
414 |             assert os.path.exists(img1_name), "Query image does not exist at: {} or at: {}".format(
415 |                     os.path.join(args.query_dir, q_name),
416 |                     os.path.join(args.query_dir, q_name_base))
417 | 
418 |             if use_orig_images:
419 |                 img2_name = os.path.join(db_image_dir, retrieved_db[j])
420 |                 if not os.path.exists(img2_name):
421 |                     img2_name = os.path.join(db_image_dir, db_name_underscore)
422 | 
423 |                 assert os.path.exists(img2_name), "Database image does not exist at: {} or at: {}".format(
424 |                         os.path.join(db_image_dir, retrieved_db[j]),
425 |                         os.path.join(db_image_dir, db_name_underscore))
426 | 
427 |             else:
428 |                 img2_name = os.path.join(db_image_dir, retrieved_db[j].split('.')[0] + rendering_postfix)
429 |                 if not os.path.exists(img2_name):
430 |                     img2_name = os.path.join(db_image_dir, db_name_underscore.split('.')[0] + rendering_postfix)
431 | 
432 |                 assert os.path.exists(img2_name), "Database image does not exist at: {} or at: {}".format(
433 |                         os.path.join(db_image_dir, retrieved_db[j].split('.')[0] + rendering_postfix),
434 |                         os.path.join(db_image_dir, db_name_underscore.split('.')[0] + rendering_postfix))
435 | 
436 |             print('   Matching against ' + img2_name)
437 | 
438 |             # Loads the depth map
439 |             img2_depth = os.path.join(db_depth_dir, retrieved_db[j].split('.')[0] + '_depth.npz')
440 |             if not os.path.exists(img2_depth):
441 |                 img2_depth = os.path.join(db_depth_dir, db_name_underscore.split('.')[0] + '_depth.npz')
442 | 
443 |             assert os.path.exists(img2_depth), "Database depth image does not exist at: {} or at: {}".format(
444 |                     os.path.join(db_depth_dir, retrieved_db[j].split('.')[0] + '_depth.npz'),
445 |                     os.path.join(db_depth_dir, db_name_underscore.split('.')[0] + '_depth.npz'))
446 | 
447 |             depth_map = np.load(img2_depth)['depth'].astype(np.float32)
448 | 
449 |             # Get the transformation from reference camera to world coordinates
450 |             img2_id = map_db_name_to_id[retrieved_db[j].strip()]
451 |             T = np.identity(4)
452 |             R = np.asmatrix(qvec2rotmat(images[img2_id].qvec)).transpose()
453 |             T[0:3,0:3] = R
454 |             T[0:3,3] = -R.dot(images[img2_id].tvec)
455 |             P = np.zeros((3,4))
456 |             P[0:3,0:3] = R.transpose()
457 |             P[0:3,3] = images[img2_id].tvec
458 | 
459 |             colmap_cam = cameras[images[img2_id].camera_id]
460 | 
461 |             top_ranked_cameras.append({'model': colmap_cam.model,
462 |                                        'width' : colmap_cam.width,
463 |                                        'height' : colmap_cam.height,
464 |                                        'params' : colmap_cam.params,
465 |                                        'q' : images[img2_id].qvec,
466 |                                        't' : images[img2_id].tvec})
467 | 
468 |             if "#" not in args.method_name:
469 |                 # Tries to load the 2D-2D matches from disk to save time.
470 |                 match_file_name = args.match_prefix + str(q_name_base.split('.')[0]) \
471 |                                   + '_-_' + str(db_name_underscore.split('.')[0]) + '_-_'\
472 |                                   + args.method_name + '_' + args.method_config\
473 |                                   + '_-_' + str(max_side_length) + '.npy'
474 |                 match_file_exists = os.path.exists(match_file_name)
475 | 
476 |                 if match_file_exists:
477 |                     matches = np.load(match_file_name)
478 |                 else:
479 |                     # computes the matches
480 |                     matches, _, _, _ = matcher(img1_name, img2_name)
481 |                     np.save(match_file_name, matches)
482 |             else:
483 |                 method_names = args.method_name.split("#")
484 |                 method_configs = args.method_config.split("#")
485 |                 match_dirs = args.match_prefix.split("#")
486 |                 matches = np.empty((0, 4))
487 |                 for method_idx in range(0, len(method_names)):
488 |                     # Tries to load the 2D-2D matches from disk to save time.
489 |                     match_file_name = match_dirs[method_idx] + str(q_name_base.split('.')[0]) \
490 |                                       + '_-_' + str(db_name_underscore.split('.')[0]) + '_-_'\
491 |                                       + method_names[method_idx] + '_' + method_configs[method_idx]\
492 |                                       + '_-_' + str(max_side_length) + '.npy'
493 |                     match_file_exists = os.path.exists(match_file_name)
494 | 
495 |                     if match_file_exists:
496 |                         matches_method_idx = np.load(match_file_name)
497 |                         matches = np.append(matches, matches_method_idx, axis=0)
498 | 
499 | 
500 | 
501 |             print('    Number of matches with %s: %d.' % (method, matches.shape[0]))
502 |             if matches.shape[0] < 3:
503 |                 continue
504 | 
505 |             matches[:, 2:] += np.array([args.bias_x, args.bias_y])
506 | 
507 |             # kpts1 = matches[:, :2]
508 |             kpts2 = matches[:, 2:]
509 | 
510 |             # Get the corresponding depth values.
511 |             kpts2_int = np.rint(kpts2).astype(np.int64)
512 |             kpts2_int[:,0] = np.clip(kpts2_int[:,0], 0, colmap_cam.width - 1)
513 |             kpts2_int[:,1] = np.clip(kpts2_int[:,1], 0, colmap_cam.height - 1)
514 |             depth_values = depth_map[kpts2_int[:,1], kpts2_int[:,0]]
515 | 
516 |             # Assumes that the images use the PINHOLE camera model
517 |             fx = colmap_cam.params[0]
518 |             fy = colmap_cam.params[1]
519 |             cx = colmap_cam.params[2]
520 |             cy = colmap_cam.params[3]
521 |             P[0,:] *= fx
522 |             P[1,:] *= fy
523 | 
524 |             rays = kpts2 - np.array([cx, cy])
525 |             rays = np.append(rays, np.ones(depth_values.reshape((-1,1)).shape), axis=1)
526 |             rays[:,0] /= fx
527 |             rays[:,1] /= fy
528 |             points3D = rays * depth_values.reshape((-1,1))
529 |             num_points = points3D.shape[0]
530 |             points3D_world = np.matmul(T, np.append(points3D, np.ones([num_points, 1]), axis=1).transpose()).transpose()[:, :3]
531 | 
532 |             for m in range(0, matches.shape[0]):
533 |                 m_key = tuple([matches[m, 0], matches[m, 1]])
534 |                 xr = np.arange(max(0, np.floor(matches[m, 2])), min(colmap_cam.width - 1, np.floor(matches[m, 2]) + 2)).astype(int)
535 |                 yr = np.arange(max(0, np.floor(matches[m, 3])), min(colmap_cam.height - 1, np.floor(matches[m, 3]) + 2)).astype(int)
536 |                 xx, yy = np.meshgrid(xr, yr)
537 |                 D = depth_map[yy, xx]
538 |                 delta_x = matches[m, 2] - np.floor(matches[m, 2])
539 |                 delta_y = matches[m, 3] - np.floor(matches[m, 3])
540 |                 if len(xr) == 2 and len(yr) == 2:
541 |                     depth_m = (D[0, 0] * (1.0 - delta_x) + D[0, 1] * delta_x) * (1.0 - delta_y) + (D[1, 0] * (1.0 - delta_x) + D[1, 1] * delta_x) * delta_y
542 |                 elif len(xr) == 2 and len(yr) == 1:
543 |                     depth_m = D[0, 0] * (1.0 - delta_x) + D[0, 1] * delta_x
544 |                 elif len(xr) == 1 and len(yr) == 2:
545 |                     depth_m = D[0, 0] * (1.0 - delta_y) + D[1, 0] * delta_y
546 |                 else:
547 |                     depth_m = 0.0
548 | 
549 |                 rays_ = np.array([(matches[m, 2] - cx) / fx, (matches[m, 3] - cy) / fy, 1.0]).transpose()
550 |                 points_3D_m = rays_ * depth_m
551 |                 points_3D_world_m = np.matmul(T, np.array([points_3D_m[0], points_3D_m[1], points_3D_m[2], 1.0]).transpose()).transpose()[:3]
552 | 
553 |                 m_key = tuple([matches[m, 0], matches[m, 1]])
554 |                 pt = points3D_world[m, :]
555 |                 pt = points_3D_world_m
556 | 
557 |                 if m_key not in matches_per_feat:
558 |                     matches_per_feat[m_key] = {'keypoint' : matches[m,:2],
559 |                                                'points' : np.empty((0,3)),
560 |                                                'observations' : np.empty((0,2)),
561 |                                                'db_indices' : []}
562 |                 matches_per_feat[m_key]['observations'] = np.append(matches_per_feat[m_key]['observations'], (matches[m, 2:] - np.array([cx, cy])).reshape(1,2), axis=0)
563 |                 matches_per_feat[m_key]['points'] = np.append(matches_per_feat[m_key]['points'], pt.reshape(1,3), axis=0)
564 |                 matches_per_feat[m_key]['db_indices'].append(j)
565 | 
566 |         matches = []
567 | 
568 |         for m_key in matches_per_feat.keys():
569 |             matches.append(matches_per_feat[m_key])
570 | 
571 |         pose_options = {'triangulate' : args.triangulate,
572 |                         'merge_3D_points' : args.merge_3D_points,
573 |                         'cluster_keypoints' : args.cluster_keypoints,
574 |                         'covisibility_filtering' : args.covisibility_filtering,
575 |                         'use_all_matches' : args.all_matches_ransac,
576 |                         'inlier_threshold' : reproj_error,
577 |                         'num_LO_iters' : 10,
578 |                         'min_ransac_iterations' : args.min_ransac_iterations,
579 |                         'max_ransac_iterations' : args.max_ransac_iterations,
580 |                         'ransac_type' : args.ransac_type,
581 |                         'refinement_range' : args.refinement_range,
582 |                         'refinement_step' : args.refinement_step}
583 | 
584 |         estimate = meshloc.pose_estimation(camera_dict, top_ranked_cameras,
585 |                                            matches, pose_options)
586 |         
587 |         if estimate['success']:
588 |             if best_inliers[q_name] < estimate['num_inliers']:
589 |                 poses[q_name] = (estimate['qvec'], estimate['tvec'])
590 |                 best_inliers[q_name] = estimate['num_inliers']
591 |             
592 |             print(estimate['qvec'])
593 |             print(estimate['tvec'])
594 | 
595 |     # Writes out the poses. Code taken from
596 |     # https://github.com/cvg/Hierarchical-Localization/blob/master/hloc/localize_sfm.py#L192
597 |     pose_file = args.out_prefix + str(num_top_ranked) + "_" + method + "_" + str(args.reproj_error)
598 |     if args.triangulate:
599 |         pose_file = pose_file + "_triangulated"
600 |     if args.merge_3D_points:
601 |         pose_file = pose_file + "_merged_3D_points"
602 |     if args.cluster_keypoints:
603 |         pose_file = pose_file + "_keypoint_clusters"
604 |     if args.covisibility_filtering:
605 |         pose_file = pose_file + "_covis_filtering"
606 |     if args.all_matches_ransac:
607 |         pose_file = pose_file + "_all_matches_ransac"
608 |     pose_file = pose_file + "_" + args.ransac_type
609 |     pose_file = pose_file + "_min_" + str(args.min_ransac_iterations) + "_max_" + str(args.max_ransac_iterations)
610 |     pose_file = pose_file + "_ref_" + str(args.refinement_range) + "_" + str(args.refinement_step)
611 |     pose_file = pose_file + "_bias_" + str(args.bias_x) + "_" + str(args.bias_y)
612 |     pose_file = pose_file + ".txt"
613 |     
614 |     print(pose_file)
615 |     with open(pose_file, 'w') as f:
616 |         for q in poses:
617 |             qvec, tvec = poses[q]
618 |             qvec = ' '.join(map(str, qvec))
619 |             tvec = ' '.join(map(str, tvec))
620 |             name = q.split('/')[-1]
621 |             f.write(f'{name} {qvec} {tvec}\n')
622 | 
623 | 
624 | if __name__ == "__main__":
625 |     main()
626 | 


--------------------------------------------------------------------------------
/localize_12scenes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | #
  9 | # 1. Redistributions of source code must retain the above copyright notice, this
 10 | #    list of conditions and the following disclaimer.
 11 | #
 12 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 13 | #    this list of conditions and the following disclaimer in the documentation
 14 | #    and/or other materials provided with the distribution.
 15 | #
 16 | # 3. Neither the name of the copyright holder nor the names of its
 17 | #    contributors may be used to endorse or promote products derived from
 18 | #    this software without specific prior written permission.
 19 | #
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import os
 32 | import yaml
 33 | import immatch
 34 | # import pycolmap
 35 | 
 36 | import numpy as np
 37 | from immatch.utils import plot_matches
 38 | 
 39 | import collections
 40 | import os
 41 | import struct
 42 | 
 43 | from tqdm import tqdm
 44 | 
 45 | import scipy.sparse
 46 | import scipy.spatial.distance
 47 | 
 48 | import meshloc
 49 | 
 50 | import argparse
 51 | 
 52 | # Defines helper function
 53 | 
 54 | #### Code taken from Colmap:
 55 | # from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
 56 | CameraModel = collections.namedtuple(
 57 |     "CameraModel", ["model_id", "model_name", "num_params"])
 58 | Camera = collections.namedtuple(
 59 |     "Camera", ["id", "model", "width", "height", "params"])
 60 | BaseImage = collections.namedtuple(
 61 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 62 | Point3D = collections.namedtuple(
 63 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 64 | 
 65 | 
 66 | class Image(BaseImage):
 67 |     def qvec2rotmat(self):
 68 |         return qvec2rotmat(self.qvec)
 69 | 
 70 | 
 71 | CAMERA_MODELS = {
 72 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 73 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 74 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 75 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 76 |     CameraModel(model_id=4, model_name="OPENCV", num_params=8),
 77 |     CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
 78 |     CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
 79 |     CameraModel(model_id=7, model_name="FOV", num_params=5),
 80 |     CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
 81 |     CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
 82 |     CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 83 | }
 84 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
 85 |                          for camera_model in CAMERA_MODELS])
 86 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
 87 |                            for camera_model in CAMERA_MODELS])
 88 | 
 89 | 
 90 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 91 |     """Read and unpack the next bytes from a binary file.
 92 |     :param fid:
 93 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 94 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 95 |     :param endian_character: Any of {@, =, <, >, !}
 96 |     :return: Tuple of read and unpacked values.
 97 |     """
 98 |     data = fid.read(num_bytes)
 99 |     return struct.unpack(endian_character + format_char_sequence, data)
100 | 
101 | def qvec2rotmat(qvec):
102 |     return np.array([
103 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
104 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
105 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
106 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
107 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
108 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
109 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
110 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
111 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
112 | 
113 | def read_cameras_text(path):
114 |     """
115 |     see: src/base/reconstruction.cc
116 |         void Reconstruction::WriteCamerasText(const std::string& path)
117 |         void Reconstruction::ReadCamerasText(const std::string& path)
118 |     """
119 |     cameras = {}
120 |     with open(path, "r") as fid:
121 |         while True:
122 |             line = fid.readline()
123 |             if not line:
124 |                 break
125 |             line = line.strip()
126 |             if len(line) > 0 and line[0] != "#":
127 |                 elems = line.split()
128 |                 camera_id = int(elems[0])
129 |                 model = elems[1]
130 |                 width = int(elems[2])
131 |                 height = int(elems[3])
132 |                 params = np.array(tuple(map(float, elems[4:])))
133 |                 cameras[camera_id] = Camera(id=camera_id, model=model,
134 |                                             width=width, height=height,
135 |                                             params=params)
136 |     return cameras
137 | 
138 | 
139 | def read_cameras_binary(path_to_model_file):
140 |     """
141 |     see: src/base/reconstruction.cc
142 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
143 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
144 |     """
145 |     cameras = {}
146 |     with open(path_to_model_file, "rb") as fid:
147 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
148 |         for _ in range(num_cameras):
149 |             camera_properties = read_next_bytes(
150 |                 fid, num_bytes=24, format_char_sequence="iiQQ")
151 |             camera_id = camera_properties[0]
152 |             model_id = camera_properties[1]
153 |             model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
154 |             width = camera_properties[2]
155 |             height = camera_properties[3]
156 |             num_params = CAMERA_MODEL_IDS[model_id].num_params
157 |             params = read_next_bytes(fid, num_bytes=8*num_params,
158 |                                      format_char_sequence="d"*num_params)
159 |             cameras[camera_id] = Camera(id=camera_id,
160 |                                         model=model_name,
161 |                                         width=width,
162 |                                         height=height,
163 |                                         params=np.array(params))
164 |         assert len(cameras) == num_cameras
165 |     return cameras
166 | 
167 | 
168 | 
169 | def read_images_text(path):
170 |     """
171 |     see: src/base/reconstruction.cc
172 |         void Reconstruction::ReadImagesText(const std::string& path)
173 |         void Reconstruction::WriteImagesText(const std::string& path)
174 |     """
175 |     images = {}
176 |     with open(path, "r") as fid:
177 |         while True:
178 |             line = fid.readline()
179 |             if not line:
180 |                 break
181 |             line = line.strip()
182 |             if len(line) > 0 and line[0] != "#":
183 |                 elems = line.split()
184 |                 image_id = int(elems[0])
185 |                 qvec = np.array(tuple(map(float, elems[1:5])))
186 |                 tvec = np.array(tuple(map(float, elems[5:8])))
187 |                 camera_id = int(elems[8])
188 |                 image_name = elems[9]
189 |                 elems = fid.readline().split()
190 |                 # xys = np.column_stack([tuple(map(float, elems[0::3])),
191 |                                        # tuple(map(float, elems[1::3]))])
192 |                 # point3D_ids = np.array(tuple(map(int, elems[2::3])))
193 |                 # images[image_id] = Image(
194 |                     # id=image_id, qvec=qvec, tvec=tvec,
195 |                     # camera_id=camera_id, name=image_name,
196 |                     # xys=xys, point3D_ids=point3D_ids)
197 |                 images[image_id] = Image(
198 |                 id=image_id, qvec=qvec, tvec=tvec,
199 |                 camera_id=camera_id, name=image_name,
200 |                 xys={}, point3D_ids={})
201 |     return images
202 | 
203 | 
204 | def read_images_binary(path_to_model_file):
205 |     """
206 |     see: src/base/reconstruction.cc
207 |         void Reconstruction::ReadImagesBinary(const std::string& path)
208 |         void Reconstruction::WriteImagesBinary(const std::string& path)
209 |     """
210 |     images = {}
211 |     with open(path_to_model_file, "rb") as fid:
212 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
213 |         for _ in range(num_reg_images):
214 |             binary_image_properties = read_next_bytes(
215 |                 fid, num_bytes=64, format_char_sequence="idddddddi")
216 |             image_id = binary_image_properties[0]
217 |             qvec = np.array(binary_image_properties[1:5])
218 |             tvec = np.array(binary_image_properties[5:8])
219 |             camera_id = binary_image_properties[8]
220 |             image_name = ""
221 |             current_char = read_next_bytes(fid, 1, "c")[0]
222 |             while current_char != b"\x00":   # look for the ASCII 0 entry
223 |                 image_name += current_char.decode("utf-8")
224 |                 current_char = read_next_bytes(fid, 1, "c")[0]
225 |             num_points2D = read_next_bytes(fid, num_bytes=8,
226 |                                            format_char_sequence="Q")[0]
227 |             x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
228 |                                        format_char_sequence="ddq"*num_points2D)
229 |             xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
230 |                                    tuple(map(float, x_y_id_s[1::3]))])
231 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
232 |             # images[image_id] = Image(
233 |                 # id=image_id, qvec=qvec, tvec=tvec,
234 |                 # camera_id=camera_id, name=image_name,
235 |                 # xys=xys, point3D_ids=point3D_ids)
236 |             images[image_id] = Image(
237 |                 id=image_id, qvec=qvec, tvec=tvec,
238 |                 camera_id=camera_id, name=image_name,
239 |                 xys={}, point3D_ids={})
240 |     return images
241 | 
242 | def detect_model_format(path, ext):
243 |     if os.path.isfile(os.path.join(path, "cameras"  + ext)) and \
244 |        os.path.isfile(os.path.join(path, "images"   + ext)):
245 |         print("Detected model format: '" + ext + "'")
246 |         return True
247 | 
248 |     return False
249 | 
250 | 
251 | def read_model(path, ext=""):
252 |     # try to detect the extension automatically
253 |     if ext == "":
254 |         if detect_model_format(path, ".bin"):
255 |             ext = ".bin"
256 |         elif detect_model_format(path, ".txt"):
257 |             ext = ".txt"
258 |         else:
259 |             print("Provide model format: '.bin' or '.txt'")
260 |             return
261 | 
262 |     if ext == ".txt":
263 |         cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
264 |         images = read_images_text(os.path.join(path, "images" + ext))
265 |     else:
266 |         cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
267 |         images = read_images_binary(os.path.join(path, "images" + ext))
268 |     return cameras, images
269 | 
270 | 
271 | ####
272 | # Main function
273 | 
274 | 
275 | def main():
276 |     parser = argparse.ArgumentParser(description="Localization against 3D model")
277 |     parser.add_argument("--db_image_dir", type=str, help="Directory with database images")
278 |     parser.add_argument("--colmap_model_dir", type=str, help="Directory with colmap model")
279 |     parser.add_argument("--db_depth_image_dir", type=str, help="Directory with database image depth maps")
280 |     parser.add_argument("--method_name", type=str, help="Method name")
281 |     parser.add_argument("--method_config", type=str, help="Method config name")
282 |     parser.add_argument("--method_string", type=str, help="String name for method used for output")
283 |     parser.add_argument("--out_prefix", type=str, help="Prefix of output file (including path)")
284 |     parser.add_argument("--query_list", type=str, help="Text file containing the query names and intrinsics")
285 |     parser.add_argument("--query_dir", type=str, help="Directory with query images")
286 |     parser.add_argument("--retrieval_pairs", type=str, help="Text file with results of retrieval")
287 |     parser.add_argument("--top_k", type=int, help="Number of top-ranked images to use")
288 |     parser.add_argument("--reproj_error", type=float, help="Reprojection error threshold for RANSAC")
289 |     parser.add_argument("--use_orig_db_images", action="store_true", help="Use real or rendered database images.")
290 |     parser.add_argument("--triangulate", action="store_true", help="Use triangulation instead of 3D points from depth maps.")
291 |     parser.add_argument("--merge_3D_points", action="store_true", help="If multiple 3D points are available per query feature, whether to select one or not.")
292 |     parser.add_argument("--cluster_keypoints", action="store_true", help="Whether to cluster keypoints. Only applicable for patch2pix at the moment.")
293 |     parser.add_argument("--covisibility_filtering", action="store_true", help="Use covisibility filtering or not.")
294 |     parser.add_argument("--all_matches_ransac", action="store_true", help="Use all possible 2D-3D matches in RANSAC.")
295 |     parser.add_argument("--min_ransac_iterations", type=int, default=1000, required=False, help="Minimum number of RANSAC iterations.")
296 |     parser.add_argument("--max_ransac_iterations", type=int, default=100000, required=False, help="Maximum number of RANSAC iterations.")
297 |     parser.add_argument("--max_side_length", type=int, default=800, required=False, help="Maximum side length to use for queries, -1 for original resolution")
298 |     parser.add_argument("--ransac_type", type=str, default="MSAC", required=False, help="RANSAC type: MSAC, EFFSAC, or PYCOLMAP")
299 |     parser.add_argument("--match_prefix", type=str, required=True, help="Contains the directory name and a prefix for the filenames that will be used to write out matches")
300 |     parser.add_argument("--rendering_postfix", type=str, required=False, help="Ending for the images")
301 |     parser.add_argument("--refinement_range", type=float, default=0.25, help="Range for the +REF refinement")
302 |     parser.add_argument("--refinement_step", type=float, default=0.05, help="Step size for the +REF refinement")
303 |     parser.add_argument("--bias_x", type=float, default=0.0, help="Bias term for x-direction for feature detections")
304 |     parser.add_argument("--bias_y", type=float, default=0.0, help="Bias term for y-direction for feature detections")
305 |     args = parser.parse_args()
306 |     print(args)
307 | 
308 |     assert os.path.exists(args.db_image_dir), "Given db_image_dir does not exist: {}".format(args.db_image_dir)
309 |     assert os.path.exists(args.colmap_model_dir), "Given colmap_model_dir does not exist: {}".format(args.colmap_model_dir)
310 |     assert os.path.exists(args.db_depth_image_dir), "Given db_depth_image_dir does not exist: {}".format(args.db_depth_image_dir)
311 |     assert os.path.exists(args.query_list), "Given query_list file does not exist: {}".format(args.query_list)
312 |     assert os.path.exists(args.query_dir), "Given query_dir does not exist: {}".format(args.query_dir)
313 |     assert os.path.exists(args.retrieval_pairs), "Given retrieval_pairs file does not exist: {}".format(args.retrieval_pairs)
314 | 
315 |     # get the query information (names, intrinsics calibrations)
316 |     query_infos = open(args.query_list, "r").readlines()
317 |     print(len(query_infos))
318 | 
319 |     # load the retrieval pairs
320 |     retrieval_pairs = open(args.retrieval_pairs, "r").readlines()
321 |     retrieval_results = {}
322 |     for i in range(0, len(retrieval_pairs)):
323 |         if ',' in retrieval_pairs[i]:
324 |             # AP-GeM retrieval pairs format
325 |             q_name, db_name, score = retrieval_pairs[i].split(', ')
326 |         else:
327 |             # NetVLAD retrieval pairs format
328 |             q_name, db_name = retrieval_pairs[i].split(' ')
329 | 
330 |         q_name = q_name.replace('/', '_')
331 |         db_name = db_name.replace('/', '_')
332 |         if q_name not in retrieval_results:
333 |             retrieval_results[q_name] = []
334 |         retrieval_results[q_name].append(db_name)
335 | 
336 |     # maximum side length of the query images
337 |     max_side_length = np.float32(args.max_side_length)
338 | 
339 |     # load the reference camera poses
340 |     print('Loading the reference poses')
341 |     cameras, images = read_model(args.colmap_model_dir)
342 |     print('Found %d images and %d cameras' % (len(images), len(cameras)))
343 | 
344 |     # maps from image names to image ids
345 |     map_db_name_to_id = {}
346 |     for img in images:
347 |         map_db_name_to_id[images[img].name.replace('/', '_')] = images[img].id
348 | 
349 |     reproj_error = args.reproj_error
350 | 
351 |     db_image_dir = args.db_image_dir
352 |     use_orig_images = args.use_orig_db_images
353 |     rendering_postfix = args.rendering_postfix  #'_rendered_no_color.png'
354 |     db_depth_dir = args.db_depth_image_dir
355 | 
356 |     # main code: matching and pose estimation
357 |     poses = {}
358 |     best_inliers = {}
359 |     num_top_ranked = args.top_k
360 |     print('Using the %d top-ranked images' % num_top_ranked)
361 | 
362 |     # initialize matcher
363 |     if "#" not in args.method_name:
364 |         config_file = f'configs/{args.method_name}.yml'
365 |         print(config_file)
366 |         print(args.method_config)
367 |         with open(config_file, 'r') as f:
368 |             imm_args = yaml.load(f, Loader=yaml.FullLoader)[args.method_config]
369 |             if 'ckpt' in imm_args:
370 |                 imm_args['ckpt'] = os.path.join('.', imm_args['ckpt'])
371 |             class_name = imm_args['class']
372 |         # Init model
373 |         model = immatch.__dict__[class_name](imm_args)
374 |         matcher = lambda im1, im2: model.match_pairs(im1, im2)
375 |     method = args.method_string
376 | 
377 |     for i in tqdm(range(0, len(query_infos))):
378 |         print('   ')
379 |         q_name = query_infos[i].split(' ')[0].replace('/', '_')
380 |         q_data = query_infos[i].split(' ')[1:]
381 |         print(' Trying to localize query image ' + q_name)
382 |         if q_name not in retrieval_results:
383 |             print('   Could not find retrieval results, skipping')
384 |             continue
385 |         width = np.float32(q_data[1])
386 |         height = np.float32(q_data[2])
387 |         if max_side_length > 0:
388 |             scaling_factor = max_side_length / max(width, height)
389 |         else:
390 |             scaling_factor = 1.0
391 |         # We are currently assuming the SIMPLE_RADIAL camera model
392 |         camera_dict = {'model': q_data[0], 'width' : int(width * scaling_factor),
393 |                        'height' : int(height * scaling_factor),
394 |                        'params' : [np.float32(q_data[3]) * scaling_factor,
395 |                                    np.float32(q_data[4]) * scaling_factor,
396 |                                    np.float32(q_data[5]) * scaling_factor,
397 |                                    np.float32(q_data[6]) * scaling_factor]}
398 | 
399 |         retrieved_db = retrieval_results[q_name]
400 | 
401 |         best_inliers[q_name] = 0
402 | 
403 |         top_ranked_cameras = []
404 | 
405 |         matches_per_feat = {}
406 | 
407 |         for j in range(0, min(num_top_ranked, len(retrieved_db))): #len(retrieved_db)):
408 |             img1_name = os.path.join(args.query_dir, q_name)
409 |             if use_orig_images:
410 |                 img2_name = os.path.join(db_image_dir, retrieved_db[j])
411 |             else:
412 |                 img2_name = os.path.join(db_image_dir, retrieved_db[j].split('.')[0] + rendering_postfix)
413 |             print('   Matching against ' + img2_name)
414 | 
415 |             assert os.path.exists(img1_name), "Query image does not exist: {}".format(img1_name)
416 |             assert os.path.exists(img2_name), "Database image does not exist: {}".format(img2_name)
417 | 
418 |             # Loads the depth map
419 |             img2_depth = os.path.join(db_depth_dir, retrieved_db[j].split('.')[0] + '_depth.npz')
420 |             assert os.path.exists(img2_depth), "Database depth image does not exist: {}".format(img2_depth)
421 |             depth_map = np.load(img2_depth)['depth'].astype(np.float32)
422 | 
423 |             # Get the transformation from reference camera to world coordinates
424 |             img2_id = map_db_name_to_id[retrieved_db[j]]
425 |             T = np.identity(4)
426 |             R = np.asmatrix(qvec2rotmat(images[img2_id].qvec)).transpose()
427 |             T[0:3,0:3] = R
428 |             T[0:3,3] = -R.dot(images[img2_id].tvec)
429 |             P = np.zeros((3,4))
430 |             P[0:3,0:3] = R.transpose()
431 |             P[0:3,3] = images[img2_id].tvec
432 | 
433 |             colmap_cam = cameras[images[img2_id].camera_id]
434 | 
435 |             top_ranked_cameras.append({'model': colmap_cam.model,
436 |                                        'width' : colmap_cam.width,
437 |                                        'height' : colmap_cam.height,
438 |                                        'params' : colmap_cam.params,
439 |                                        'q' : images[img2_id].qvec,
440 |                                        't' : images[img2_id].tvec})
441 | 
442 |             if "#" not in args.method_name:
443 |                 # Tries to load the 2D-2D matches from disk to save time.
444 |                 match_file_name = args.match_prefix + str(q_name.split('.')[0]) \
445 |                                   + '_-_' + str(retrieved_db[j].split('.')[0]) + '_-_'\
446 |                                   + args.method_name + '_' + args.method_config\
447 |                                   + '_-_' + str(max_side_length) + '.npy'
448 |                 match_file_exists = os.path.exists(match_file_name)
449 | 
450 |                 if match_file_exists:
451 |                     matches = np.load(match_file_name)
452 |                 else:
453 |                     # computes the matches
454 |                     matches, _, _, _ = matcher(img1_name, img2_name)
455 |                     np.save(match_file_name, matches)
456 |             else:
457 |                 method_names = args.method_name.split("#")
458 |                 method_configs = args.method_config.split("#")
459 |                 match_dirs = args.match_prefix.split("#")
460 |                 matches = np.empty((0, 4))
461 |                 for method_idx in range(0, len(method_names)):
462 |                     # Tries to load the 2D-2D matches from disk to save time.
463 |                     match_file_name = match_dirs[method_idx] + str(q_name.split('.')[0]) \
464 |                                       + '_-_' + str(retrieved_db[j].split('.')[0]) + '_-_'\
465 |                                       + method_names[method_idx] + '_' + method_configs[method_idx]\
466 |                                       + '_-_' + str(max_side_length) + '.npy'
467 |                     match_file_exists = os.path.exists(match_file_name)
468 | 
469 |                     if match_file_exists:
470 |                         matches_method_idx = np.load(match_file_name)
471 |                         matches = np.append(matches, matches_method_idx, axis=0)
472 | 
473 | 
474 | 
475 |             print('    Number of matches with %s: %d.' % (method, matches.shape[0]))
476 |             if matches.shape[0] < 3:
477 |                 continue
478 | 
479 |             matches[:, 2:] += np.array([args.bias_x, args.bias_y])
480 | 
481 |             kpts1 = matches[:, :2]
482 |             kpts2 = matches[:, 2:]
483 | 
484 |             # Get the corresponding depth values.
485 |             kpts2_int = np.rint(kpts2).astype(np.int64)
486 |             kpts2_int[:,0] = np.clip(kpts2_int[:,0], 0, colmap_cam.width - 1)
487 |             kpts2_int[:,1] = np.clip(kpts2_int[:,1], 0, colmap_cam.height - 1)
488 |             depth_values = depth_map[kpts2_int[:,1], kpts2_int[:,0]]
489 | 
490 |             # Assumes that the images use the PINHOLE camera model
491 |             # print(colmap_cam.params)
492 |             fx = colmap_cam.params[0]
493 |             fy = colmap_cam.params[1]
494 |             cx = colmap_cam.params[2]
495 |             cy = colmap_cam.params[3]
496 |             P[0,:] *= fx
497 |             P[1,:] *= fy
498 | 
499 |             rays = kpts2 - np.array([cx, cy])
500 |             rays = np.append(rays, np.ones(depth_values.reshape((-1,1)).shape), axis=1)
501 |             rays[:,0] /= fx
502 |             rays[:,1] /= fy
503 |             points3D = rays * depth_values.reshape((-1,1))
504 |             num_points = points3D.shape[0]
505 |             points3D_world = np.matmul(T, np.append(points3D, np.ones([num_points, 1]), axis=1).transpose()).transpose()[:, :3]
506 | 
507 |             for m in range(0, matches.shape[0]):
508 |                 m_key = tuple([matches[m, 0], matches[m, 1]])
509 |                 xr = np.arange(max(0, np.floor(matches[m, 2])), min(colmap_cam.width - 1, np.floor(matches[m, 2]) + 2)).astype(int)
510 |                 yr = np.arange(max(0, np.floor(matches[m, 3])), min(colmap_cam.height - 1, np.floor(matches[m, 3]) + 2)).astype(int)
511 |                 xx, yy = np.meshgrid(xr, yr)
512 |                 D = depth_map[yy, xx]
513 |                 delta_x = matches[m, 2] - np.floor(matches[m, 2])
514 |                 delta_y = matches[m, 3] - np.floor(matches[m, 3])
515 |                 if len(xr) == 2 and len(yr) == 2:
516 |                     depth_m = (D[0, 0] * (1.0 - delta_x) + D[0, 1] * delta_x) * (1.0 - delta_y) + (D[1, 0] * (1.0 - delta_x) + D[1, 1] * delta_x) * delta_y
517 |                 elif len(xr) == 2 and len(yr) == 1:
518 |                     depth_m = D[0, 0] * (1.0 - delta_x) + D[0, 1] * delta_x
519 |                 elif len(xr) == 1 and len(yr) == 2:
520 |                     depth_m = D[0, 0] * (1.0 - delta_y) + D[1, 0] * delta_y
521 |                 else:
522 |                     depth_m = 0.0
523 | 
524 |                 rays_ = np.array([(matches[m, 2] - cx) / fx, (matches[m, 3] - cy) / fy, 1.0]).transpose()
525 |                 points_3D_m = rays_ * depth_m
526 |                 points_3D_world_m = np.matmul(T, np.array([points_3D_m[0], points_3D_m[1], points_3D_m[2], 1.0]).transpose()).transpose()[:3]
527 | 
528 |                 m_key = tuple([matches[m, 0], matches[m, 1]])
529 |                 pt = points3D_world[m, :]
530 |                 pt = points_3D_world_m
531 | 
532 |                 if m_key not in matches_per_feat:
533 |                     matches_per_feat[m_key] = {'keypoint' : matches[m,:2],
534 |                                                'points' : np.empty((0,3)),
535 |                                                'observations' : np.empty((0,2)),
536 |                                                'db_indices' : []}
537 |                 matches_per_feat[m_key]['observations'] = np.append(matches_per_feat[m_key]['observations'], (matches[m, 2:] - np.array([cx, cy])).reshape(1,2), axis=0)
538 |                 matches_per_feat[m_key]['points'] = np.append(matches_per_feat[m_key]['points'], pt.reshape(1,3), axis=0)
539 |                 matches_per_feat[m_key]['db_indices'].append(j)
540 | 
541 |         matches = []
542 | 
543 |         for m_key in matches_per_feat.keys():
544 |             matches.append(matches_per_feat[m_key])
545 | 
546 |         pose_options = {'triangulate' : args.triangulate,
547 |                         'merge_3D_points' : args.merge_3D_points,
548 |                         'cluster_keypoints' : args.cluster_keypoints,
549 |                         'covisibility_filtering' : args.covisibility_filtering,
550 |                         'use_all_matches' : args.all_matches_ransac,
551 |                         'inlier_threshold' : reproj_error,
552 |                         'num_LO_iters' : 10,
553 |                         'min_ransac_iterations' : args.min_ransac_iterations,
554 |                         'max_ransac_iterations' : args.max_ransac_iterations,
555 |                         'ransac_type' : args.ransac_type,
556 |                         'refinement_range' : args.refinement_range,
557 |                         'refinement_step' : args.refinement_step}
558 | 
559 |         estimate = meshloc.pose_estimation(camera_dict, top_ranked_cameras,
560 |                                            matches, pose_options)
561 |         
562 |         if estimate['success']:
563 |             if best_inliers[q_name] < estimate['num_inliers']:
564 |                 poses[q_name] = (estimate['qvec'], estimate['tvec'])
565 |                 best_inliers[q_name] = estimate['num_inliers']
566 |             
567 |             print(estimate['qvec'])
568 |             print(estimate['tvec'])
569 |             
570 | 
571 |     # Writes out the poses. Code taken from
572 |     # https://github.com/cvg/Hierarchical-Localization/blob/master/hloc/localize_sfm.py#L192
573 |     pose_file = args.out_prefix + str(num_top_ranked) + "_" + method + "_" + str(args.reproj_error)
574 |     if args.triangulate:
575 |         pose_file = pose_file + "_triangulated"
576 |     if args.merge_3D_points:
577 |         pose_file = pose_file + "_merged_3D_points"
578 |     if args.cluster_keypoints:
579 |         pose_file = pose_file + "_keypoint_clusters"
580 |     if args.covisibility_filtering:
581 |         pose_file = pose_file + "_covis_filtering"
582 |     if args.all_matches_ransac:
583 |         pose_file = pose_file + "_all_matches_ransac"
584 |     pose_file = pose_file + "_" + args.ransac_type
585 |     pose_file = pose_file + "_min_" + str(args.min_ransac_iterations) + "_max_" + str(args.max_ransac_iterations)
586 |     pose_file = pose_file + "_ref_" + str(args.refinement_range) + "_" + str(args.refinement_step)
587 |     pose_file = pose_file + "_bias_" + str(args.bias_x) + "_" + str(args.bias_y)
588 |     pose_file = pose_file + ".txt"
589 |     
590 |     print(pose_file)
591 |     with open(pose_file, 'w') as f:
592 |         for q in poses:
593 |             qvec, tvec = poses[q]
594 |             qvec = ' '.join(map(str, qvec))
595 |             tvec = ' '.join(map(str, tvec))
596 |             name = q.split('/')[-1]
597 |             f.write(f'{name} {qvec} {tvec}\n')
598 | 
599 | 
600 | if __name__ == "__main__":
601 |     main()
602 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Based on the setup.py script from 
  3 | https://github.com/mihaidusmanu/pycolmap/blob/master/setup.py
  4 | BSD-3-Clause License 
  5 | 
  6 | Note that we never tried compiling it on Windows.
  7 | """
  8 | import os
  9 | import re
 10 | import sys
 11 | import platform
 12 | import subprocess
 13 | 
 14 | from setuptools import setup, Extension
 15 | from setuptools.command.build_ext import build_ext
 16 | from distutils.version import LooseVersion
 17 | 
 18 | import multiprocessing
 19 | 
 20 | class CMakeExtension(Extension):
 21 |     def __init__(self, name, sourcedir=''):
 22 |         Extension.__init__(self, name, sources=[])
 23 |         self.sourcedir = os.path.abspath(sourcedir)
 24 | 
 25 | 
 26 | class CMakeBuild(build_ext):
 27 |     def run(self):
 28 |         try:
 29 |             out = subprocess.check_output(['cmake', '--version'])
 30 |         except OSError:
 31 |             raise RuntimeError("CMake must be installed to build the following extensions: " +
 32 |                                ", ".join(e.name for e in self.extensions))
 33 | 
 34 |         if platform.system() == "Windows":
 35 |             cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
 36 |             if cmake_version < '3.1.0':
 37 |                 raise RuntimeError("CMake >= 3.1.0 is required on Windows")
 38 | 
 39 |         for ext in self.extensions:
 40 |             self.build_extension(ext)
 41 | 
 42 |     def build_extension(self, ext):
 43 |         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
 44 |         cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
 45 |                       '-DPYTHON_EXECUTABLE=' + sys.executable]
 46 |         
 47 |         if "CUDA_HOME" in os.environ:
 48 |             cmake_args += ['-DCMAKE_PREFIX_PATH=' + os.environ["CUDA_HOME"]]
 49 |             cmake_args += ['-DCMAKE_CUDA_COMPILER=' + os.path.join(os.environ["CUDA_HOME"], 'bin', 'nvcc')]
 50 | 
 51 |         cfg = 'Debug' if self.debug else 'Release'
 52 |         build_args = ['--config', cfg]
 53 | 
 54 |         if platform.system() == "Windows":
 55 |             if os.environ.get('CMAKE_TOOLCHAIN_FILE') is not None:
 56 |                 cmake_toolchain_file = os.environ.get('CMAKE_TOOLCHAIN_FILE')
 57 |                 print(f'-DCMAKE_TOOLCHAIN_FILE={cmake_toolchain_file}')
 58 |                 cmake_args += [f'-DCMAKE_TOOLCHAIN_FILE={cmake_toolchain_file}']
 59 |             if os.environ.get('CMAKE_PREFIX_PATH') is not None:
 60 |                 cmake_prefix_path = os.environ.get('CMAKE_PREFIX_PATH')
 61 |                 print(f'-DCMAKE_PREFIX_PATH={cmake_prefix_path}')
 62 |                 cmake_args += [f'-DCMAKE_PREFIX_PATH={cmake_prefix_path}']
 63 |             cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
 64 |             if sys.maxsize > 2**32:
 65 |                 cmake_args += ['-DVCPKG_TARGET_TRIPLET=x64-windows']
 66 |                 cmake_args += ['-A', 'x64']
 67 |             build_args += ['--', '/m']
 68 |         else:
 69 |             cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
 70 |             build_args += ['--', '-j{}'.format(multiprocessing.cpu_count() - 1)]
 71 | 
 72 |             if platform.system() == "Darwin":
 73 |                 cmake_args += ['-DOpenMP_CXX_FLAGS="-Xclang -fopenmp"']
 74 |                 cmake_args += ['-DOpenMP_CXX_LIB_NAMES=libomp']
 75 |                 cmake_args += ['-DOpenMP_C_FLAGS="-Xclang -fopenmp"']
 76 |                 cmake_args += ['-DOpenMP_C_LIB_NAMES=libomp']
 77 | 
 78 |         env = os.environ.copy()
 79 |         env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(
 80 |             env.get('CXXFLAGS', ''),
 81 |             self.distribution.get_version()
 82 |         )
 83 |         if not os.path.exists(self.build_temp):
 84 |             os.makedirs(self.build_temp)
 85 |         print(['cmake', ext.sourcedir] + cmake_args)
 86 |         subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
 87 |         subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
 88 | 
 89 | setup(
 90 |     name='meshloc',
 91 |     version='0.1.0',
 92 |     author='Torsten Sattler',
 93 |     author_email='torsten.sattler@cvut.cz',
 94 |     description='Python bindings for localization helpers',
 95 |     ext_modules=[CMakeExtension('src')],
 96 |     cmdclass=dict(build_ext=CMakeBuild),
 97 |     zip_safe=False,
 98 |     install_requires=[
 99 |         "pyyaml",
100 |         "numpy < 2.0.0",
101 |     ],
102 | )


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.6)
 2 | 
 3 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
 4 | set(PYBIND11_CPP_STANDARD -std=c++17)
 5 | 
 6 | find_package (Eigen3 REQUIRED)
 7 | 
 8 | find_package (Ceres REQUIRED)
 9 | 
10 | find_package (colmap REQUIRED)
11 | 
12 | find_package( TIFF REQUIRED )
13 | if ( TIFF_FOUND )
14 |     include_directories( ${TIFF_INCLUDE_DIRS} )
15 |     list(APPEND COLMAP_EXTERNAL_LIBRARIES ${TIFF_LIBRARIES})
16 | endif( TIFF_FOUND )
17 | 
18 | include_directories (
19 |   ${CMAKE_SOURCE_DIR}
20 |   ${EIGEN3_INCLUDE_DIRS}
21 |   ${CERES_INCLUDE_DIRS}
22 |   ${CMAKE_SOURCE_DIR}/RansacLib
23 |   ${CMAKE_SOURCE_DIR}/PoseLib
24 |   ${CMAKE_SOURCE_DIR}/src
25 |   ${COLMAP_INCLUDE_DIRS}
26 | )
27 | 
28 | link_directories(${COLMAP_LINK_DIRS})
29 | 
30 | file(GLOB POSELIB_SRC
31 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/misc/*.h"
32 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/misc/*.cc"
33 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/*.h"
34 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/*.cc"
35 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/solvers/*.h"
36 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/solvers/*.cc"
37 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/robust/*.h"
38 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/robust/*.cc"
39 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/robust/estimators/*.h"
40 |      "${CMAKE_SOURCE_DIR}/PoseLib/PoseLib/robust/estimators/*.cc"     
41 | )
42 | 
43 | pybind11_add_module(meshloc main.cc absolute_pose_estimator.h absolute_pose_estimator.cc multi_absolute_pose_estimator.h multi_absolute_pose_estimator.cc utils.h utils.cc ${POSELIB_SRC})
44 | 
45 | target_link_libraries(meshloc PRIVATE ${CERES_LIBRARIES} colmap::colmap Eigen3::Eigen)
46 | 


--------------------------------------------------------------------------------
/src/absolute_pose_estimator.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //
 10 | //     * Redistributions in binary form must reproduce the above copyright
 11 | //       notice, this list of conditions and the following disclaimer in the
 12 | //       documentation and/or other materials provided with the distribution.
 13 | //
 14 | //     * Neither the name of the copyright holder nor the
 15 | //       names of its contributors may be used to endorse or promote products
 16 | //       derived from this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 22 | // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 25 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | //
 29 | // author: Torsten Sattler, torsten.sattler.de@googlemail.com
 30 | 
 31 | #include <iostream>
 32 | 
 33 | #include <ceres/ceres.h>
 34 | #include <ceres/rotation.h>
 35 | #include <Eigen/StdVector>
 36 | #include <PoseLib/camera_pose.h>
 37 | 
 38 | #include "absolute_pose_estimator.h"
 39 | 
 40 | namespace mesh_loc {
 41 | 
 42 | AbsolutePoseEstimator::AbsolutePoseEstimator(
 43 |     const double f_x, const double f_y, const double squared_inlier_threshold, 
 44 |     const int width, const int height, const double bin_size,
 45 |     const Points2D& points2D, const ViewingRays& rays, const Points3D& points3D)
 46 |     : focal_x_(f_x),
 47 |       focal_y_(f_y),
 48 |       squared_inlier_threshold_(squared_inlier_threshold),
 49 |       width_(width), height_(height), bin_size_(bin_size), 
 50 |       points2D_(points2D),
 51 |       points3D_(points3D),
 52 |       rays_(rays) {
 53 |   num_data_ = static_cast<int>(points2D_.size());
 54 |   num_x_bins_ = static_cast<int>(static_cast<double>(width_) / bin_size_ + 0.5);
 55 | }
 56 | 
 57 | int AbsolutePoseEstimator::MinimalSolver(
 58 |     const std::vector<int>& sample, CameraPoses* poses) const {
 59 |   poses->clear();
 60 |   std::vector<Eigen::Vector3d> x(3), X(3);
 61 |   for (int i = 0; i < 3; ++i) {
 62 |     x[i] = rays_[sample[i]];
 63 |     X[i] = points3D_[sample[i]];
 64 |   }
 65 | 
 66 |   std::vector<poselib::CameraPose> poselib_poses;
 67 |   int num_sols = poselib::p3p(x, X, &poselib_poses);
 68 |   // std::cout << num_sols << std::endl;
 69 |   if (num_sols == 0) return 0;
 70 | 
 71 |   for (const poselib::CameraPose& pose : poselib_poses) {
 72 |     CameraPose P;
 73 |     P.topLeftCorner<3, 3>() = pose.R();
 74 |     P.col(3) = -P.topLeftCorner<3, 3>().transpose() * pose.t;
 75 | 
 76 |     poses->push_back(P);
 77 | 
 78 |     // const double kError = EvaluateModelOnPoint(P, sample[3]);
 79 |     // if (kError < squared_inlier_threshold_) {
 80 |       // poses->push_back(P);
 81 |       // break;
 82 |     // }
 83 |   }
 84 |   // std::cout << poses->size() << std::endl;
 85 | 
 86 |   return static_cast<int>(poses->size());
 87 | }
 88 | 
 89 | // Returns 0 if no model could be estimated and 1 otherwise.
 90 | // Implemented via non-linear optimization in Ceres.
 91 | int AbsolutePoseEstimator::NonMinimalSolver(
 92 |     const std::vector<int>& sample, CameraPose* pose) const {
 93 |   CameraPoses poses;
 94 |   std::vector<std::vector<int>> subsamples = 
 95 |       {{sample[0], sample[1], sample[2]},
 96 |        {sample[1], sample[2], sample[3]},
 97 |        {sample[2], sample[3], sample[4]},
 98 |        {sample[3], sample[4], sample[5]},
 99 |        {sample[0], sample[3], sample[4]},
100 |        {sample[3], sample[3], sample[5]},
101 |        {sample[0], sample[4], sample[5]},
102 |        {sample[1], sample[3], sample[4]},
103 |        {sample[1], sample[3], sample[5]},
104 |        {sample[1], sample[4], sample[5]},
105 |        {sample[2], sample[3], sample[5]},
106 |        {sample[2], sample[4], sample[5]}};
107 |   for (int k = 0; k < 12; ++k) {
108 |     CameraPoses sub_poses;
109 |     if (MinimalSolver(subsamples[k], &sub_poses) > 0) {
110 |       for (const CameraPose& P : sub_poses) {
111 |         poses.emplace_back(P);
112 |       }
113 |     }
114 |   }
115 | 
116 | 
117 |   // if (MinimalSolver(sample, &poses) > 0) {
118 | 
119 |   const int kSampleSize = static_cast<int>(sample.size());
120 |   *pose = poses[0];
121 | 
122 |   double best_score = std::numeric_limits<double>::max();
123 |   for (const CameraPose& P : poses) {
124 |     double score = 0.0;
125 |     for (int i = 0; i < kSampleSize; ++i) {
126 |       score += std::min(EvaluateModelOnPoint(P, sample[i]),
127 |                         squared_inlier_threshold_);
128 |     }
129 |     if (score < best_score) {
130 |       best_score = score;
131 |       *pose = P;
132 |     }
133 |   }
134 |   
135 |   LeastSquares(sample, pose);
136 |   return 1;
137 |   // } else {
138 |     // return 0;
139 |   // }
140 | }
141 | 
142 | // Evaluates the pose on the i-th data point.
143 | double AbsolutePoseEstimator::EvaluateModelOnPoint(
144 |     const CameraPose& pose, int i) const {
145 |   Eigen::Vector3d p_c =
146 |       pose.topLeftCorner<3, 3>() * (points3D_[i] - pose.col(3));
147 | 
148 |   // Check whether point projects behind the camera.
149 |   if (p_c[2] < 0.0) return std::numeric_limits<double>::max();
150 | 
151 |   Eigen::Vector2d p_2d = p_c.head<2>() / p_c[2];
152 |   p_2d[0] *= focal_x_;
153 |   p_2d[1] *= focal_y_;
154 | 
155 |   return (p_2d - points2D_[i]).squaredNorm();
156 | }
157 | 
158 | double AbsolutePoseEstimator::EvaluateModelOnPoint(
159 |     const CameraPose& pose, int i, int* hash) const {
160 |   int hash_x = std::max(0,
161 |     std::min(static_cast<int>(points2D_[i][0] / bin_size_ + 0.5), width_ - 1));
162 |   int hash_y = std::max(0,
163 |     std::min(static_cast<int>(points2D_[i][1] / bin_size_ + 0.5), height_ - 1));
164 |   *hash = hash_x + hash_y * 100000;
165 | 
166 |   return EvaluateModelOnPoint(pose, i);
167 | }
168 | 
169 | // Reference implementation using Ceres for refinement.
170 | void AbsolutePoseEstimator::LeastSquares(
171 |     const std::vector<int>& sample, CameraPose* pose) const {
172 |   return;
173 | 
174 |   Eigen::AngleAxisd aax(pose->topLeftCorner<3, 3>());
175 |   Eigen::Vector3d aax_vec = aax.axis() * aax.angle();
176 |   double* camera = new double[6];
177 |   camera[0] = aax_vec[0];
178 |   camera[1] = aax_vec[1];
179 |   camera[2] = aax_vec[2];
180 |   Eigen::Vector3d t = -pose->topLeftCorner<3, 3>() * pose->col(3);
181 |   camera[3] = t[0];
182 |   camera[4] = t[1];
183 |   camera[5] = t[2];
184 | 
185 |   ceres::Problem refinement_problem;
186 | 
187 |   // ceres::LossFunction* loss_fcnt = new ceres::CauchyLoss(1.0);
188 | 
189 |   const int kSampleSize = static_cast<int>(sample.size());
190 |   for (int i = 0; i < kSampleSize; ++i) {
191 |     const int kIdx = sample[i];
192 |     const Eigen::Vector2d& p_img = points2D_[kIdx];
193 |     const Eigen::Vector3d& p_3D = points3D_[kIdx];
194 |     ceres::CostFunction* cost_function =
195 |         ReprojectionError::CreateCost(p_img[0], p_img[1], p_3D[0], p_3D[1],
196 |                                       p_3D[2], focal_x_, focal_y_);
197 | 
198 |     refinement_problem.AddResidualBlock(cost_function, nullptr, camera);
199 |   }
200 | 
201 |   ceres::Solver::Options options;
202 |   options.max_num_iterations = 100;
203 |   options.linear_solver_type = ceres::DENSE_QR;
204 |   options.minimizer_progress_to_stdout = false;
205 |   ceres::Solver::Summary summary;
206 |   ceres::Solve(options, &refinement_problem, &summary);
207 | 
208 |   if (summary.IsSolutionUsable()) {
209 |     Eigen::Vector3d axis(camera[0], camera[1], camera[2]);
210 |     double angle = axis.norm();
211 |     axis.normalize();
212 |     aax.axis() = axis;
213 |     aax.angle() = angle;
214 | 
215 |     pose->topLeftCorner<3, 3>() = aax.toRotationMatrix();
216 |     t = Eigen::Vector3d(camera[3], camera[4], camera[5]);
217 |     pose->col(3) = -pose->topLeftCorner<3, 3>().transpose() * t;
218 |   }
219 | 
220 |   delete [] camera;
221 |   camera = nullptr;
222 | }
223 | 
224 | void AbsolutePoseEstimator::PixelsToViewingRays(
225 |     const double focal_x, const double focal_y, const Points2D& points2D,
226 |     ViewingRays* rays) {
227 |   const int kNumData = static_cast<int>(points2D.size());
228 | 
229 |   // Creates the bearing vectors and points for the OpenGV adapter.
230 |   rays->resize(kNumData);
231 |   for (int i = 0; i < kNumData; ++i) {
232 |     (*rays)[i] = points2D[i].homogeneous();
233 |     (*rays)[i][0] /= focal_x;
234 |     (*rays)[i][1] /= focal_y;
235 |     (*rays)[i].normalize();
236 |   }
237 | }
238 | 
239 | }  // namespace visloc_help
240 | 


--------------------------------------------------------------------------------
/src/absolute_pose_estimator.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //
 10 | //     * Redistributions in binary form must reproduce the above copyright
 11 | //       notice, this list of conditions and the following disclaimer in the
 12 | //       documentation and/or other materials provided with the distribution.
 13 | //
 14 | //     * Neither the name of the copyright holder nor the
 15 | //       names of its contributors may be used to endorse or promote products
 16 | //       derived from this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 22 | // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 25 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | //
 29 | // author: Torsten Sattler, torsten.sattler.de@googlemail.com
 30 | 
 31 | // Based on the calibrated absolute pose estimator example provided by 
 32 | // RansacLib.
 33 | 
 34 | #ifndef ABSOLUTE_POSE_ESTIMATOR_H_
 35 | #define ABSOLUTE_POSE_ESTIMATOR_H_
 36 | 
 37 | #include <algorithm>
 38 | #include <cmath>
 39 | #include <cstddef>
 40 | #include <cstdint>
 41 | #include <limits>
 42 | #include <memory>
 43 | #include <random>
 44 | #include <vector>
 45 | 
 46 | #include <Eigen/Core>
 47 | #include <Eigen/StdVector>
 48 | 
 49 | #include <PoseLib/solvers/p3p.h>
 50 | #include <PoseLib/types.h>
 51 | 
 52 | #include "utils.h"
 53 | #include "types.h"
 54 | 
 55 | namespace mesh_loc {
 56 | 
 57 | // using Eigen::Vector3d;
 58 | // // An absolute pose is a Eigen 3x4 double matrix storing the rotation and
 59 | // // translation of the camera.
 60 | // typedef Eigen::Matrix<double, 3, 4> CameraPose;
 61 | // typedef std::vector<CameraPose, Eigen::aligned_allocator<CameraPose>>
 62 | //     CameraPoses;
 63 | 
 64 | // typedef std::vector<Eigen::Vector2d> Points2D;
 65 | // typedef std::vector<Vector3d, Eigen::aligned_allocator<Vector3d>> Points3D;
 66 | // typedef std::vector<Vector3d, Eigen::aligned_allocator<Vector3d>> ViewingRays;
 67 | 
 68 | class AbsolutePoseEstimator {
 69 |  public:
 70 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 71 | 
 72 |   AbsolutePoseEstimator(const double f_x, const double f_y,
 73 |                         const double squared_inlier_threshold,
 74 |                         const int width, const int height,
 75 |                         const double bin_size,
 76 |                         const Points2D& points2D,
 77 |                         const ViewingRays& rays,
 78 |                         const Points3D& points3D);
 79 | 
 80 |   inline int min_sample_size() const { return 3; }
 81 | 
 82 |   inline int non_minimal_sample_size() const { return 6; }
 83 | 
 84 |   inline int num_data() const { return num_data_; }
 85 | 
 86 |   int MinimalSolver(const std::vector<int>& sample, CameraPoses* poses) const;
 87 | 
 88 |   int NonMinimalSolver(const std::vector<int>& sample, CameraPose* pose) const;
 89 | 
 90 |   double EvaluateModelOnPoint(const CameraPose& pose, int i) const;
 91 | 
 92 |   double EvaluateModelOnPoint(const CameraPose& pose, int i, int* hash) const;
 93 | 
 94 |   void LeastSquares(const std::vector<int>& sample, CameraPose* pose) const;
 95 | 
 96 |   static void PixelsToViewingRays(const double focal_x, const double focal_y,
 97 |                                   const Points2D& points2D, ViewingRays* rays);
 98 | 
 99 |  protected:
100 |   // Focal lengths in x- and y-directions.
101 |   double focal_x_;
102 |   double focal_y_;
103 |   double squared_inlier_threshold_;
104 |   // Size of the image.
105 |   int width_;
106 |   int height_;
107 |   double bin_size_;
108 |   int num_x_bins_;
109 |   // Stores the 2D point positions.
110 |   Points2D points2D_;
111 |   // Stores the corresponding 3D point positions.
112 |   Points3D points3D_;
113 |   // Stores the viewing ray for each 2D point position.
114 |   ViewingRays rays_;
115 |   int num_data_;
116 | };
117 | 
118 | }  // namespace visloc_help
119 | 
120 | #endif  // ABSOLUTE_POSE_ESTIMATOR_H_
121 | 


--------------------------------------------------------------------------------
/src/main.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
 2 | // All rights reserved.
 3 | //
 4 | // Redistribution and use in source and binary forms, with or without
 5 | // modification, are permitted provided that the following conditions are met:
 6 | //
 7 | // 1. Redistributions of source code must retain the above copyright notice, 
 8 | //    this list of conditions and the following disclaimer.
 9 | //
10 | // 2. Redistributions in binary form must reproduce the above copyright notice,
11 | //    this list of conditions and the following disclaimer in the documentation
12 | //    and/or other materials provided with the distribution.
13 | //
14 | // 3. Neither the name of the copyright holder nor the names of its
15 | //    contributors may be used to endorse or promote products derived from
16 | //    this software without specific prior written permission.
17 | //
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
21 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
22 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
23 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
24 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
25 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
26 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
27 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
28 | // THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | #include <pybind11/pybind11.h>
31 | 
32 | #include "pose_estimation.cc"
33 | 
34 | namespace py = pybind11;
35 | 
36 | PYBIND11_MODULE(meshloc, m) {
37 |     m.doc() = "Python bindings for localization helpers";
38 |     m.def("pose_estimation", &pose_estimation, 
39 |           py::arg("query_camera"),
40 |           py::arg("db_cams"), 
41 |           py::arg("matches"),
42 |           py::arg("pose_options"),
43 |           "Absolute pose estimation with pre-processing, followed by non-linear refinement.");
44 | }


--------------------------------------------------------------------------------
/src/multi_absolute_pose_estimator.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //
 10 | //     * Redistributions in binary form must reproduce the above copyright
 11 | //       notice, this list of conditions and the following disclaimer in the
 12 | //       documentation and/or other materials provided with the distribution.
 13 | //
 14 | //     * Neither the name of the copyright holder nor the
 15 | //       names of its contributors may be used to endorse or promote products
 16 | //       derived from this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 22 | // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 25 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | //
 29 | // author: Torsten Sattler, torsten.sattler.de@googlemail.com
 30 | 
 31 | #include <iostream>
 32 | 
 33 | #include <ceres/ceres.h>
 34 | #include <ceres/rotation.h>
 35 | #include <Eigen/StdVector>
 36 | #include <PoseLib/camera_pose.h>
 37 | 
 38 | #include "multi_absolute_pose_estimator.h"
 39 | 
 40 | namespace mesh_loc {
 41 | 
 42 | MultiAbsolutePoseEstimator::MultiAbsolutePoseEstimator(
 43 |     const double f_x, const double f_y, const double squared_inlier_threshold, 
 44 |     const int width, const int height, const double bin_size,
 45 |     const Points2D& points2D, const ViewingRays& rays,
 46 |     const Points3DVec& points3D, const int num_trials)
 47 |     : focal_x_(f_x),
 48 |       focal_y_(f_y),
 49 |       squared_inlier_threshold_(squared_inlier_threshold),
 50 |       width_(width), height_(height), bin_size_(bin_size), 
 51 |       points2D_(points2D),
 52 |       points3D_(points3D),
 53 |       rays_(rays),
 54 |       num_trials_(num_trials) {
 55 |   num_data_ = static_cast<int>(points2D_.size());
 56 |   num_x_bins_ = static_cast<int>(static_cast<double>(width_) / bin_size_ + 0.5);
 57 | }
 58 | 
 59 | int MultiAbsolutePoseEstimator::MinimalSolver(
 60 |     const std::vector<int>& sample, CameraPoses* poses) const {
 61 |   poses->clear();
 62 |   std::uniform_int_distribution<int> dist1(0,
 63 |       static_cast<int>(points3D_[sample[0]].size()));
 64 |   std::uniform_int_distribution<int> dist2(0,
 65 |       static_cast<int>(points3D_[sample[1]].size()));
 66 |   std::uniform_int_distribution<int> dist3(0,
 67 |       static_cast<int>(points3D_[sample[2]].size()));
 68 | 
 69 |   std::vector<Eigen::Vector3d> x = {rays_[sample[0]], rays_[sample[1]],
 70 |                                     rays_[sample[2]]};
 71 |   std::vector<Eigen::Vector3d> X(3);
 72 |   for (int t = 0; t < num_trials_; ++t) {
 73 |     X[0] = points3D_[sample[0]][dist1(rng_)];
 74 |     X[1] = points3D_[sample[1]][dist2(rng_)];
 75 |     X[2] = points3D_[sample[2]][dist3(rng_)];
 76 | 
 77 |     std::vector<poselib::CameraPose> poselib_poses;
 78 |     int num_sols = poselib::p3p(x, X, &poselib_poses);
 79 |     if (num_sols == 0) continue;
 80 | 
 81 |     for (const poselib::CameraPose& pose : poselib_poses) {
 82 |       CameraPose P;
 83 |       P.topLeftCorner<3, 3>() = pose.R();
 84 |       P.col(3) = -P.topLeftCorner<3, 3>().transpose() * pose.t;
 85 | 
 86 |       // poses->push_back(P);
 87 | 
 88 |       const double kError = EvaluateModelOnPoint(P, sample[3]);
 89 |       if (kError < squared_inlier_threshold_) {
 90 |         poses->push_back(P);
 91 |         break;
 92 |       }
 93 |     }
 94 |   }
 95 | 
 96 |   // if (num_trials_ > 1 && poses->size() > 1u) {
 97 |   //   // Rotation and position averaging for the remaining poses.
 98 |   //   // See page 20 in Hartley et al., Rotation Averaging, IJCV 2013
 99 |   //   Eigen::Vector3d mean_pos(0.0, 0.0, 0.0);
100 |   //   Eigen::Matrix3d C_e(Eigen::Matrix3d::Zero());
101 |   // }
102 | 
103 |   return static_cast<int>(poses->size());
104 | }
105 | 
106 | // Returns 0 if no model could be estimated and 1 otherwise.
107 | // Implemented via non-linear optimization in Ceres.
108 | int MultiAbsolutePoseEstimator::NonMinimalSolver(
109 |     const std::vector<int>& sample, CameraPose* pose) const {
110 |   CameraPoses poses;
111 |   if (MinimalSolver(sample, &poses) > 0) {
112 |     for (const CameraPose& P : poses) {
113 |       const int kSampleSize = static_cast<int>(sample.size());
114 | 
115 |       double best_score = std::numeric_limits<double>::max();
116 |       for (const CameraPose& P : poses) {
117 |         double score = 0.0;
118 |         for (int i = 0; i < kSampleSize; ++i) {
119 |           score += std::min(EvaluateModelOnPoint(P, sample[i]),
120 |                             squared_inlier_threshold_);
121 |         }
122 |         if (score < best_score) {
123 |           best_score = score;
124 |           *pose = P;
125 |         }
126 |       }
127 |     }
128 |   }
129 |   
130 |   LeastSquares(sample, pose);
131 |   return 1;
132 | }
133 | 
134 | // Evaluates the pose on the i-th data point.
135 | double MultiAbsolutePoseEstimator::EvaluateModelOnPoint(
136 |     const CameraPose& pose, int i) const {
137 |   double min_squared_error = std::numeric_limits<double>::max();
138 |   for (const Eigen::Vector3d& p : points3D_[i]) {
139 |     Eigen::Vector3d p_c = pose.topLeftCorner<3, 3>() * (p - pose.col(3));
140 | 
141 |     // Check whether point projects behind the camera.
142 |     if (p_c[2] < 0.0) continue;
143 | 
144 |     Eigen::Vector2d p_2d = p_c.head<2>() / p_c[2];
145 |     p_2d[0] *= focal_x_;
146 |     p_2d[1] *= focal_y_;
147 |     const double kSquaredError = (p_2d - points2D_[i]).squaredNorm();
148 |     min_squared_error = std::min(min_squared_error, kSquaredError);
149 |   }
150 | 
151 |   return min_squared_error;
152 | }
153 | 
154 | double MultiAbsolutePoseEstimator::EvaluateModelOnPoint(
155 |     const CameraPose& pose, int i, int* hash) const {
156 |   int hash_x = std::max(0,
157 |     std::min(static_cast<int>(points2D_[i][0] / bin_size_ + 0.5), width_ - 1));
158 |   int hash_y = std::max(0,
159 |     std::min(static_cast<int>(points2D_[i][1] / bin_size_ + 0.5), height_ - 1));
160 |   *hash = hash_x + hash_y * 100000;
161 | 
162 |   return EvaluateModelOnPoint(pose, i);
163 | }
164 | 
165 | // Reference implementation using Ceres for refinement.
166 | void MultiAbsolutePoseEstimator::LeastSquares(
167 |     const std::vector<int>& sample, CameraPose* pose) const {
168 |   Eigen::AngleAxisd aax(pose->topLeftCorner<3, 3>());
169 |   Eigen::Vector3d aax_vec = aax.axis() * aax.angle();
170 |   double camera[6];
171 |   camera[0] = aax_vec[0];
172 |   camera[1] = aax_vec[1];
173 |   camera[2] = aax_vec[2];
174 |   camera[3] = pose->col(3)[0];
175 |   camera[4] = pose->col(3)[1];
176 |   camera[5] = pose->col(3)[2];
177 | 
178 |   ceres::Problem refinement_problem;
179 | 
180 |   const int kSampleSize = static_cast<int>(sample.size());
181 |   for (int i = 0; i < kSampleSize; ++i) {
182 |     const int kIdx = sample[i];
183 |     const Eigen::Vector2d& p_img = points2D_[kIdx];
184 |     const Points3D& p_3D = points3D_[kIdx];
185 |     ceres::CostFunction* cost_function =
186 |         MultiReprojectionError::CreateCost(p_img[0], p_img[1], p_3D, focal_x_,
187 |                                            focal_y_);
188 | 
189 |     refinement_problem.AddResidualBlock(cost_function, nullptr, camera);
190 |   }
191 | 
192 |   ceres::Solver::Options options;
193 |   options.linear_solver_type = ceres::DENSE_QR;
194 |   options.minimizer_progress_to_stdout = false;
195 |   ceres::Solver::Summary summary;
196 |   ceres::Solve(options, &refinement_problem, &summary);
197 | 
198 |   if (summary.IsSolutionUsable()) {
199 |     Eigen::Vector3d axis(camera[0], camera[1], camera[2]);
200 |     double angle = axis.norm();
201 |     axis.normalize();
202 |     aax.axis() = axis;
203 |     aax.angle() = angle;
204 | 
205 |     pose->topLeftCorner<3, 3>() = aax.toRotationMatrix();
206 |     pose->col(3) = Eigen::Vector3d(camera[3], camera[4], camera[5]);
207 |   }
208 | }
209 | 
210 | void MultiAbsolutePoseEstimator::PixelsToViewingRays(
211 |     const double focal_x, const double focal_y, const Points2D& points2D,
212 |     ViewingRays* rays) {
213 |   const int kNumData = static_cast<int>(points2D.size());
214 | 
215 |   // Creates the bearing vectors and points for the OpenGV adapter.
216 |   rays->resize(kNumData);
217 |   for (int i = 0; i < kNumData; ++i) {
218 |     (*rays)[i] = points2D[i].homogeneous();
219 |     (*rays)[i][0] /= focal_x;
220 |     (*rays)[i][1] /= focal_y;
221 |     (*rays)[i].normalize();
222 |   }
223 | }
224 | 
225 | }  // namespace visloc_help
226 | 


--------------------------------------------------------------------------------
/src/multi_absolute_pose_estimator.h:
--------------------------------------------------------------------------------
 1 | // Based on the calibrated absolute pose estimator example provided by 
 2 | // RansacLib.
 3 | 
 4 | #ifndef MULTI_ABSOLUTE_POSE_ESTIMATOR_H_
 5 | #define MULTI_ABSOLUTE_POSE_ESTIMATOR_H_
 6 | 
 7 | #include <algorithm>
 8 | #include <cmath>
 9 | #include <cstddef>
10 | #include <cstdint>
11 | #include <limits>
12 | #include <memory>
13 | #include <random>
14 | #include <vector>
15 | 
16 | #include <Eigen/Core>
17 | #include <Eigen/StdVector>
18 | 
19 | #include <PoseLib/solvers/p3p.h>
20 | #include <PoseLib/types.h>
21 | 
22 | #include "utils.h"
23 | #include "types.h"
24 | 
25 | namespace mesh_loc {
26 | 
27 | class MultiAbsolutePoseEstimator {
28 |  public:
29 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
30 | 
31 |   MultiAbsolutePoseEstimator(const double f_x, const double f_y,
32 |                              const double squared_inlier_threshold,
33 |                              const int width, const int height,
34 |                              const double bin_size,
35 |                              const Points2D& points2D,
36 |                              const ViewingRays& rays,
37 |                              const Points3DVec& points3D,
38 |                              const int num_trials);
39 | 
40 |   inline int min_sample_size() const { return 4; }
41 | 
42 |   inline int non_minimal_sample_size() const { return 6; }
43 | 
44 |   inline int num_data() const { return num_data_; }
45 | 
46 |   int MinimalSolver(const std::vector<int>& sample, CameraPoses* poses) const;
47 | 
48 |   int NonMinimalSolver(const std::vector<int>& sample, CameraPose* pose) const;
49 | 
50 |   double EvaluateModelOnPoint(const CameraPose& pose, int i) const;
51 | 
52 |   double EvaluateModelOnPoint(const CameraPose& pose, int i, int* hash) const;
53 | 
54 |   void LeastSquares(const std::vector<int>& sample, CameraPose* pose) const;
55 | 
56 |   static void PixelsToViewingRays(const double focal_x, const double focal_y,
57 |                                   const Points2D& points2D, ViewingRays* rays);
58 | 
59 |  protected:
60 |   // Focal lengths in x- and y-directions.
61 |   double focal_x_;
62 |   double focal_y_;
63 |   double squared_inlier_threshold_;
64 |   // Size of the image.
65 |   int width_;
66 |   int height_;
67 |   double bin_size_;
68 |   int num_x_bins_;
69 |   // Stores the 2D point positions.
70 |   Points2D points2D_;
71 |   // Stores the corresponding 3D point positions.
72 |   Points3DVec points3D_;
73 |   // Stores the viewing ray for each 2D point position.
74 |   ViewingRays rays_;
75 |   int num_data_;
76 | 
77 |   int num_trials_;
78 | 
79 |   mutable std::minstd_rand rng_;
80 | };
81 | 
82 | }  // namespace visloc_help
83 | 
84 | #endif  // MULTI_ABSOLUTE_POSE_ESTIMATOR_H_
85 | 


--------------------------------------------------------------------------------
/src/pose_estimation.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //
 10 | //     * Redistributions in binary form must reproduce the above copyright
 11 | //       notice, this list of conditions and the following disclaimer in the
 12 | //       documentation and/or other materials provided with the distribution.
 13 | //
 14 | //     * Neither the name of the copyright holder nor the
 15 | //       names of its contributors may be used to endorse or promote products
 16 | //       derived from this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 22 | // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 25 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | //
 29 | // author: Torsten Sattler, torsten.sattler.de@googlemail.com
 30 | 
 31 | #include <algorithm>
 32 | #include <chrono>
 33 | #include <cmath>
 34 | #include <cstddef>
 35 | #include <cstdint>
 36 | #include <fstream>
 37 | #include <iomanip>
 38 | #include <iostream>
 39 | #include <limits>
 40 | #include <random>
 41 | #include <string>
 42 | #include <vector>
 43 | 
 44 | #include <Eigen/Core>
 45 | #include <Eigen/Geometry>
 46 | #include <Eigen/StdVector>
 47 | 
 48 | #include <RansacLib/ransac.h>
 49 | 
 50 | #include <colmap/scene/camera.h>
 51 | #include <colmap/sensor/models.h>
 52 | #include <colmap/estimators/pose.h>
 53 | #include <colmap/geometry/rigid3.h>
 54 | 
 55 | #include <PoseLib/camera_pose.h>
 56 | #include <PoseLib/misc/colmap_models.h>
 57 | #include <PoseLib/robust/bundle.h>
 58 | #include <PoseLib/robust/ransac.h>
 59 | #include <PoseLib/robust.h>
 60 | #include <PoseLib/types.h>
 61 | 
 62 | 
 63 | #include <pybind11/pybind11.h>
 64 | #include <pybind11/stl.h>
 65 | #include <pybind11/eigen.h>
 66 | 
 67 | #include "absolute_pose_estimator.h"
 68 | #include "multi_absolute_pose_estimator.h"
 69 | #include "utils.h"
 70 | #include "spatial_ransac.h"
 71 | 
 72 | namespace py = pybind11;
 73 | 
 74 | namespace undistortion {
 75 | 
 76 | // Distortion function, implementing various camera models.
 77 | void Distortion(const mesh_loc::Camera& camera, const double u,
 78 |                 const double v, double* du, double* dv) {
 79 |   if (camera.camera_type.compare("SIMPLE_RADIAL") == 0) {
 80 |     const double kR2 = u * u + v * v;
 81 |     const double kRadial = camera.radial[0] * kR2;
 82 |     *du = u * kRadial;
 83 |     *dv = v * kRadial;
 84 |   } else if (camera.camera_type.compare("RADIAL") == 0) {
 85 |     const double kR2 = u * u + v * v;
 86 |     const double kRadial = camera.radial[0] * kR2 + camera.radial[1] * kR2 * kR2;
 87 |     *du = u * kRadial;
 88 |     *dv = v * kRadial;
 89 |   } else if (camera.camera_type.compare("BROWN_3_PARAMS") == 0) {
 90 |     const double kR2 = u * u + v * v;
 91 |     const double kRadial = camera.radial[0] * kR2 + camera.radial[1] * kR2 * kR2
 92 |                                            + camera.radial[2] * kR2 * kR2 * kR2;
 93 |     *du = u * kRadial;
 94 |     *dv = v * kRadial;
 95 |   } else if (camera.camera_type.compare("PINHOLE") == 0) {
 96 |     *du = 0;
 97 |     *dv = 0;    
 98 |   } else {
 99 |     std::cerr << " ERROR: Distortion function for camera model "
100 |               << camera.camera_type << " not yet implemented" << std::endl;
101 |   }
102 | }
103 | 
104 | // The following code is taken from 
105 | // https://github.com/colmap/colmap/blob/master/src/base/camera_models.h
106 | // Please see the license file for details.
107 | // Assumes that principal point has been subtracted and that the coordinates
108 | // have been divided by the focal length.
109 | void IterativeUndistortion(const mesh_loc::Camera& camera, double* u,
110 |                            double* v) {
111 |   // Parameters for Newton iteration using numerical differentiation with
112 |   // central differences, 100 iterations should be enough even for complex
113 |   // camera models with higher order terms.
114 |   const size_t kNumIterations = 100;
115 |   const double kMaxStepNorm = 1e-10;
116 |   const double kRelStepSize = 1e-6;
117 | 
118 |   Eigen::Matrix2d J;
119 |   const Eigen::Vector2d x0(*u, *v);
120 |   Eigen::Vector2d x(*u, *v);
121 |   Eigen::Vector2d dx;
122 |   Eigen::Vector2d dx_0b;
123 |   Eigen::Vector2d dx_0f;
124 |   Eigen::Vector2d dx_1b;
125 |   Eigen::Vector2d dx_1f;
126 | 
127 |   for (size_t i = 0; i < kNumIterations; ++i) {
128 |     const double step0 = std::max(std::numeric_limits<double>::epsilon(),
129 |                                   std::abs(kRelStepSize * x(0)));
130 |     const double step1 = std::max(std::numeric_limits<double>::epsilon(),
131 |                                   std::abs(kRelStepSize * x(1)));
132 |     Distortion(camera, x(0), x(1), &dx(0), &dx(1));
133 |     Distortion(camera, x(0) - step0, x(1), &dx_0b(0), &dx_0b(1));
134 |     Distortion(camera, x(0) + step0, x(1), &dx_0f(0), &dx_0f(1));
135 |     Distortion(camera, x(0), x(1) - step1, &dx_1b(0), &dx_1b(1));
136 |     Distortion(camera, x(0), x(1) + step1, &dx_1f(0), &dx_1f(1));
137 |     J(0, 0) = 1 + (dx_0f(0) - dx_0b(0)) / (2 * step0);
138 |     J(0, 1) = (dx_1f(0) - dx_1b(0)) / (2 * step1);
139 |     J(1, 0) = (dx_0f(1) - dx_0b(1)) / (2 * step0);
140 |     J(1, 1) = 1 + (dx_1f(1) - dx_1b(1)) / (2 * step1);
141 |     const Eigen::Vector2d step_x = J.inverse() * (x + dx - x0);
142 |     x -= step_x;
143 |     if (step_x.squaredNorm() < kMaxStepNorm) {
144 |       break;
145 |     }
146 |   }
147 | 
148 |   *u = x(0);
149 |   *v = x(1);
150 | }
151 | 
152 | }  // namespace undistortion
153 | 
154 | py::dict pose_estimation(
155 |   const py::dict query_camera,
156 |   const std::vector<py::dict> db_cams,
157 |   const std::vector<py::dict> matches,
158 |   const py::dict pose_options) {
159 |   // Imports definitions.
160 |   using ransac_lib::LocallyOptimizedMSAC;
161 |   using mesh_loc::AbsolutePoseEstimator;
162 |   using mesh_loc::MultiAbsolutePoseEstimator;
163 |   using mesh_loc::CameraPose;
164 |   using mesh_loc::CameraPoses;
165 |   using mesh_loc::Points2D;
166 |   using mesh_loc::Points3D;
167 |   using mesh_loc::Points3DVec;
168 |   using mesh_loc::ViewingRays;
169 |   using mesh_loc::Camera;
170 |   using mesh_loc::CreateCamera;
171 |   using mesh_loc::triangulate_observations;
172 |   using mesh_loc::select_and_refine_point;
173 |   using mesh_loc::compute_covisibility_clusters;
174 |   using mesh_loc::cluster_keypoints;
175 |   using mesh_loc::get_camera_position;
176 |   using undistortion::IterativeUndistortion;
177 |   using ransac_lib::LocallyOptimizedEffectiveRANSAC;
178 | 
179 |   std::cout << matches.size() << " " 
180 |             << pose_options["triangulate"].cast<bool>() << " " 
181 |             << pose_options["merge_3D_points"].cast<bool>() << " "
182 |             << pose_options["cluster_keypoints"].cast<bool>() << " " 
183 |             << pose_options["covisibility_filtering"].cast<bool>()
184 |             << " " << pose_options["use_all_matches"].cast<bool>() << " " 
185 |             << " " << pose_options["num_LO_iters"].cast<int>() << " "
186 |             << pose_options["inlier_threshold"].cast<double>() << " "
187 |             << pose_options["min_ransac_iterations"].cast<uint32_t>() << " " 
188 |             << pose_options["max_ransac_iterations"].cast<uint32_t>()
189 |             << std::endl;
190 | 
191 |   ////
192 |   // Creates the query and database cameras.
193 |   const int kNumDb = static_cast<int>(db_cams.size());
194 |   std::vector<Camera> db_cameras(kNumDb);
195 |   for (int i = 0; i < kNumDb; ++i) {
196 |     CreateCamera(db_cams[i]["model"].cast<std::string>(),
197 |                  db_cams[i]["params"].cast<std::vector<double>>(),
198 |                  db_cams[i]["q"].cast<Eigen::Vector4d>(),
199 |                  db_cams[i]["t"].cast<Eigen::Vector3d>(),
200 |                  &(db_cameras[i]));
201 |   }
202 |   Camera cam;
203 |   CreateCamera(query_camera["model"].cast<std::string>(),
204 |                query_camera["params"].cast<std::vector<double>>(),
205 |                Eigen::Vector4d(1.0, 0.0, 0.0, 0.0),
206 |                Eigen::Vector3d::Zero(), &cam);
207 |   const int kWidth = query_camera["width"].cast<int>();
208 |   const int kHeight = query_camera["height"].cast<int>();
209 | 
210 |   double inlier_threshold = pose_options["inlier_threshold"].cast<double>();
211 |   uint32_t min_num_iterations = 
212 |         pose_options["min_ransac_iterations"].cast<uint32_t>();
213 |   uint32_t max_num_iterations = 
214 |         pose_options["max_ransac_iterations"].cast<uint32_t>();
215 |   int num_LO_iters = pose_options["num_LO_iters"].cast<int>();
216 | 
217 |   ////
218 |   // Extracts all necessary information from the matches.
219 |   const int kNumInitialMatches = static_cast<int>(matches.size());
220 |   std::vector<Eigen::Vector2d> query_keypoints(kNumInitialMatches);
221 |   std::vector<std::vector<Eigen::Vector3d>> points(kNumInitialMatches);
222 |   std::vector<std::vector<Eigen::Vector2d>> db_observations(kNumInitialMatches);
223 |   std::vector<std::vector<int>> camera_indices(kNumInitialMatches);
224 |   std::cout << " Initial number of matches: " << kNumInitialMatches << std::endl;
225 | 
226 | 
227 |   ////
228 |   // Data preparation.
229 |   for (int i = 0; i < kNumInitialMatches; ++i) {
230 |     query_keypoints[i] = matches[i]["keypoint"].cast<Eigen::Vector2d>();
231 |     points[i] = matches[i]["points"].cast<std::vector<Eigen::Vector3d>>();
232 |     db_observations[i] = matches[i]["observations"].cast<std::vector<Eigen::Vector2d>>();
233 |     camera_indices[i] = matches[i]["db_indices"].cast<std::vector<int>>();
234 |   }
235 | 
236 |   ////
237 |   // Clusters keypoints that are closeby (within 4 pixels). This should only be
238 |   // run in case the features do not have repeatable keypoints, e.g., patch2pix. 
239 |   // if (cluster_keypoints) {
240 |   if (pose_options["cluster_keypoints"].cast<bool>()) {
241 |     std::vector<Eigen::Vector2d> new_keypoints;
242 |     std::vector<std::vector<Eigen::Vector2d>> new_observations;
243 |     std::vector<std::vector<int>> new_camera_indices;
244 |     std::vector<std::vector<Eigen::Vector3d>> new_points;
245 | 
246 |     cluster_keypoints(db_cameras, query_keypoints, db_observations,
247 |                       camera_indices,  points, 4.0, false, inlier_threshold,
248 |                       kWidth, kHeight, &new_keypoints, &new_observations,
249 |                       &new_camera_indices, &new_points);
250 |     query_keypoints = new_keypoints;
251 |     db_observations = new_observations;
252 |     camera_indices = new_camera_indices;
253 |     points = new_points;
254 |     std::cout << " Found " << query_keypoints.size() << " keypoint clusters"
255 |               << std::endl;
256 |   }
257 | 
258 |   int num_keypoints = static_cast<int>(query_keypoints.size());
259 | 
260 |   ////
261 |   // If desired, triangulates 3D points from the database observations.
262 |   // NOTE: At the moment, we assume that there is no distortion in the database
263 |   // images (or that the distortion is negligible).
264 |   if (pose_options["triangulate"].cast<bool>() 
265 |       && pose_options["merge_3D_points"].cast<bool>()) {
266 |     std::cerr << " ERROR: Can either triangulate or merge 3D points, not both."
267 |               << " Decided for merging them" << std::endl;
268 |   }
269 | 
270 |   // if (merge_3D_points) {
271 |   int count_merged = 0;
272 |   if (pose_options["merge_3D_points"].cast<bool>()) {
273 |     bool using_multiple_points = false;
274 |     for (int i = 0; i < num_keypoints; ++i) {
275 |       Eigen::Vector3d X;
276 |       std::vector<int> inlier_indices;
277 |       int num_consistent = select_and_refine_point(db_cameras,
278 |                             db_observations[i], camera_indices[i],
279 |                             points[i], inlier_threshold, &X, &inlier_indices);
280 |       if (num_consistent < 1) {
281 |         points[i].clear();
282 |         db_observations[i].clear();
283 |         camera_indices[i].clear();
284 |       } else if (num_consistent == 1) {
285 |         using_multiple_points = true;
286 |         ++count_merged;
287 |       } else {
288 |         points[i] = {X};
289 |         std::vector<Eigen::Vector2d> selected_obs;
290 |         std::vector<int> selected_ids;
291 |         for (int id : inlier_indices) {
292 |           selected_obs.push_back(db_observations[i][id]);
293 |           selected_ids.push_back(camera_indices[i][id]);
294 |         }
295 |         db_observations[i] = selected_obs;
296 |         camera_indices[i] = selected_ids;
297 |       }
298 |     }
299 | 
300 |     if (using_multiple_points) {
301 |       std::cout << " NOTICE: " << count_merged << " query keypoint where "
302 |                 << "merging 3D was not possible. Using all 3D points for these "
303 |                 << "points." << std::endl;
304 |     }
305 |   } else if (pose_options["triangulate"].cast<bool>()) {
306 |     for (int i = 0; i < num_keypoints; ++i) {
307 |       Eigen::Vector3d X;
308 |       std::vector<int> inlier_indices;
309 |       int num_consistent = triangulate_observations(db_cameras,
310 |                             db_observations[i], camera_indices[i],
311 |                             inlier_threshold, 1.0, &X, &inlier_indices);
312 |       if (num_consistent <= 1) {
313 |         db_observations[i].clear();
314 |         camera_indices[i].clear();
315 |         points[i].clear();
316 |       } else {
317 |         points[i] = {X};
318 |         std::vector<Eigen::Vector2d> selected_obs;
319 |         std::vector<int> selected_ids;
320 |         for (int id : inlier_indices) {
321 |           selected_obs.push_back(db_observations[i][id]);
322 |           selected_ids.push_back(camera_indices[i][id]);
323 |         }
324 |         db_observations[i] = selected_obs;
325 |         camera_indices[i] = selected_ids;
326 |       }
327 |     }
328 |   }
329 | 
330 |   ////
331 |   // Performs covisibility filtering.
332 |   std::vector<int> covis_cluster_ids(kNumDb, 0);
333 |   int num_covisility_clusters = 1;
334 |   if (pose_options["covisibility_filtering"].cast<bool>()) {
335 |   // if (covisibility_filtering) {
336 |     num_covisility_clusters = compute_covisibility_clusters(
337 |       db_cameras, db_observations, camera_indices, points, inlier_threshold,
338 |       &covis_cluster_ids);
339 |   }
340 |   std::cout << " Found " << num_covisility_clusters << " covisibility clusters"
341 |             << std::endl;
342 | 
343 |   ////
344 |   // Pose estimation per covisibility cluster.
345 |   py::dict result_dict;
346 |   result_dict["success"] = false;
347 |   result_dict["num_inliers"] = 0;
348 | 
349 |   std::vector<Eigen::Vector3d> camera_positions(kNumDb);
350 |   for (int k = 0; k < kNumDb; ++k) {
351 |     camera_positions[k] = get_camera_position(db_cameras[k]);
352 |   }
353 | 
354 | 
355 |   for (int c = 0; c < num_covisility_clusters; ++c) {
356 |     Points2D points2D_;
357 |     Points3D points3D_;
358 |     Points2D points2D_aggreg_;
359 |     Points3DVec points3D_aggreg_;
360 |     
361 |     int num_filtered = 0;
362 |     for (int i = 0; i < num_keypoints; ++i) {
363 |       const int kNumObs = static_cast<int>(db_observations[i].size());
364 |       const bool kSinglePoint = points[i].size() == 1u;
365 | 
366 |       Points3D pts_;
367 | 
368 |       for (int j = 0; j < kNumObs; ++j) {
369 |         if (covis_cluster_ids[camera_indices[i][j]] == c) {
370 |           // There can be 3D points without a valid depth which survive until
371 |           // this stage, e.g., because we never merge 3D points.
372 |           // Filters these points out as they correspond to 3D points with
373 |           // positions nearly identical to the database image they come from.
374 |           if ((points[i][j] - 
375 |             camera_positions[camera_indices[i][j]]).squaredNorm() <= 0.000001) {
376 |             ++num_filtered;
377 |             continue;
378 |           }
379 |           points2D_.push_back(query_keypoints[i]);
380 |           points3D_.push_back(points[i][j]);
381 |           pts_.push_back(points[i][j]);
382 |           if (kSinglePoint) break;
383 |         }
384 |       }
385 | 
386 |       if (!pts_.empty()) {
387 |         points2D_aggreg_.push_back(query_keypoints[i]);
388 |         points3D_aggreg_.push_back(pts_);
389 |       }
390 |     }
391 |     std::cout << " Num filtered: " << num_filtered << std::endl;
392 | 
393 |     const int kNumMatches = static_cast<int>(points2D_.size());
394 |     if (kNumMatches <= 3) continue;
395 | 
396 |       // Undistorts the keypoints.
397 |     for (int i = 0; i < kNumMatches; ++i) {
398 |       double x = (points2D_[i][0] - cam.c_x) / cam.focal_x;
399 |       double y = (points2D_[i][1] - cam.c_y) / cam.focal_y;
400 |       IterativeUndistortion(cam, &x, &y);
401 |       points2D_[i] << x * cam.focal_x, y * cam.focal_y;
402 |     }
403 | 
404 |     ViewingRays rays, rays_aggreg;
405 |     AbsolutePoseEstimator::PixelsToViewingRays(
406 |         cam.focal_x, cam.focal_y, points2D_, &rays);
407 | 
408 |     const int kNumMatchesAggreg = static_cast<int>(points2D_aggreg_.size());
409 |     for (int i = 0; i < kNumMatchesAggreg; ++i) {
410 |       double x = (points2D_aggreg_[i][0] - cam.c_x) / cam.focal_x;
411 |       double y = (points2D_aggreg_[i][1] - cam.c_y) / cam.focal_y;
412 |       IterativeUndistortion(cam, &x, &y);
413 |       points2D_aggreg_[i] << x * cam.focal_x, y * cam.focal_y;
414 |     }
415 |     AbsolutePoseEstimator::PixelsToViewingRays(
416 |         cam.focal_x, cam.focal_y, points2D_aggreg_, &rays_aggreg);
417 | 
418 |     ransac_lib::LORansacOptions options;
419 |     options.min_num_iterations_ = min_num_iterations;
420 |     options.max_num_iterations_ = max_num_iterations;
421 |     options.min_sample_multiplicator_ = 7;
422 |     options.num_lsq_iterations_ = 4;
423 |     options.num_lo_steps_ = num_LO_iters;
424 |     options.lo_starting_iterations_ = 100u;
425 |     options.final_least_squares_ = true;
426 | 
427 |     std::random_device rand_dev;
428 |     options.random_seed_ = rand_dev();
429 | 
430 |     const double kInThreshPX = static_cast<double>(inlier_threshold);
431 |     options.squared_inlier_threshold_ = kInThreshPX * kInThreshPX;
432 | 
433 |     AbsolutePoseEstimator solver(cam.focal_x, cam.focal_y, 
434 |                                  kInThreshPX * kInThreshPX,
435 |                                  kWidth, kHeight, 16, 
436 |                                  points2D_, rays, points3D_);
437 | 
438 |     MultiAbsolutePoseEstimator solver_multi(cam.focal_x, cam.focal_y, 
439 |                                             kInThreshPX * kInThreshPX,
440 |                                             kWidth, kHeight, 16, 
441 |                                             points2D_aggreg_, rays_aggreg, 
442 |                                             points3D_aggreg_, 10);
443 | 
444 |     LocallyOptimizedMSAC<CameraPose, CameraPoses,
445 |                          AbsolutePoseEstimator> lomsac;
446 |     LocallyOptimizedEffectiveRANSAC<CameraPose, CameraPoses,
447 |                                     AbsolutePoseEstimator> loeffsac;
448 |     LocallyOptimizedMSAC<CameraPose, CameraPoses,
449 |                          MultiAbsolutePoseEstimator> multi_lomsac;
450 | 
451 |     ransac_lib::RansacStatistics ransac_stats;
452 |     CameraPose best_model;
453 |     colmap::Camera colmap_camera = colmap::Camera::CreateFromModelName(
454 |         1, "PINHOLE", cam.focal_x, kWidth, kHeight);
455 |     colmap_camera.SetFocalLengthX((double)cam.focal_x);
456 |     colmap_camera.SetFocalLengthY((double)cam.focal_y);
457 |     colmap_camera.SetPrincipalPointX((double)cam.c_x);
458 |     colmap_camera.SetPrincipalPointY((double)cam.c_y);
459 | 
460 |     colmap::AbsolutePoseEstimationOptions colmap_abs_pose_options;
461 |     colmap_abs_pose_options.estimate_focal_length = false;
462 |     colmap_abs_pose_options.ransac_options.max_error = kInThreshPX;
463 |     colmap_abs_pose_options.ransac_options.min_inlier_ratio = 0.01;
464 |     colmap_abs_pose_options.ransac_options.min_num_trials = min_num_iterations;
465 |     colmap_abs_pose_options.ransac_options.max_num_trials = max_num_iterations;
466 |     colmap_abs_pose_options.ransac_options.confidence = 0.9999;
467 | 
468 |     std::vector<char> colmap_inlier_mask;
469 | 
470 |     colmap::AbsolutePoseRefinementOptions colmap_refinement_options;
471 |     colmap_refinement_options.refine_focal_length = false;
472 |     colmap_refinement_options.refine_extra_params = false;
473 |     colmap_refinement_options.print_summary = false;
474 | 
475 |     std::string ransac_type = pose_options["ransac_type"].cast<std::string>();
476 |     if (ransac_type.compare("MULTIMSAC") != 0) {
477 |       std::cout << "       running " << ransac_type << " on " << kNumMatches 
478 |                 << " matches " << std::endl;
479 |     } else {
480 |       std::cout << "       running " << ransac_type << " on " 
481 |                 << kNumMatchesAggreg << " matches " << std::endl;
482 |     }
483 |     auto ransac_start = std::chrono::system_clock::now();
484 | 
485 |     int num_ransac_inliers = 0;
486 |     if (ransac_type.compare("MSAC") == 0 ||
487 |         ransac_type.compare("MSAC+REF") == 0) {
488 |       num_ransac_inliers = 
489 |           lomsac.EstimateModel(options, solver, &best_model, &ransac_stats);
490 |     } else if (ransac_type.compare("EFFSAC") == 0) {
491 |       num_ransac_inliers = 
492 |           loeffsac.EstimateModel(options, solver, &best_model, &ransac_stats);
493 |     } else if (ransac_type.compare("PYCOLMAP") == 0) {
494 |       
495 |       colmap::Rigid3d est_pose;
496 |       size_t num_colmap_inliers;
497 |       for (int i = 0; i < kNumMatches; ++i) {
498 |         points2D_[i] += Eigen::Vector2d(cam.c_x, cam.c_y);
499 |       }
500 | 
501 |       if (!colmap::EstimateAbsolutePose(colmap_abs_pose_options,
502 |                                         points2D_, points3D_,
503 |                                         &est_pose, &colmap_camera,
504 |                                         &num_colmap_inliers,
505 |                                         &colmap_inlier_mask)) {
506 |         num_ransac_inliers = 0;
507 |       } else {
508 |         // Refines the estimated pose.
509 |         if (!colmap::RefineAbsolutePose(colmap_refinement_options,
510 |                                         colmap_inlier_mask,
511 |                                         points2D_, points3D_,
512 |                                         &est_pose, &colmap_camera)) {
513 |           num_ransac_inliers = 0;
514 |         } else {
515 |           auto ransac_end = std::chrono::system_clock::now();
516 |           std::chrono::duration<double> elapsed_seconds = ransac_end - ransac_start;
517 |           std::cout << "   ... " << ransac_type << " found " 
518 |                     <<  num_colmap_inliers << " inliers" << std::endl;
519 |           std::cout << "   ... " << ransac_type << " took " 
520 |                     << elapsed_seconds.count() << " s" << std::endl;
521 |           result_dict["success"] = true;
522 |           Eigen::Vector4d q_vec_wxyz;
523 |           q_vec_wxyz << est_pose.rotation.w(), est_pose.rotation.x(), est_pose.rotation.y(), est_pose.rotation.z();
524 |           result_dict["qvec"] = q_vec_wxyz;
525 |           result_dict["tvec"] = est_pose.translation;
526 |           result_dict["num_inliers"] = num_colmap_inliers;
527 |           std::vector<bool> colmap_inliers;
528 |           for (auto it : colmap_inlier_mask) {
529 |             bool inlier = false;
530 |             if (it) inlier = true;
531 |             colmap_inliers.push_back(inlier);
532 |           }
533 |           result_dict["inliers"] = colmap_inliers;
534 |           return result_dict;
535 |         }
536 |       }
537 |     } else if (ransac_type.compare("MULTIMSAC") == 0) {
538 |       num_ransac_inliers = multi_lomsac.EstimateModel(options, solver_multi,
539 |                                                       &best_model, 
540 |                                                       &ransac_stats);
541 |     } else if (ransac_type.compare("POSELIB") == 0 ||
542 |                ransac_type.compare("POSELIB+REF") == 0) {
543 |       poselib::BundleOptions bundle_opts;
544 |       bundle_opts.loss_type = poselib::BundleOptions::LossType::CAUCHY;
545 |       bundle_opts.loss_scale = 1.0;  //inlier_threshold * 0.5;
546 | 
547 |       poselib::RansacOptions ransac_opts;
548 |       ransac_opts.max_iterations = max_num_iterations;
549 |       ransac_opts.min_iterations = min_num_iterations;
550 |       ransac_opts.max_reproj_error = inlier_threshold;
551 | 
552 |       // Assumes that keypoints are already centered and undistorted.
553 |       std::vector<double> cam_params = {cam.focal_x, cam.focal_y, 0.0, 0.0};
554 |       poselib::Camera poselib_cam("PINHOLE", cam_params, kWidth, kHeight);
555 |       std::vector<char> inliers_poselib;
556 | 
557 |       poselib::CameraPose poselib_pose;
558 | 
559 |       poselib::RansacStats r_stats = poselib::estimate_absolute_pose(
560 |         points2D_, points3D_, poselib_cam, ransac_opts, bundle_opts,
561 |         &poselib_pose, &inliers_poselib);
562 | 
563 |       num_ransac_inliers = r_stats.num_inliers;
564 |       ransac_stats.num_iterations = r_stats.iterations;
565 |       ransac_stats.inlier_ratio = r_stats.inlier_ratio;
566 | 
567 |       best_model.topLeftCorner<3, 3>() = poselib_pose.R();
568 |       best_model.col(3) = -best_model.topLeftCorner<3, 3>().transpose() * poselib_pose.t;
569 | 
570 |       ransac_stats.inlier_indices.clear();
571 |       for (int in = 0; in < static_cast<int>(points2D_.size()); ++in) {
572 |         if (inliers_poselib[in] == true) {
573 |           ransac_stats.inlier_indices.push_back(in);
574 |         }
575 |       }
576 | 
577 |       ransac_stats.number_lo_iterations = r_stats.refinements;
578 |     }
579 | 
580 |     if (num_ransac_inliers > 0 && (ransac_type.compare("MSAC") == 0 ||
581 |                                    ransac_type.compare("MSAC+REF") == 0 || 
582 |                                    ransac_type.compare("EFFSAC") == 0)) {
583 |       for (int i = 0; i < kNumMatches; ++i) {
584 |         points2D_[i] += Eigen::Vector2d(cam.c_x, cam.c_y);
585 |       }
586 | 
587 |       Eigen::Matrix3d R = best_model.topLeftCorner<3, 3>();
588 |       Eigen::Vector3d t = -R * best_model.col(3);
589 |       Eigen::Quaterniond q(R);
590 |       q.normalize();
591 |       colmap::Rigid3d colmap_pose(q, t);
592 |       colmap_inlier_mask.resize(kNumMatches, false);
593 |       for (int inl : ransac_stats.inlier_indices) {
594 |         colmap_inlier_mask[inl] = true;
595 |       }
596 | 
597 |       if (colmap::RefineAbsolutePose(colmap_refinement_options,
598 |                                      colmap_inlier_mask,
599 |                                      points2D_, points3D_,
600 |                                      &colmap_pose, &colmap_camera)) {
601 |         Eigen::Quaterniond qq = colmap_pose.rotation;
602 |         Eigen::Vector3d t_vec = colmap_pose.translation;
603 |         best_model.topLeftCorner<3, 3>() = qq.toRotationMatrix();
604 |         best_model.col(3) = -best_model.topLeftCorner<3, 3>().transpose() * t_vec;
605 |       }
606 | 
607 |       for (int i = 0; i < kNumMatches; ++i) {
608 |         points2D_[i] -= Eigen::Vector2d(cam.c_x, cam.c_y);
609 |       }
610 |     }
611 | 
612 |     if (num_ransac_inliers < 4) {
613 |       // Default to the pose of the top retrieved image.
614 |       best_model.topLeftCorner<3, 3>() = db_cameras[0].proj_matrix_normalized.topLeftCorner<3, 3>();
615 |       best_model.col(3) = camera_positions[0];
616 |     }
617 | 
618 |     if (ransac_type.compare("MSAC+REF") == 0 ||
619 |         ransac_type.compare("POSELIB+REF") == 0) {
620 |       int total_num_inliers_xyz = 0;
621 |       Eigen::Vector3d c_weighted(0.0, 0.0, 0.0);
622 |       const double kRange = pose_options["refinement_range"].cast<double>();
623 |       const double kStepSize = pose_options["refinement_step"].cast<double>();
624 |       for (double x = -kRange; x <= kRange; x += kStepSize) {
625 |         for (double y = -kRange; y <= kRange; y += kStepSize) {
626 |           for (double z = -kRange; z <= kRange; z += kStepSize) {
627 |             Eigen::Vector3d c_xyz = best_model.col(3) + Eigen::Vector3d(x, y, z);
628 |             Eigen::Matrix<double, 3, 4> P;
629 |             P.topLeftCorner<3, 3>() = best_model.topLeftCorner<3, 3>();
630 |             P.col(3) = -best_model.topLeftCorner<3, 3>() * c_xyz;
631 |             P.row(0) *= cam.focal_x;
632 |             P.row(1) *= cam.focal_y;
633 |             int num_inliers_xyz = 0;
634 |             for (int i = 0; i < kNumMatches; ++i) {
635 |               Eigen::Vector3d p = P * points3D_[i].homogeneous();
636 |               if (p[2] < 0.0) continue;
637 |               double error = (points2D_[i] - p.hnormalized()).squaredNorm();
638 |               if (error < options.squared_inlier_threshold_) ++num_inliers_xyz;
639 |             }
640 | 
641 |             total_num_inliers_xyz += num_inliers_xyz;
642 |             c_weighted += c_xyz * static_cast<double>(num_inliers_xyz);
643 |           }
644 |         } 
645 |       }
646 |       c_weighted /= static_cast<double>(total_num_inliers_xyz);
647 |       std::cout << "   distance between BA estimate and weighted position: "
648 |                 << (c_weighted - best_model.col(3)).norm() << std::endl;
649 |       if (total_num_inliers_xyz > 0) best_model.col(3) = c_weighted;
650 |     }
651 | 
652 |     auto ransac_end = std::chrono::system_clock::now();
653 |     std::chrono::duration<double> elapsed_seconds = ransac_end - ransac_start;
654 |     std::cout << "   ... " << ransac_type << " found " << num_ransac_inliers 
655 |               << " inliers in " << ransac_stats.num_iterations
656 |               << " iterations with an inlier ratio of "
657 |               << ransac_stats.inlier_ratio << std::endl;
658 |     std::cout << "   ... " << ransac_type << " took " 
659 |               << elapsed_seconds.count() << " s"<< std::endl;
660 |     std::cout << "   ... " << ransac_type << " executed " 
661 |               << ransac_stats.number_lo_iterations
662 |               << " local optimization stages" << std::endl;
663 | 
664 |     Eigen::Matrix3d R = best_model.topLeftCorner<3, 3>();
665 |     Eigen::Vector3d t = -R * best_model.col(3);
666 |     Eigen::Quaterniond q(R);
667 |     q.normalize();
668 | 
669 |     if (num_ransac_inliers > result_dict["num_inliers"].cast<int>()) {
670 |       result_dict["success"] = true;
671 |       result_dict["qvec"] = Eigen::Vector4d(q.w(), q.x(), q.y(), q.z());
672 |       result_dict["tvec"] = t;
673 |       result_dict["num_inliers"] = num_ransac_inliers;
674 |       result_dict["inliers"] = ransac_stats.inlier_indices;
675 |     }
676 |   }
677 |   
678 |   return result_dict;
679 | }
680 | 


--------------------------------------------------------------------------------
/src/spatial_ransac.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //
 10 | //     * Redistributions in binary form must reproduce the above copyright
 11 | //       notice, this list of conditions and the following disclaimer in the
 12 | //       documentation and/or other materials provided with the distribution.
 13 | //
 14 | //     * Neither the name of the copyright holder nor the
 15 | //       names of its contributors may be used to endorse or promote products
 16 | //       derived from this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 22 | // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 24 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 25 | // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | //
 29 | // author: Torsten Sattler, torsten.sattler.de@googlemail.com
 30 | 
 31 | // Adapts the LO-RANSAC implementation from RansacLib to use a form of effective
 32 | // inlier count: rather than counting each 2D-3D match as a separate inlier,
 33 | // the query image is tiled into bins and at most one inlier per bin is
 34 | // counted.
 35 | 
 36 | #ifndef SRC_SPATIAL_RANSAC_H_
 37 | #define SRC_SPATIAL_RANSAC_H_
 38 | 
 39 | #include <algorithm>
 40 | #include <cmath>
 41 | #include <cstddef>
 42 | #include <cstdint>
 43 | #include <limits>
 44 | #include <random>
 45 | #include <unordered_set>
 46 | #include <vector>
 47 | 
 48 | #include <RansacLib/sampling.h>
 49 | #include <RansacLib/utils.h>
 50 | #include <RansacLib/ransac.h>
 51 | 
 52 | namespace ransac_lib {
 53 | 
 54 | // Note that the bin size is adjusted in the solver class.
 55 | // Note that RANSAC's termination criterion is not necessarily valid anymore.
 56 | template <class Model, class ModelVector, class Solver,
 57 |           class Sampler = UniformSampling<Solver> >
 58 | class LocallyOptimizedEffectiveRANSAC : 
 59 |     public LocallyOptimizedMSAC<Model, ModelVector, Solver, Sampler> {
 60 |  protected:
 61 |   void GetBestEstimatedModelId(const Solver& solver, const ModelVector& models,
 62 |                                const int num_models,
 63 |                                const double squared_inlier_threshold,
 64 |                                double* best_score, int* best_model_id) const {
 65 |     *best_score = std::numeric_limits<double>::max();
 66 |     *best_model_id = 0;
 67 |     for (int m = 0; m < num_models; ++m) {
 68 |       double score = 0;
 69 |       ScoreModel(solver, models[m], squared_inlier_threshold, &score);
 70 | 
 71 |       if (score > *best_score) {
 72 |         *best_score = score;
 73 |         *best_model_id = m;
 74 |       }
 75 |     }
 76 |   }
 77 | 
 78 |   void ScoreModel(const Solver& solver, const Model& model,
 79 |                   const double squared_inlier_threshold, double* score) const {
 80 |     const int kNumData = solver.num_data();
 81 |     *score = 0.0;
 82 |     std::unordered_set<int> effective_inliers;
 83 |     for (int i = 0; i < kNumData; ++i) {
 84 |       int hash;
 85 |       double squared_error = solver.EvaluateModelOnPoint(model, i, &hash);
 86 |       if (squared_error < squared_inlier_threshold) {
 87 |         effective_inliers.emplace(hash);
 88 |       }
 89 |     }
 90 |     *score = static_cast<double>(effective_inliers.size());
 91 |   }
 92 | 
 93 |   // Standard inlier scroing function.
 94 |   inline double ComputeScore(const double squared_error,
 95 |                              const double squared_error_threshold) const {
 96 |     return (squared_error < squared_error_threshold)? 1.0 : 0.0;
 97 |   }
 98 | 
 99 |   inline void UpdateBestModel(const double score_curr, const Model& m_curr,
100 |                               double* score_best, Model* m_best) const {
101 |     if (score_curr > *score_best) {
102 |       *score_best = score_curr;
103 |       *m_best = m_curr;
104 |     }
105 |   }
106 | };
107 | 
108 | }  // namespace ransac_lib
109 | 
110 | #endif  // SRC_SPATIAL_RANSAC_H_
111 | 


--------------------------------------------------------------------------------
/src/types.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | // 1. Redistributions of source code must retain the above copyright notice, 
  8 | //    this list of conditions and the following disclaimer.
  9 | //
 10 | // 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | //    this list of conditions and the following disclaimer in the documentation
 12 | //    and/or other materials provided with the distribution.
 13 | //
 14 | // 3. Neither the name of the copyright holder nor the names of its
 15 | //    contributors may be used to endorse or promote products derived from
 16 | //    this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
 22 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 23 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 24 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 25 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 26 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 27 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
 28 | // THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | #ifndef MESH_LOC_TYPES_H_
 31 | #define MESH_LOC_TYPES_H_
 32 | 
 33 | #include <algorithm>
 34 | #include <chrono>
 35 | #include <cmath>
 36 | #include <cstddef>
 37 | #include <cstdint>
 38 | #include <fstream>
 39 | #include <iomanip>
 40 | #include <iostream>
 41 | #include <limits>
 42 | #include <queue>
 43 | #include <random>
 44 | #include <string>
 45 | #include <unordered_map>
 46 | #include <vector>
 47 | 
 48 | #include <Eigen/Core>
 49 | #include <Eigen/Geometry>
 50 | #include <Eigen/StdVector>
 51 | 
 52 | #include <ceres/ceres.h>
 53 | #include <ceres/rotation.h>
 54 | 
 55 | namespace mesh_loc {
 56 | 
 57 | // An absolute pose is a Eigen 3x4 double matrix storing the rotation and
 58 | // translation of the camera.
 59 | typedef Eigen::Matrix<double, 3, 4> CameraPose;
 60 | typedef std::vector<CameraPose> CameraPoses;
 61 | 
 62 | typedef std::vector<Eigen::Vector2d> Points2D;
 63 | typedef std::vector<Eigen::Vector3d> Points3D;
 64 | typedef std::vector<Points3D> Points3DVec;
 65 | typedef std::vector<Eigen::Vector3d> ViewingRays;
 66 | 
 67 | struct Camera {
 68 |   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 69 |   double c_x;
 70 |   double c_y;
 71 |   
 72 |   double focal_x;
 73 |   double focal_y;
 74 |   
 75 |   std::string camera_type;
 76 |   
 77 |   std::vector<double> radial;
 78 | 
 79 |   Eigen::Quaterniond q;
 80 |   Eigen::Vector3d t;
 81 | 
 82 |   Eigen::Matrix<double, 3, 4> proj_matrix;
 83 |   Eigen::Matrix<double, 3, 4> proj_matrix_normalized;
 84 | };
 85 | 
 86 | // Code for optimizing a camera pose by optimizing reprojection errors.
 87 | struct ReprojectionError {
 88 |   ReprojectionError(double x, double y, double X, double Y, double Z,
 89 |                     double fx, double fy)
 90 |       : point2D_x(x / fx),
 91 |         point2D_y(y / fy),
 92 |         point3D_X(X),
 93 |         point3D_Y(Y),
 94 |         point3D_Z(Z),
 95 |         f_x(fx),
 96 |         f_y(fy) {}
 97 | 
 98 |   template <typename T>
 99 |   bool operator()(const T* const camera, T* residuals) const {
100 |     // The last three entries are the camera translation.
101 |     T p[3];
102 |     p[0] = static_cast<T>(point3D_X);  // - camera[3];
103 |     p[1] = static_cast<T>(point3D_Y);  // - camera[4];
104 |     p[2] = static_cast<T>(point3D_Z);  // - camera[5];
105 | 
106 |     // The first three entries correspond to the rotation matrix stored in an
107 |     // angle-axis representation.
108 |     T p_rot[3];
109 |     ceres::AngleAxisRotatePoint(camera, p, p_rot);
110 |     p_rot[0] += camera[3];
111 |     p_rot[1] += camera[4];
112 |     p_rot[2] += camera[5];
113 | 
114 |     // T x_proj = static_cast<T>(f_x) * p_rot[0] / p_rot[2];
115 |     // T y_proj = static_cast<T>(f_y) * p_rot[1] / p_rot[2];
116 |     T x_proj = p_rot[0] / p_rot[2];
117 |     T y_proj = p_rot[1] / p_rot[2];
118 | 
119 |     residuals[0] = static_cast<T>(point2D_x) - x_proj;
120 |     residuals[1] = static_cast<T>(point2D_y) - y_proj;
121 | 
122 |     return true;
123 |   }
124 | 
125 |   // Factory function
126 |   static ceres::CostFunction* CreateCost(const double x, const double y,
127 |                                          const double X, const double Y,
128 |                                          const double Z, const double fx,
129 |                                          const double fy) {
130 |     return (new ceres::AutoDiffCostFunction<ReprojectionError, 2, 6>(
131 |         new ReprojectionError(x, y, X, Y, Z, fx, fy)));
132 |   }
133 | 
134 |   // Assumes that the measurement is centered around the principal point.
135 |   // This camera model does not take any radial distortion into account. If
136 |   // radial distortion is present, one should undistort the measurements first.
137 |   double point2D_x;
138 |   double point2D_y;
139 |   // The 3D point position is fixed as we are only interested in refining the
140 |   // camera parameters.
141 |   double point3D_X;
142 |   double point3D_Y;
143 |   double point3D_Z;
144 |   double f_x;
145 |   double f_y;
146 | };
147 | 
148 | // Code for optimizing a camera pose by optimizing reprojection errors.
149 | // Each 2D keypoint can be represented by one or more 3D point and we compute
150 | // the average residual.
151 | struct MultiReprojectionError {
152 |   MultiReprojectionError(double x, double y,
153 |                          const std::vector<Eigen::Vector3d>& points,
154 |                          double fx, double fy)
155 |       : point2D_x(x),
156 |         point2D_y(y),
157 |         points_(points),
158 |         f_x(fx),
159 |         f_y(fy) {
160 |     num_points_ = static_cast<int>(points.size());
161 |   }
162 | 
163 |   template <typename T>
164 |   bool operator()(const T* const camera, T* residuals) const {
165 |     residuals[0] = static_cast<T>(0.0);
166 |     residuals[1] = static_cast<T>(0.0);
167 | 
168 |     T inv_num_points_ = static_cast<T>(1.0 / static_cast<double>(num_points_));
169 | 
170 |     for (int i = 0; i < num_points_; ++i) {
171 |       // The last three entries are the camera position.
172 |       T p[3];
173 |       p[0] = points_[i][0] - camera[3];
174 |       p[1] = points_[i][1] - camera[4];
175 |       p[2] = points_[i][2] - camera[5];
176 | 
177 |       // The first three entries correspond to the rotation matrix stored in an
178 |       // angle-axis representation.
179 |       T p_rot[3];
180 |       ceres::AngleAxisRotatePoint(camera, p, p_rot);
181 | 
182 |       T x_proj = static_cast<T>(f_x) * p_rot[0] / p_rot[2];
183 |       T y_proj = static_cast<T>(f_y) * p_rot[1] / p_rot[2];
184 | 
185 |       residuals[0] += (static_cast<T>(point2D_x) - x_proj) * inv_num_points_;
186 |       residuals[1] += (static_cast<T>(point2D_y) - y_proj) * inv_num_points_;
187 |     }
188 | 
189 |     return true;
190 |   }
191 | 
192 |   // Factory function
193 |   static ceres::CostFunction* CreateCost(
194 |       const double x, const double y, 
195 |       const std::vector<Eigen::Vector3d>& points, const double fx, 
196 |       const double fy) {
197 |     return (new ceres::AutoDiffCostFunction<MultiReprojectionError, 2, 6>(
198 |         new MultiReprojectionError(x, y, points, fx, fy)));
199 |   }
200 | 
201 |   // Assumes that the measurement is centered around the principal point.
202 |   // This camera model does not take any radial distortion into account. If
203 |   // radial distortion is present, one should undistort the measurements first.
204 |   double point2D_x;
205 |   double point2D_y;
206 |   std::vector<Eigen::Vector3d> points_;
207 |   int num_points_;
208 |   double f_x;
209 |   double f_y;
210 | };
211 | 
212 | // Non-linear refinement of a 3D point based on reprojection errors.
213 | struct ReprojectionErrorTriangulation {
214 |   ReprojectionErrorTriangulation(const Camera& cam, double u, double v)
215 |       : point2D_x(u),
216 |         point2D_y(v) {
217 |     P.topLeftCorner<3, 3>() = cam.q.toRotationMatrix();
218 |     P.col(3) = cam.t;
219 |     P.row(0) *= cam.focal_x;
220 |     P.row(1) *= cam.focal_y;
221 |   }
222 | 
223 |   template <typename T>
224 |   bool operator()(const T* const point, T* residuals) const {
225 |     Eigen::Map<const Eigen::Matrix<T, 3, 1> > X(point);
226 | 
227 |     const Eigen::Matrix<T, 2, 1> p = (P * X.homogeneous()).hnormalized();
228 | 
229 |     residuals[0] = static_cast<T>(point2D_x) - p[0];
230 |     residuals[1] = static_cast<T>(point2D_y) - p[1];
231 | 
232 |     return true;
233 |   }
234 | 
235 |   // Factory function
236 |   static ceres::CostFunction* CreateCost(const Camera& cam, double u, double v) {
237 |     return (new ceres::AutoDiffCostFunction<ReprojectionErrorTriangulation, 2, 3>(
238 |         new ReprojectionErrorTriangulation(cam, u, v)));
239 |   }
240 | 
241 |   // Assumes that the measurement is centered around the principal point.
242 |   // This camera model does not take any radial distortion into account. If
243 |   // radial distortion is present, one should undistort the measurements first.
244 |   double point2D_x;
245 |   double point2D_y;
246 |   // The camera pose is fixed as we only want to optimize the 3D point position.
247 |   Eigen::Matrix<double, 3, 4> P;
248 | };
249 | 
250 | }  // namespace visloc_help
251 | 
252 | 
253 | #endif  // MESH_LOC_TYPES_H_
254 | 
255 | 


--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | // 1. Redistributions of source code must retain the above copyright notice, 
  8 | //    this list of conditions and the following disclaimer.
  9 | //
 10 | // 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | //    this list of conditions and the following disclaimer in the documentation
 12 | //    and/or other materials provided with the distribution.
 13 | //
 14 | // 3. Neither the name of the copyright holder nor the names of its
 15 | //    contributors may be used to endorse or promote products derived from
 16 | //    this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
 22 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 23 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 24 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 25 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 26 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 27 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
 28 | // THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | #ifndef MESH_LOC_UTILS_H_
 31 | #define MESH_LOC_UTILS_H_
 32 | 
 33 | #include <algorithm>
 34 | #include <chrono>
 35 | #include <cmath>
 36 | #include <cstddef>
 37 | #include <cstdint>
 38 | #include <fstream>
 39 | #include <iomanip>
 40 | #include <iostream>
 41 | #include <limits>
 42 | #include <queue>
 43 | #include <random>
 44 | #include <string>
 45 | #include <unordered_map>
 46 | #include <vector>
 47 | 
 48 | #include <Eigen/Core>
 49 | #include <Eigen/Geometry>
 50 | #include <Eigen/StdVector>
 51 | 
 52 | #include <ceres/ceres.h>
 53 | #include <ceres/rotation.h>
 54 | 
 55 | #include "types.h"
 56 | #include "utils.h"
 57 | 
 58 | namespace mesh_loc {
 59 | 
 60 | // Creates a Camera object from the specifications of a Colmap camera.
 61 | // Note that not all camera models are currently supported.
 62 | void CreateCamera(const std::string& camera_model,
 63 |                   const std::vector<double>& params,
 64 |                   const Eigen::Vector4d& q,
 65 |                   const Eigen::Vector3d& t,
 66 |                   Camera* cam) {
 67 |   if (camera_model.compare("SIMPLE_RADIAL") == 0 ||
 68 |       camera_model.compare("SIMPLE_RADIAL_FISHEYE") == 0) {
 69 |     cam->focal_x = params[0];
 70 |     cam->focal_y = params[0];
 71 |     cam->c_x = params[1];
 72 |     cam->c_y = params[2];
 73 |     cam->radial = {params[3]};
 74 |     cam->camera_type = camera_model;
 75 |   } else if (camera_model.compare("SIMPLE_PINHOLE") == 0) {
 76 |     cam->focal_x = params[0];
 77 |     cam->focal_y = params[0];
 78 |     cam->c_x = params[1];
 79 |     cam->c_y = params[2];
 80 |     cam->radial.clear();
 81 |     cam->camera_type = camera_model;
 82 |   } else if (camera_model.compare("PINHOLE") == 0) {
 83 |     cam->focal_x = params[0];
 84 |     cam->focal_y = params[1];
 85 |     cam->c_x = params[2];
 86 |     cam->c_y = params[3];
 87 |     cam->radial.clear();
 88 |     cam->camera_type = camera_model;
 89 |   } else if (camera_model.compare("RADIAL") == 0 || 
 90 |              camera_model.compare("RADIAL_FISHEYE") == 0) {
 91 |     cam->focal_x = params[0];
 92 |     cam->focal_y = params[0];
 93 |     cam->c_x = params[1];
 94 |     cam->c_y = params[2];
 95 |     cam->radial = {params[3], params[4]};
 96 |     cam->camera_type = camera_model;
 97 |   } else {
 98 |     std::cerr << " ERROR: Camera model " << camera_model << " is currently "
 99 |               << "not supported" << std::endl;
100 |     return;
101 |   }
102 |   cam->q.w() = q[0];
103 |   cam->q.x() = q[1];
104 |   cam->q.y() = q[2];
105 |   cam->q.z() = q[3];
106 |   cam->t = t;
107 |   cam->proj_matrix.topLeftCorner<3, 3>() = cam->q.toRotationMatrix();
108 |   // std::cout << cam->proj_matrix.topLeftCorner<3, 3>() << std::endl << std::endl;
109 |   cam->proj_matrix.col(3) = t;
110 |   cam->proj_matrix_normalized = cam->proj_matrix;
111 |   cam->proj_matrix.row(0) *= cam->focal_x;
112 |   cam->proj_matrix.row(1) *= cam->focal_y;
113 | }
114 | 
115 | Eigen::Vector2d project(const Camera& cam, const Eigen::Vector3d& X) {
116 |   Eigen::Vector3d p = cam.proj_matrix * X.homogeneous();
117 |   if (p[2] <= 0.0) return Eigen::Vector2d(1000000.0, 1000000.0);
118 |   return p.hnormalized();
119 |   // return (cam.proj_matrix * X.homogeneous()).hnormalized();
120 | }
121 | 
122 | Eigen::Vector2d project(const Camera& cam, const Eigen::Vector4d& X) {
123 |   Eigen::Vector3d p = cam.proj_matrix * X;
124 |   if (p[2] <= 0.0) return Eigen::Vector2d(1000000.0, 1000000.0);
125 |   return p.hnormalized();
126 |   // return (cam.proj_matrix * X).hnormalized();
127 | }
128 | 
129 | Eigen::Vector3d get_camera_position(const Camera& cam) {
130 |   return -cam.proj_matrix_normalized.topLeftCorner<3, 3>().transpose()
131 |                                           * cam.proj_matrix_normalized.col(3);
132 | }
133 | 
134 | 
135 | 
136 | // Code for triangulating a 3D point from two observations. See Hartley &
137 | // Zisserman, 2nd edition, Chapter 12.2 (page 312).
138 | void triangulate(const Eigen::Matrix<double, 3, 4>& P1,
139 |                  const Eigen::Matrix<double, 3, 4>& P2,
140 |                  const Eigen::Vector3d& x1, const Eigen::Vector3d& x2,
141 |                  Eigen::Vector3d* X) {
142 |   Eigen::Matrix4d A;
143 |   A.row(0) = x1[0] * P1.row(2) - x1[2] * P1.row(0);
144 |   A.row(1) = x1[1] * P1.row(2) - x1[2] * P1.row(1);
145 |   A.row(2) = x2[0] * P2.row(2) - x2[2] * P2.row(0);
146 |   A.row(3) = x2[1] * P2.row(2) - x2[2] * P2.row(1);
147 | 
148 |   Eigen::JacobiSVD<Eigen::Matrix<double, 4, 4>> svd(A, Eigen::ComputeFullV);
149 |   Eigen::Vector4d v = svd.matrixV().col(3);
150 |   (*X) = v.hnormalized();
151 | }
152 | 
153 | int refine_point(const std::vector<Camera>& cameras,
154 |                  const std::vector<Eigen::Vector2d>& observations,
155 |                  const std::vector<int>& camera_indices,
156 |                  const double reprojection_error,
157 |                  Eigen::Vector3d* X, std::vector<int>* inlier_indices) {
158 |   const int kNumObs = static_cast<int>(observations.size());
159 |   const double kSquaredThreshold = reprojection_error * reprojection_error;
160 | 
161 |   inlier_indices->clear();
162 |   ceres::Problem refinement_problem;
163 |   double* point = new double[3];
164 |   for (int i = 0; i < 3; ++i) point[i] = (*X)[i];
165 | 
166 |   int num_inliers = 0;
167 |   for (int k = 0; k < kNumObs; ++k) {
168 |     const int kCamId = camera_indices[k];
169 |     Eigen::Vector2d p =  project(cameras[kCamId], *X);
170 |     double error = (p - observations[k]).squaredNorm();
171 | 
172 |     if (error < kSquaredThreshold) {
173 |       ++num_inliers;
174 |       inlier_indices->push_back(k);
175 |       ceres::CostFunction* cost_function =
176 |           ReprojectionErrorTriangulation::CreateCost(cameras[kCamId],
177 |               observations[k][0], observations[k][1]);
178 |       refinement_problem.AddResidualBlock(cost_function, nullptr, point);
179 |     }
180 |   }
181 | 
182 |   if (inlier_indices->size() < 2u) {
183 |     delete [] point;
184 |     point = nullptr;
185 |     return static_cast<int>(inlier_indices->size());
186 |   }
187 |   // std::cout << std::endl << X->transpose() << std::endl;
188 | 
189 |   ceres::Solver::Options options;
190 |   options.linear_solver_type = ceres::DENSE_QR;
191 |   options.minimizer_progress_to_stdout = false;
192 |   options.max_num_iterations = 100;
193 |   ceres::Solver::Summary summary;
194 |   ceres::Solve(options, &refinement_problem, &summary);
195 | 
196 |   if (summary.IsSolutionUsable()) {
197 |     Eigen::Vector3d Y;
198 |     Y << point[0], point[1], point[2];
199 | 
200 |     int new_num_inliers = 0;
201 |     std::vector<int> new_inliers;
202 |     for (int k = 0; k < kNumObs; ++k) {
203 |     const int kCamId = camera_indices[k];
204 |       Eigen::Vector2d p =  project(cameras[kCamId], Y);
205 |       double error = (p - observations[k]).squaredNorm();
206 | 
207 |       if (error < kSquaredThreshold) {
208 |         ++new_num_inliers;
209 |         new_inliers.push_back(k);
210 |       }
211 |     }
212 | 
213 |     if (new_num_inliers >= num_inliers) {
214 |       *X = Y;
215 |       *inlier_indices = new_inliers;
216 |     }
217 |   }
218 |   delete [] point;
219 |   point = nullptr;
220 |   // std::cout << X->transpose() << std::endl;
221 | 
222 |   return static_cast<int>(inlier_indices->size());
223 | }
224 | 
225 | // Given a set of observations in images, computes the corresponding 3D point.  
226 | // Returns the number of inliers for the generated point. If the return value is
227 | // smaller than 2, then the resulting 3D point is not well-defined.
228 | int triangulate_observations(const std::vector<Camera>& cameras,
229 |                              const std::vector<Eigen::Vector2d>& observations,
230 |                              const std::vector<int>& camera_indices,
231 |                              const double reprojection_error,
232 |                              const double min_angle,
233 |                              Eigen::Vector3d* X,
234 |                              std::vector<int>* inlier_indices) {
235 |   if (observations.size() < 2u) return 0;
236 | 
237 |   const int kNumObs = static_cast<int>(observations.size());
238 |   std::vector<Eigen::Matrix<double, 3, 4>> projection_matrices(kNumObs);
239 |   std::vector<Eigen::Vector3d> rays(kNumObs);
240 |   std::vector<Eigen::Vector3d> global_rays(kNumObs);
241 |   for (int i = 0; i < kNumObs; ++i) {
242 |     projection_matrices[i] = cameras[camera_indices[i]].proj_matrix_normalized;
243 | 
244 |     rays[i] << observations[i][0] / cameras[camera_indices[i]].focal_x,
245 |                observations[i][1] / cameras[camera_indices[i]].focal_y, 1.0;
246 |     global_rays[i] = 
247 |           projection_matrices[i].topLeftCorner<3, 3>().transpose() * rays[i];
248 |     global_rays[i].normalize();
249 |   }
250 | 
251 |   // Exhaustively tries all possible combinations.
252 |   const double kAngleThresh = std::cos(min_angle * M_PI / 180.0);
253 |   const double kSquaredThreshold = reprojection_error * reprojection_error;
254 |   double best_score = std::numeric_limits<double>::max();
255 |   for (int i = 0; i < kNumObs; ++i) {
256 |     for (int j = i + 1; j < kNumObs; ++j) {
257 |       double cos_angle = std::fabs(global_rays[i].dot(global_rays[j]));
258 |       if (cos_angle >= kAngleThresh) continue;
259 |       Eigen::Vector3d x;
260 |       triangulate(projection_matrices[i], projection_matrices[j],
261 |                   rays[i], rays[j], &x);
262 | 
263 |       double score = 0.0;
264 |       Eigen::Vector4d x_ = x.homogeneous();
265 |       for (int k = 0; k < kNumObs; ++k) {
266 |         Eigen::Vector2d p = project(cameras[camera_indices[k]], x_);
267 | 
268 |         score += std::min(kSquaredThreshold,
269 |                           (p - observations[k]).squaredNorm());
270 |       }
271 | 
272 |       if (score < best_score) {
273 |         best_score = score;
274 |         *X = x;
275 |       }
276 |     }
277 |   }
278 | 
279 |   // Non-linear refinement.
280 |   return refine_point(cameras, observations, camera_indices, reprojection_error,
281 |                       X, inlier_indices);
282 | }
283 | 
284 | // Given a set of 2D observations selects a single 3D point that is consistent 
285 | // with most other observations. Refines the selected point by optimizing the
286 | // reprojection error. Returns the number of observations it is consistent with.
287 | int select_and_refine_point(const std::vector<Camera>& cameras,
288 |                             const std::vector<Eigen::Vector2d>& observations,
289 |                             const std::vector<int>& camera_indices,
290 |                             const std::vector<Eigen::Vector3d>& points,
291 |                             const double reprojection_error,
292 |                             Eigen::Vector3d* X, 
293 |                             std::vector<int>* inlier_indices) {
294 |   const int kNumObs = static_cast<int>(observations.size());
295 | 
296 |   double best_score = std::numeric_limits<double>::max();
297 |   const double kSquaredThreshold = reprojection_error * reprojection_error;
298 | 
299 |   for (int i = 0; i < kNumObs; ++i) {
300 |     double score = 0.0;
301 |     for (int k = 0; k < kNumObs; ++k) {
302 |       Eigen::Vector2d p = project(cameras[camera_indices[k]], points[i]);
303 | 
304 |       score += std::min(kSquaredThreshold, (p - observations[k]).squaredNorm());
305 |     }
306 | 
307 |     if (score < best_score) {
308 |       *X = points[i];
309 |       best_score = score;
310 |     }
311 |   }
312 | 
313 |   // Non-linear refinement.
314 |   return refine_point(cameras, observations, camera_indices, reprojection_error,
315 |                       X, inlier_indices);
316 | }
317 | 
318 | 
319 | // Given a set of 2D-3D matches (represented by the indices of the database
320 | // images in which the 3D points were observed), computes the corresponding
321 | // co-visibility graph and detects connected components in the graph.
322 | // Returns the number of clusters.
323 | int compute_covisibility_clusters(
324 |   const std::vector<Camera>& cameras,
325 |   const std::vector<std::vector<Eigen::Vector2d>>& observations,
326 |   const std::vector<std::vector<int>>& camera_indices,
327 |   const std::vector<std::vector<Eigen::Vector3d>>& points,
328 |   const double reprojection_error,
329 |   std::vector<int>* cluster_ids_per_cam) {
330 | 
331 |   int num_cams = static_cast<int>(cameras.size());
332 | 
333 |   Eigen::MatrixXi A(num_cams, num_cams);
334 |   A.setZero();
335 | 
336 |   const int kNumMatches = static_cast<int>(observations.size());
337 |   const double kSquaredReprojError = reprojection_error * reprojection_error;
338 |   for (int i = 0; i < kNumMatches; ++i) {
339 |     const int kNumObs = static_cast<int>(observations[i].size());
340 | 
341 |     for (int j = 0; j < kNumObs; ++j) {
342 |       for (int k = j + 1; k < kNumObs; ++k) {
343 |         if (A(camera_indices[i][j], camera_indices[i][k]) == 1) continue;
344 | 
345 |         Eigen::Vector2d p1 = project(cameras[camera_indices[i][j]],
346 |                                      points[i][k]);
347 |         Eigen::Vector2d p2 = project(cameras[camera_indices[i][k]],
348 |                                      points[i][j]);
349 |         double max_error = std::max(
350 |           (p1 - observations[i][j]).squaredNorm(), 
351 |           (p2 - observations[i][k]).squaredNorm());
352 |         if (max_error > kSquaredReprojError) continue;
353 |         A(camera_indices[i][j], camera_indices[i][k]) = 1;
354 |         A(camera_indices[i][k], camera_indices[i][j]) = 1;
355 |       }
356 |     }
357 |   }
358 | 
359 |   std::vector<int>& c_ids = *cluster_ids_per_cam;
360 |   c_ids.resize(num_cams);
361 |   std::fill(c_ids.begin(), c_ids.end(), -1);
362 |   int current_cid = 0;
363 | 
364 |   for (int i = 0; i < num_cams; ++i) {
365 |     if (c_ids[i] > -1) continue;
366 | 
367 |     std::queue<int> queue;
368 |     queue.push(i);
369 | 
370 |     while (!queue.empty()) {
371 |       int current_cam = queue.front();
372 |       queue.pop();
373 | 
374 |       c_ids[current_cam] = current_cid;
375 | 
376 |       for (int j = 0; j < num_cams; ++j) {
377 |         if (A(current_cid, j) == 1 && c_ids[j] == -1) {
378 |           queue.push(j);
379 |         }
380 |       }
381 |     }
382 | 
383 |     ++current_cid;
384 |   }
385 | 
386 |   return current_cid;
387 | }
388 | 
389 | // Clusters keypoint detections in the image. This is useful for methods such as
390 | // patch2pix, which do not compute repeatable keypoint positions.
391 | // This approach is inspired by the clustering method from
392 | // [Zhou et al., Patch2Pix: Epipolar-Guided Pixel-Level Correspondences, 
393 | //  CVPR 2021].
394 | void cluster_keypoints(
395 |   const std::vector<Camera>& cameras,
396 |   const std::vector<Eigen::Vector2d>& keypoints,
397 |   const std::vector<std::vector<Eigen::Vector2d>>& observations,
398 |   const std::vector<std::vector<int>>& camera_indices,
399 |   const std::vector<std::vector<Eigen::Vector3d>>& points,
400 |   const double distance_thresh, const bool use_3D_points,
401 |   const double reprojection_error, const int width, const int height,
402 |   std::vector<Eigen::Vector2d>* new_keypoints,
403 |   std::vector<std::vector<Eigen::Vector2d>>* new_observations,
404 |   std::vector<std::vector<int>>* new_camera_indices,
405 |   std::vector<std::vector<Eigen::Vector3d>>* new_points) {
406 |   new_keypoints->clear();
407 |   new_observations->clear();
408 |   new_camera_indices->clear();
409 |   new_points->clear();
410 | 
411 |   const int kNumObs = static_cast<int>(observations.size());
412 | 
413 |   // To efficiently find potential candidates for merging for each keypoint,
414 |   // we use a regular grid (stored as a hash map) with a cell size corresponding
415 |   // to the radius at which points can be merged.
416 |   const double kGridSize = static_cast<double>(
417 |           static_cast<int>(distance_thresh + 0.5));
418 |   // const int kNumXGrids = static_cast<int>(static_cast<float>(width)
419 |                                          // / static_cast<float>(kGridSize) + 0.5);
420 |   std::unordered_multimap<int, int> grid;
421 | 
422 |   const int kGridMult = 100000;
423 | 
424 |   // Fills the map.
425 |   for (int i = 0; i < kNumObs; ++i) {
426 |     int cell_x = static_cast<int>(keypoints[i][0] / kGridSize);
427 |     int cell_y = static_cast<int>(keypoints[i][1] / kGridSize);
428 |     grid.emplace(std::make_pair(cell_y * kGridMult + cell_x, i));
429 |   }
430 | 
431 |   // For each keypoint, uses the map to find relevant points and to compute a
432 |   // score (which is how many points it covers).
433 |   typedef std::pair<int, std::pair<int, double>> ScoreEntry;
434 |   std::vector<ScoreEntry> scores(kNumObs);
435 |   std::vector<std::vector<int>> covers(kNumObs);
436 |   const double kSquaredReprojError = reprojection_error * reprojection_error;
437 |   for (int i = 0; i < kNumObs; ++i) {
438 |     int cell_x = static_cast<int>(keypoints[i][0] / kGridSize);
439 |     int cell_y = static_cast<int>(keypoints[i][1] / kGridSize);
440 |     int score = 0;
441 |     double sum_dist = 0.0;
442 |     covers[i].clear();
443 |     scores[i].first = i;
444 |     for (int x = -1; x <= 1; ++x) {
445 |       for (int y = -1; y <= 1; ++y) {
446 |         int c_x = cell_x + x;
447 |         int c_y = cell_y + y;
448 |         auto indices = grid.equal_range(c_y * kGridMult + c_x);
449 |         for (auto idx = indices.first; idx != indices.second; ++idx) {
450 |           const int kKeyId = idx->second;
451 |           double dist = (keypoints[i] - keypoints[kKeyId]).norm();
452 |           if (dist < distance_thresh) {
453 |             if (use_3D_points) {
454 |               // This should be deprecated.
455 |               Eigen::Vector2d p1 = project(cameras[camera_indices[kKeyId][0]],
456 |                                            points[i][0]);
457 |               Eigen::Vector2d p2 = project(cameras[camera_indices[i][0]],
458 |                                            points[kKeyId][0]);
459 |               double max_error = std::max(
460 |                 (p1 - observations[kKeyId][0]).squaredNorm(), 
461 |                 (p2 - observations[i][0]).squaredNorm());
462 |               if (max_error > kSquaredReprojError) continue;
463 |             }
464 |             ++score;
465 |             sum_dist += dist;
466 |             covers[i].emplace_back(idx->second);
467 |           }
468 |         }
469 |       }
470 |     }
471 |     scores[i].second.first = score;
472 |     scores[i].second.second = sum_dist;
473 |   }
474 | 
475 |   // Greedily select keypoints that cover as many 
476 |   std::sort(scores.begin(), scores.end(),
477 |             [](const ScoreEntry& a, const ScoreEntry& b) {
478 |               if (a.second.first == b.second.first) {
479 |                 return a.second.second < b.second.second;
480 |               }
481 |               return a.second.first > b.second.first;
482 |             });
483 | 
484 |   std::vector<bool> covered(kNumObs, false);
485 |   // std::vector<Eigen::Vector2d> keys;
486 |   for (int i = 0; i < kNumObs; ++i) {
487 |     const int kKeyId = scores[i].first;
488 |     if (covered[kKeyId]) continue;
489 | 
490 |     Eigen::Vector2d key_pt(0.0, 0.0);
491 | 
492 |     std::vector<Eigen::Vector2d> obs;
493 |     std::vector<int> cam_ids;
494 |     std::vector<Eigen::Vector3d> pts;
495 |     for (const int id : covers[kKeyId]) {
496 |       key_pt += keypoints[id];
497 |       // keys.push_back(keypoints[id]);
498 |       // obs.push_back(observations[id][0]);
499 |       obs.insert(obs.end(), observations[id].begin(),
500 |                  observations[id].end());
501 |       // cam_ids.push_back(camera_indices[id][0]);
502 |       cam_ids.insert(cam_ids.end(), camera_indices[id].begin(),
503 |                      camera_indices[id].end());
504 |       // pts.push_back(points[id][0]);
505 |       pts.insert(pts.end(), points[id].begin(), points[id].end());
506 |       covered[id] = true;
507 |     }
508 |     // Use the average keypoint.
509 |     key_pt /= static_cast<double>(obs.size());
510 | 
511 |     // new_keypoints->push_back(keypoints[kKeyId]);
512 |     new_keypoints->push_back(key_pt);
513 |     new_observations->push_back(obs);
514 |     new_points->push_back(pts);
515 |     new_camera_indices->push_back(cam_ids);
516 |   }
517 | }
518 | 
519 | }  // namespace visloc_help
520 | 
521 | 
522 | #endif  // MESH_LOC_UTILS_H_
523 | 
524 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2022, Vojtech Panek and Zuzana Kukelova and Torsten Sattler
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | // 1. Redistributions of source code must retain the above copyright notice, 
  8 | //    this list of conditions and the following disclaimer.
  9 | //
 10 | // 2. Redistributions in binary form must reproduce the above copyright notice,
 11 | //    this list of conditions and the following disclaimer in the documentation
 12 | //    and/or other materials provided with the distribution.
 13 | //
 14 | // 3. Neither the name of the copyright holder nor the names of its
 15 | //    contributors may be used to endorse or promote products derived from
 16 | //    this software without specific prior written permission.
 17 | //
 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 21 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
 22 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 23 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 24 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 25 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 26 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 27 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
 28 | // THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | #ifndef MESH_LOC_UTILS_H_
 31 | #define MESH_LOC_UTILS_H_
 32 | 
 33 | #include <algorithm>
 34 | #include <chrono>
 35 | #include <cmath>
 36 | #include <cstddef>
 37 | #include <cstdint>
 38 | #include <fstream>
 39 | #include <iomanip>
 40 | #include <iostream>
 41 | #include <limits>
 42 | #include <queue>
 43 | #include <random>
 44 | #include <string>
 45 | #include <unordered_map>
 46 | #include <vector>
 47 | 
 48 | #include <Eigen/Core>
 49 | #include <Eigen/Geometry>
 50 | #include <Eigen/StdVector>
 51 | 
 52 | #include <ceres/ceres.h>
 53 | #include <ceres/rotation.h>
 54 | 
 55 | #include "types.h"
 56 | 
 57 | namespace mesh_loc {
 58 | 
 59 | // Creates a Camera object from the specifications of a Colmap camera.
 60 | // Note that not all camera models are currently supported.
 61 | void CreateCamera(const std::string& camera_model,
 62 |                   const std::vector<double>& params,
 63 |                   const Eigen::Vector4d& q,
 64 |                   const Eigen::Vector3d& t,
 65 |                   Camera* cam);
 66 | 
 67 | Eigen::Vector2d project(const Camera& cam, const Eigen::Vector3d& X);
 68 | 
 69 | Eigen::Vector2d project(const Camera& cam, const Eigen::Vector4d& X);
 70 | 
 71 | Eigen::Vector3d get_camera_position(const Camera& cam);
 72 | 
 73 | // Code for triangulating a 3D point from two observations. See Hartley &
 74 | // Zisserman, 2nd edition, Chapter 12.2 (page 312).
 75 | void triangulate(const Eigen::Matrix<double, 3, 4>& P1,
 76 |                  const Eigen::Matrix<double, 3, 4>& P2,
 77 |                  const Eigen::Vector3d& x1, const Eigen::Vector3d& x2,
 78 |                  Eigen::Vector3d* X);
 79 | 
 80 | int refine_point(const std::vector<Camera>& cameras,
 81 |                  const std::vector<Eigen::Vector2d>& observations,
 82 |                  const std::vector<int>& camera_indices,
 83 |                  const double reprojection_error,
 84 |                  Eigen::Vector3d* X, std::vector<int>* inlier_indices);
 85 | 
 86 | // Given a set of observations in images, computes the corresponding 3D point.  
 87 | // Returns the number of inliers for the generated point. If the return value is
 88 | // smaller than 2, then the resulting 3D point is not well-defined.
 89 | int triangulate_observations(const std::vector<Camera>& cameras,
 90 |                              const std::vector<Eigen::Vector2d>& observations,
 91 |                              const std::vector<int>& camera_indices,
 92 |                              const double reprojection_error,
 93 |                              const double min_angle,
 94 |                              Eigen::Vector3d* X,
 95 |                              std::vector<int>* inlier_indices);
 96 | 
 97 | // Given a set of 2D observations selects a single 3D point that is consistent 
 98 | // with most other observations. Refines the selected point by optimizing the
 99 | // reprojection error. Returns the number of observations it is consistent with.
100 | int select_and_refine_point(const std::vector<Camera>& cameras,
101 |                             const std::vector<Eigen::Vector2d>& observations,
102 |                             const std::vector<int>& camera_indices,
103 |                             const std::vector<Eigen::Vector3d>& points,
104 |                             const double reprojection_error,
105 |                             Eigen::Vector3d* X, 
106 |                             std::vector<int>* inlier_indices);
107 | 
108 | 
109 | // Given a set of 2D-3D matches (represented by the indices of the database
110 | // images in which the 3D points were observed), computes the corresponding
111 | // co-visibility graph and detects connected components in the graph.
112 | // Returns the number of clusters.
113 | int compute_covisibility_clusters(
114 |   const std::vector<Camera>& cameras,
115 |   const std::vector<std::vector<Eigen::Vector2d>>& observations,
116 |   const std::vector<std::vector<int>>& camera_indices,
117 |   const std::vector<std::vector<Eigen::Vector3d>>& points,
118 |   const double reprojection_error,
119 |   std::vector<int>* cluster_ids_per_cam);
120 | 
121 | // Clusters keypoint detections in the image. This is useful for methods such as
122 | // patch2pix, which do not compute repeatable keypoint positions.
123 | // This approach is inspired by the clustering method from
124 | // [Zhou et al., Patch2Pix: Epipolar-Guided Pixel-Level Correspondences, 
125 | //  CVPR 2021].
126 | // Assumes that there is exactly one database keypoint observation and one
127 | // corresponding 3D point per query image keypoint.
128 | void cluster_keypoints(
129 |   const std::vector<Camera>& cameras,
130 |   const std::vector<Eigen::Vector2d>& keypoints,
131 |   const std::vector<std::vector<Eigen::Vector2d>>& observations,
132 |   const std::vector<std::vector<int>>& camera_indices,
133 |   const std::vector<std::vector<Eigen::Vector3d>>& points,
134 |   const double distance_thresh, const bool use_3D_points,
135 |   const double reprojection_error, const int width, const int height,
136 |   std::vector<Eigen::Vector2d>* new_keypoints,
137 |   std::vector<std::vector<Eigen::Vector2d>>* new_observations,
138 |   std::vector<std::vector<int>>* new_camera_indices,
139 |   std::vector<std::vector<Eigen::Vector3d>>* new_points);
140 | 
141 | }  // namespace visloc_help
142 | 
143 | 
144 | #endif  // MESH_LOC_UTILS_H_
145 | 
146 | 


--------------------------------------------------------------------------------