├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── benchmark ├── config.py ├── extended_datasets.md ├── mapfree.py ├── metrics.py ├── reprojection.py ├── scannet.py ├── sevenscenes.py ├── test_metrics.py └── utils.py ├── config ├── default.py ├── mapfree.yaml ├── mapfree_multi.yaml ├── matching │ ├── mapfree │ │ ├── loftr_emat_dptkitti.yaml │ │ ├── loftr_emat_dptnyu.yaml │ │ ├── loftr_pnp_dptkitti.yaml │ │ ├── loftr_pnp_dptnyu.yaml │ │ ├── sg_emat_dptkitti.yaml │ │ ├── sg_emat_dptnyu.yaml │ │ ├── sg_pnp_dptkitti.yaml │ │ ├── sg_pnp_dptnyu.yaml │ │ ├── sg_procrustes_dptkitti.yaml │ │ ├── sift_emat_dptkitti.yaml │ │ ├── sift_emat_dptnyu.yaml │ │ ├── sift_pnp_dptkitti.yaml │ │ └── sift_pnp_dptnyu.yaml │ ├── scannet │ │ ├── loftr_emat_dpt.yaml │ │ ├── loftr_emat_gt.yaml │ │ ├── loftr_emat_planercnn.yaml │ │ ├── loftr_pnp_dpt.yaml │ │ ├── loftr_pnp_gt.yaml │ │ ├── loftr_pnp_planercnn.yaml │ │ ├── loftr_procrustes_dpt.yaml │ │ ├── loftr_procrustes_dpt_icp.yaml │ │ ├── loftr_procrustes_gt.yaml │ │ ├── loftr_procrustes_gt_icp.yaml │ │ ├── loftr_procrustes_planercnn.yaml │ │ ├── loftr_procrustes_planercnn_icp.yaml │ │ ├── sg_emat_dpt.yaml │ │ ├── sg_emat_gt.yaml │ │ ├── sg_emat_planercnn.yaml │ │ ├── sg_pnp_dpt.yaml │ │ ├── sg_pnp_gt.yaml │ │ ├── sg_pnp_planercnn.yaml │ │ ├── sg_procrustes_dpt.yaml │ │ ├── sg_procrustes_gt.yaml │ │ ├── sg_procrustes_planercnn.yaml │ │ ├── sift_emat_dpt.yaml │ │ ├── sift_emat_gt.yaml │ │ ├── sift_emat_planercnn.yaml │ │ ├── sift_pnp_dpt.yaml │ │ ├── sift_pnp_gt.yaml │ │ ├── sift_pnp_planercnn.yaml │ │ ├── sift_procrustes_dpt.yaml │ │ ├── sift_procrustes_dpt_icp.yaml │ │ ├── sift_procrustes_gtdepth.yaml │ │ ├── sift_procrustes_gtdepth_icp.yaml │ │ ├── sift_procrustes_planercnn.yaml │ │ ├── sift_procrustes_planercnn_icp.yaml │ │ └── sift_procrustes_smdp.yaml │ └── sevenscenes │ │ ├── loftr_emat_planercnn.yaml │ │ ├── loftr_pnp_planercnn.yaml │ │ ├── sg_emat_planercnn.yaml │ │ ├── sg_pnp_planercnn.yaml │ │ ├── sift_emat_planercnn.yaml │ │ └── sift_pnp_planercnn.yaml ├── regression │ ├── mapfree │ │ ├── 3d3d.yaml │ │ ├── 3d3d_lowoverlap.yaml │ │ ├── 3d3d_no_posencoder.yaml │ │ ├── 3d3d_no_warping.yaml │ │ ├── 3d3d_weighted_loss.yaml │ │ ├── multiframe │ │ │ └── 3d3d_multi.yaml │ │ ├── rot6d_trans.yaml │ │ ├── rotbin_trans.yaml │ │ ├── rotbin_transdirectionbin_scale.yaml │ │ ├── rotbin_transdirectionbin_scale_lowoverlap.yaml │ │ ├── rotbin_transdirectionbin_scale_qkv.yaml │ │ ├── rotquat_trans.yaml │ │ └── rotquat_transdirection_scale.yaml │ └── scannet │ │ ├── 3d3d.yaml │ │ ├── 3d3d_dual_posenc.yaml │ │ ├── 3d3d_dual_posenc_upsampling.yaml │ │ ├── 3d3d_half_cv.yaml │ │ ├── 3d3d_lowoverlap.yaml │ │ ├── 3d3d_no_avgpool.yaml │ │ ├── 3d3d_qkv.yaml │ │ ├── 3d3d_with_dustbin.yaml │ │ └── rotbin_transdirectionbin_scale.yaml ├── scannet.yaml ├── sevenscenes.yaml └── utils.py ├── environment.yml ├── environment_eccv22.yml ├── etc ├── feature_matching_baselines │ ├── compute.py │ ├── matchers.py │ └── utils.py └── teaser.png ├── lib ├── datasets │ ├── datamodules.py │ ├── mapfree.py │ ├── sampler.py │ ├── scannet.py │ ├── sevenscenes.py │ └── utils.py ├── models │ ├── builder.py │ ├── matching │ │ ├── feature_matching.py │ │ ├── model.py │ │ └── pose_solver.py │ └── regression │ │ ├── aggregator.py │ │ ├── encoder │ │ ├── preact.py │ │ ├── resnet.py │ │ └── resunet.py │ │ ├── head.py │ │ └── model.py └── utils │ ├── data.py │ ├── localize.py │ ├── logger.py │ ├── loss.py │ ├── metrics.py │ ├── rotationutils.py │ ├── solver.py │ └── visualisation.py ├── pyrightconfig.json ├── submission.py ├── train.py └── visualisation ├── README.md ├── environment.yml ├── lazy_camera.py ├── render_estimates.py ├── render_scene.py └── render_util.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | __pycache__/ 4 | .mypy_cache 5 | *.egg-info 6 | tmp/ 7 | 8 | data/ 9 | outputs/ 10 | results/ 11 | 12 | # scripts for running experiments 13 | .idea/ 14 | 15 | # testing 16 | .coverage 17 | .coverage.* 18 | *,cover 19 | .pytest_cache 20 | 21 | # Python related 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .gradle 32 | .Python 33 | build/ 34 | develop-eggs/ 35 | dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | pip-wheel-metadata/ 45 | share/python-wheels/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | MANIFEST 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | .hypothesis/ 72 | .pytest_cache/ 73 | 74 | # Translations 75 | *.mo 76 | *.pot 77 | 78 | # Django stuff: 79 | *.log 80 | local_settings.py 81 | db.sqlite3 82 | db.sqlite3-journal 83 | 84 | # Flask stuff: 85 | instance/ 86 | .webassets-cache 87 | 88 | # Scrapy stuff: 89 | .scrapy 90 | 91 | # Sphinx documentation 92 | docs/_build/ 93 | 94 | # PyBuilder 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # celery beat schedule file 115 | celerybeat-schedule 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | /visloc/keyframes/data/ 148 | 149 | # Blender backup files 150 | .blend1 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "etc/feature_matching_baselines/LoFTR"] 2 | path = etc/feature_matching_baselines/LoFTR 3 | url = git@github.com:zju3dv/LoFTR.git 4 | [submodule "etc/feature_matching_baselines/SuperGlue"] 5 | path = etc/feature_matching_baselines/SuperGlue 6 | url = git@github.com:magicleap/SuperGluePretrainedNetwork.git 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © Niantic, Inc. 2022. Patent Pending. 2 | 3 | All rights reserved. 4 | 5 | 6 | 7 | ======================================================================================= 8 | 9 | 10 | 11 | This Software is licensed under the terms of the following Map-free Relocalizaation 12 | license which allows for non-commercial use only. For any other use of the software 13 | not covered by the terms of this license, please contact partnerships@nianticlabs.com 14 | 15 | 16 | 17 | ======================================================================================= 18 | 19 | 20 | 21 | Map-free Visual Relocalization License 22 | 23 | 24 | This Agreement is made by and between the Licensor and the Licensee as 25 | defined and identified below. 26 | 27 | 28 | 1. Definitions. 29 | 30 | In this Agreement (“the Agreement”) the following words shall have the 31 | following meanings: 32 | 33 | "Authors" shall mean E. Arnold, J. Wynn, S. Vicente, G. Garcia-Hernando, 34 | A. Monszpart, V. Prisacariu, D. Turmukhambetov, E. Brachmann 35 | "Licensee" Shall mean the person or organization agreeing to use the 36 | Software in accordance with these terms and conditions. 37 | "Licensor" shall mean Niantic Inc., a company organized and existing under 38 | the laws of Delaware, whose principal place of business is at 1 Ferry Building, 39 | Suite 200, San Francisco, 94111. 40 | "Software" shall mean the Map-free Relocalization Software uploaded by 41 | Licensor to the GitHub repository at https://github.com/nianticlabs/map-free-reloc 42 | on October 10th 2022 in source code or object code form and any 43 | accompanying documentation as well as any modifications or additions uploaded 44 | to the same GitHub repository by Licensor. 45 | 46 | 47 | 2. License. 48 | 49 | 2.1 The Licensor has all necessary rights to grant a license under: (i) 50 | copyright and rights in the nature of copyright subsisting in the Software; and 51 | (ii) certain patent rights resulting from a patent application(s) filed by the 52 | Licensor in the United States and/or other jurisdictions in connection with the 53 | Software. The Licensor grants the Licensee for the duration of this Agreement, 54 | a free of charge, non-sublicenseable, non-exclusive, non-transferable copyright 55 | and patent license (in consequence of said patent application(s)) to use the 56 | Software for non-commercial purpose only, including teaching and research at 57 | educational institutions and research at not-for-profit research institutions 58 | in accordance with the provisions of this Agreement. Non-commercial use 59 | expressly excludes any profit-making or commercial activities, including without 60 | limitation sale, license, manufacture or development of commercial products, use in 61 | commercially-sponsored research, use at a laboratory or other facility owned or 62 | controlled (whether in whole or in part) by a commercial entity, provision of 63 | consulting service, use for or on behalf of any commercial entity, use in 64 | research where a commercial party obtains rights to research results or any 65 | other benefit, and use of the code in any models, model weights or code 66 | resulting from such procedure in any commercial product. Notwithstanding the 67 | foregoing restrictions, you can use this code for publishing comparison results 68 | for academic papers, including retraining on your own data. Any use of the 69 | Software for any purpose other than pursuant to the license grant set forth 70 | above shall automatically terminate this License. 71 | 72 | 73 | 2.2 The Licensee is permitted to make modifications to the Software 74 | provided that any distribution of such modifications is in accordance with 75 | Clause 3. 76 | 77 | 2.3 Except as expressly permitted by this Agreement and save to the 78 | extent and in the circumstances expressly required to be permitted by law, the 79 | Licensee is not permitted to rent, lease, sell, offer to sell, or loan the 80 | Software or its associated documentation. 81 | 82 | 83 | 3. Redistribution and modifications 84 | 85 | 3.1 The Licensee may reproduce and distribute copies of the Software, with 86 | or without modifications, in source format only and only to this same GitHub 87 | repository , and provided that any and every distribution is accompanied by an 88 | unmodified copy of this License and that the following copyright notice is 89 | always displayed in an obvious manner: Copyright © Niantic, Inc. 2018. All 90 | rights reserved. 91 | 92 | 93 | 3.2 In the case where the Software has been modified, any distribution must 94 | include prominent notices indicating which files have been changed. 95 | 96 | 3.3 The Licensee shall cause any work that it distributes or publishes, 97 | that in whole or in part contains or is derived from the Software or any part 98 | thereof (“Work based on the Software”), to be licensed as a whole at no charge 99 | to all third parties entitled to a license to the Software under the terms of 100 | this License and on the same terms provided in this License. 101 | 102 | 103 | 4. Duration. 104 | 105 | This Agreement is effective until the Licensee terminates it by destroying 106 | the Software, any Work based on the Software, and its documentation together 107 | with all copies. It will also terminate automatically if the Licensee fails to 108 | abide by its terms. Upon automatic termination the Licensee agrees to destroy 109 | all copies of the Software, Work based on the Software, and its documentation. 110 | 111 | 112 | 5. Disclaimer of Warranties. 113 | 114 | The Software is provided as is. To the maximum extent permitted by law, 115 | Licensor provides no warranties or conditions of any kind, either express or 116 | implied, including without limitation, any warranties or condition of title, 117 | non-infringement or fitness for a particular purpose. 118 | 119 | 120 | 6. LIMITATION OF LIABILITY. 121 | 122 | IN NO EVENT SHALL THE LICENSOR AND/OR AUTHORS BE LIABLE FOR ANY DIRECT, 123 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING 124 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 125 | DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 126 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 127 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 128 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 129 | 130 | 131 | 7. Indemnity. 132 | 133 | The Licensee shall indemnify the Licensor and/or Authors against all third 134 | party claims that may be asserted against or suffered by the Licensor and/or 135 | Authors and which relate to use of the Software by the Licensee. 136 | 137 | 138 | 8. Intellectual Property. 139 | 140 | 8.1 As between the Licensee and Licensor, copyright and all other 141 | intellectual property rights subsisting in or in connection with the Software 142 | and supporting information shall remain at all times the property of the 143 | Licensor. The Licensee shall acquire no rights in any such material except as 144 | expressly provided in this Agreement. 145 | 146 | 8.2 No permission is granted to use the trademarks or product names of the 147 | Licensor except as required for reasonable and customary use in describing the 148 | origin of the Software and for the purposes of abiding by the terms of Clause 149 | 3.1. 150 | 151 | 8.3 The Licensee shall promptly notify the Licensor of any improvement or 152 | new use of the Software (“Improvements”) in sufficient detail for Licensor to 153 | evaluate the Improvements. The Licensee hereby grants the Licensor and its 154 | affiliates a non-exclusive, fully paid-up, royalty-free, irrevocable and 155 | perpetual license to all Improvements for non-commercial academic research and 156 | teaching purposes upon creation of such improvements. 157 | 158 | 8.4 The Licensee grants an exclusive first option to the Licensor to be 159 | exercised by the Licensor within three (3) years of the date of notification of 160 | an Improvement under Clause 8.3 to use any the Improvement for commercial 161 | purposes on terms to be negotiated and agreed by Licensee and Licensor in good 162 | faith within a period of six (6) months from the date of exercise of the said 163 | option (including without limitation any royalty share in net income from such 164 | commercialization payable to the Licensee, as the case may be). 165 | 166 | 167 | 9. Acknowledgements. 168 | 169 | The Licensee shall acknowledge the Authors and use of the Software in the 170 | publication of any work that uses, or results that are achieved through, the 171 | use of the Software. The following citation shall be included in the 172 | acknowledgement: “Map-free Visual Relocalization: Metric Pose Relative to a 173 | Single Image", by E. Arnold, J. Wynn, S. Vicente, G. Garcia-Hernando, 174 | A. Monszpart, V. Prisacariu, D. Turmukhambetov, E. Brachmann, ECCV 2022. 175 | 176 | 177 | 10. Governing Law. 178 | 179 | This Agreement shall be governed by, construed and interpreted in 180 | accordance with English law and the parties submit to the exclusive 181 | jurisdiction of the English courts. 182 | 183 | 184 | 11. Termination. 185 | 186 | Upon termination of this Agreement, the licenses granted hereunder will 187 | terminate and Sections 5, 6, 7, 8, 9, 10 and 11 shall survive any termination 188 | of this Agreement. -------------------------------------------------------------------------------- /benchmark/config.py: -------------------------------------------------------------------------------- 1 | # translation and rotation thresholds [meters, degrees] 2 | # used to compute Precision and AUC considering Pose Error 3 | t_threshold = 0.25 4 | R_threshold = 5 5 | 6 | # reprojection (VCRE) threshold [pixels] 7 | # used to compute Precision and AUC considering VCRE 8 | vcre_threshold = 90 9 | -------------------------------------------------------------------------------- /benchmark/extended_datasets.md: -------------------------------------------------------------------------------- 1 | # Dataset preparation 2 | 3 | ## Scannet 4 | - Download the Scannet dataset following the [official instructions](https://github.com/ScanNet/ScanNet#scannet-data). 5 | - Extract the dataset root folder to `data/scannet` 6 | - Download the [Scannet indices](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/scannet_indices.zip) used for train/val/test splits. 7 | - Download [estimated depth maps and correspondences](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/scannet_baselines_aux.zip). 8 | - Extract both zip files contents to `data/` 9 |
10 | Note on Scannet indices 11 | 12 | - The test pairs are the same as SuperGlue/LoFTR (sequences `0707_00 - 0806_00`); 13 | - Training uses SG/LoFTR pairs from sequences `0000_00 - 0699_00`; 14 | - The validation uses the SG/LoFTR pairs from sequences `0700_00 - 0706_00`; 15 | - This split is used to prevent overlapping train/val sequences. 16 |
17 | 18 | ## 7Scenes 19 | - Download the [7Scenes dataset](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/). 20 | - Download [7Scenes pairs indices](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_pairs.zip). 21 | - Download [7Scenes feature-matching correspondences](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_correspondences.tar.gz). 22 | - Download [7Scenes PlaneRCNN estimated depth maps](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_prcnn_depth.zip). 23 | - Extract all zip/tar files to `data/sevenscenes` 24 | 25 | ## Pre-computed correspondences and depth maps 26 | The pre-computed correspondences (SIFT, SuperGlue+SuperPoint and LoFTR) are found in the path 27 | - Scannet: `data/scannet_misc/correspondences_{feature_method}_scannet_test.npz` 28 | - 7Scenes: `data/sevenscenes/{scene}/correspondences_{feature_method}_test_pairs_{pair_variant}.npz` 29 | 30 | The pre-computed depth maps are found in path: 31 | - Scannet (PlaneRCNN monodepth): `data/scannet_misc/scannet_test_depthmaps_planercnn.npz` 32 | - Scannet (DPT NYU monodepth): `data/scannet_misc/scannet_test_depthmaps_dpt.npz` 33 | - 7Scenes (PlaneRCNN monodepth): `data/sevenscenes/{scene}/frame_{framenum}.depth.planercnn.png` 34 | 35 | # 📈 Scannet Relative Pose Evaluation 36 | ```bash 37 | python -m benchmark.scannet [model config file] [--checkpoint path_to_checkpoint] 38 | ``` 39 | Each time the script runs, a result file is created in the folder `results` with the same name as the config file. 40 | This result file contains the rotation and translation errors of each sample in the Scannet test set. 41 | A log text file is also created in `results/log/` with the config file name. 42 | 43 | For example, feature-matching methods (more options in [config/matching/scannet](config/matching/scannet)) can be evaluated using: 44 | ```bash 45 | #for E-mat based R,t with GT depth maps to get metric pose 46 | python -m benchmark.scannet config/matching/scannet/sift_emat_gt.yaml 47 | 48 | #For E-mat based R,t, with DPT monodepth to get metric pose 49 | python -m benchmark.scannet config/matching/scannet/sift_emat_dpt.yaml 50 | 51 | #For PnP based R,t, with PlaneRCNN monodepth to get metric pose 52 | python -m benchmark.scannet config/matching/scannet/sift_pnp_planercnn.yaml 53 | 54 | #For Procrustes based R,t, with DPT monodepth to backproject correspondences to 3D 55 | python -m benchmark.scannet config/matching/scannet/sift_procrustes_dpt.yaml 56 | ``` 57 | 58 | # 📈 7Scenes Visual Localisation Evaluation 59 | ```bash 60 | python -m benchmark.sevenscenes [model config file] \ 61 | [dataset config file] \ 62 | [--checkpoint path_to_checkpoint] \ 63 | [--test_pair_txt pair_file_name] 64 | ``` 65 | 66 | - Use `config/sevenscenes.yaml` as the dataset config. 67 | - `--test_pair_txt` specifies the pairs of training/query images used in the evaluation (Overrides the one in `config/sevenscenes.yaml`, with default value: `test_pairs.5nn.5cm10m.vlad.minmax.txt` (full EssNetPairs)) 68 | - `--one_nn` to filter the single nearest neighbour training image with highest DVLAD similarity to each query image. 69 | - `--triang` uses triangulation (discards translation vector norm) to estimate the absolute pose of the query image 70 | - `--triang_ransac_thres` is the angular inlier threshold for the triangulation RANSAC loop 71 | 72 | Note that if neither `--triang` or `--one_nn` is specified, the absolute pose of a query image is computed using all its nearest neighbours. 73 | The absolute pose predictions from each neighbour are aggregated using geometric median of the translation vectors, and the chordal L2 mean of rotation matrices. 74 | 75 | Once completed, this evaluation saves the result log as `test_results.txt`. 76 | Additionally, the predicted absolute pose for each query image in a SCENE is saved in a file `pose_7scenes_SCENE.txt`. 77 | Each line in this file follows the format: `image_path qw qx qy qz tx ty tz`, where the quaternion `q` and translation vector `t` encode the predicted absolute pose from world to camera coordinates. 78 | 79 | The evaluation code supports feature-matching baselines (SIFT/SuperGlue/LoFTR) for non-metric relative pose (absolute pose obtained via triangulation); and feature-matching & predicted depth, where the metric pose can be obtained using scale from depth. 80 | For example, the baseline SuperGlue + PlaneRCNN depth considering a database of only 10 images per scan, and considering only the closest (DVLAD similarity) database image can be executed with: 81 | ```bash 82 | python -m benchmark.sevenscenes \ 83 | config/baseline/sevenscenes/baseline_sg_emat_metric_planercnn_depth.yaml \ 84 | --test_pair_txt test_pairs_ours_km10.txt \ 85 | --one_nn 86 | ``` 87 | Other baselines, including SIFT/LoFTR are available in `config/matching/sevenscenes/`. 88 | We also provide different test pairs, considering different numbers of database images, namely, `test_pairs_ours_{km1/km2/km5/km10}.txt`. 89 | For each one of these pairs, the database images are selected based on the K-Means clustering of their D-VLAD features. 90 | The pairs file formatting follows the pattern from [EssNet](https://vision.in.tum.de/webshare/u/zhouq/visloc-datasets/README.md). 91 | 92 | The other evaluation flags also apply for baselines, for example, one can compute results for SuperGlue + triangulation: 93 | ```bash 94 | python -m benchmark.sevenscenes \ 95 | config/baseline/sevenscenes/baseline_sg_emat_metric_planercnn_depth.yaml \ 96 | --test_pair_txt test_pairs_ours_km10.txt \ 97 | --triang 98 | ``` 99 | 100 | Note that the correspondences from feature-matching baselines have been pre-computed for each test pair, and saved in a file for each scene of the 7Scenes dataset. -------------------------------------------------------------------------------- /benchmark/mapfree.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | from pathlib import Path 4 | from zipfile import ZipFile 5 | from io import TextIOWrapper 6 | import json 7 | import logging 8 | 9 | import numpy as np 10 | 11 | from benchmark.utils import load_poses, subsample_poses, load_K, precision_recall 12 | from benchmark.metrics import MetricManager, Inputs 13 | import benchmark.config as config 14 | from config.default import cfg 15 | 16 | 17 | def compute_scene_metrics(dataset_path: Path, submission_zip: ZipFile, scene: str): 18 | metric_manager = MetricManager() 19 | 20 | # load intrinsics and poses 21 | try: 22 | K, W, H = load_K(dataset_path / scene / 'intrinsics.txt') 23 | with (dataset_path / scene / 'poses.txt').open('r', encoding='utf-8') as gt_poses_file: 24 | gt_poses = load_poses(gt_poses_file, load_confidence=False) 25 | except FileNotFoundError as e: 26 | logging.error(f'Could not find ground-truth dataset files: {e}') 27 | raise 28 | else: 29 | logging.info( 30 | f'Loaded ground-truth intrinsics and poses for scene {scene}') 31 | 32 | # try to load estimated poses from submission 33 | try: 34 | with submission_zip.open(f'pose_{scene}.txt') as estimated_poses_file: 35 | estimated_poses_file_wrapper = TextIOWrapper( 36 | estimated_poses_file, encoding='utf-8') 37 | estimated_poses = load_poses( 38 | estimated_poses_file_wrapper, load_confidence=True) 39 | except KeyError as e: 40 | logging.warning( 41 | f'Submission does not have estimates for scene {scene}.') 42 | return dict(), len(gt_poses) 43 | except UnicodeDecodeError as e: 44 | logging.error('Unsupported file encoding: please use UTF-8') 45 | raise 46 | else: 47 | logging.info(f'Loaded estimated poses for scene {scene}') 48 | 49 | # The val/test set is subsampled by a factor of 5 50 | gt_poses = subsample_poses(gt_poses, subsample=5) 51 | 52 | # failures encode how many frames did not have an estimate 53 | # e.g. user/method did not provide an estimate for that frame 54 | # it's different from when an estimate is provided with low confidence! 55 | failures = 0 56 | 57 | # Results encoded as dict 58 | # key: metric name; value: list of values (one per frame). 59 | # e.g. results['t_err'] = [1.2, 0.3, 0.5, ...] 60 | results = defaultdict(list) 61 | 62 | # compute metrics per frame 63 | for frame_num, (q_gt, t_gt, _) in gt_poses.items(): 64 | if frame_num not in estimated_poses: 65 | failures += 1 66 | continue 67 | 68 | q_est, t_est, confidence = estimated_poses[frame_num] 69 | inputs = Inputs(q_gt=q_gt, t_gt=t_gt, q_est=q_est, t_est=t_est, 70 | confidence=confidence, K=K[frame_num], W=W, H=H) 71 | metric_manager(inputs, results) 72 | 73 | return results, failures 74 | 75 | 76 | def aggregate_results(all_results, all_failures): 77 | # aggregate metrics 78 | median_metrics = defaultdict(list) 79 | all_metrics = defaultdict(list) 80 | for scene_results in all_results.values(): 81 | for metric, values in scene_results.items(): 82 | median_metrics[metric].append(np.median(values)) 83 | all_metrics[metric].extend(values) 84 | all_metrics = {k: np.array(v) for k, v in all_metrics.items()} 85 | assert all([v.ndim == 1 for v in all_metrics.values()] 86 | ), 'invalid metrics shape' 87 | 88 | # compute avg median metrics 89 | avg_median_metrics = {metric: np.mean( 90 | values) for metric, values in median_metrics.items()} 91 | 92 | # compute precision/AUC for pose error and reprojection errors 93 | accepted_poses = (all_metrics['trans_err'] < config.t_threshold) * \ 94 | (all_metrics['rot_err'] < config.R_threshold) 95 | accepted_vcre = all_metrics['reproj_err'] < config.vcre_threshold 96 | total_samples = len(next(iter(all_metrics.values()))) + all_failures 97 | 98 | prec_pose = np.sum(accepted_poses) / total_samples 99 | prec_vcre = np.sum(accepted_vcre) / total_samples 100 | 101 | # compute AUC for pose and VCRE 102 | _, _, auc_pose = precision_recall( 103 | inliers=all_metrics['confidence'], tp=accepted_poses, failures=all_failures) 104 | _, _, auc_vcre = precision_recall( 105 | inliers=all_metrics['confidence'], tp=accepted_vcre, failures=all_failures) 106 | 107 | # output metrics 108 | output_metrics = dict() 109 | output_metrics['Average Median Translation Error'] = avg_median_metrics['trans_err'] 110 | output_metrics['Average Median Rotation Error'] = avg_median_metrics['rot_err'] 111 | output_metrics['Average Median Reprojection Error'] = avg_median_metrics['reproj_err'] 112 | output_metrics[f'Precision @ Pose Error < ({config.t_threshold*100}cm, {config.R_threshold}deg)'] = prec_pose 113 | output_metrics[f'AUC @ Pose Error < ({config.t_threshold*100}cm, {config.R_threshold}deg)'] = auc_pose 114 | output_metrics[f'Precision @ VCRE < {config.vcre_threshold}px'] = prec_vcre 115 | output_metrics[f'AUC @ VCRE < {config.vcre_threshold}px'] = auc_vcre 116 | output_metrics[f'Estimates for % of frames'] = len(all_metrics['trans_err']) / total_samples 117 | return output_metrics 118 | 119 | 120 | def count_unexpected_scenes(scenes: tuple, submission_zip: ZipFile): 121 | submission_scenes = [fname[5:-4] 122 | for fname in submission_zip.namelist() if fname.startswith("pose_")] 123 | return len(set(submission_scenes) - set(scenes)) 124 | 125 | 126 | def main(args): 127 | dataset_path = args.dataset_path / args.split 128 | scenes = tuple(f.name for f in dataset_path.iterdir() if f.is_dir()) 129 | 130 | try: 131 | submission_zip = ZipFile(args.submission_path, 'r') 132 | except FileNotFoundError as e: 133 | logging.error(f'Could not find ZIP file in path {args.submission_path}') 134 | return 135 | 136 | all_results = dict() 137 | all_failures = 0 138 | for scene in scenes: 139 | metrics, failures = compute_scene_metrics( 140 | dataset_path, submission_zip, scene) 141 | all_results[scene] = metrics 142 | all_failures += failures 143 | 144 | if all_failures > 0: 145 | logging.warning( 146 | f'Submission is missing pose estimates for {all_failures} frames') 147 | 148 | unexpected_scene_count = count_unexpected_scenes(scenes, submission_zip) 149 | if unexpected_scene_count > 0: 150 | logging.warning( 151 | f'Submission contains estimates for {unexpected_scene_count} scenes outside the {args.split} set') 152 | 153 | if all((len(metrics) == 0 for metrics in all_results.values())): 154 | logging.error( 155 | f'Submission does not have any valid pose estimates') 156 | return 157 | 158 | output_metrics = aggregate_results(all_results, all_failures) 159 | output_json = json.dumps(output_metrics, indent=2) 160 | print(output_json) 161 | 162 | 163 | if __name__ == '__main__': 164 | parser = argparse.ArgumentParser( 165 | 'eval', description='Evaluate submissions for the MapFree dataset benchmark') 166 | parser.add_argument('submission_path', type=Path, 167 | help='Path to the submission ZIP file') 168 | parser.add_argument('--split', choices=('val', 'test'), default='test', 169 | help='Dataset split to use for evaluation. Default: test') 170 | parser.add_argument('--log', choices=('warning', 'info', 'error'), 171 | default='warning', help='Logging level. Default: warning') 172 | parser.add_argument('--dataset_path', type=Path, default=None, 173 | help='Path to the dataset folder') 174 | 175 | args = parser.parse_args() 176 | 177 | if args.dataset_path is None: 178 | cfg.merge_from_file('config/mapfree.yaml') 179 | args.dataset_path = Path(cfg.DATASET.DATA_ROOT) 180 | 181 | logging.basicConfig(level=args.log.upper()) 182 | try: 183 | main(args) 184 | except Exception: 185 | logging.error("Unexpected behaviour. Exiting.") 186 | -------------------------------------------------------------------------------- /benchmark/metrics.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Callable 3 | 4 | import numpy as np 5 | 6 | from benchmark.reprojection import reprojection_error 7 | from benchmark.utils import VARIANTS_ANGLE_SIN, quat_angle_error 8 | 9 | 10 | @dataclass 11 | class Inputs: 12 | q_gt: np.array 13 | t_gt: np.array 14 | q_est: np.array 15 | t_est: np.array 16 | confidence: float 17 | K: np.array 18 | W: int 19 | H: int 20 | 21 | def __post_init__(self): 22 | assert self.q_gt.shape == (4,), 'invalid gt quaternion shape' 23 | assert self.t_gt.shape == (3,), 'invalid gt translation shape' 24 | assert self.q_est.shape == (4,), 'invalid estimated quaternion shape' 25 | assert self.t_est.shape == (3,), 'invalid estimated translation shape' 26 | assert self.confidence >= 0, 'confidence must be non negative' 27 | assert self.K.shape == (3, 3), 'invalid K shape' 28 | assert self.W > 0, 'invalid image width' 29 | assert self.H > 0, 'invalid image height' 30 | 31 | 32 | class MyDict(dict): 33 | def register(self, fn) -> Callable: 34 | """Registers a function within dict(fn_name -> fn_ref). 35 | This is used to evaluate all registered metrics in MetricManager.__call__()""" 36 | self[fn.__name__] = fn 37 | return fn 38 | 39 | 40 | class MetricManager: 41 | _metrics = MyDict() 42 | 43 | def __call__(self, inputs: Inputs, results: dict) -> None: 44 | for metric, metric_fn in self._metrics.items(): 45 | results[metric].append(metric_fn(inputs)) 46 | 47 | @staticmethod 48 | @_metrics.register 49 | def trans_err(inputs: Inputs) -> np.float64: 50 | return np.linalg.norm(inputs.t_est - inputs.t_gt) 51 | 52 | @staticmethod 53 | @_metrics.register 54 | def rot_err(inputs: Inputs, variant: str = VARIANTS_ANGLE_SIN) -> np.float64: 55 | return quat_angle_error(label=inputs.q_est, pred=inputs.q_gt, variant=variant)[0, 0] 56 | 57 | @staticmethod 58 | @_metrics.register 59 | def reproj_err(inputs: Inputs) -> float: 60 | return reprojection_error( 61 | q_est=inputs.q_est, t_est=inputs.t_est, q_gt=inputs.q_gt, t_gt=inputs.t_gt, K=inputs.K, 62 | W=inputs.W, H=inputs.H) 63 | 64 | @staticmethod 65 | @_metrics.register 66 | def confidence(inputs: Inputs) -> float: 67 | return inputs.confidence 68 | -------------------------------------------------------------------------------- /benchmark/reprojection.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import numpy as np 4 | from transforms3d.quaternions import quat2mat 5 | 6 | 7 | def project(pts: np.ndarray, K: np.ndarray, img_size: List[int] or Tuple[int] = None) -> np.ndarray: 8 | """Projects 3D points to image plane. 9 | 10 | Args: 11 | - pts [N, 3/4]: points in camera coordinates (homogeneous or non-homogeneous) 12 | - K [3, 3]: intrinsic matrix 13 | - img_size (width, height): optional, clamp projection to image borders 14 | Outputs: 15 | - uv [N, 2]: coordinates of projected points 16 | """ 17 | 18 | assert len(pts.shape) == 2, 'incorrect number of dimensions' 19 | assert pts.shape[1] in [3, 4], 'invalid dimension size' 20 | assert K.shape == (3, 3), 'incorrect intrinsic shape' 21 | 22 | uv_h = (K @ pts[:, :3].T).T 23 | uv = uv_h[:, :2] / uv_h[:, -1:] 24 | 25 | if img_size is not None: 26 | uv[:, 0] = np.clip(uv[:, 0], 0, img_size[0]) 27 | uv[:, 1] = np.clip(uv[:, 1], 0, img_size[1]) 28 | 29 | return uv 30 | 31 | 32 | def get_grid_multipleheight() -> np.ndarray: 33 | # create grid of points 34 | ar_grid_step = 0.3 35 | ar_grid_num_x = 7 36 | ar_grid_num_y = 4 37 | ar_grid_num_z = 7 38 | ar_grid_z_offset = 1.8 39 | ar_grid_y_offset = 0 40 | 41 | ar_grid_x_pos = np.arange(0, ar_grid_num_x)-(ar_grid_num_x-1)/2 42 | ar_grid_x_pos *= ar_grid_step 43 | 44 | ar_grid_y_pos = np.arange(0, ar_grid_num_y)-(ar_grid_num_y-1)/2 45 | ar_grid_y_pos *= ar_grid_step 46 | ar_grid_y_pos += ar_grid_y_offset 47 | 48 | ar_grid_z_pos = np.arange(0, ar_grid_num_z).astype(float) 49 | ar_grid_z_pos *= ar_grid_step 50 | ar_grid_z_pos += ar_grid_z_offset 51 | 52 | xx, yy, zz = np.meshgrid(ar_grid_x_pos, ar_grid_y_pos, ar_grid_z_pos) 53 | ones = np.ones(xx.shape[0]*xx.shape[1]*xx.shape[2]) 54 | eye_coords = np.concatenate([c.reshape(-1, 1) 55 | for c in (xx, yy, zz, ones)], axis=-1) 56 | return eye_coords 57 | 58 | 59 | # global variable, avoids creating it again 60 | eye_coords_glob = get_grid_multipleheight() 61 | 62 | 63 | def reprojection_error( 64 | q_est: np.ndarray, t_est: np.ndarray, q_gt: np.ndarray, t_gt: np.ndarray, K: np.ndarray, 65 | W: int, H: int) -> float: 66 | eye_coords = eye_coords_glob 67 | 68 | # obtain ground-truth position of projected points 69 | uv_gt = project(eye_coords, K, (W, H)) 70 | 71 | # residual transformation 72 | cam2w_est = np.eye(4) 73 | cam2w_est[:3, :3] = quat2mat(q_est) 74 | cam2w_est[:3, -1] = t_est 75 | cam2w_gt = np.eye(4) 76 | cam2w_gt[:3, :3] = quat2mat(q_gt) 77 | cam2w_gt[:3, -1] = t_gt 78 | 79 | # residual reprojection 80 | eyes_residual = (np.linalg.inv(cam2w_est) @ cam2w_gt @ eye_coords.T).T 81 | uv_pred = project(eyes_residual, K, (W, H)) 82 | 83 | # get reprojection error 84 | repr_err = np.linalg.norm(uv_gt - uv_pred, ord=2, axis=1) 85 | mean_repr_err = float(repr_err.mean().item()) 86 | return mean_repr_err 87 | -------------------------------------------------------------------------------- /benchmark/scannet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | import torch 5 | from tqdm import tqdm 6 | 7 | from config.default import cfg 8 | from lib.utils.logger import set_log 9 | from lib.datasets.datamodules import DataModule 10 | from lib.models.builder import build_model 11 | from lib.utils.data import data_to_model_device 12 | from lib.utils.metrics import MetricsAccumulator, print_auc_table, pose_error_torch, A_metrics, precision 13 | 14 | 15 | def main(args): 16 | cfg.merge_from_file('config/scannet.yaml') 17 | cfg.merge_from_file(args.config) 18 | 19 | # Set-up dataloader and model 20 | datamodule = DataModule(cfg) 21 | dataset_loader = datamodule.test_dataloader() 22 | model = build_model(cfg, args.checkpoint) 23 | 24 | # Create logger and save to file 25 | config_name = args.config.split('/')[-1][:-5] 26 | set_log(f'results/scannet/{config_name}.txt') 27 | 28 | macc = MetricsAccumulator() 29 | 30 | for data in tqdm(dataset_loader): 31 | data = data_to_model_device(data, model) 32 | with torch.no_grad(): 33 | R, t = model(data) 34 | metrics = pose_error_torch(R, t, data['T_0to1']) 35 | macc.accumulate(metrics) 36 | 37 | agg_metrics = macc.aggregate() 38 | print(f"Median Rotation error [deg]: {np.nanmedian(agg_metrics['R_err']):.2f}") 39 | print(f"Median Translation angular error [deg]: {np.nanmedian(agg_metrics['t_err_ang']):.2f}") 40 | print(f"Median Translation Euclidean error [m]: {np.nanmedian(agg_metrics['t_err_euc']):.2f}") 41 | print_auc_table(agg_metrics) 42 | 43 | # compute precision 44 | thresholds = ((0.1, 5), (0.25, 5), (0.5, 10), (1, 20)) 45 | print("Recall @ "+"/".join([f"({t[0]:.1f}m,{t[1]:.0f}deg)" for t in thresholds])+': '+"/".join( 46 | ['{:.2f}'.format(precision(agg_metrics, t[1], t[0])) for t in thresholds])) 47 | 48 | # compute A1/A2/A3 metric for translation scale 49 | a1, a2, a3 = A_metrics(agg_metrics['t_err_scale_sym']) 50 | print(f"t_scale_error A1/A2/A3 [%]: {a1*100:.1f}/{a2*100:.1f}/{a3*100:.1f}") 51 | 52 | # compute ratio of failures (baselines) 53 | ratio_failures = np.isnan(agg_metrics['R_err']).mean() 54 | print(f'failures (not enough corr.) [%]: {ratio_failures*100:.1f}') 55 | 56 | # Save results to `results/' with the name of the config 57 | np.savez(f'results/scannet/{config_name}', **agg_metrics) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument('config', help='path to config file') 63 | parser.add_argument('--checkpoint', help='path to checkpoint', default='') 64 | args = parser.parse_args() 65 | 66 | main(args) 67 | -------------------------------------------------------------------------------- /benchmark/sevenscenes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from pathlib import Path 4 | 5 | import torch 6 | from tqdm import tqdm 7 | 8 | from config.default import cfg 9 | from lib.utils.logger import set_log 10 | from lib.utils.visualisation import save_video 11 | from lib.datasets.datamodules import DataModule 12 | from lib.models.builder import build_model 13 | from lib.utils.data import data_to_model_device 14 | from lib.utils.localize import * 15 | 16 | 17 | def predict(loader, model): 18 | results_dict = {} 19 | 20 | for data in tqdm(loader): 21 | # run inference 22 | data = data_to_model_device(data, model) 23 | with torch.no_grad(): 24 | R, t = model(data) 25 | 26 | # populate results_dict 27 | train, test = data['pair_names'][0][0], data['pair_names'][1][0] 28 | scene = data['scene_id'][0] 29 | if scene not in results_dict: 30 | results_dict[scene] = {} 31 | results_dict[scene]['pair_data'] = {} 32 | results_dict[scene]['no_pt_pairs'] = [] 33 | 34 | if test not in results_dict[scene]['pair_data']: 35 | results_dict[scene]['pair_data'][test] = {} 36 | results_dict[scene]['pair_data'][test]['test_pairs'] = [] 37 | 38 | # Wrap pose label with RelaPose, AbsPose objects 39 | train_c, train_q = data['abs_c_0'][0].cpu().numpy( 40 | ).copy(), data['abs_q_0'][0].cpu().numpy().copy() 41 | train_abs_pose = AbsPose(train_q, train_c) 42 | 43 | test_c, test_q = data['abs_c_1'][0].cpu().numpy( 44 | ).copy(), data['abs_q_1'][0].cpu().numpy().copy() 45 | test_abs_pose = AbsPose(test_q, test_c) 46 | results_dict[scene]['pair_data'][test]['test_abs_pose'] = test_abs_pose 47 | 48 | rel_t_gt = data['T_0to1'][:, :3, -1].reshape(-1).cpu().numpy().copy() 49 | rel_q_gt = mat2quat(data['T_0to1'][:, :3, :3].cpu().numpy()).reshape(-1) 50 | rela_pose_lbl = RelaPose(rel_q_gt, rel_t_gt) 51 | 52 | # check for NaN's in output, meaning failure due to lack of correspondences (for correspondence based methods) 53 | R = R.detach().cpu().numpy() 54 | t = t.reshape(-1).detach().cpu().numpy() 55 | if np.isnan(R).any() or np.isnan(t).any() or np.isinf(t).any(): 56 | results_dict[scene]['no_pt_pairs'].append(data['pair_names']) 57 | else: 58 | rel_t_pred = t 59 | rel_q_pred = mat2quat(R).reshape(-1) 60 | rela_pose_pred = RelaPose(rel_q_pred, rel_t_pred) 61 | test_pair = RelaPosePair(test, train_abs_pose, rela_pose_lbl, 62 | rela_pose_pred, data['sim'].item()) 63 | test_pair.inliers = data['inliers'] if 'inliers' in data.keys() else 0 64 | results_dict[scene]['pair_data'][test]['test_pairs'].append(test_pair) 65 | 66 | return results_dict 67 | 68 | 69 | def eval(args): 70 | # Load configs 71 | cfg.merge_from_file(args.dataset_config) 72 | cfg.merge_from_file(args.config) 73 | 74 | # update test pair txt from arguments (can be set at dataset config) 75 | if args.test_pair_txt: 76 | cfg.DATASET.PAIRS_TXT.TEST = args.test_pair_txt 77 | if args.one_nn: 78 | cfg.DATASET.PAIRS_TXT.ONE_NN = True 79 | 80 | # Set log object 81 | args.output_root.mkdir(parents=True, exist_ok=True) 82 | set_log(args.output_root / 'test_results.txt') 83 | 84 | # Create dataloader 85 | dataloader = DataModule(cfg).test_dataloader() 86 | 87 | # Create model 88 | model = build_model(cfg, args.checkpoint) 89 | 90 | # Get predictions from model 91 | results_dict = predict(dataloader, model) 92 | np.save(args.output_root / 'rawpred.npy', results_dict) # save, just in case 93 | 94 | # Evaluate 95 | err_thres = ((0.1, 5), (0.25, 5), (0.5, 10), (1, 20)) # (meters, deg) 96 | save_res_path = args.output_root / 'results.npy' 97 | if args.triang: 98 | # Using triangulation + RANSAC 99 | eval_pipeline_with_ransac(results_dict, None, ransac_thres=args.triang_ransac_thres, 100 | ransac_iter=10, ransac_miu=1.414, pair_type='relapose', 101 | err_thres=err_thres, save_res_path=save_res_path) 102 | else: 103 | # Directly using metric relative pose estimate to obtain absolute query pose 104 | # NOTE: if there are more than 1NN for a query, the absolute pose is obtained by 105 | # the geometric median of absolute translation vectors of each NN, and 106 | # L2 chordal mean rotation of abs. rotation matrices of each NN (see more details in cal_abs_pose_err_metric) 107 | eval_pipeline_without_ransac(results_dict, err_thres=err_thres, save_res_path=save_res_path) 108 | 109 | # Create txt file per scene showing predicted pose of each query 110 | save_results_visualisation(save_res_path) 111 | 112 | # Create precision/recall plots 113 | generate_precision_recall_plots(save_res_path, err_thres[1]) 114 | 115 | if args.save_video: 116 | save_video(save_res_path, dataloader, args.output_root) 117 | 118 | 119 | if __name__ == '__main__': 120 | parser = argparse.ArgumentParser() 121 | parser.add_argument('config', help='path to config file') 122 | parser.add_argument('dataset_config', help='path to dataset config file') 123 | parser.add_argument('--checkpoint', help='path to model checkpoint', default='') 124 | parser.add_argument('--test_pair_txt', '-pair', type=str, default=None) 125 | parser.add_argument('--output_root', '-odir', type=str, default='results/') 126 | parser.add_argument( 127 | '--one_nn', action='store_true', 128 | help='keep only one nearest neighbour, the one with highest VLAD similarity. Applicable for 7Scenes, which has more than one NN. No effect on MapFree dataset, which by definition only contains 1 keyframe per scene.') 129 | parser.add_argument( 130 | '--triang', action='store_true', 131 | help='uses triangulation to compute absolute pose of query image. Only applicable for 7Scenes.') 132 | parser.add_argument( 133 | '--triang_ransac_thres', '-rthres', metavar='%d', type=int, nargs='+', default=[15], 134 | help='the set of triangulation ransac inlier thresolds(angle error)(default: %(default)s)') 135 | parser.add_argument( 136 | '--save_video', action='store_true', 137 | help='create a video per sequence showing results per frame (valid only for 1NN cases)') 138 | 139 | args = parser.parse_args() 140 | args.output_root = Path(args.output_root) 141 | assert (args.one_nn and args.triang) != True, 'triangulation needs more than one nearest neighbour' 142 | if args.save_video: 143 | assert args.one_nn, 'video option only available when using a single keyframe (1 nearest neighbour)' 144 | 145 | eval(args) 146 | -------------------------------------------------------------------------------- /benchmark/test_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from transforms3d.euler import euler2quat 4 | from transforms3d.quaternions import axangle2quat, qmult, quat2mat, rotate_vector 5 | 6 | from benchmark.metrics import Inputs, MetricManager 7 | from benchmark.reprojection import project 8 | from benchmark.utils import VARIANTS_ANGLE_COS, VARIANTS_ANGLE_SIN 9 | 10 | 11 | def createInput(q_gt=None, t_gt=None, q_est=None, t_est=None, confidence=None, K=None, W=None, H=None): 12 | q_gt = np.zeros(4) if q_gt is None else q_gt 13 | t_gt = np.zeros(3) if t_gt is None else t_gt 14 | q_est = np.zeros(4) if q_est is None else q_est 15 | t_est = np.zeros(3) if t_est is None else t_est 16 | confidence = 0. if confidence is None else confidence 17 | K = np.eye(3) if K is None else K 18 | H = 1 if H is None else H 19 | W = 1 if W is None else W 20 | return Inputs(q_gt=q_gt, t_gt=t_gt, q_est=q_est, t_est=t_est, confidence=confidence, K=K, W=W, H=H) 21 | 22 | 23 | def randomQuat(): 24 | angles = np.random.uniform(0, 2*np.pi, 3) 25 | q = euler2quat(*angles) 26 | return q 27 | 28 | 29 | class TestMetrics: 30 | @pytest.mark.parametrize('run_number', range(50)) 31 | def test_t_err_tinvariance(self, run_number: int) -> None: 32 | """Computes the translation error given an initial translation and displacement of this 33 | translation. The translation error must be equal to the norm of the displacement.""" 34 | mean, var = 5, 10 35 | t0 = np.random.normal(mean, var, (3,)) 36 | displacement = np.random.normal(mean, var, (3,)) 37 | 38 | i = createInput(t_gt=t0, t_est=t0+displacement) 39 | trans_err = MetricManager.trans_err(i) 40 | assert np.isclose(trans_err, np.linalg.norm(displacement)) 41 | 42 | @pytest.mark.parametrize('run_number', range(50)) 43 | def test_trans_err_rinvariance(self, run_number: int) -> None: 44 | """Computes the translation error given estimated and gt vectors. 45 | The translation error must be the same for a rotated version of those vectors 46 | (same random rotation)""" 47 | mean, var = 5, 10 48 | t0 = np.random.normal(mean, var, (3,)) 49 | t1 = np.random.normal(mean, var, (3,)) 50 | q = randomQuat() 51 | 52 | i = createInput(t_gt=t0, t_est=t1) 53 | trans_err = MetricManager.trans_err(i) 54 | 55 | ir = createInput(t_gt=rotate_vector(t0, q), t_est=rotate_vector(t1, q)) 56 | trans_err_r = MetricManager.trans_err(ir) 57 | 58 | assert np.isclose(trans_err, trans_err_r) 59 | 60 | @pytest.mark.parametrize('run_number', range(50)) 61 | @pytest.mark.parametrize('dtype', (np.float64, np.float32)) 62 | def test_rot_err_raxis(self, run_number: int, dtype: type) -> None: 63 | """Test rotation error for rotations around a random axis. 64 | 65 | Note: We create GT as high precision, and only downcast when calling rot_err. 66 | """ 67 | q = randomQuat().astype(np.float64) 68 | 69 | axis = np.random.uniform(low=-1, high=1, size=3).astype(np.float64) 70 | angle = np.float64(np.random.uniform(low=-np.pi, high=np.pi)) 71 | qres = axangle2quat(vector=axis, theta=angle, is_normalized=False).astype(np.float64) 72 | 73 | i = createInput(q_gt=q.astype(dtype), q_est=qmult(q, qres).astype(dtype)) 74 | rot_err = MetricManager.rot_err(i) 75 | assert isinstance(rot_err, np.float64) 76 | rot_err_expected = np.abs(np.degrees(angle)) 77 | # if we add up errors, we want them to be positive 78 | assert 0. <= rot_err 79 | rtol = 1.e-5 # numpy default 80 | atol = 1.e-8 # numpy default 81 | if isinstance(dtype, np.float32): 82 | atol = 1.e-7 # 1/50 test might fail at 1.e-8 83 | assert np.isclose(rot_err, rot_err_expected, rtol=rtol, atol=atol) 84 | 85 | @pytest.mark.parametrize('run_number', range(50)) 86 | def test_r_err_mat(self, run_number: int) -> None: 87 | q0 = randomQuat() 88 | q1 = randomQuat() 89 | 90 | i = createInput(q_gt=q0, q_est=q1) 91 | rot_err = MetricManager.rot_err(i) 92 | 93 | R0 = quat2mat(q0) 94 | R1 = quat2mat(q1) 95 | Rres = R1 @ R0.T 96 | theta = (np.trace(Rres) - 1)/2 97 | theta = np.clip(theta, -1, 1) 98 | angle = np.degrees(np.arccos(theta)) 99 | 100 | assert np.isclose(angle, rot_err) 101 | 102 | def test_reproj_error_identity(self): 103 | """Test that reprojection error is zero if poses match""" 104 | q = randomQuat() 105 | t = np.random.normal(0, 10, (3,)) 106 | i = createInput(q_gt=q, t_gt=t, q_est=q, t_est=t) 107 | 108 | reproj_err = MetricManager.reproj_err(i) 109 | assert np.isclose(reproj_err, 0) 110 | 111 | @pytest.mark.parametrize('run_number', range(10)) 112 | @pytest.mark.parametrize('variant', (VARIANTS_ANGLE_SIN,)) 113 | @pytest.mark.parametrize('dtype', (np.float64,)) 114 | def test_r_err_small(self, run_number: int, variant: str, dtype: type) -> None: 115 | """Test rotation error for small angle differences. 116 | 117 | Note: We create GT as high precision, and only downcast when calling rot_err. 118 | """ 119 | scales_failed = [] 120 | for scale in np.logspace(start=-1, stop=-9, num=9, base=10, dtype=dtype): 121 | q = randomQuat().astype(np.float64) 122 | angle = np.float64(np.random.uniform(low=-np.pi, high=np.pi)) * scale 123 | assert isinstance(angle, np.float64) 124 | axis = np.random.uniform(low=-1., high=1., size=3).astype(np.float64) 125 | assert axis.dtype == np.float64 126 | qres = axangle2quat(vector=axis, theta=angle, is_normalized=False).astype(np.float64) 127 | assert qres.dtype == np.float64 128 | 129 | i = createInput(q_gt=q.astype(dtype), q_est=qmult(q, qres).astype(dtype)) 130 | 131 | # We expect the error to always be np.float64 for highest acc. 132 | rot_err = MetricManager.rot_err(i, variant=variant) 133 | assert isinstance(rot_err, np.float64) 134 | rot_err_expected = np.abs(np.degrees(angle)) 135 | assert isinstance(rot_err_expected, type(rot_err)) 136 | 137 | # if we add up errors, we want them to be positive 138 | assert 0. <= rot_err 139 | 140 | # check accuracy for one magnitude higher tolerance than the angle 141 | tol = 0.1 * scale 142 | # need to be more permissive for lower precision 143 | if dtype == np.float32: 144 | tol = 1.e3 * scale 145 | 146 | # cast to dtype for checking 147 | rot_err = rot_err.astype(dtype) 148 | rot_err_expected = rot_err_expected.astype(dtype) 149 | 150 | if variant == VARIANTS_ANGLE_SIN: 151 | assert np.isclose(rot_err, rot_err_expected, rtol=tol, atol=tol) 152 | elif variant == VARIANTS_ANGLE_COS: 153 | if not np.isclose(rot_err, rot_err_expected, rtol=tol, atol=tol): 154 | print(f"[variant '{variant}'] raises an error for\n" 155 | f"\trot_err: {rot_err}" 156 | f"\trot_err_expected: {rot_err_expected}" 157 | f"\trtol: {tol}" 158 | f"\tatol: {tol}") 159 | scales_failed.append(scale) 160 | if len(scales_failed): 161 | pytest.fail(f"Variant {variant} failed at scales {scales_failed}") 162 | 163 | 164 | def test_projection() -> None: 165 | xyz = np.array(((10, 20, 30), (10, 30, 50), (-20, -15, 5), 166 | (-20, -50, 10)), dtype=np.float32) 167 | K = np.eye(3) 168 | 169 | uv = np.array(((1/3, 2/3), (1/5, 3/5), (-4, -3), 170 | (-2, -5)), dtype=np.float32) 171 | assert np.allclose(uv, project(xyz, K)) 172 | 173 | uv = np.array(((1/3, 2/3), (1/5, 3/5), (0, 0), (0, 0)), dtype=np.float32) 174 | assert np.allclose(uv, project(xyz, K, img_size=(5, 5))) 175 | -------------------------------------------------------------------------------- /benchmark/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import typing 3 | import logging 4 | 5 | import numpy as np 6 | from transforms3d.quaternions import qinverse, rotate_vector, qmult 7 | 8 | VARIANTS_ANGLE_SIN = 'sin' 9 | VARIANTS_ANGLE_COS = 'cos' 10 | 11 | 12 | def convert_world2cam_to_cam2world(q, t): 13 | qinv = qinverse(q) 14 | tinv = -rotate_vector(t, qinv) 15 | return qinv, tinv 16 | 17 | 18 | def load_poses(file: typing.IO, load_confidence: bool = False): 19 | """Load poses from text file and converts them to cam2world convention (t is the camera center in world coordinates) 20 | 21 | The text file encodes world2cam poses with the format: 22 | imgpath qw qx qy qz tx ty tz [confidence] 23 | where qw qx qy qz is the quaternion encoding rotation, 24 | and tx ty tz is the translation vector, 25 | and confidence is a float encoding confidence, for estimated poses 26 | """ 27 | 28 | expected_parts = 9 if load_confidence else 8 29 | 30 | poses = dict() 31 | for line_number, line in enumerate(file.readlines()): 32 | parts = tuple(line.strip().split(' ')) 33 | 34 | if len(parts) != expected_parts: 35 | logging.warning( 36 | f'Invalid number of fields in file {file.name} line {line_number}.' 37 | f' Expected {expected_parts}, received {len(parts)}. Ignoring line.') 38 | continue 39 | 40 | try: 41 | name = parts[0] 42 | if '#' in name: 43 | logging.info(f'Ignoring comment line in {file.name} line {line_number}') 44 | continue 45 | frame_num = int(name[-9:-4]) 46 | except ValueError: 47 | logging.warning( 48 | f'Invalid frame number in file {file.name} line {line_number}.' 49 | f' Expected formatting "seq1/frame_00000.jpg". Ignoring line.') 50 | continue 51 | 52 | try: 53 | parts_float = tuple(map(float, parts[1:])) 54 | if any(np.isnan(v) or np.isinf(v) for v in parts_float): 55 | raise ValueError() 56 | qw, qx, qy, qz, tx, ty, tz = parts_float[:7] 57 | confidence = parts_float[7] if load_confidence else None 58 | except ValueError: 59 | logging.warning( 60 | f'Error parsing pose in file {file.name} line {line_number}. Ignoring line.') 61 | continue 62 | 63 | q = np.array((qw, qx, qy, qz), dtype=np.float64) 64 | t = np.array((tx, ty, tz), dtype=np.float64) 65 | 66 | if np.isclose(np.linalg.norm(q), 0): 67 | logging.warning( 68 | f'Error parsing pose in file {file.name} line {line_number}. ' 69 | 'Quaternion must have non-zero norm. Ignoring line.') 70 | continue 71 | 72 | q, t = convert_world2cam_to_cam2world(q, t) 73 | poses[frame_num] = (q, t, confidence) 74 | return poses 75 | 76 | 77 | def subsample_poses(poses: dict, subsample: int = 1): 78 | return {k: v for i, (k, v) in enumerate(poses.items()) if i % subsample == 0} 79 | 80 | 81 | def load_K(file_path: Path): 82 | K = dict() 83 | with file_path.open('r', encoding='utf-8') as f: 84 | for line in f.readlines(): 85 | if '#' in line: 86 | continue 87 | line = line.strip().split(' ') 88 | 89 | frame_num = int(line[0][-9:-4]) 90 | fx, fy, cx, cy, W, H = map(float, line[1:]) 91 | K[frame_num] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32) 92 | return K, W, H 93 | 94 | 95 | def quat_angle_error(label, pred, variant=VARIANTS_ANGLE_SIN) -> np.ndarray: 96 | assert label.shape == (4,) 97 | assert pred.shape == (4,) 98 | assert variant in (VARIANTS_ANGLE_SIN, VARIANTS_ANGLE_COS), \ 99 | f"Need variant to be in ({VARIANTS_ANGLE_SIN}, {VARIANTS_ANGLE_COS})" 100 | 101 | if len(label.shape) == 1: 102 | label = np.expand_dims(label, axis=0) 103 | if len(label.shape) != 2 or label.shape[0] != 1 or label.shape[1] != 4: 104 | raise RuntimeError(f"Unexpected shape of label: {label.shape}, expected: (1, 4)") 105 | 106 | if len(pred.shape) == 1: 107 | pred = np.expand_dims(pred, axis=0) 108 | if len(pred.shape) != 2 or pred.shape[0] != 1 or pred.shape[1] != 4: 109 | raise RuntimeError(f"Unexpected shape of pred: {pred.shape}, expected: (1, 4)") 110 | 111 | label = label.astype(np.float64) 112 | pred = pred.astype(np.float64) 113 | 114 | q1 = pred / np.linalg.norm(pred, axis=1, keepdims=True) 115 | q2 = label / np.linalg.norm(label, axis=1, keepdims=True) 116 | if variant == VARIANTS_ANGLE_COS: 117 | d = np.abs(np.sum(np.multiply(q1, q2), axis=1, keepdims=True)) 118 | d = np.clip(d, a_min=-1, a_max=1) 119 | angle = 2. * np.degrees(np.arccos(d)) 120 | elif variant == VARIANTS_ANGLE_SIN: 121 | if q1.shape[0] != 1 or q2.shape[0] != 1: 122 | raise NotImplementedError(f"Multiple angles is todo") 123 | # https://www.researchgate.net/post/How_do_I_calculate_the_smallest_angle_between_two_quaternions/5d6ed4a84f3a3e1ed3656616/citation/download 124 | sine = qmult(q1[0], qinverse(q2[0])) # note: takes first element in 2D array 125 | # 114.59 = 2. * 180. / pi 126 | angle = np.arcsin(np.linalg.norm(sine[1:], keepdims=True)) * 114.59155902616465 127 | angle = np.expand_dims(angle, axis=0) 128 | 129 | return angle.astype(np.float64) 130 | 131 | 132 | def precision_recall(inliers, tp, failures): 133 | """ 134 | Computes Precision/Recall plot for a set of poses given inliers (confidence) and whether the 135 | estimated pose error (whatever it may be) is within a threshold. 136 | Each point in the plot is obtained by choosing a threshold for inliers (i.e. inlier_thr). 137 | Recall measures how many images have inliers >= inlier_thr 138 | Precision measures how many images that have inliers >= inlier_thr have 139 | estimated pose error <= pose_threshold (measured by counting tps) 140 | Where pose_threshold is (trans_thr[m], rot_thr[deg]) 141 | 142 | Inputs: 143 | - inliers [N] 144 | - terr [N] 145 | - rerr [N] 146 | - failures (int) 147 | - pose_threshold (tuple float) 148 | Output 149 | - precision [N] 150 | - recall [N] 151 | - average_precision (scalar) 152 | """ 153 | 154 | assert len(inliers) == len(tp), 'unequal shapes' 155 | 156 | # sort by inliers (descending order) 157 | inliers = np.array(inliers) 158 | sort_idx = np.argsort(inliers)[::-1] 159 | inliers = inliers[sort_idx] 160 | tp = np.array(tp).reshape(-1)[sort_idx] 161 | 162 | # get idxs where inliers change (avoid tied up values) 163 | distinct_value_indices = np.where(np.diff(inliers))[0] 164 | threshold_idxs = np.r_[distinct_value_indices, inliers.size - 1] 165 | 166 | # compute prec/recall 167 | N = inliers.shape[0] 168 | rec = np.arange(N, dtype=np.float32) + 1 169 | cum_tp = np.cumsum(tp) 170 | prec = cum_tp[threshold_idxs] / rec[threshold_idxs] 171 | rec = rec[threshold_idxs] / (float(N) + float(failures)) 172 | 173 | # invert order and ensures (prec=1, rec=0) point 174 | last_ind = rec.searchsorted(rec[-1]) 175 | sl = slice(last_ind, None, -1) 176 | prec = np.r_[prec[sl], 1] 177 | rec = np.r_[rec[sl], 0] 178 | 179 | # compute average precision (AUC) as the weighted average of precisions 180 | average_precision = np.abs(np.sum(np.diff(rec) * np.array(prec)[:-1])) 181 | 182 | return prec, rec, average_precision 183 | -------------------------------------------------------------------------------- /config/default.py: -------------------------------------------------------------------------------- 1 | from yacs.config import CfgNode as CN 2 | 3 | _CN = CN() 4 | 5 | ############## Model ############## 6 | _CN.MODEL = None # options: ['Regression', 'FeatureMatching'] 7 | _CN.DEBUG = False 8 | 9 | # Regression model options 10 | _CN.ENCODER = CN() 11 | _CN.ENCODER.TYPE = None # options: ['ResNet', 'ResUNet'] 12 | _CN.ENCODER.NUM_BLOCKS = None # # blocks per layer separated by dashes. e.g. 3-3-3 13 | _CN.ENCODER.BLOCK_TYPE = None # 0:PreactBlock, 1:PreactBlockBottleneck 14 | _CN.ENCODER.NOT_CONCAT = None # ResUNet option 15 | _CN.ENCODER.NUM_OUT_LAYERS = None # ResUNet option 16 | 17 | _CN.AGGREGATOR = CN() 18 | _CN.AGGREGATOR.TYPE = None # options: ['CorrelationVolumeWarping', 'CorrelationVolumeWarpingQKV'] 19 | _CN.AGGREGATOR.POSITION_ENCODER = None # True/False. If True adds two channel with average u,v coordinates of warp 20 | _CN.AGGREGATOR.POSITION_ENCODER_IM1 = None # True/False. If True adds two channel with uniform u,v coordinates of im1 21 | _CN.AGGREGATOR.MAX_SCORE_CHANNEL = None # True/False. If True adds a channel with max score to global features 22 | _CN.AGGREGATOR.NORMALISE_DOT = False # True/False. If True normalise features before dot product 23 | _CN.AGGREGATOR.RESIDUAL_ATT = False # True/False. If True Q,K,V are residuals from features 24 | _CN.AGGREGATOR.CV_OUTLAYERS = 0 # If >0, compresses CorrelationVolume into OutLayers and channel-wise append to Global Volume 25 | _CN.AGGREGATOR.CV_HALF_CHANNELS = False # If True, computes correlation volume using only half the images feature channels, giving more freedom for the rest 26 | _CN.AGGREGATOR.UPSAMPLE_POS_ENC = 0 # If >0, upsamples positional encoder with number of channels 27 | _CN.AGGREGATOR.DUSTBIN = False # If True, creates dustbins to assign 'unmatched' features. Also learns a 'dustbin feature' to be used when warping feature maps 28 | 29 | _CN.HEAD = CN() 30 | _CN.HEAD.TYPE = None # options: ['ProcrustesResBlockMLP', 'DirectResBlockMLP'] 31 | _CN.BACKPROJECT_ANCHORS = None # whether to backproject anchors to 3D or assume that HEAD already gives 3D points 32 | _CN.HEAD.ADD_BASIS = False # if true, add orthonormal basis to MLP anchors, only valid if NUM_PTS=3 or 6 33 | _CN.HEAD.NUM_PTS = 6 # number of points to estimate. 3, 6 or more. (3: predict correspondences to fixed orthonormal-basis, 6: predict full 3D-3D correspondences, even, more than 6: predict overcomplete set) 34 | _CN.HEAD.AVG_POOL = False # if true, reduce last feature volume to vector using Global Avg. Pool. Otherwise, use ravel() 35 | _CN.HEAD.BATCH_NORM = True # enable/disable batch-norm for head res-blocks 36 | _CN.HEAD.SEPARATE_SCALE = True # For QuatDeepResblock: if True, regress scale separately (unitary translation vector (3D) + 1D scale); else, regress scaled translation vector (3D) 37 | # For AngularBinsResblock: if True, regress scale separately (bins for trans. angle + 1D scale); else, regress scaled translation vector 38 | 39 | # Feature Matching Options 40 | _CN.FEATURE_MATCHING = None # options: ['SIFT', 'Precomputed'] 41 | _CN.POSE_SOLVER = None # options: ['EssentialMatrix', 'EssentialMatrixMetric', 'Procrustes', 'PNP'] 42 | 43 | # SIFT options 44 | _CN.SIFT = CN() 45 | _CN.SIFT.NUM_FEATURES = None 46 | _CN.SIFT.RATIO_THRESHOLD = None 47 | 48 | # Pre-computed feature matching options 49 | _CN.MATCHES_FILE_PATH = None # path to NPY storing the correspondences pre-computed using the learned algorithm 50 | 51 | # EMAT RANSAC options 52 | _CN.EMAT_RANSAC = CN() 53 | _CN.EMAT_RANSAC.PIX_THRESHOLD = None 54 | _CN.EMAT_RANSAC.SCALE_THRESHOLD = None 55 | _CN.EMAT_RANSAC.CONFIDENCE = None 56 | 57 | # Procrustes RANSAC options 58 | _CN.PROCRUSTES = CN() 59 | _CN.PROCRUSTES.MAX_CORR_DIST = None 60 | _CN.PROCRUSTES.REFINE = False #refine pose with ICP 61 | 62 | # PNP RANSAC options 63 | _CN.PNP = CN() 64 | _CN.PNP.RANSAC_ITER = None 65 | _CN.PNP.REPROJECTION_INLIER_THRESHOLD = None # pixels 66 | _CN.PNP.CONFIDENCE = None 67 | 68 | ############## Dataset ############## 69 | _CN.DATASET = CN() 70 | # 1. data config 71 | _CN.DATASET.DATA_SOURCE = None # options: ['ScanNet', '7Scenes', 'MapFree'] 72 | _CN.DATASET.SCENES = None # scenes to use (for 7Scenes/MapFree); should be a list []; If none, use all scenes. 73 | _CN.DATASET.DATA_ROOT = None # path to dataset folder 74 | _CN.DATASET.NPZ_ROOT = None # path to npz files containing pairs of frame indices per sample 75 | _CN.DATASET.MIN_OVERLAP_SCORE = None # discard data with overlap_score < min_overlap_score 76 | _CN.DATASET.MAX_OVERLAP_SCORE = None # discard data with overlap_score > max_overlap_score 77 | _CN.DATASET.AUGMENTATION_TYPE = None # options: [None, 'colorjitter'] 78 | _CN.DATASET.BLACK_WHITE = False # if true, transform images to black & white 79 | _CN.DATASET.PAIRS_TXT = CN() # Path to text file defining the train/val/test pairs (7Scenes) 80 | _CN.DATASET.PAIRS_TXT.TRAIN = None 81 | _CN.DATASET.PAIRS_TXT.VAL = None 82 | _CN.DATASET.PAIRS_TXT.TEST = None 83 | _CN.DATASET.PAIRS_TXT.ONE_NN = False # If true, keeps only reference image w/ highest similarity to each query 84 | _CN.DATASET.HEIGHT = None 85 | _CN.DATASET.WIDTH = None 86 | _CN.DATASET.ESTIMATED_DEPTH = None # Use 'estimated' predictions of depth map, if None uses GT depth map 87 | # For Scannet: path to NPZ storing the depth maps (for a given method); if None use GT depth 88 | # For 7Scenes: suffix to add to depthpath when loading depth maps; if None use GT depth 89 | # For Mapfree: suffix to add to depthpath when loading depth maps; if None, no depth 90 | _CN.DATASET.QUERY_FRAME_COUNT = 1 # number of query frames to infer from. 1 or 9 91 | 92 | ############# TRAINING ############# 93 | _CN.TRAINING = CN() 94 | # Data Loader settings 95 | _CN.TRAINING.BATCH_SIZE = None 96 | _CN.TRAINING.NUM_WORKERS = None 97 | _CN.TRAINING.SAMPLER = None # options: ['random', 'scene_balance'] 98 | _CN.TRAINING.N_SAMPLES_SCENE = None # if 'scene_balance' sampler, the number of samples to get per scene 99 | _CN.TRAINING.SAMPLE_WITH_REPLACEMENT = None # if 'scene_balance' sampler, whether to sample with replacement 100 | # Training settings 101 | _CN.TRAINING.LR = None 102 | _CN.TRAINING.LR_STEP_INTERVAL = None 103 | _CN.TRAINING.LR_STEP_GAMMA = None # multiplicative factor of LR every LR_STEP_ITERATIONS 104 | _CN.TRAINING.VAL_INTERVAL = None 105 | _CN.TRAINING.VAL_BATCHES = None 106 | _CN.TRAINING.LOG_INTERVAL = None 107 | _CN.TRAINING.EPOCHS = None 108 | _CN.TRAINING.GRAD_CLIP = 0. # Indicates the L2 norm at which to clip the gradient. Disabled if 0 109 | # Loss settings 110 | _CN.TRAINING.ROT_LOSS = 'rot_frobenius_loss' # options: ['rot_frobenius_loss', 'rot_l1_loss', 'rot_angle_loss'] 111 | _CN.TRAINING.TRANS_LOSS = 'trans_l2_loss' # options: ['trans_l2_loss', 'trans_ang_loss'] 112 | _CN.TRAINING.LAMBDA = 1.0 # scaling term for the translation loss term. If 0.0, learns optimal weighting. 113 | 114 | 115 | 116 | cfg = _CN -------------------------------------------------------------------------------- /config/mapfree.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | DATA_SOURCE: 'MapFree' 3 | DATA_ROOT: 'data/mapfree/' 4 | SCENES: None # should be a list [] or None. If none, use all scenes. 5 | ESTIMATED_DEPTH: None # To load estimated depth map, provide the suffix to the depth files, e.g. 'dptnyu', 'dptkitti' 6 | AUGMENTATION_TYPE: None 7 | HEIGHT: 720 8 | WIDTH: 540 9 | MIN_OVERLAP_SCORE: 0.2 # [train only] discard data with overlap_score < min_overlap_score 10 | MAX_OVERLAP_SCORE: 0.7 # [train only] discard data with overlap_score < min_overlap_score 11 | QUERY_FRAME_COUNT: 1 # 1 (single frame task) or 9 (multi-frame task) only! 12 | -------------------------------------------------------------------------------- /config/mapfree_multi.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | QUERY_FRAME_COUNT: 9 # 1 (single frame task) or 9 (multi-frame task) only! -------------------------------------------------------------------------------- /config/matching/mapfree/loftr_emat_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/loftr_emat_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/loftr_pnp_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/mapfree/loftr_pnp_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/mapfree/sg_emat_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sg_emat_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sg_pnp_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sg_pnp_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sg_procrustes_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/mapfree/sift_emat_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 3.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sift_emat_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 3.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sift_pnp_dptkitti.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptkitti' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/mapfree/sift_pnp_dptnyu.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'dptnyu' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_emat_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_emat_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: None 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_pnp_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/scannet/loftr_pnp_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: None 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/scannet/loftr_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_dpt_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | REFINE: True 10 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_gt_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | REFINE: True 10 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/loftr_procrustes_planercnn_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | REFINE: True 10 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_emat_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_emat_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_pnp_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_pnp_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: None 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 5 | DATASET: 6 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_procrustes_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_procrustes_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/sg_procrustes_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz' 7 | PROCRUSTES: 8 | MAX_CORR_DIST: 0.05 # meters 9 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_emat_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | EMAT_RANSAC: 10 | PIX_THRESHOLD: 3.0 11 | SCALE_THRESHOLD: 0.1 12 | CONFIDENCE: 0.9999 13 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_emat_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | EMAT_RANSAC: 10 | PIX_THRESHOLD: 3.0 11 | SCALE_THRESHOLD: 0.1 12 | CONFIDENCE: 0.9999 13 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | EMAT_RANSAC: 10 | PIX_THRESHOLD: 3.0 11 | SCALE_THRESHOLD: 0.1 12 | CONFIDENCE: 0.9999 13 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_pnp_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | PNP: 7 | RANSAC_ITER: 1000 8 | REPROJECTION_INLIER_THRESHOLD: 3 9 | CONFIDENCE: 0.9999 10 | SIFT: 11 | NUM_FEATURES: 2048 12 | RATIO_THRESHOLD: 0.8 -------------------------------------------------------------------------------- /config/matching/scannet/sift_pnp_gt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | PNP: 7 | RANSAC_ITER: 1000 8 | REPROJECTION_INLIER_THRESHOLD: 3 9 | CONFIDENCE: 0.9999 10 | SIFT: 11 | NUM_FEATURES: 2048 12 | RATIO_THRESHOLD: 0.8 -------------------------------------------------------------------------------- /config/matching/scannet/sift_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | PNP: 7 | RANSAC_ITER: 1000 8 | REPROJECTION_INLIER_THRESHOLD: 3 9 | CONFIDENCE: 0.9999 10 | SIFT: 11 | NUM_FEATURES: 2048 12 | RATIO_THRESHOLD: 0.8 -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_dpt.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_dpt_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | REFINE: True 12 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_gtdepth.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_gtdepth_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: None 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | REFINE: True 12 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_planercnn_icp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | REFINE: True 12 | -------------------------------------------------------------------------------- /config/matching/scannet/sift_procrustes_smdp.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'SIFT' 3 | POSE_SOLVER: 'Procrustes' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_smdp_bilinear.npz' 6 | SIFT: 7 | NUM_FEATURES: 2048 8 | RATIO_THRESHOLD: 0.8 9 | PROCRUSTES: 10 | MAX_CORR_DIST: 0.05 # meters 11 | -------------------------------------------------------------------------------- /config/matching/sevenscenes/loftr_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR_{pairs_txt}.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/sevenscenes/loftr_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR_{pairs_txt}.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 -------------------------------------------------------------------------------- /config/matching/sevenscenes/sg_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG_{pairs_txt}.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 2.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/sevenscenes/sg_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG_{pairs_txt}.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/sevenscenes/sift_emat_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'EssentialMatrixMetric' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT_{pairs_txt}.npz' 7 | EMAT_RANSAC: 8 | PIX_THRESHOLD: 3.0 9 | SCALE_THRESHOLD: 0.1 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/matching/sevenscenes/sift_pnp_planercnn.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'FeatureMatching' 2 | FEATURE_MATCHING: 'Precomputed' 3 | POSE_SOLVER: 'PNP' 4 | DATASET: 5 | ESTIMATED_DEPTH: 'prcnn' 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT_{pairs_txt}.npz' 7 | PNP: 8 | RANSAC_ITER: 1000 9 | REPROJECTION_INLIER_THRESHOLD: 3 10 | CONFIDENCE: 0.9999 11 | -------------------------------------------------------------------------------- /config/regression/mapfree/3d3d.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/3d3d_lowoverlap.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.2 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/3d3d_no_posencoder.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: False 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/3d3d_no_warping.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'Concat' 10 | HEAD: 11 | TYPE: 'ProcrustesDeepResBlock' 12 | ADD_BASIS: True 13 | AVG_POOL: True 14 | TRAINING: 15 | BATCH_SIZE: 10 16 | NUM_WORKERS: 10 17 | SAMPLER: 'scene_balance' 18 | N_SAMPLES_SCENE: 700 19 | SAMPLE_WITH_REPLACEMENT: True 20 | LR: 1e-4 21 | LOG_INTERVAL: 50 22 | VAL_INTERVAL: 0.25 23 | VAL_BATCHES: 500 24 | EPOCHS: 50 25 | ROT_LOSS: 'rot_angle_loss' 26 | TRANS_LOSS: 'trans_l1_loss' 27 | LAMBDA: 1. 28 | BACKPROJECT_ANCHORS: False 29 | DATASET: 30 | HEIGHT: 360 31 | WIDTH: 270 32 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 33 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/3d3d_weighted_loss.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 0. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/multiframe/3d3d_multi.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'RegressionMultiFrame' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rot6d_trans.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'DirectDeepResBlockMLP' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotbin_trans.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'AngularBinsDeepResBlockMLP' 14 | SEPARATE_SCALE: False 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_bin_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotbin_transdirectionbin_scale.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'AngularBinsDeepResBlockMLP' 14 | SEPARATE_SCALE: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_bin_loss' 28 | TRANS_LOSS: 'trans_sphbin_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotbin_transdirectionbin_scale_lowoverlap.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'AngularBinsDeepResBlockMLP' 14 | SEPARATE_SCALE: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 10 18 | NUM_WORKERS: 10 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 700 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 500 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_bin_loss' 28 | TRANS_LOSS: 'trans_sphbin_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 360 33 | WIDTH: 270 34 | MIN_OVERLAP_SCORE: 0.2 # [train only] discard data with overlap_score < min_overlap_score 35 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotbin_transdirectionbin_scale_qkv.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarpingQKV' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | NORMALISE_DOT: False 13 | RESIDUAL_ATT: True 14 | HEAD: 15 | TYPE: 'AngularBinsDeepResBlockMLP' 16 | SEPARATE_SCALE: True 17 | AVG_POOL: True 18 | TRAINING: 19 | BATCH_SIZE: 10 20 | NUM_WORKERS: 10 21 | SAMPLER: 'scene_balance' 22 | N_SAMPLES_SCENE: 700 23 | SAMPLE_WITH_REPLACEMENT: True 24 | LR: 1e-4 25 | LOG_INTERVAL: 50 26 | VAL_INTERVAL: 0.25 27 | VAL_BATCHES: 500 28 | EPOCHS: 50 29 | ROT_LOSS: 'rot_bin_loss' 30 | TRANS_LOSS: 'trans_sphbin_loss' 31 | LAMBDA: 1. 32 | BACKPROJECT_ANCHORS: False 33 | DATASET: 34 | HEIGHT: 360 35 | WIDTH: 270 36 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 37 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotquat_trans.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'QuatDeepResBlock' 14 | SEPARATE_SCALE: False # trans. vector is regressed WITH scale, not using independent 1D scale 15 | TRAINING: 16 | BATCH_SIZE: 10 17 | NUM_WORKERS: 10 18 | SAMPLER: 'scene_balance' 19 | N_SAMPLES_SCENE: 700 20 | SAMPLE_WITH_REPLACEMENT: True 21 | LR: 1e-4 22 | LOG_INTERVAL: 50 23 | VAL_INTERVAL: 0.25 24 | VAL_BATCHES: 500 25 | EPOCHS: 50 26 | ROT_LOSS: 'quat_l1_loss' 27 | TRANS_LOSS: 'trans_l1_loss' 28 | LAMBDA: 0. 29 | BACKPROJECT_ANCHORS: False 30 | DATASET: 31 | HEIGHT: 360 32 | WIDTH: 270 33 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 34 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/mapfree/rotquat_transdirection_scale.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'QuatDeepResBlock' 14 | SEPARATE_SCALE: True # trans. vector is regressed as 3D unitary direction + 1D scale 15 | TRAINING: 16 | BATCH_SIZE: 10 17 | NUM_WORKERS: 10 18 | SAMPLER: 'scene_balance' 19 | N_SAMPLES_SCENE: 700 20 | SAMPLE_WITH_REPLACEMENT: True 21 | LR: 1e-4 22 | LOG_INTERVAL: 50 23 | VAL_INTERVAL: 0.25 24 | VAL_BATCHES: 500 25 | EPOCHS: 50 26 | ROT_LOSS: 'quat_l1_loss' 27 | TRANS_LOSS: 'trans_scale_direction_loss' 28 | LAMBDA: 1. 29 | BACKPROJECT_ANCHORS: False 30 | DATASET: 31 | HEIGHT: 360 32 | WIDTH: 270 33 | MIN_OVERLAP_SCORE: 0.4 # [train only] discard data with overlap_score < min_overlap_score 34 | MAX_OVERLAP_SCORE: 0.8 # [train only] discard data with overlap_score < min_overlap_score -------------------------------------------------------------------------------- /config/regression/scannet/3d3d.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 12 18 | NUM_WORKERS: 12 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 200 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 200 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 240 33 | WIDTH: 320 34 | 35 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_dual_posenc.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | POSITION_ENCODER_IM1: True 12 | MAX_SCORE_CHANNEL: True 13 | HEAD: 14 | TYPE: 'ProcrustesDeepResBlock' 15 | ADD_BASIS: True 16 | AVG_POOL: True 17 | TRAINING: 18 | BATCH_SIZE: 12 19 | NUM_WORKERS: 12 20 | SAMPLER: 'scene_balance' 21 | N_SAMPLES_SCENE: 200 22 | SAMPLE_WITH_REPLACEMENT: True 23 | LR: 1e-4 24 | LOG_INTERVAL: 50 25 | VAL_INTERVAL: 0.25 26 | VAL_BATCHES: 200 27 | EPOCHS: 50 28 | ROT_LOSS: 'rot_angle_loss' 29 | TRANS_LOSS: 'trans_l1_loss' 30 | LAMBDA: 1. 31 | BACKPROJECT_ANCHORS: False 32 | DATASET: 33 | HEIGHT: 240 34 | WIDTH: 320 35 | 36 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_dual_posenc_upsampling.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | POSITION_ENCODER_IM1: True 12 | MAX_SCORE_CHANNEL: True 13 | UPSAMPLE_POS_ENC: 8 14 | HEAD: 15 | TYPE: 'ProcrustesDeepResBlock' 16 | ADD_BASIS: True 17 | AVG_POOL: True 18 | TRAINING: 19 | BATCH_SIZE: 12 20 | NUM_WORKERS: 12 21 | SAMPLER: 'scene_balance' 22 | N_SAMPLES_SCENE: 200 23 | SAMPLE_WITH_REPLACEMENT: True 24 | LR: 1e-4 25 | LOG_INTERVAL: 50 26 | VAL_INTERVAL: 0.25 27 | VAL_BATCHES: 200 28 | EPOCHS: 50 29 | ROT_LOSS: 'rot_angle_loss' 30 | TRANS_LOSS: 'trans_l1_loss' 31 | LAMBDA: 1. 32 | BACKPROJECT_ANCHORS: False 33 | DATASET: 34 | HEIGHT: 240 35 | WIDTH: 320 36 | 37 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_half_cv.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | POSITION_ENCODER_IM1: True 12 | MAX_SCORE_CHANNEL: True 13 | CV_HALF_CHANNELS: True 14 | HEAD: 15 | TYPE: 'ProcrustesDeepResBlock' 16 | ADD_BASIS: True 17 | AVG_POOL: True 18 | TRAINING: 19 | BATCH_SIZE: 12 20 | NUM_WORKERS: 12 21 | SAMPLER: 'scene_balance' 22 | N_SAMPLES_SCENE: 200 23 | SAMPLE_WITH_REPLACEMENT: True 24 | LR: 1e-4 25 | LOG_INTERVAL: 50 26 | VAL_INTERVAL: 0.25 27 | VAL_BATCHES: 200 28 | EPOCHS: 50 29 | ROT_LOSS: 'rot_angle_loss' 30 | TRANS_LOSS: 'trans_l1_loss' 31 | LAMBDA: 1. 32 | BACKPROJECT_ANCHORS: False 33 | DATASET: 34 | HEIGHT: 240 35 | WIDTH: 320 36 | 37 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_lowoverlap.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 12 18 | NUM_WORKERS: 12 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 200 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 200 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 240 33 | WIDTH: 320 34 | MIN_OVERLAP_SCORE: 0.0 35 | 36 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_no_avgpool.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'ProcrustesDeepResBlock' 14 | ADD_BASIS: True 15 | AVG_POOL: False 16 | TRAINING: 17 | BATCH_SIZE: 12 18 | NUM_WORKERS: 12 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 200 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 200 26 | EPOCHS: 20 27 | ROT_LOSS: 'rot_angle_loss' 28 | TRANS_LOSS: 'trans_l1_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 240 33 | WIDTH: 320 34 | 35 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_qkv.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarpingQKV' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | RESIDUAL_ATT: True 13 | HEAD: 14 | TYPE: 'ProcrustesDeepResBlock' 15 | ADD_BASIS: True 16 | AVG_POOL: True 17 | TRAINING: 18 | BATCH_SIZE: 12 19 | NUM_WORKERS: 12 20 | SAMPLER: 'scene_balance' 21 | N_SAMPLES_SCENE: 200 22 | SAMPLE_WITH_REPLACEMENT: True 23 | LR: 1e-4 24 | LOG_INTERVAL: 50 25 | VAL_INTERVAL: 0.25 26 | VAL_BATCHES: 200 27 | EPOCHS: 50 28 | ROT_LOSS: 'rot_angle_loss' 29 | TRANS_LOSS: 'trans_l1_loss' 30 | LAMBDA: 1. 31 | BACKPROJECT_ANCHORS: False 32 | DATASET: 33 | HEIGHT: 240 34 | WIDTH: 320 35 | 36 | -------------------------------------------------------------------------------- /config/regression/scannet/3d3d_with_dustbin.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | DUSTBIN: True 13 | HEAD: 14 | TYPE: 'ProcrustesDeepResBlock' 15 | ADD_BASIS: True 16 | AVG_POOL: True 17 | TRAINING: 18 | BATCH_SIZE: 12 19 | NUM_WORKERS: 12 20 | SAMPLER: 'scene_balance' 21 | N_SAMPLES_SCENE: 200 22 | SAMPLE_WITH_REPLACEMENT: True 23 | LR: 1e-4 24 | LOG_INTERVAL: 50 25 | VAL_INTERVAL: 0.25 26 | VAL_BATCHES: 200 27 | EPOCHS: 50 28 | ROT_LOSS: 'rot_angle_loss' 29 | TRANS_LOSS: 'trans_l1_loss' 30 | LAMBDA: 1. 31 | BACKPROJECT_ANCHORS: False 32 | DATASET: 33 | HEIGHT: 240 34 | WIDTH: 320 35 | 36 | -------------------------------------------------------------------------------- /config/regression/scannet/rotbin_transdirectionbin_scale.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 'Regression' 2 | ENCODER: 3 | TYPE: 'ResUNet' 4 | BLOCK_TYPE: 1 5 | NUM_BLOCKS: '3-3-3' 6 | NOT_CONCAT: False 7 | NUM_OUT_LAYERS: 32 8 | AGGREGATOR: 9 | TYPE: 'CorrelationVolumeWarping' 10 | POSITION_ENCODER: True 11 | MAX_SCORE_CHANNEL: True 12 | HEAD: 13 | TYPE: 'AngularBinsDeepResBlockMLP' 14 | SEPARATE_SCALE: True 15 | AVG_POOL: True 16 | TRAINING: 17 | BATCH_SIZE: 12 18 | NUM_WORKERS: 12 19 | SAMPLER: 'scene_balance' 20 | N_SAMPLES_SCENE: 200 21 | SAMPLE_WITH_REPLACEMENT: True 22 | LR: 1e-4 23 | LOG_INTERVAL: 50 24 | VAL_INTERVAL: 0.25 25 | VAL_BATCHES: 200 26 | EPOCHS: 50 27 | ROT_LOSS: 'rot_bin_loss' 28 | TRANS_LOSS: 'trans_sphbin_loss' 29 | LAMBDA: 1. 30 | BACKPROJECT_ANCHORS: False 31 | DATASET: 32 | HEIGHT: 240 33 | WIDTH: 320 -------------------------------------------------------------------------------- /config/scannet.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | DATA_SOURCE: 'ScanNet' 3 | DATA_ROOT: 'data/scannet/' 4 | NPZ_ROOT: 'data/scannet_indices/scene_data' 5 | # general options 6 | MIN_OVERLAP_SCORE: 0.4 # discard data with overlap_score < min_overlap_score 7 | AUGMENTATION_TYPE: None # options: [None, 'dark', 'mobile'] 8 | ESTIMATED_DEPTH: None # Loads GT depth maps. To load estimated depthmaps, provide the path to the NPZ file containing them 9 | HEIGHT: 480 10 | WIDTH: 640 -------------------------------------------------------------------------------- /config/sevenscenes.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | DATA_SOURCE: '7Scenes' 3 | DATA_ROOT: 'data/sevenscenes' 4 | SCENES: None # scenes to use (for 7Scenes); should be a list [] or None. If none, use all scenes. 5 | PAIRS_TXT: 6 | TRAIN: 'train_pairs.4nn.medium.txt' 7 | VAL: 'val_pairs.3nn.medium.txt' 8 | TEST: 'test_pairs.5nn.5cm10m.vlad.minmax.txt' 9 | # general options 10 | ESTIMATED_DEPTH: None # None loads GT depth maps. To load estimated depth map, provide the suffix to the depth files, e.g. 'prcnn' loads PlaneRCNN depth estimates 11 | AUGMENTATION_TYPE: None 12 | HEIGHT: 480 13 | WIDTH: 640 -------------------------------------------------------------------------------- /config/utils.py: -------------------------------------------------------------------------------- 1 | def config_merge_from_file( 2 | cfg: "yacs.config.CfgNode", 3 | path_to_config: "Union[str, Path, list[str], list[Path], tuple[str, ...], tuple[Path, ...]]", 4 | ) -> "yacs.config.CfgNode": 5 | if isinstance(path_to_config, (list, tuple)): 6 | for path_to_config_ in path_to_config: 7 | cfg.merge_from_file(path_to_config_) 8 | else: 9 | cfg.merge_from_file(path_to_config) 10 | 11 | return cfg 12 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: mapfree 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.10 7 | - cudatoolkit=11.8 8 | - pip=23.2.1 9 | - pip: 10 | - einops==0.6.1 11 | - lazy-loader==0.3 12 | - lightning-utilities==0.9.0 13 | - matplotlib==3.7.2 14 | - numpy==1.24.4 15 | - omegaconf==2.3.0 16 | - open3d==0.17.0 17 | - opencv-python==4.8.0.74 18 | - protobuf==4.23.4 19 | - pytorch-lightning==2.0.6 20 | - tensorboard==2.13.0 21 | - tensorboard-data-server==0.7.1 22 | - timm==0.6.7 23 | - torch==2.0.1 24 | - torchmetrics==1.0.2 25 | - torchvision==0.15.2 26 | - tqdm==4.65.1 27 | - transforms3d==0.4.1 28 | - yacs==0.1.8 29 | - kornia==0.7.2 -------------------------------------------------------------------------------- /environment_eccv22.yml: -------------------------------------------------------------------------------- 1 | name: mapfree 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python=3.7 8 | - cudatoolkit=11.1 9 | - pytorch=1.8.0 10 | - torchvision=0.9.0 11 | - pip 12 | - pip: 13 | - opencv-python>=4.5.3 14 | - open3d==0.14.1 15 | - transforms3d 16 | - yacs>=0.1.8 17 | - h5py 18 | - pytorch-lightning==1.6.5 19 | - kornia==0.5.3 -------------------------------------------------------------------------------- /etc/feature_matching_baselines/compute.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from pathlib import Path 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | from utils import parse_7scenes_matching_pairs, parse_mapfree_query_frames, stack_pts, load_scannet_imgpaths 8 | from matchers import LoFTR_matcher, SuperGlue_matcher, SIFT_matcher 9 | 10 | MATCHERS = {'LoFTR': LoFTR_matcher, 'SG': SuperGlue_matcher, 'SIFT': SIFT_matcher} 11 | 12 | 13 | def get_parser(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dataset', '-ds', type=str, default='7Scenes', 16 | choices=['Scannet', '7Scenes', 'Mapfree']) 17 | parser.add_argument('--matcher', '-m', type=str, default='SIFT', 18 | choices=MATCHERS.keys()) 19 | parser.add_argument('--scenes', '-sc', type=str, nargs='*', default=None) 20 | parser.add_argument('--pair_txt', type=str, 21 | default='test_pairs.5nn.5cm10m.vlad.minmax.txt') # 7Scenes 22 | parser.add_argument('--pair_npz', type=str, 23 | default='../../data/scannet_indices/scene_data/test/test.npz') # Scannet 24 | parser.add_argument('--outdoor', action='store_true', 25 | help='use outdoor SG/LoFTR model. If not specified, use indoor models') 26 | args = parser.parse_args() 27 | 28 | dataset = args.dataset 29 | if dataset == '7Scenes': 30 | args.data_root = '../../data/sevenscenes' 31 | scenes = ['chess', 'fire', 'heads', 'office', 'pumpkin', 'redkitchen', 'stairs'] 32 | args.scenes = scenes if not args.scenes else args.scenes 33 | resize = 640, 480 34 | elif dataset == 'Scannet': 35 | args.data_root = '../../data/scannet/scans_test' 36 | resize = 640, 480 37 | elif dataset == 'Mapfree': 38 | args.data_root = Path('../../data/mapfree/') 39 | test_scenes = [folder for folder in (args.data_root / 'test').iterdir() if folder.is_dir()] 40 | val_scenes = [folder for folder in (args.data_root / 'val').iterdir() if folder.is_dir()] 41 | args.scenes = test_scenes + val_scenes 42 | resize = 540, 720 43 | 44 | return args, MATCHERS[args.matcher](resize, args.outdoor) 45 | 46 | 47 | if __name__ == '__main__': 48 | args, matcher = get_parser() 49 | 50 | if args.dataset == '7Scenes': 51 | for scene in args.scenes: 52 | scene_dir = Path(args.data_root) / scene 53 | im_pairs = parse_7scenes_matching_pairs( 54 | str(scene_dir / args.pair_txt)) # {(im1, im2) : (q, t, ess_mat)} 55 | pair_names = list(im_pairs.keys()) 56 | im_pairs_path = [(str(scene_dir / train_im), 57 | str(scene_dir / test_im)) for (train_im, test_im) in pair_names] 58 | 59 | pts_stack = list() 60 | print(f'Started {scene}') 61 | for pair in tqdm(im_pairs_path): 62 | pts = matcher.match(pair) 63 | pts_stack.append(pts) 64 | pts_stack = stack_pts(pts_stack) 65 | results = {'correspondences': pts_stack} 66 | np.savez_compressed(os.path.join( 67 | scene_dir, 68 | f'correspondences_{args.matcher}_{args.pair_txt}.npz'), 69 | **results) 70 | print(f'Finished {scene}') 71 | 72 | elif args.dataset == 'Mapfree': 73 | for scene_dir in args.scenes: 74 | query_frames_paths = parse_mapfree_query_frames(scene_dir / 'poses.txt') 75 | im_pairs_path = [(str(scene_dir / 'seq0' / 'frame_00000.jpg'), 76 | str(scene_dir / qpath)) for qpath in query_frames_paths] 77 | 78 | pts_stack = list() 79 | print(f'Started {scene_dir.name}') 80 | for pair in tqdm(im_pairs_path): 81 | pts = matcher.match(pair) 82 | pts_stack.append(pts) 83 | pts_stack = stack_pts(pts_stack) 84 | results = {'correspondences': pts_stack} 85 | np.savez_compressed(scene_dir / f'correspondences_{args.matcher}.npz', **results) 86 | print(f'Finished {scene_dir.name}') 87 | 88 | elif args.dataset == 'Scannet': 89 | im_pairs_path = load_scannet_imgpaths(args.pair_npz, args.data_root) 90 | pts_stack = list() 91 | print(f'Started Scannet') 92 | for pair in tqdm(im_pairs_path): 93 | pts = matcher.match(pair) 94 | pts_stack.append(pts) 95 | pts_stack = stack_pts(pts_stack) 96 | results = {'correspondences': pts_stack} 97 | np.savez_compressed( 98 | f'../../data/scannet_misc/correspondences_{args.matcher}_scannet_test.npz', 99 | **results) 100 | print(f'Finished Scannet') 101 | else: 102 | raise NotImplementedError('Invalid dataset') 103 | -------------------------------------------------------------------------------- /etc/feature_matching_baselines/matchers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | 5 | from LoFTR.src.loftr import LoFTR, default_cfg 6 | from SuperGlue.models.utils import read_image 7 | from SuperGlue.models.matching import Matching 8 | 9 | torch.set_grad_enabled(False) 10 | 11 | 12 | class LoFTR_matcher: 13 | def __init__(self, resize, outdoor=False): 14 | # Initialize LoFTR 15 | print("started loading model") 16 | matcher = LoFTR(config=default_cfg) 17 | weights_path = "LoFTR/weights/outdoor_ot.ckpt" if outdoor else "LoFTR/weights/indoor_ot.ckpt" 18 | matcher.load_state_dict(torch.load(weights_path)['state_dict'], strict=False) 19 | matcher = matcher.eval().cuda() 20 | self.matcher = matcher 21 | print("model loaded") 22 | self.resize = resize 23 | 24 | def match(self, pair_path): 25 | '''retrurn correspondences between images (w/ path pair_path)''' 26 | 27 | input_path0, input_path1 = pair_path 28 | resize = self.resize 29 | resize_float = True 30 | rot0, rot1 = 0, 0 31 | device = 'cuda' 32 | 33 | # using resolution [640, 480] (default for 7Scenes, re-scale Scannet) 34 | image0, inp0, scales0 = read_image( 35 | input_path0, device, resize, rot0, resize_float) 36 | 37 | image1, inp1, scales1 = read_image( 38 | input_path1, device, resize, rot1, resize_float) 39 | 40 | # LoFTR needs resolution multiple of 8. If that is not the case, we pad 0's to get to a multiple of 8 41 | if inp0.size(2) % 8 != 0 or inp0.size(1) % 8 != 0: 42 | pad_bottom = inp0.size(2) % 8 43 | pad_right = inp0.size(3) % 8 44 | pad_fn = torch.nn.ConstantPad2d((0, pad_right, 0, pad_bottom), 0) 45 | inp0 = pad_fn(inp0) 46 | inp1 = pad_fn(inp1) 47 | 48 | with torch.no_grad(): 49 | batch = {'image0': inp0, 'image1': inp1} 50 | self.matcher(batch) 51 | mkpts0 = batch['mkpts0_f'].cpu().numpy() 52 | mkpts1 = batch['mkpts1_f'].cpu().numpy() 53 | 54 | if mkpts0.shape[0] > 0: 55 | pts = np.concatenate([mkpts0, mkpts1], axis=1) 56 | return pts 57 | else: 58 | print("no correspondences") 59 | return np.full((1, 4), np.nan) 60 | 61 | 62 | class SuperGlue_matcher: 63 | def __init__(self, resize, outdoor=False): 64 | # copied default values 65 | nms_radius = 4 66 | keypoint_threshold = 0.005 67 | max_keypoints = 1024 68 | 69 | superglue_weights = 'outdoor' if outdoor else 'indoor' # indoor trained on scannet 70 | sinkhorn_iterations = 20 71 | match_threshold = 0.2 72 | 73 | # Load the SuperPoint and SuperGlue models. 74 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 75 | print('Running inference on device \"{}\"'.format(device)) 76 | config = { 77 | 'superpoint': { 78 | 'nms_radius': nms_radius, 79 | 'keypoint_threshold': keypoint_threshold, 80 | 'max_keypoints': max_keypoints 81 | }, 82 | 'superglue': { 83 | 'weights': superglue_weights, 84 | 'sinkhorn_iterations': sinkhorn_iterations, 85 | 'match_threshold': match_threshold, 86 | } 87 | } 88 | self.matching = Matching(config).eval().to(device) 89 | self.device = device 90 | print('SuperGlue model loaded') 91 | self.resize = resize 92 | 93 | def match(self, pair_path): 94 | '''retrurn correspondences between images (w/ path pair_path)''' 95 | 96 | input_path0, input_path1 = pair_path 97 | resize = self.resize 98 | resize_float = True 99 | rot0, rot1 = 0, 0 100 | 101 | image0, inp0, scales0 = read_image( 102 | input_path0, self.device, resize, rot0, resize_float) 103 | image1, inp1, scales1 = read_image( 104 | input_path1, self.device, resize, rot1, resize_float) 105 | pred = self.matching({'image0': inp0, 'image1': inp1}) 106 | pred = {k: v[0].cpu().numpy() for k, v in pred.items()} 107 | kpts0, kpts1 = pred['keypoints0'], pred['keypoints1'] 108 | matches, conf = pred['matches0'], pred['matching_scores0'] 109 | 110 | # Keep the matching keypoints. 111 | valid = matches > -1 112 | mkpts0 = kpts0[valid] 113 | mkpts1 = kpts1[matches[valid]] 114 | 115 | if mkpts0.shape[0] > 0: 116 | pts = np.concatenate([mkpts0, mkpts1], axis=1) 117 | return pts 118 | else: 119 | print("no correspondences") 120 | return np.full((1, 4), np.nan) 121 | 122 | 123 | class SIFT_matcher: 124 | def __init__(self, resize, outdoor=False): 125 | self.resize = resize 126 | 127 | def root_sift(self, descs): 128 | '''Apply the Hellinger kernel by first L1-normalizing, taking the square-root, and then L2-normalizing''' 129 | 130 | eps = 1e-7 131 | descs /= (descs.sum(axis=1, keepdims=True) + eps) 132 | descs = np.sqrt(descs) 133 | return descs 134 | 135 | def match(self, pair_path): 136 | ''' 137 | Given path to im1, im2, extract correspondences using OpenCV SIFT. 138 | Returns: pts (N x 4) array containing (x1, y1, x2, y2) correspondences; returns nan array if no correspondences. 139 | ''' 140 | 141 | im1_path, im2_path = pair_path 142 | 143 | # hyper-parameters 144 | ratio_test_threshold = 0.8 145 | n_features = 2048 146 | sift = cv2.SIFT_create(n_features) 147 | 148 | # Read images in grayscale 149 | img0 = cv2.imread(im1_path, 0) 150 | img1 = cv2.imread(im2_path, 0) 151 | 152 | # Resize 153 | img0 = cv2.resize(img0, self.resize) 154 | img1 = cv2.resize(img1, self.resize) 155 | 156 | # get SIFT key points and descriptors 157 | kp0, des0 = sift.detectAndCompute(img0, None) 158 | kp1, des1 = sift.detectAndCompute(img1, None) 159 | 160 | # Apply normalisation (rootSIFT) 161 | des0, des1 = self.root_sift(des0), self.root_sift(des1) 162 | 163 | # Get matches using FLANN 164 | FLANN_INDEX_KDTREE = 1 165 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) 166 | search_params = dict(checks=50) 167 | flann = cv2.FlannBasedMatcher(index_params, search_params) 168 | matches = flann.knnMatch(des0, des1, k=2) 169 | 170 | pts1 = [] 171 | pts2 = [] 172 | good_matches = [] 173 | # ratio test as per Lowe's paper 174 | for i, (m, n) in enumerate(matches): 175 | if m.distance < ratio_test_threshold * n.distance: 176 | pts2.append(kp1[m.trainIdx].pt) 177 | pts1.append(kp0[m.queryIdx].pt) 178 | good_matches.append(m) 179 | 180 | pts1 = np.float32(pts1).reshape(-1, 2) 181 | pts2 = np.float32(pts2).reshape(-1, 2) 182 | 183 | if pts1.shape[0] > 0: 184 | pts = np.concatenate([pts1, pts2], axis=1) 185 | return pts 186 | else: 187 | print("no correspondences") 188 | return np.full((1, 4), np.nan) 189 | -------------------------------------------------------------------------------- /etc/feature_matching_baselines/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | 5 | def load_scannet_imgpaths(npz_path, root_dir): 6 | data_names = np.load(npz_path)['name'] 7 | pair_paths = [] 8 | 9 | for scene_name, scene_sub_name, stem_name_0, stem_name_1 in data_names: 10 | scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}' 11 | img_name0 = os.path.join(root_dir, scene_name, 'sensor_data', 12 | f'frame-{stem_name_0:06}.color.jpg') 13 | img_name1 = os.path.join(root_dir, scene_name, 'sensor_data', 14 | f'frame-{stem_name_1:06}.color.jpg') 15 | pair_paths.append((img_name0, img_name1)) 16 | 17 | return pair_paths 18 | 19 | 20 | def parse_7scenes_matching_pairs(pair_txt): 21 | """Get list of image pairs for matching 22 | Arg: 23 | pair_txt: file contains image pairs and essential 24 | matrix with line format 25 | image1 image2 sim w p q r x y z ess_vec 26 | Return: 27 | list of 3d-tuple contains (q=[wpqr], t=[xyz], essential matrix) 28 | """ 29 | im_pairs = {} 30 | f = open(pair_txt) 31 | for line in f: 32 | cur = line.split() 33 | im1, im2 = cur[0], cur[1] 34 | q = np.array([float(i) for i in cur[3:7]], dtype=np.float32) 35 | t = np.array([float(i) for i in cur[7:10]], dtype=np.float32) 36 | ess_mat = np.array([float(i) for i in cur[10:19]], dtype=np.float32).reshape(3, 3) 37 | im_pairs[(im1, im2)] = (q, t, ess_mat) 38 | f.close() 39 | return im_pairs 40 | 41 | 42 | def parse_mapfree_query_frames(pose_path): 43 | """ 44 | Get list of query frames given a pose path 45 | :param pose_path: 46 | :return: 47 | """ 48 | query_paths = [] 49 | with pose_path.open('r') as f: 50 | for l in f.readlines(): 51 | # skip if comment(#) or keyframe (seq0) 52 | if '#' in l or 'seq0' in l: 53 | continue 54 | qpath = l.strip().split(' ')[0] 55 | query_paths.append(qpath) 56 | return query_paths 57 | 58 | 59 | def stack_pts(pts_list): 60 | '''Given a pts list with N arrays, each shaped (Npts, D), where Npts varies, creates a common array shaped (N, max(Npts), D) filled with NaNs when Npts < Max(Npts)''' 61 | assert len(pts_list) > 0, 'list must not be empty' 62 | 63 | N = len(pts_list) 64 | max_npts = max([pts.shape[0] for pts in pts_list]) 65 | D = pts_list[0].shape[1] 66 | pts_stack = np.full((N, max_npts, D), np.nan) 67 | for i, pts in enumerate(pts_list): 68 | pts_stack[i, :pts.shape[0]] = pts 69 | return pts_stack 70 | -------------------------------------------------------------------------------- /etc/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nianticlabs/map-free-reloc/b5182dc7b32231edb3b0f01f8cc97f289ec8a241/etc/teaser.png -------------------------------------------------------------------------------- /lib/datasets/datamodules.py: -------------------------------------------------------------------------------- 1 | import torch.utils as utils 2 | from torchvision.transforms import ColorJitter, Grayscale 3 | import pytorch_lightning as pl 4 | 5 | from lib.datasets.sampler import RandomConcatSampler 6 | from lib.datasets.scannet import ScanNetDataset 7 | from lib.datasets.sevenscenes import SevenScenesDataset 8 | from lib.datasets.mapfree import MapFreeDataset 9 | 10 | 11 | class DataModule(pl.LightningDataModule): 12 | def __init__(self, cfg): 13 | super().__init__() 14 | self.cfg = cfg 15 | 16 | datasets = {'ScanNet': ScanNetDataset, 17 | '7Scenes': SevenScenesDataset, 18 | 'MapFree': MapFreeDataset} 19 | 20 | assert cfg.DATASET.DATA_SOURCE in datasets.keys(), 'invalid DATA_SOURCE, this dataset is not implemented' 21 | self.dataset_type = datasets[cfg.DATASET.DATA_SOURCE] 22 | 23 | def get_sampler(self, dataset, reset_epoch=False): 24 | if self.cfg.TRAINING.SAMPLER == 'scene_balance': 25 | sampler = RandomConcatSampler(dataset, 26 | self.cfg.TRAINING.N_SAMPLES_SCENE, 27 | self.cfg.TRAINING.SAMPLE_WITH_REPLACEMENT, 28 | shuffle=True, 29 | reset_on_iter=reset_epoch 30 | ) 31 | else: 32 | sampler = None 33 | return sampler 34 | 35 | def train_dataloader(self): 36 | transforms = ColorJitter() if self.cfg.DATASET.AUGMENTATION_TYPE == 'colorjitter' else None 37 | transforms = Grayscale( 38 | num_output_channels=3) if self.cfg.DATASET.BLACK_WHITE else transforms 39 | 40 | dataset = self.dataset_type(self.cfg, 'train', transforms=transforms) 41 | sampler = self.get_sampler(dataset) 42 | dataloader = utils.data.DataLoader(dataset, 43 | batch_size=self.cfg.TRAINING.BATCH_SIZE, 44 | num_workers=self.cfg.TRAINING.NUM_WORKERS, 45 | sampler=sampler 46 | ) 47 | return dataloader 48 | 49 | def val_dataloader(self): 50 | dataset = self.dataset_type(self.cfg, 'val') 51 | # Scannet uses scene_balance for validation (resets at each epoch, so val samples are always the same) 52 | if isinstance(dataset, ScanNetDataset): 53 | sampler = self.get_sampler(dataset, reset_epoch=True) 54 | else: 55 | sampler = None 56 | dataloader = utils.data.DataLoader(dataset, 57 | batch_size=self.cfg.TRAINING.BATCH_SIZE, 58 | num_workers=self.cfg.TRAINING.NUM_WORKERS, 59 | sampler=sampler, 60 | drop_last=True 61 | ) 62 | return dataloader 63 | 64 | def test_dataloader(self): 65 | dataset = self.dataset_type(self.cfg, 'test') 66 | dataloader = utils.data.DataLoader(dataset, 67 | batch_size=1, 68 | num_workers=1, 69 | shuffle=False) 70 | return dataloader 71 | -------------------------------------------------------------------------------- /lib/datasets/sampler.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/zju3dv/LoFTR/blob/261baf641cb9ada07dd9746e420ada7fe8a03152/src/datasets/sampler.py 2 | import torch 3 | from torch.utils.data import Sampler, ConcatDataset 4 | 5 | 6 | class RandomConcatSampler(Sampler): 7 | """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset 8 | in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement. 9 | However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase. 10 | 11 | For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not. 12 | Args: 13 | shuffle (bool): shuffle the random sampled indices across all sub-datsets. 14 | repeat (int): repeatedly use the sampled indices multiple times for training. 15 | [arXiv:1902.05509, arXiv:1901.09335] 16 | NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples) 17 | NOTE: This sampler behaves differently with DistributedSampler. 18 | It assume the dataset is splitted across ranks instead of replicated. 19 | TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs. 20 | ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373 21 | """ 22 | 23 | def __init__(self, 24 | data_source: ConcatDataset, 25 | n_samples_per_subset: int, 26 | subset_replacement: bool = True, 27 | shuffle: bool = True, 28 | repeat: int = 1, 29 | seed: int = 66, 30 | reset_on_iter: bool = False): 31 | if not isinstance(data_source, ConcatDataset): 32 | raise TypeError("data_source should be torch.utils.data.ConcatDataset") 33 | 34 | self.data_source = data_source 35 | self.n_subset = len(self.data_source.datasets) 36 | self.n_samples_per_subset = n_samples_per_subset 37 | self.n_samples = self.n_subset * self.n_samples_per_subset * repeat 38 | self.subset_replacement = subset_replacement 39 | self.repeat = repeat 40 | self.shuffle = shuffle 41 | self.seed = seed 42 | self.reset_on_iter = reset_on_iter # If true, recreate random seed to that samples are the same every epoch 43 | self.generator = torch.manual_seed(self.seed) 44 | assert self.repeat >= 1 45 | 46 | def __len__(self): 47 | return self.n_samples 48 | 49 | def __iter__(self): 50 | if self.reset_on_iter: 51 | self.generator = torch.manual_seed(self.seed) 52 | 53 | indices = [] 54 | # sample from each sub-dataset 55 | for d_idx in range(self.n_subset): 56 | low = 0 if d_idx == 0 else self.data_source.cumulative_sizes[d_idx - 1] 57 | high = self.data_source.cumulative_sizes[d_idx] 58 | if self.subset_replacement: 59 | rand_tensor = torch.randint(low, high, (self.n_samples_per_subset,), 60 | generator=self.generator, dtype=torch.int64) 61 | else: # sample without replacement 62 | len_subset = len(self.data_source.datasets[d_idx]) 63 | rand_tensor = torch.randperm(len_subset, generator=self.generator) + low 64 | if len_subset >= self.n_samples_per_subset: 65 | rand_tensor = rand_tensor[:self.n_samples_per_subset] 66 | else: # padding with replacement 67 | rand_tensor_replacement = torch.randint( 68 | low, high, (self.n_samples_per_subset - len_subset,), 69 | generator=self.generator, dtype=torch.int64) 70 | rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement]) 71 | indices.append(rand_tensor) 72 | indices = torch.cat(indices) 73 | if self.shuffle: # shuffle the sampled dataset (from multiple subsets) 74 | rand_tensor = torch.randperm(len(indices), generator=self.generator) 75 | indices = indices[rand_tensor] 76 | 77 | # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling) 78 | if self.repeat > 1: 79 | repeat_indices = [indices.clone() for _ in range(self.repeat - 1)] 80 | if self.shuffle: 81 | def _choice(x): return x[torch.randperm(len(x), generator=self.generator)] 82 | repeat_indices = map(_choice, repeat_indices) 83 | indices = torch.cat([indices, *repeat_indices], 0) 84 | 85 | assert indices.shape[0] == self.n_samples 86 | return iter(indices.tolist()) 87 | -------------------------------------------------------------------------------- /lib/datasets/scannet.py: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/zju3dv/LoFTR/blob/master/src/datasets/scannet.py 2 | from os import path as osp 3 | from os import listdir 4 | 5 | import numpy as np 6 | import torch 7 | import torch.utils as utils 8 | from numpy.linalg import inv 9 | 10 | from lib.datasets.utils import ( 11 | read_color_image, 12 | read_depth_image, 13 | read_scannet_pose, 14 | read_scannet_intrinsic, 15 | correct_intrinsic_scale 16 | ) 17 | 18 | 19 | class ScanNetScene(utils.data.Dataset): 20 | def __init__(self, 21 | root_dir, 22 | npz_path, 23 | mode='train', 24 | min_overlap_score=0.4, 25 | augment_fn=None, 26 | resize=(640, 480), 27 | estimated_depth=None, 28 | **kwargs): 29 | """Manage one scene of ScanNet Dataset. 30 | Args: 31 | root_dir (str): ScanNet root directory that contains scene folders. 32 | npz_path (str): {scene_id}.npz path. This contains image pair information of a scene. 33 | intrinsic_path (str): path to depth-camera intrinsic file. 34 | mode (str): options are ['train', 'val', 'test']. 35 | augment_fn (callable, optional): augments images with pre-defined visual effects. 36 | pose_dir (str): ScanNet root directory that contains all poses. 37 | (we use a separate (optional) pose_dir since we store images and poses separately.) 38 | """ 39 | super().__init__() 40 | self.root_dir = root_dir 41 | self.mode = mode 42 | self.resize = resize 43 | 44 | # prepare data_names, intrinsics and extrinsics(T) 45 | with np.load(npz_path) as data: 46 | self.data_names = data['name'] 47 | if 'score' in data.keys() and mode not in ['val' or 'test']: 48 | kept_mask = data['score'] > min_overlap_score 49 | self.data_names = self.data_names[kept_mask] 50 | 51 | # for training 52 | self.augment_fn = augment_fn if mode == 'train' else None 53 | 54 | # load pre-computed estimated depth, if exists 55 | self.depthmaps = np.load(estimated_depth) if estimated_depth is not None else None 56 | 57 | def __len__(self): 58 | return len(self.data_names) 59 | 60 | def _read_abs_pose(self, scene_name, name): 61 | pth = osp.join(self.root_dir, 62 | scene_name, 63 | 'sensor_data', f'frame-{name:06}.pose.txt') 64 | return read_scannet_pose(pth) 65 | 66 | def _compute_rel_pose(self, scene_name, name0, name1): 67 | pose0 = self._read_abs_pose(scene_name, name0) 68 | pose1 = self._read_abs_pose(scene_name, name1) 69 | 70 | return np.matmul(pose1, inv(pose0)) # (4, 4) 71 | 72 | def __getitem__(self, idx): 73 | scene_name, scene_sub_name, stem_name_0, stem_name_1 = self.data_names[idx] 74 | scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}' 75 | 76 | # loads image and rescales. apply augmentation if available 77 | img_name0 = osp.join(self.root_dir, scene_name, 'sensor_data', 78 | f'frame-{stem_name_0:06}.color.jpg') 79 | img_name1 = osp.join(self.root_dir, scene_name, 'sensor_data', 80 | f'frame-{stem_name_1:06}.color.jpg') 81 | image0 = read_color_image(img_name0, resize=self.resize, augment_fn=self.augment_fn) 82 | image1 = read_color_image(img_name1, resize=self.resize, augment_fn=self.augment_fn) 83 | 84 | # read the depthmap which is stored as (480, 640) 85 | if self.mode in ['test']: 86 | if self.depthmaps is None: 87 | # Load GT depth 88 | dimg_name0 = osp.join(self.root_dir, scene_name, 'sensor_data', 89 | f'frame-{stem_name_0:06}.depth.pgm') 90 | dimg_name1 = osp.join(self.root_dir, scene_name, 'sensor_data', 91 | f'frame-{stem_name_1:06}.depth.pgm') 92 | depth0 = read_depth_image(dimg_name0) 93 | depth1 = read_depth_image(dimg_name1) 94 | else: 95 | # Load pre-computed depth (using arbitrary methods) from npz file 96 | def key(frame_idx): return f'{scene_name[5:]}_frame_{frame_idx:06}' 97 | depth0 = torch.from_numpy(self.depthmaps[key(stem_name_0)].astype(np.float32)) 98 | depth1 = torch.from_numpy(self.depthmaps[key(stem_name_1)].astype(np.float32)) 99 | else: 100 | depth0 = depth1 = torch.tensor([]) 101 | 102 | # get intrinsics 103 | intrinsics_path = osp.join(self.root_dir, scene_name, 'sensor_data', '_info.txt') 104 | K_color = read_scannet_intrinsic(intrinsics_path, color=True) 105 | K_color = correct_intrinsic_scale( 106 | K_color, scale_x=self.resize[0] / 1296, scale_y=self.resize[1] / 968) 107 | K_color = torch.from_numpy(K_color) 108 | K_depth = torch.from_numpy(read_scannet_intrinsic(intrinsics_path, color=False)) 109 | 110 | # read and compute relative poses 111 | T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1), 112 | dtype=torch.float32) 113 | T_1to0 = T_0to1.inverse() 114 | 115 | data = { 116 | 'image0': image0, # (3, h, w) 117 | 'depth0': depth0, # (h, w) 118 | 'image1': image1, 119 | 'depth1': depth1, 120 | 'T_0to1': T_0to1, # (4, 4) 121 | 'T_1to0': T_1to0, 122 | 'K_color0': K_color, # (3, 3) 123 | 'K_color1': K_color, # (3, 3) 124 | 'K_depth': K_depth, # (3, 3) 125 | 'dataset_name': 'ScanNet', 126 | 'scene_id': scene_name, 127 | 'pair_id': idx, 128 | 'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'), 129 | osp.join(scene_name, 'color', f'{stem_name_1}.jpg')) 130 | } 131 | 132 | return data 133 | 134 | 135 | class ScanNetDataset(utils.data.ConcatDataset): 136 | def __init__(self, 137 | cfg, 138 | mode: str, 139 | transforms=None): 140 | assert mode in ('train', 'val', 'test'), 'Invalid dataset mode' 141 | 142 | root_dir = cfg.DATASET.DATA_ROOT 143 | index_npz_dir = cfg.DATASET.NPZ_ROOT 144 | min_overlap_score = cfg.DATASET.MIN_OVERLAP_SCORE 145 | resize = (cfg.DATASET.WIDTH, cfg.DATASET.HEIGHT) 146 | estimated_depth = cfg.DATASET.ESTIMATED_DEPTH 147 | 148 | # create a dataset for each npz file 149 | # usually each npz file contains the information for a single scene (training and val) 150 | # however, for testing all pairs are concatenated into a single npz file (test.npz) 151 | root_dir = osp.join(root_dir, 'scans_test' if mode == 'test' else 'scans') 152 | npz_path = osp.join(index_npz_dir, mode) 153 | npz_list = [osp.join(npz_path, fname) for fname in listdir(npz_path) if fname[-3:] == 'npz'] 154 | 155 | dataset_list = [ScanNetScene(root_dir=root_dir, 156 | npz_path=npz_fname, 157 | mode=mode, 158 | min_overlap_score=min_overlap_score, 159 | augment_fn=transforms, 160 | resize=resize, 161 | estimated_depth=estimated_depth) for npz_fname in npz_list] 162 | 163 | super().__init__(dataset_list) 164 | -------------------------------------------------------------------------------- /lib/datasets/sevenscenes.py: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/GrumpyZhou/visloc-relapose/blob/master/utils/datasets/relapose.py 2 | 3 | import os 4 | import glob 5 | 6 | import torch 7 | import torch.utils.data as data 8 | import numpy as np 9 | from scipy.spatial.transform import Rotation 10 | 11 | from lib.datasets.utils import read_color_image, read_depth_image, correct_intrinsic_scale 12 | 13 | 14 | class SceneDataset(data.Dataset): 15 | def __init__(self, scene_root, pair_txt, resize, transforms=None, one_nn=False, 16 | estimated_depth=None): 17 | ''' scene_root: path to scene folder 18 | pair_txt: path to file specifying the (reference,query) pairs 19 | resize: shape to resize images 20 | transforms: function to apply to images 21 | one_nn: if True, keep only the reference image with highest DVLAD similarity to each query 22 | ''' 23 | self.scene_root = scene_root 24 | self.transforms = transforms 25 | self.resize = resize 26 | self.estimated_depth = estimated_depth 27 | 28 | # load relative poses for given pairs 29 | self.im_pairs, self.relv_poses, _, self.sim = self.parse_relv_pose_txt(os.path.join( 30 | scene_root, 31 | pair_txt)) 32 | self.original_idxs = list(range(len(self.im_pairs))) 33 | if one_nn: 34 | self.filter_one_nn() 35 | self.num = len(self.im_pairs) 36 | 37 | # load absolute poses for each sample 38 | self.abs_poses = self.parse_abs_pose_txt(os.path.join(scene_root, 'dataset_test.txt')) 39 | self.abs_poses.update(self.parse_abs_pose_txt( 40 | os.path.join(scene_root, 'dataset_train.txt'))) 41 | 42 | # static intrinsic matrix 43 | ox, oy = 320, 240 44 | f = 525 45 | self.K = np.array([[f, 0, ox], [0, f, oy], [0, 0, 1]], dtype=np.float32) 46 | self.K = correct_intrinsic_scale(self.K, resize[0] / 640, resize[1] / 480) 47 | 48 | def parse_relv_pose_txt(self, fpath, with_ess=False): 49 | '''Relative pose pair format:image1 image2 sim w p q r x y z ess_vec''' 50 | im_pairs = [] 51 | ess_vecs = [] if with_ess else None 52 | relv_poses = [] 53 | sim = [] 54 | with open(fpath) as f: 55 | for line in f: 56 | cur = line.split() 57 | im_pairs.append((cur[0], cur[1])) 58 | sim.append(float(cur[2])) 59 | q = np.array([float(i) for i in cur[3:7]], dtype=np.float32) 60 | t = np.array([float(i) for i in cur[7:10]], dtype=np.float32) 61 | 62 | # change q convention to [x, y, z, w] 63 | q = q[[1, 2, 3, 0]] 64 | R = Rotation.from_quat(q).as_matrix() 65 | 66 | # Convert to rotation matrix and 4x4 pose matrix 67 | T = np.eye(4) 68 | T[:3, :3] = R 69 | T[:3, -1] = t.ravel() 70 | relv_poses.append(T) 71 | 72 | if with_ess: 73 | ess_vecs.append(np.array([float(i) for i in cur[10:19]], dtype=np.float32)) 74 | return im_pairs, relv_poses, ess_vecs, sim 75 | 76 | def parse_abs_pose_txt(self, fpath): 77 | """Absolute pose label format: 78 | 3 header lines 79 | list of samples with format: 80 | image x y z w p q r 81 | """ 82 | 83 | pose_dict = {} 84 | with open(fpath) as f: 85 | for line in f.readlines()[3::]: # Skip 3 header lines 86 | cur = line.split(' ') 87 | c = np.array([float(v) for v in cur[1:4]], dtype=np.float32) 88 | q = np.array([float(v) for v in cur[4:8]], dtype=np.float32) 89 | im = cur[0] 90 | pose_dict[im] = (c, q) 91 | return pose_dict 92 | 93 | def filter_one_nn(self): 94 | """Filters pairs such that for each query image, only the reference image with highest similarity is kept""" 95 | 96 | kept_queries_idx = {} # dict (query image, kept_idx) 97 | kept_queries_sim = {} # dict (query image, kept_similarity) 98 | 99 | for i, ((ref, query), sim) in enumerate(zip(self.im_pairs, self.sim)): 100 | if query in kept_queries_sim: 101 | if sim < kept_queries_sim[query]: 102 | continue 103 | 104 | kept_queries_idx[query] = i 105 | kept_queries_sim[query] = sim 106 | 107 | # update internal arrays 108 | keep_idxs = list(kept_queries_idx.values()) 109 | self.im_pairs = [self.im_pairs[idx] for idx in keep_idxs] 110 | self.relv_poses = [self.relv_poses[idx] for idx in keep_idxs] 111 | self.sim = [self.sim[idx] for idx in keep_idxs] 112 | self.original_idxs = keep_idxs 113 | 114 | def __getitem__(self, index): 115 | # load color images 116 | im1_path, im2_path = [os.path.join(self.scene_root, im_ref) 117 | for im_ref in self.im_pairs[index]] 118 | image1 = read_color_image(im1_path, self.resize, augment_fn=self.transforms) 119 | image2 = read_color_image(im2_path, self.resize, augment_fn=self.transforms) 120 | 121 | # load depth maps 122 | depth_path_suffix = '.depth.' if self.estimated_depth is None else f'.depth.{self.estimated_depth}.' 123 | dim1_path = im1_path.replace('.color.', depth_path_suffix) 124 | dim2_path = im2_path.replace('.color.', depth_path_suffix) 125 | depth1 = read_depth_image(dim1_path) 126 | depth2 = read_depth_image(dim2_path) 127 | 128 | # get relative pose transformation 129 | T_0to1 = torch.tensor(self.relv_poses[index], dtype=torch.float32) 130 | 131 | # get absolute pose of im0 and im1 132 | im1ref, im2ref = self.im_pairs[index] 133 | # center of camera 1 in world coordinates, quaternion transf. from camera to world 134 | c1, q1 = self.abs_poses[im1ref] 135 | # center of camera 2 in world coordinates, quaternion transf. from camera to world 136 | c2, q2 = self.abs_poses[im2ref] 137 | 138 | data = { 139 | 'image0': image1, # (3, h, w) 140 | 'depth0': depth1, # (h, w) 141 | 'image1': image2, 142 | 'depth1': depth2, 143 | 'T_0to1': T_0to1, # (4, 4) # relative pose 144 | 'abs_q_0': q1, 145 | 'abs_c_0': c1, 146 | 'abs_q_1': q2, 147 | 'abs_c_1': c2, 148 | 'sim': self.sim[index], # DVLAD similarity 149 | 'K_color0': self.K.copy(), # (3, 3) 150 | 'K_color1': self.K.copy(), # (3, 3) 151 | 'K_depth': self.K.copy(), # (3, 3) 152 | 'dataset_name': '7Scenes', 153 | 'scene_id': self.scene_root.split('/')[-1], 154 | 'scene_root': str(self.scene_root), 155 | 'pair_id': self.original_idxs[index], 156 | 'pair_names': self.im_pairs[index] 157 | } 158 | 159 | return data 160 | 161 | def __len__(self): 162 | return self.num 163 | 164 | 165 | class SevenScenesDataset(data.ConcatDataset): 166 | def __init__(self, cfg, mode, transforms=None): 167 | 168 | scenes = cfg.DATASET.SCENES 169 | data_root = cfg.DATASET.DATA_ROOT 170 | resize = (cfg.DATASET.WIDTH, cfg.DATASET.HEIGHT) 171 | # If None, loads GT depth. Otherwise, loads depth map with name `pairs.depth.suffix.png` where suffix is estimated_depth 172 | estimated_depth = cfg.DATASET.ESTIMATED_DEPTH 173 | 174 | assert mode in ['train', 'val', 'test'], 'Invalid dataset mode' 175 | pair_txt = {'train': cfg.DATASET.PAIRS_TXT.TRAIN, 176 | 'val': cfg.DATASET.PAIRS_TXT.VAL, 177 | 'test': cfg.DATASET.PAIRS_TXT.TEST}[mode] 178 | one_nn = cfg.DATASET.PAIRS_TXT.ONE_NN 179 | 180 | if scenes is None: 181 | # Locate all scenes of the current dataset 182 | scenes = self.glob_scenes(data_root, pair_txt) 183 | 184 | # Init dataset objects for each scene 185 | data_srcs = [ 186 | SceneDataset( 187 | os.path.join(data_root, scene), 188 | pair_txt, resize, transforms, one_nn, estimated_depth) for scene in scenes] 189 | super().__init__(data_srcs) 190 | 191 | def glob_scenes(self, data_root, pair_txt): 192 | scenes = [] 193 | for sdir in glob.iglob('{}/*/{}'.format(data_root, pair_txt)): 194 | sdir = sdir.split('/')[-2] 195 | scenes.append(sdir) 196 | return sorted(scenes) 197 | -------------------------------------------------------------------------------- /lib/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from numpy.linalg import inv 5 | 6 | 7 | def imread(path, augment_fn=None): 8 | cv_type = cv2.IMREAD_COLOR 9 | image = cv2.imread(str(path), cv_type) 10 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 11 | 12 | if augment_fn is not None: 13 | image = cv2.imread(str(path), cv2.IMREAD_COLOR) 14 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 15 | image = augment_fn(image) 16 | image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 17 | return image # (h, w, 3) 18 | 19 | 20 | def get_resized_wh(w, h, resize=None): 21 | if resize is not None: # resize the longer edge 22 | scale = resize / max(h, w) 23 | w_new, h_new = int(round(w * scale)), int(round(h * scale)) 24 | else: 25 | w_new, h_new = w, h 26 | return w_new, h_new 27 | 28 | 29 | def get_divisible_wh(w, h, df=None): 30 | if df is not None: 31 | w_new, h_new = map(lambda x: int(x // df * df), [w, h]) 32 | else: 33 | w_new, h_new = w, h 34 | return w_new, h_new 35 | 36 | 37 | def pad_bottom_right(inp, pad_size, ret_mask=False): 38 | assert isinstance(pad_size, int) and pad_size >= max( 39 | inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}" 40 | mask = None 41 | if inp.ndim == 2: 42 | padded = np.zeros((pad_size, pad_size), dtype=inp.dtype) 43 | padded[:inp.shape[0], :inp.shape[1]] = inp 44 | if ret_mask: 45 | mask = np.zeros((pad_size, pad_size), dtype=bool) 46 | mask[:inp.shape[0], :inp.shape[1]] = True 47 | elif inp.ndim == 3: 48 | padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype) 49 | padded[:, :inp.shape[1], :inp.shape[2]] = inp 50 | if ret_mask: 51 | mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool) 52 | mask[:, :inp.shape[1], :inp.shape[2]] = True 53 | else: 54 | raise NotImplementedError() 55 | return padded, mask 56 | 57 | 58 | def read_color_image(path, resize=(640, 480), augment_fn=None): 59 | """ 60 | Args: 61 | resize (tuple): align image to depthmap, in (w, h). 62 | augment_fn (callable, optional): augments images with pre-defined visual effects 63 | Returns: 64 | image (torch.tensor): (3, h, w) 65 | """ 66 | # read and resize image 67 | image = imread(path, None) 68 | image = cv2.resize(image, resize) 69 | 70 | # (h, w, 3) -> (3, h, w) and normalized 71 | image = torch.from_numpy(image).float().permute(2, 0, 1) / 255 72 | if augment_fn: 73 | image = augment_fn(image) 74 | return image 75 | 76 | 77 | def read_depth_image(path): 78 | depth = cv2.imread(str(path), cv2.IMREAD_UNCHANGED) 79 | depth = depth / 1000 80 | depth = torch.from_numpy(depth).float() # (h, w) 81 | return depth 82 | 83 | 84 | def read_scannet_pose(path): 85 | """ Read ScanNet's Camera2World pose and transform it to World2Camera. 86 | 87 | Returns: 88 | pose_w2c (np.ndarray): (4, 4) 89 | """ 90 | cam2world = np.loadtxt(path, delimiter=' ') 91 | world2cam = inv(cam2world) 92 | return world2cam 93 | 94 | 95 | def read_scannet_intrinsic(path, color=True): 96 | """ 97 | Read ScanNet's intrinsic matrix and returns 3x3 matrix. If color is True, returns color camera intrinsics. 98 | Otherwise returns depth camera intrinsics. 99 | The file containing the intrinsics is located in {scannet_root}/scans/scene{id}/sensor_data/_info.txt 100 | This file has the intrinsics of the depth camera and color camera under the keys 'm_calibrationColorIntrinsic' 101 | and 'm_calibrationDepthIntrinsic'. 102 | """ 103 | 104 | key = 'm_calibrationColorIntrinsic' if color else 'm_calibrationDepthIntrinsic' 105 | 106 | with open(path, 'r') as f: 107 | for line in f.readlines(): 108 | if key in line: 109 | mat = line.split(' = ')[1] 110 | mat = mat.lstrip().rstrip().split(' ') 111 | mat = [float(m) for m in mat] 112 | return np.array(mat).reshape(4, 4)[:-1, :-1] 113 | 114 | raise Exception(f'Invalid key {key}') 115 | 116 | 117 | def correct_intrinsic_scale(K, scale_x, scale_y): 118 | '''Given an intrinsic matrix (3x3) and two scale factors, returns the new intrinsic matrix corresponding to 119 | the new coordinates x' = scale_x * x; y' = scale_y * y 120 | Source: https://dsp.stackexchange.com/questions/6055/how-does-resizing-an-image-affect-the-intrinsic-camera-matrix 121 | ''' 122 | 123 | transform = np.eye(3) 124 | transform[0, 0] = scale_x 125 | transform[0, 2] = scale_x / 2 - 0.5 126 | transform[1, 1] = scale_y 127 | transform[1, 2] = scale_y / 2 - 0.5 128 | Kprime = transform @ K 129 | 130 | return Kprime 131 | -------------------------------------------------------------------------------- /lib/models/builder.py: -------------------------------------------------------------------------------- 1 | import torch.cuda 2 | 3 | from lib.models.regression.model import RegressionModel 4 | from lib.models.regression.model import RegressionMultiFrameModel 5 | from lib.models.matching.model import FeatureMatchingModel 6 | 7 | 8 | def build_model(cfg, checkpoint=''): 9 | if cfg.MODEL == 'FeatureMatching': 10 | return FeatureMatchingModel(cfg) 11 | elif cfg.MODEL == 'Regression': 12 | model = RegressionModel.load_from_checkpoint(checkpoint, cfg=cfg) if \ 13 | checkpoint is not '' else RegressionModel(cfg) 14 | if torch.cuda.is_available(): 15 | model = model.cuda() 16 | model.eval() 17 | return model 18 | elif cfg.MODEL == 'RegressionMultiFrame': 19 | model = RegressionMultiFrameModel.load_from_checkpoint(checkpoint, cfg=cfg) if \ 20 | checkpoint is not '' else RegressionMultiFrameModel(cfg) 21 | if torch.cuda.is_available(): 22 | model = model.cuda() 23 | model.eval() 24 | return model 25 | else: 26 | raise NotImplementedError() 27 | -------------------------------------------------------------------------------- /lib/models/matching/feature_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 as cv 3 | 4 | 5 | class PrecomputedMatching: 6 | '''Get correspondences from pre-computed file''' 7 | 8 | def __init__(self, cfg): 9 | # Scannet correspondences are stored in a single file, pointed by MATCHES_FILE_PATH 10 | # 7Scenes correspondences are split in a file per scene and dependent on the pairs. 11 | # The 7Scenes file pattern (including {scene_id} and {test_pairs} tags) is stored in MATCHES_FILE_PATH 12 | 13 | self.correspondences = None 14 | self.debug = cfg.DEBUG 15 | 16 | # If there is a pattern, save that string pattern, and will load correspondences once the scene_id is defined 17 | if '{' in cfg.MATCHES_FILE_PATH: 18 | self.matches_file_path = cfg.MATCHES_FILE_PATH 19 | self.scene_id = None 20 | self.pairs_txt = cfg.DATASET.PAIRS_TXT.TEST 21 | else: 22 | self.load_correspondences(cfg.MATCHES_FILE_PATH) 23 | 24 | def load_correspondences(self, file_path): 25 | data = np.load(file_path, allow_pickle=True) 26 | self.correspondences = data['correspondences'].astype(np.float32) 27 | 28 | def get_correspondences(self, data): 29 | # Check if loaded scene_id is still valid (in the case where correspondences are stored over multiple files) 30 | # If not, load the correct scene_id correspondences 31 | if hasattr(self, 'scene_id'): 32 | if self.scene_id != data['scene_id'][0]: 33 | self.scene_id = data['scene_id'][0] 34 | scene_root = data['scene_root'][0] 35 | matches_fpath = self.matches_file_path.format( 36 | scene_root=scene_root, pairs_txt=self.pairs_txt) 37 | self.load_correspondences(matches_fpath) 38 | 39 | # get correspondences for the given pair 40 | pair_id = data['pair_id'].item() 41 | corr = self.correspondences[pair_id] 42 | 43 | # remove nan's (filler) 44 | corr = corr[~np.isnan(corr)].reshape(-1, 4) 45 | if len(corr) > 0: 46 | pts1, pts2 = corr[:, :2], corr[:, 2:] 47 | else: 48 | pts1 = pts2 = np.array([]) 49 | 50 | return pts1, pts2 51 | 52 | 53 | class SIFTMatching: 54 | def __init__(self, cfg): 55 | 56 | # SIFT parameters 57 | self.ratio_threshold = cfg.SIFT.RATIO_THRESHOLD 58 | self.sift = cv.SIFT_create(cfg.SIFT.NUM_FEATURES) 59 | self.debug = cfg.DEBUG 60 | 61 | def transform_grayscale(self, img): 62 | img = img.permute(1, 2, 0).numpy() 63 | img = (255 * img).astype(np.uint8) 64 | img_gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY) 65 | return img_gray 66 | 67 | def root_sift(self, descs): 68 | '''Apply the Hellinger kernel by first L1-normalizing, taking the square-root, and then L2-normalizing''' 69 | 70 | eps = 1e-7 71 | descs /= (descs.sum(axis=1, keepdims=True) + eps) 72 | descs = np.sqrt(descs) 73 | return descs 74 | 75 | def get_correspondences(self, data): 76 | # get grayscale images 77 | img0 = self.transform_grayscale(data['image0'].squeeze(0)) 78 | img1 = self.transform_grayscale(data['image1'].squeeze(0)) 79 | 80 | # get SIFT key points and descriptors 81 | kp0, des0 = self.sift.detectAndCompute(img0, None) 82 | kp1, des1 = self.sift.detectAndCompute(img1, None) 83 | 84 | # Apply normalisation (rootSIFT) 85 | des0, des1 = self.root_sift(des0), self.root_sift(des1) 86 | 87 | # Get matches using FLANN 88 | FLANN_INDEX_KDTREE = 1 89 | index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) 90 | search_params = dict(checks=50) 91 | flann = cv.FlannBasedMatcher(index_params, search_params) 92 | matches = flann.knnMatch(des0, des1, k=2) 93 | 94 | pts1 = [] 95 | pts2 = [] 96 | good_matches = [] 97 | # ratio test as per Lowe's paper 98 | for i, (m, n) in enumerate(matches): 99 | if m.distance < self.ratio_threshold * n.distance: 100 | pts2.append(kp1[m.trainIdx].pt) 101 | pts1.append(kp0[m.queryIdx].pt) 102 | good_matches.append(m) 103 | 104 | pts1 = np.float32(pts1).reshape(-1, 2) 105 | pts2 = np.float32(pts2).reshape(-1, 2) 106 | 107 | # plot results (DEBUG) 108 | if self.debug: 109 | img_matches = np.empty( 110 | (max(img0.shape[0], 111 | img1.shape[0]), 112 | img1.shape[1] + img1.shape[1], 113 | 3), 114 | dtype=np.uint8) 115 | cv.drawMatches(img0, kp0, img1, kp1, good_matches, img_matches, 116 | flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) 117 | data['debug_img_matches'] = img_matches 118 | return pts1, pts2 119 | -------------------------------------------------------------------------------- /lib/models/matching/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from lib.models.matching.feature_matching import * 4 | from lib.models.matching.pose_solver import * 5 | 6 | 7 | class FeatureMatchingModel(torch.nn.Module): 8 | def __init__(self, cfg): 9 | super().__init__() 10 | 11 | if cfg.FEATURE_MATCHING == 'SIFT': 12 | self.feature_matching = SIFTMatching(cfg) 13 | elif cfg.FEATURE_MATCHING == 'Precomputed': 14 | self.feature_matching = PrecomputedMatching(cfg) 15 | else: 16 | raise NotImplementedError('Invalid feature matching') 17 | 18 | if cfg.POSE_SOLVER == 'EssentialMatrix': 19 | self.pose_solver = EssentialMatrixSolver(cfg) 20 | elif cfg.POSE_SOLVER == 'EssentialMatrixMetric': 21 | self.pose_solver = EssentialMatrixMetricSolver(cfg) 22 | elif cfg.POSE_SOLVER == 'Procrustes': 23 | self.pose_solver = ProcrustesSolver(cfg) 24 | elif cfg.POSE_SOLVER == 'PNP': 25 | self.pose_solver = PnPSolver(cfg) 26 | else: 27 | raise NotImplementedError('Invalid pose solver') 28 | 29 | def forward(self, data): 30 | assert data['depth0'].shape[0] == 1, 'Baseline models require batch size of 1' 31 | 32 | # get 2D-2D correspondences 33 | pts1, pts2 = self.feature_matching.get_correspondences(data) 34 | 35 | # get relative pose 36 | R, t, inliers = self.pose_solver.estimate_pose(pts1, pts2, data) 37 | data['inliers'] = inliers 38 | R = torch.from_numpy(R.copy()).unsqueeze(0).float() 39 | t = torch.from_numpy(t.copy()).view(1, 3).unsqueeze(0).float() 40 | return R, t 41 | -------------------------------------------------------------------------------- /lib/models/regression/encoder/preact.py: -------------------------------------------------------------------------------- 1 | '''Pre-activation ResNet in PyTorch. 2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/preact_resnet.py 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 6 | ''' 7 | 8 | 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class PreActBlock(nn.Module): 14 | '''Pre-activation version of the BasicBlock.''' 15 | expansion = 1 16 | 17 | def __init__(self, in_planes, planes, stride=1, bn=True): 18 | super(PreActBlock, self).__init__() 19 | self.bn1 = nn.BatchNorm2d(in_planes) if bn else nn.Identity() 20 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, 21 | stride=stride, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) if bn else nn.Identity() 23 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 24 | 25 | if stride != 1 or in_planes != self.expansion*planes: 26 | self.shortcut = nn.Sequential(nn.Conv2d( 27 | in_planes, self.expansion * planes, kernel_size=1, 28 | stride=stride, bias=False)) 29 | 30 | def forward(self, x): 31 | out = F.relu(self.bn1(x)) 32 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 33 | out = self.conv1(out) 34 | out = self.conv2(F.relu(self.bn2(out))) 35 | out += shortcut 36 | return out 37 | 38 | 39 | class PreActBottleneck(nn.Module): 40 | '''Pre-activation version of the original Bottleneck module.''' 41 | expansion = 4 42 | 43 | def __init__(self, in_planes, planes, stride=1): 44 | super(PreActBottleneck, self).__init__() 45 | self.bn1 = nn.BatchNorm2d(in_planes) 46 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 47 | self.bn2 = nn.BatchNorm2d(planes) 48 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 49 | self.bn3 = nn.BatchNorm2d(planes) 50 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 51 | 52 | if stride != 1 or in_planes != self.expansion*planes: 53 | self.shortcut = nn.Sequential(nn.Conv2d( 54 | in_planes, self.expansion * planes, kernel_size=1, 55 | stride=stride, bias=False)) 56 | 57 | def forward(self, x): 58 | out = F.relu(self.bn1(x)) 59 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 60 | out = self.conv1(out) 61 | out = self.conv2(F.relu(self.bn2(out))) 62 | out = self.conv3(F.relu(self.bn3(out))) 63 | out += shortcut 64 | return out 65 | 66 | 67 | class PreActBottleneck_depthwise(nn.Module): 68 | '''Pre-activation version of the original Bottleneck module.''' 69 | expansion = 4 70 | 71 | def __init__(self, in_planes, planes, stride=1): 72 | super(PreActBottleneck_depthwise, self).__init__() 73 | self.bn1 = nn.BatchNorm2d(in_planes) 74 | self.group_num = in_planes if in_planes < planes else planes 75 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False, groups=self.group_num) 76 | self.bn2 = nn.BatchNorm2d(planes) 77 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 78 | padding=1, bias=False, groups=self.group_num) 79 | self.bn3 = nn.BatchNorm2d(planes) 80 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, 81 | bias=False, groups=self.group_num) 82 | 83 | if stride != 1 or in_planes != self.expansion*planes: 84 | self.shortcut = nn.Sequential( 85 | nn.Conv2d( 86 | in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False, 87 | groups=self.group_num)) 88 | 89 | def forward(self, x): 90 | out = F.relu(self.bn1(x)) 91 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 92 | out = self.conv1(out) 93 | out = self.conv2(F.relu(self.bn2(out))) 94 | out = self.conv3(F.relu(self.bn3(out))) 95 | out += shortcut 96 | return out 97 | -------------------------------------------------------------------------------- /lib/models/regression/encoder/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from lib.models.regression.encoder.preact import PreActBlock, PreActBottleneck 5 | 6 | 7 | class ResNet(nn.Module): 8 | def __init__(self, cfg): 9 | super().__init__() 10 | block_type = [PreActBlock, PreActBottleneck] 11 | block = block_type[cfg.BLOCK_TYPE] 12 | num_blocks = [int(x) for x in cfg.NUM_BLOCKS.strip().split("-")] 13 | self.in_planes = 64 14 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False) 15 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 16 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 17 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 18 | self.num_out_layers = 256 * block.expansion 19 | 20 | def _make_layer(self, block, planes, num_blocks, stride): 21 | strides = [stride] + [1] * (num_blocks - 1) 22 | layers = [] 23 | for stride in strides: 24 | layers.append(block(self.in_planes, planes, stride)) 25 | self.in_planes = planes * block.expansion 26 | return nn.Sequential(*layers) 27 | 28 | def forward(self, x): 29 | # todo recheck 30 | out = self.conv1(x) 31 | out = self.layer1(out) 32 | out = F.avg_pool2d(out, 2) 33 | out = self.layer2(out) 34 | out = F.avg_pool2d(out, 2) 35 | out = self.layer3(out) 36 | out = F.avg_pool2d(out, 2) 37 | return out 38 | -------------------------------------------------------------------------------- /lib/models/regression/encoder/resunet.py: -------------------------------------------------------------------------------- 1 | '''ResUNet in PyTorch. 2 | https://github.com/qianqianwang68/caps/blob/master/CAPS/network.py 3 | Reference: 4 | [1] Zhengxin Zhang, Qingjie Liu 5 | Road Extraction by Deep Residual U-Net. arXiv:1711.10684 6 | ''' 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | from lib.models.regression.encoder.preact import PreActBlock, PreActBottleneck 13 | 14 | 15 | class conv(nn.Module): 16 | def __init__(self, num_in_layers, num_out_layers, kernel_size, stride): 17 | super(conv, self).__init__() 18 | self.kernel_size = kernel_size 19 | self.conv = nn.Conv2d(num_in_layers, num_out_layers, kernel_size=kernel_size, stride=stride, 20 | padding=(self.kernel_size - 1) // 2) 21 | self.normalize = nn.BatchNorm2d(num_out_layers) 22 | 23 | def forward(self, x): 24 | x = self.conv(x) 25 | x = self.normalize(x) 26 | return F.elu(x, inplace=True) 27 | 28 | 29 | class upconv(nn.Module): 30 | def __init__(self, num_in_layers, num_out_layers, kernel_size, scale): 31 | super(upconv, self).__init__() 32 | self.scale = scale 33 | self.conv1 = conv(num_in_layers, num_out_layers, kernel_size, 1) 34 | 35 | def forward(self, x): 36 | x = nn.functional.interpolate(x, scale_factor=self.scale, 37 | mode='bilinear', align_corners=True) 38 | return self.conv1(x) 39 | 40 | 41 | class ResUNet(nn.Module): 42 | def __init__(self, cfgmodel, num_in_layers=3): 43 | super().__init__() 44 | filters = [256, 512, 1024, 2048] 45 | self.in_planes = 64 46 | if num_in_layers != 3: # Number of input channels 47 | self.firstconv = nn.Conv2d( 48 | num_in_layers, 64, kernel_size=(7, 7), 49 | stride=(2, 2), 50 | padding=(3, 3), 51 | bias=False) 52 | else: 53 | self.firstconv = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) # H/2 54 | self.firstbn = nn.BatchNorm2d(64) 55 | self.firstrelu = nn.ReLU(inplace=True) 56 | self.firstmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # H/4 57 | # encoder 58 | block_type = [PreActBlock, PreActBottleneck] 59 | block = block_type[cfgmodel.BLOCK_TYPE] 60 | num_blocks = [int(x) for x in cfgmodel.NUM_BLOCKS.strip().split("-")] 61 | self.encoder1 = self._make_layer(block, 64, num_blocks[0], stride=1) # H/4 62 | self.encoder2 = self._make_layer(block, 128, num_blocks[1], stride=2) # H/8 63 | self.encoder3 = self._make_layer(block, 256, num_blocks[2], stride=2) # H/16 64 | 65 | # decoder 66 | self.not_concat = getattr(cfgmodel, "NOT_CONCAT", False) 67 | self.upconv4 = upconv(filters[2], 512, 3, 2) 68 | if not self.not_concat: 69 | self.iconv4 = conv(filters[1] + 512, 512, 3, 1) 70 | else: 71 | self.iconv4 = conv(512, 512, 3, 1) 72 | 73 | self.upconv3 = upconv(512, 256, 3, 2) 74 | if not self.not_concat: 75 | self.iconv3 = conv(filters[0] + 256, 256, 3, 1) 76 | else: 77 | self.iconv3 = conv(256, 256, 3, 1) 78 | 79 | num_out_layers = getattr(cfgmodel, "NUM_OUT_LAYERS", 128) 80 | self.num_out_layers = num_out_layers 81 | self.outconv = conv(256, num_out_layers, 1, 1) 82 | 83 | def _make_layer(self, block, planes, num_blocks, stride): 84 | strides = [stride] + [1] * (num_blocks - 1) 85 | layers = [] 86 | for stride in strides: 87 | layers.append(block(self.in_planes, planes, stride)) 88 | self.in_planes = planes * block.expansion 89 | return nn.Sequential(*layers) 90 | 91 | def skipconnect(self, x1, x2): 92 | diffY = x2.size()[2] - x1.size()[2] 93 | diffX = x2.size()[3] - x1.size()[3] 94 | 95 | x1 = F.pad(x1, (diffX // 2, diffX - diffX // 2, 96 | diffY // 2, diffY - diffY // 2)) 97 | 98 | # for padding issues, see 99 | # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a 100 | # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd 101 | 102 | x = torch.cat([x2, x1], dim=1) 103 | return x 104 | 105 | def forward(self, x): 106 | # encoding 107 | x1 = self.firstconv(x) 108 | x1 = self.firstbn(x1) 109 | x1 = self.firstrelu(x1) 110 | x1 = self.firstmaxpool(x1) 111 | 112 | x2 = self.encoder1(x1) 113 | x3 = self.encoder2(x2) 114 | x4 = self.encoder3(x3) 115 | 116 | # decoding 117 | x = self.upconv4(x4) 118 | if not self.not_concat: 119 | x = self.skipconnect(x3, x) 120 | x = self.iconv4(x) 121 | 122 | x = self.upconv3(x) 123 | if not self.not_concat: 124 | x = self.skipconnect(x2, x) 125 | x = self.iconv3(x) 126 | 127 | x = self.outconv(x) 128 | return x 129 | -------------------------------------------------------------------------------- /lib/utils/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def data_to_model_device(data, model): 5 | '''Move all tensors in data dictionary to the same device as model''' 6 | 7 | try: 8 | device = next(model.parameters()).device 9 | except: 10 | # in case the model has no parameters (baseline models) 11 | device = 'cpu' 12 | 13 | for k, v in data.items(): 14 | if torch.is_tensor(v): 15 | data[k] = v.to(device) 16 | 17 | return data 18 | -------------------------------------------------------------------------------- /lib/utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, filepath): 6 | self.terminal = sys.stdout 7 | self.log = open(filepath, "w") 8 | 9 | def write(self, message): 10 | self.terminal.write(message) 11 | self.log.write(message) 12 | 13 | def flush(self): 14 | self.terminal.flush() 15 | self.log.flush() 16 | 17 | 18 | def set_log(filepath): 19 | sys.stdout = Logger(filepath) 20 | return Logger 21 | -------------------------------------------------------------------------------- /lib/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from collections import defaultdict 4 | 5 | 6 | def pose_error_torch(R, t, Tgt, reduce=None): 7 | """Compute angular, scale and euclidean error of translation vector (metric). Compute angular rotation error.""" 8 | 9 | Rgt = Tgt[:, :3, :3] # [B, 3, 3] 10 | tgt = Tgt[:, :3, 3:].transpose(1, 2) # [B, 1, 3] 11 | 12 | scale_t = torch.linalg.norm(t, dim=-1) 13 | scale_tgt = torch.linalg.norm(tgt, dim=-1) 14 | 15 | cosine = (t @ tgt.transpose(1, 2)).squeeze(-1) / (scale_t * scale_tgt + 1e-9) 16 | cosine = torch.clip(cosine, -1.0, 1.0) # handle numerical errors 17 | t_ang_err = torch.rad2deg(torch.acos(cosine)) 18 | t_ang_err = torch.minimum(t_ang_err, 180 - t_ang_err) 19 | 20 | t_scale_err = scale_t / scale_tgt 21 | t_scale_err_sym = torch.maximum(scale_t / scale_tgt, scale_tgt / scale_t) 22 | t_euclidean_err = torch.linalg.norm(t - tgt, dim=-1) 23 | 24 | residual = R.transpose(1, 2) @ Rgt 25 | trace = torch.diagonal(residual, dim1=-2, dim2=-1).sum(-1) 26 | cosine = (trace - 1) / 2 27 | cosine = torch.clip(cosine, -1., 1.) # handle numerical errors 28 | R_err = torch.rad2deg(torch.acos(cosine)) 29 | 30 | if reduce is None: 31 | def fn(x): return x 32 | elif reduce == 'mean': 33 | fn = torch.mean 34 | elif reduce == 'median': 35 | fn = torch.median 36 | 37 | t_ang_err = fn(t_ang_err) 38 | t_scale_err = fn(t_scale_err) 39 | t_euclidean_err = fn(t_euclidean_err) 40 | R_err = fn(R_err) 41 | 42 | errors = {'t_err_ang': t_ang_err, 43 | 't_err_scale': t_scale_err, 44 | 't_err_scale_sym': t_scale_err_sym, 45 | 't_err_euc': t_euclidean_err, 46 | 'R_err': R_err} 47 | return errors 48 | 49 | 50 | def error_auc(errors, thresholds): 51 | """ 52 | Args: 53 | errors (list): [N,] 54 | thresholds (list) 55 | """ 56 | errors = np.nan_to_num(errors, nan=float('inf')) # convert nans to inf 57 | errors = [0] + sorted(list(errors)) 58 | recall = list(np.linspace(0, 1, len(errors))) 59 | 60 | aucs = [] 61 | for thr in thresholds: 62 | last_index = np.searchsorted(errors, thr) 63 | y = recall[:last_index] + [recall[last_index-1]] 64 | x = errors[:last_index] + [thr] 65 | aucs.append(np.trapz(y, x) / thr) 66 | 67 | return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)} 68 | 69 | 70 | def ecdf(x): 71 | """Get Empirical Cumulative Distribution Function (ECDF) given samples x [N,]""" 72 | cd = np.linspace(0, 1, x.shape[0]) 73 | v = np.sort(x) 74 | return v, cd 75 | 76 | 77 | def print_auc_table(agg_metrics): 78 | pose_error = np.maximum(agg_metrics['R_err'], agg_metrics['t_err_ang']) 79 | auc_pose = error_auc(pose_error, (5, 10, 20)) 80 | print('Pose error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_pose.values())) 81 | 82 | auc_rotation = error_auc(agg_metrics['R_err'], (5, 10, 20)) 83 | print('Rotation error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_rotation.values())) 84 | 85 | auc_translation_ang = error_auc(agg_metrics['t_err_ang'], (5, 10, 20)) 86 | print( 87 | 'Translation angular error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_translation_ang.values())) 88 | 89 | auc_translation_euc = error_auc(agg_metrics['t_err_euc'], (0.1, 0.5, 1)) 90 | print( 91 | 'Translation Euclidean error AUC @ 0.1/0.5/1m: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_translation_euc.values())) 92 | 93 | 94 | def precision(agg_metrics, rot_threshold, trans_threshold): 95 | '''Provides ratio of samples with rotation error < rot_threshold AND translation error < trans_threshold''' 96 | mask_rot = agg_metrics['R_err'] <= rot_threshold 97 | mask_trans = agg_metrics['t_err_euc'] <= trans_threshold 98 | recall = (mask_rot * mask_trans).mean() 99 | return recall 100 | 101 | 102 | def A_metrics(t_scale_err_sym): 103 | """Returns A1/A2/A3 metrics of translation vector norm given the "symmetric" scale error 104 | where 105 | t_scale_err_sym = torch.maximum((t_norm_gt / t_norm_pred), (t_norm_pred / t_norm_gt)) 106 | """ 107 | 108 | if not torch.is_tensor(t_scale_err_sym): 109 | t_scale_err_sym = torch.from_numpy(t_scale_err_sym) 110 | 111 | thresh = t_scale_err_sym 112 | a1 = (thresh < 1.25).float().mean() 113 | a2 = (thresh < 1.25 ** 2).float().mean() 114 | a3 = (thresh < 1.25 ** 3).float().mean() 115 | return a1, a2, a3 116 | 117 | 118 | class MetricsAccumulator: 119 | """Accumulates metrics and aggregates them when requested""" 120 | 121 | def __init__(self): 122 | self.data = defaultdict(list) 123 | 124 | def accumulate(self, data): 125 | for key, value in data.items(): 126 | self.data[key].append(value) 127 | 128 | def aggregate(self): 129 | res = dict() 130 | for key in self.data.keys(): 131 | res[key] = torch.cat(self.data[key]).view(-1).cpu().numpy() 132 | return res 133 | -------------------------------------------------------------------------------- /lib/utils/rotationutils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transforms3d.quaternions import qmult, qinverse, rotate_vector 3 | 4 | 5 | # Based on the paper : On the Continuity of Rotation Representations in Neural Networks 6 | # code from https://github.com/papagina/RotationContinuity/blob/master/Inverse_Kinematics/code/tools.py 7 | 8 | # batch*n 9 | 10 | 11 | def normalize_vector(v): 12 | batch = v.shape[0] 13 | v_mag = torch.sqrt(v.pow(2).sum(1)) # batch 14 | v_mag = torch.max(v_mag, torch.autograd.Variable(torch.FloatTensor([1e-8]).cuda())) 15 | v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) 16 | v = v / v_mag 17 | return v 18 | 19 | 20 | # u, v batch*n 21 | def cross_product(u, v): 22 | batch = u.shape[0] 23 | # print (u.shape) 24 | # print (v.shape) 25 | i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] 26 | j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] 27 | k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] 28 | 29 | out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) # batch*3 30 | 31 | return out 32 | 33 | 34 | def rotation_matrix_from_ortho6d(poses): 35 | """ 36 | Computes rotation matrix from 6D continuous space according to the parametrisation proposed in 37 | On the Continuity of Rotation Representations in Neural Networks 38 | https://arxiv.org/pdf/1812.07035.pdf 39 | :param poses: [B, 6] 40 | :return: R: [B, 3, 3] 41 | """ 42 | 43 | x_raw = poses[:, 0:3] # batch*3 44 | y_raw = poses[:, 3:6] # batch*3 45 | 46 | x = normalize_vector(x_raw) # batch*3 47 | z = cross_product(x, y_raw) # batch*3 48 | z = normalize_vector(z) # batch*3 49 | y = cross_product(z, x) # batch*3 50 | 51 | x = x.view(-1, 3, 1) 52 | y = y.view(-1, 3, 1) 53 | z = z.view(-1, 3, 1) 54 | matrix = torch.cat((x, y, z), 2) # batch*3*3 55 | return matrix 56 | 57 | 58 | def relative_pose_wxyz(q1_wxyz, t1, q2_wxyz, t2): 59 | q12_wxyz = qmult(q2_wxyz, qinverse(q1_wxyz)) 60 | t12 = t2 - rotate_vector(t1, q12_wxyz) 61 | return q12_wxyz, t12 62 | -------------------------------------------------------------------------------- /lib/utils/solver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def procrustes(A, B): 5 | """ 6 | See: https://en.wikipedia.org/wiki/Kabsch_algorithm 7 | 2-D or 3-D registration with known correspondences. 8 | Registration occurs in the zero centered coordinate system, and then 9 | must be transported back. 10 | Args: 11 | - A: Torch tensor of shape (B, N, 3) -- Point Cloud to Align (source) 12 | - B: Torch tensor of shape (B, N, 3) -- Reference Point Cloud (target) 13 | Returns: 14 | - R: optimal rotation (B, 3, 3) 15 | - t: optimal translation (B, 3, 1) 16 | Based on: https://gist.github.com/bougui505/e392a371f5bab095a3673ea6f4976cc8 17 | """ 18 | assert len(A.shape) == len(B.shape) == 3, 'three dimensions are required' 19 | assert A.shape[0] == B.shape[0], 'batch size must match' 20 | assert A.shape[1] == B.shape[1], 'number of correspondences must match' 21 | assert A.shape[2] == B.shape[2], 'number of spatial dimensions must be 3' 22 | 23 | a_mean = A.mean(axis=1, keepdim=True) 24 | b_mean = B.mean(axis=1, keepdim=True) 25 | A_c = A - a_mean 26 | B_c = B - b_mean 27 | # Covariance matrix 28 | H = A_c.transpose(1, 2) @ B_c 29 | U, S, V = torch.svd(H) 30 | # Fixes orientation such that Det(R) = + 1 31 | Z = torch.eye(3).unsqueeze(0).repeat(A.shape[0], 1, 1).to(A.device) 32 | Z[:, -1, -1] = torch.sign(torch.linalg.det(U @ V.transpose(1, 2))) 33 | # Rotation matrix 34 | R = V @ Z @ U.transpose(1, 2) 35 | # Translation vector 36 | t = b_mean - a_mean @ R.transpose(1, 2) 37 | return R, t 38 | -------------------------------------------------------------------------------- /lib/utils/visualisation.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | from lib.utils.localize import cal_vec_angle_error, cal_quat_angle_error 6 | 7 | 8 | def save_video(save_res_path, dataloader, odir): 9 | """Generate a video per sequence with per frame metrics.""" 10 | 11 | from vidgear.gears import WriteGear 12 | 13 | def save_video_gear(odir, old_scene, frames): 14 | video_writer = WriteGear( 15 | output_filename=f'{odir / old_scene}.mp4', custom_ffmpeg=os.getenv('FFMPEG_PATH')) 16 | if not video_writer._WriteGear__ffmpeg: 17 | print('Could not find ffmpeg path in the system. If available, set ffmpeg path in env. var. FFMPEG_PATH') 18 | 19 | # sort frames by filename and write to disk 20 | for k, frame in sorted(frames.items(), key=lambda item: item[0]): 21 | video_writer.write(frame) 22 | video_writer.close() 23 | return 24 | 25 | results_dict = np.load(save_res_path, allow_pickle=True).item() 26 | old_scene = None 27 | scenes = [] 28 | write_frames = {} 29 | 30 | for data in dataloader: 31 | scene = data['scene_id'][0] 32 | train_img_path, test_img_path = data['pair_names'][0][0], data['pair_names'][1][0] 33 | 34 | if scene not in scenes and old_scene is not None: 35 | save_video_gear(odir, old_scene, write_frames) 36 | write_frames = {} 37 | scenes.append(scene) 38 | 39 | # get performance metrics 40 | try: 41 | abs_pose_lbl = results_dict[scene][test_img_path]['abs_pose_lbl'] 42 | abs_pose_pred = results_dict[scene][test_img_path]['abs_pose_pred'] 43 | r_err = cal_quat_angle_error(abs_pose_lbl.q, abs_pose_pred.q).item() 44 | t_ang_err = cal_vec_angle_error(abs_pose_lbl.t, abs_pose_pred.t).item() 45 | t_err = np.linalg.norm(abs_pose_lbl.c - abs_pose_pred.c).item() 46 | except: 47 | r_err = float('inf') 48 | t_err = float('inf') 49 | t_ang_err = float('inf') 50 | 51 | # convert frames (pytorch -> OCV) 52 | c0 = (data['image0'].squeeze(0).permute(1, 2, 0) 53 | * 255).detach().cpu().numpy().astype(np.uint8) 54 | c0 = c0[:, :, ::-1] 55 | c1 = (data['image1'].squeeze(0).permute(1, 2, 0) 56 | * 255).detach().cpu().numpy().astype(np.uint8) 57 | c1 = c1[:, :, ::-1] 58 | frame = np.concatenate((c0, c1), axis=1).copy() 59 | 60 | # write metrics 61 | text = f'R_err: {r_err:.1f}deg. t_ang_err: {t_ang_err:.1f}deg. t_err: {t_err:.2f}m' 62 | font_size = 1 if c0.shape[0] > 500 else 0.5 63 | tx = 100 if c0.shape[0] > 500 else 10 64 | ty = c0.shape[0] - 30 65 | color = (0, 255, 0) if r_err <= 5 and t_err <= 0.25 else (0, 0, 255) 66 | cv2.putText(frame, text, (tx, ty), cv2.FONT_HERSHEY_DUPLEX, 67 | font_size, (0, 0, 0), 4, cv2.LINE_AA) 68 | cv2.putText(frame, text, (tx, ty), cv2.FONT_HERSHEY_DUPLEX, 69 | font_size, color, 1, cv2.LINE_AA) 70 | 71 | # resize 72 | frame = cv2.resize(frame, None, fx=0.5, fy=0.5) 73 | 74 | # save in memory (need to order before saving) 75 | write_frames[test_img_path] = frame 76 | old_scene = scene 77 | 78 | # last sequence 79 | save_video_gear(odir, old_scene, write_frames) 80 | return 81 | -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": ["data"] 3 | } 4 | -------------------------------------------------------------------------------- /submission.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from collections import defaultdict 4 | from dataclasses import dataclass 5 | from zipfile import ZipFile 6 | 7 | import torch 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | from config.default import cfg 12 | from lib.datasets.datamodules import DataModule 13 | from lib.models.builder import build_model 14 | from lib.utils.data import data_to_model_device 15 | from transforms3d.quaternions import mat2quat 16 | 17 | 18 | @dataclass 19 | class Pose: 20 | image_name: str 21 | q: np.ndarray 22 | t: np.ndarray 23 | inliers: float 24 | 25 | def __str__(self) -> str: 26 | formatter = {'float': lambda v: f'{v:.6f}'} 27 | max_line_width = 1000 28 | q_str = np.array2string(self.q, formatter=formatter, max_line_width=max_line_width)[1:-1] 29 | t_str = np.array2string(self.t, formatter=formatter, max_line_width=max_line_width)[1:-1] 30 | return f'{self.image_name} {q_str} {t_str} {self.inliers}' 31 | 32 | 33 | def predict(loader, model): 34 | results_dict = defaultdict(list) 35 | 36 | for data in tqdm(loader): 37 | # run inference 38 | data = data_to_model_device(data, model) 39 | with torch.no_grad(): 40 | R, t = model(data) 41 | R = R.detach().cpu().numpy() 42 | t = t.reshape(-1).detach().cpu().numpy() 43 | inliers = data['inliers'] 44 | scene = data['scene_id'][0] 45 | query_img = data['pair_names'][1][0] 46 | 47 | # ignore frames without poses (e.g. not enough feature matches) 48 | if np.isnan(R).any() or np.isnan(t).any() or np.isinf(t).any(): 49 | continue 50 | 51 | # populate results_dict 52 | estimated_pose = Pose(image_name=query_img, 53 | q=mat2quat(R).reshape(-1), 54 | t=t.reshape(-1), 55 | inliers=inliers) 56 | results_dict[scene].append(estimated_pose) 57 | 58 | return results_dict 59 | 60 | 61 | def save_submission(results_dict: dict, output_path: Path): 62 | with ZipFile(output_path, 'w') as zip: 63 | for scene, poses in results_dict.items(): 64 | poses_str = '\n'.join((str(pose) for pose in poses)) 65 | zip.writestr(f'pose_{scene}.txt', poses_str.encode('utf-8')) 66 | 67 | 68 | def eval(args): 69 | # Load configs 70 | cfg.merge_from_file('config/mapfree.yaml') 71 | cfg.merge_from_file(args.config) 72 | 73 | # Create dataloader 74 | if args.split == 'test': 75 | dataloader = DataModule(cfg).test_dataloader() 76 | elif args.split == 'val': 77 | cfg.TRAINING.BATCH_SIZE = 1 78 | cfg.TRAINING.NUM_WORKERS = 1 79 | dataloader = DataModule(cfg).val_dataloader() 80 | else: 81 | raise NotImplemented(f'Invalid split: {args.split}') 82 | 83 | # Create model 84 | model = build_model(cfg, args.checkpoint) 85 | 86 | # Get predictions from model 87 | results_dict = predict(dataloader, model) 88 | 89 | # Save predictions to txt per scene within zip 90 | args.output_root.mkdir(parents=True, exist_ok=True) 91 | save_submission(results_dict, args.output_root / 'submission.zip') 92 | 93 | 94 | if __name__ == '__main__': 95 | parser = argparse.ArgumentParser() 96 | parser.add_argument('config', help='path to config file') 97 | parser.add_argument( 98 | '--checkpoint', help='path to model checkpoint (models with learned parameters)', 99 | default='') 100 | parser.add_argument('--output_root', '-o', type=Path, default=Path('results/')) 101 | parser.add_argument( 102 | '--split', choices=('val', 'test'), 103 | default='test', 104 | help='Dataset split to use for evaluation. Choose from test or val. Default: test') 105 | 106 | args = parser.parse_args() 107 | eval(args) 108 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | # do this before importing numpy! (doing it right up here in case numpy is dependency of e.g. json) 6 | os.environ["MKL_NUM_THREADS"] = "1" # noqa: E402 7 | os.environ["NUMEXPR_NUM_THREADS"] = "1" # noqa: E402 8 | os.environ["OMP_NUM_THREADS"] = "1" # noqa: E402 9 | os.environ["OPENBLAS_NUM_THREADS"] = "1" # noqa: E402 10 | 11 | import pytorch_lightning as pl 12 | from pytorch_lightning.loggers import TensorBoardLogger 13 | 14 | from config.default import cfg 15 | from config.utils import config_merge_from_file 16 | from lib.datasets.datamodules import DataModule 17 | from lib.models.regression.model import RegressionModel, RegressionMultiFrameModel 18 | 19 | 20 | def main(args): 21 | global cfg 22 | cfg = config_merge_from_file(cfg=cfg, path_to_config=args.dataset_config) 23 | cfg = config_merge_from_file(cfg=cfg, path_to_config=args.config) 24 | 25 | pl.seed_everything(0) 26 | 27 | datamodule = DataModule(cfg) 28 | if cfg.MODEL == 'RegressionMultiFrame': 29 | model = RegressionMultiFrameModel(cfg) 30 | elif cfg.MODEL == 'Regression': 31 | model = RegressionModel(cfg) 32 | else: 33 | raise NotImplementedError(f'Invalid model {cfg.MODEL}') 34 | 35 | logger = TensorBoardLogger(save_dir='weights', name=args.experiment) 36 | 37 | checkpoint_callback = pl.callbacks.ModelCheckpoint( 38 | save_last=True, 39 | save_top_k=5, 40 | verbose=True, 41 | monitor='val_loss/loss', 42 | mode='min' 43 | ) 44 | 45 | epochend_callback = pl.callbacks.ModelCheckpoint( 46 | filename='e{epoch}-last', 47 | save_top_k=-1, 48 | every_n_epochs=1, 49 | save_on_train_epoch_end=True 50 | ) 51 | 52 | lr_monitoring_callback = pl.callbacks.LearningRateMonitor(logging_interval='step') 53 | trainer = pl.Trainer(devices=1, 54 | log_every_n_steps=cfg.TRAINING.LOG_INTERVAL, 55 | val_check_interval=cfg.TRAINING.VAL_INTERVAL, 56 | limit_val_batches=cfg.TRAINING.VAL_BATCHES, 57 | max_epochs=cfg.TRAINING.EPOCHS, 58 | logger=logger, 59 | callbacks=[checkpoint_callback, lr_monitoring_callback, epochend_callback], 60 | num_sanity_val_steps=1, 61 | gradient_clip_val=cfg.TRAINING.GRAD_CLIP) 62 | # track_grad_norm=-1) # TODO: put back the equivalent! 63 | 64 | trainer.fit(model, datamodule, ckpt_path=args.resume) 65 | 66 | 67 | if __name__ == '__main__': 68 | """ 69 | Single-frame query: 70 | ```shell 71 | python3 train.py \ 72 | config/regression/mapfree/3d3d.yaml config/mapfree.yaml 73 | ``` 74 | 75 | Multi-frame query: 76 | ```shell 77 | python3 train.py \ 78 | config/regression/mapfree/multiframe/3d3d_multi.yaml \ 79 | config/mapfree.yaml \ 80 | config/mapfree_multi.yaml 81 | ``` 82 | """ 83 | parser = argparse.ArgumentParser() 84 | parser.add_argument('config', action='append', help='path to config file') 85 | parser.add_argument('dataset_config', nargs='+', action='store', 86 | help='path to dataset config file') 87 | # Allow for further config files to be added. Later values overwrite earlier ones. 88 | parser.add_argument('--config', action='append', dest='config', 89 | help='one more path to a config file') 90 | # Allow for further dataset-config files to be added. Later values overwrite earlier ones. 91 | parser.add_argument('--dataset-config', '--dataset_config', action='append', 92 | dest='dataset_config', help='one more path to a dataset config file') 93 | parser.add_argument('--experiment', help='experiment name', default='default') 94 | parser.add_argument('--resume', help='resume from checkpoint path', default=None) 95 | args = parser.parse_args() 96 | 97 | assert isinstance(args.config, (list, tuple, str, Path)) 98 | # make sure we don't have nested lists by accident 99 | if isinstance(args.config, (list, tuple)): 100 | for args_config in args.config: 101 | assert isinstance(args_config, (str, Path)) 102 | 103 | assert isinstance(args.dataset_config, (list, tuple, str, Path)) 104 | # make sure we don't have nested lists by accident 105 | if isinstance(args.dataset_config, (list, tuple)): 106 | for args_dataset_config in args.dataset_config: 107 | assert isinstance(args_dataset_config, (str, Path)) 108 | 109 | main(args) 110 | -------------------------------------------------------------------------------- /visualisation/README.md: -------------------------------------------------------------------------------- 1 | ### Map-Free Relocalization Visualisation Script 2 | 3 | The code in this folder can be used to render a video that shows map-free relocalisation estimates. 4 | If ground truth is available (e.g. for the validation set), both the ground truth and estimated poses will be visualised. 5 | The estimates will be color-coded according to their metric positional error with respect to the ground truth. 6 | In particular, estimates will be green to yellow for up to 1 meter positional error, and red for more than 1 meter error. 7 | If no ground truth is available (e.g. for the test set), only the estimated poses will be visualised. 8 | 9 | These videos will look best, if ground truth is available and estimated poses are given for all frames. 10 | 11 | The visualisation uses the `pyrender` library, and in particular it's [off-screen rendering capabilities](https://pyrender.readthedocs.io/en/latest/examples/offscreen.html). 12 | The code uses the EGL platform of PyOpenGL. 13 | 14 | We provide an environment file `environment.yml` that can be used to create a conda environment with all necessary dependencies. 15 | To create the environment, run: 16 | 17 | ```bash 18 | conda env create -f environment.yml 19 | ``` 20 | 21 | Activate the environment via: 22 | 23 | ```bash 24 | conda activate mapfreevis 25 | ``` 26 | 27 | Call the visualisation script via: 28 | 29 | ```bash 30 | python render_estimates.py --estimates_path /path/to/estimates --data_path /path/to/data 31 | ``` 32 | 33 | `path/to/estimates` should point to a folder contains the map-free pose files, e.g. `pose_s00460.txt` etc. 34 | `path/to/data` should point to the map-free dataset, e.g. the `test` or `val` folder with scene subfolders `s00460` etc. 35 | 36 | The script will iterate through all pose files and create a video for each one. 37 | All videos will be saved in the folder `renderings`, which can be changed via the `--output_path` argument. 38 | 39 | If you want to render a video for a subset of scenes, you can specify them using `--render_subset` followed by a list of scene names, separated by commas, e.g. `--render_subset s00460,s00461`. -------------------------------------------------------------------------------- /visualisation/environment.yml: -------------------------------------------------------------------------------- 1 | name: mapfreevis 2 | channels: 3 | - conda-forge 4 | - anaconda 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=5.1=1_gnu 9 | - aom=3.6.0=h6a678d5_0 10 | - blas=1.0=mkl 11 | - blosc=1.21.3=h6a678d5_0 12 | - brotli=1.0.9=h5eee18b_7 13 | - brotli-bin=1.0.9=h5eee18b_7 14 | - brotli-python=1.0.9=py38h6a678d5_7 15 | - brunsli=0.1=h2531618_0 16 | - bzip2=1.0.8=h7b6447c_0 17 | - c-ares=1.19.1=h5eee18b_0 18 | - ca-certificates=2024.7.2=h06a4308_0 19 | - certifi=2024.7.4=py38h06a4308_0 20 | - cffi=1.16.0=py38h5eee18b_0 21 | - cfitsio=3.470=h5893167_7 22 | - charls=2.2.0=h2531618_0 23 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 24 | - click=8.1.7=py38h06a4308_0 25 | - cloudpickle=2.2.1=py38h06a4308_0 26 | - colorama=0.4.6=pyhd8ed1ab_0 27 | - contourpy=1.0.5=py38hdb19cb5_0 28 | - cryptography=41.0.3=py38hdda0065_0 29 | - cycler=0.11.0=pyhd3eb1b0_0 30 | - cyrus-sasl=2.1.28=h52b45da_1 31 | - cytoolz=0.12.0=py38h5eee18b_0 32 | - dask-core=2023.4.1=py38h06a4308_0 33 | - dataclasses=0.8=pyh6d0b6a4_7 34 | - dav1d=1.2.1=h5eee18b_0 35 | - dbus=1.13.18=hb2f20db_0 36 | - expat=2.5.0=h6a678d5_0 37 | - ffmpeg=4.3.2=hca11adc_0 38 | - fontconfig=2.14.1=h4c34cd2_2 39 | - fonttools=4.25.0=pyhd3eb1b0_0 40 | - freetype=2.12.1=h4a9f257_0 41 | - freetype-py=2.2.0=pyhd3eb1b0_0 42 | - fsspec=2023.9.2=py38h06a4308_0 43 | - future=0.18.3=py38h06a4308_0 44 | - giflib=5.2.1=h5eee18b_3 45 | - glib=2.69.1=he621ea3_2 46 | - gmp=6.2.1=h295c915_3 47 | - gnutls=3.6.15=he1e5248_0 48 | - gst-plugins-base=1.14.1=h6a678d5_1 49 | - gstreamer=1.14.1=h5eee18b_1 50 | - icu=73.1=h6a678d5_0 51 | - idna=3.4=py38h06a4308_0 52 | - imagecodecs=2023.1.23=py38hc4b7b5f_0 53 | - imageio=2.31.4=py38h06a4308_0 54 | - importlib-metadata=6.0.0=py38h06a4308_0 55 | - importlib_resources=6.1.0=py38h06a4308_0 56 | - intel-openmp=2021.4.0=h06a4308_3561 57 | - jpeg=9e=h5eee18b_1 58 | - jxrlib=1.1=h7b6447c_2 59 | - kiwisolver=1.4.4=py38h6a678d5_0 60 | - krb5=1.20.1=h143b758_1 61 | - lame=3.100=h7b6447c_0 62 | - lcms2=2.12=h3be6417_0 63 | - ld_impl_linux-64=2.38=h1181459_1 64 | - lerc=3.0=h295c915_0 65 | - libaec=1.0.4=he6710b0_1 66 | - libavif=0.11.1=h5eee18b_0 67 | - libbrotlicommon=1.0.9=h5eee18b_7 68 | - libbrotlidec=1.0.9=h5eee18b_7 69 | - libbrotlienc=1.0.9=h5eee18b_7 70 | - libclang=14.0.6=default_hc6dbbc7_1 71 | - libclang13=14.0.6=default_he11475f_1 72 | - libcups=2.4.2=h2d74bed_1 73 | - libcurl=7.88.1=h251f7ec_2 74 | - libdeflate=1.17=h5eee18b_1 75 | - libedit=3.1.20221030=h5eee18b_0 76 | - libev=4.33=h7f8727e_1 77 | - libffi=3.4.4=h6a678d5_0 78 | - libgcc-ng=11.2.0=h1234567_1 79 | - libgfortran-ng=11.2.0=h00389a5_1 80 | - libgfortran5=11.2.0=h1234567_1 81 | - libglu=9.0.0=hf484d3e_1 82 | - libgomp=11.2.0=h1234567_1 83 | - libidn2=2.3.4=h5eee18b_0 84 | - libllvm14=14.0.6=hdb19cb5_3 85 | - libnghttp2=1.57.0=h2d74bed_0 86 | - libpng=1.6.39=h5eee18b_0 87 | - libpq=12.15=hdbd6064_1 88 | - libssh2=1.10.0=hdbd6064_2 89 | - libstdcxx-ng=11.2.0=h1234567_1 90 | - libtasn1=4.19.0=h5eee18b_0 91 | - libtiff=4.5.1=h6a678d5_0 92 | - libunistring=0.9.10=h27cfd23_0 93 | - libuuid=1.41.5=h5eee18b_0 94 | - libwebp=1.3.2=h11a3e52_0 95 | - libwebp-base=1.3.2=h5eee18b_0 96 | - libxcb=1.15=h7f8727e_0 97 | - libxkbcommon=1.0.1=h5eee18b_1 98 | - libxml2=2.10.4=hf1b16e4_1 99 | - libzopfli=1.0.3=he6710b0_0 100 | - locket=1.0.0=py38h06a4308_0 101 | - lz4-c=1.9.4=h6a678d5_0 102 | - matplotlib=3.7.1=py38h578d9bd_0 103 | - matplotlib-base=3.7.1=py38h417a72b_1 104 | - mkl=2021.4.0=h06a4308_640 105 | - mkl-service=2.4.0=py38h7f8727e_0 106 | - mkl_fft=1.3.1=py38hd3c417c_0 107 | - mkl_random=1.2.2=py38h51133e4_0 108 | - munkres=1.1.4=py_0 109 | - mysql=5.7.24=h721c034_2 110 | - ncurses=6.4=h6a678d5_0 111 | - nettle=3.7.3=hbbd107a_1 112 | - networkx=3.1=py38h06a4308_0 113 | - numpy=1.23.5=py38h14f4228_0 114 | - numpy-base=1.23.5=py38h31eccc5_0 115 | - openh264=2.1.1=h4ff587b_0 116 | - openjpeg=2.4.0=h3ad879b_0 117 | - openssl=3.0.14=h5eee18b_0 118 | - packaging=23.1=py38h06a4308_0 119 | - partd=1.4.1=py38h06a4308_0 120 | - pcre=8.45=h295c915_0 121 | - pillow=10.0.1=py38ha6cbd5a_0 122 | - pip=23.3=py38h06a4308_0 123 | - platformdirs=3.10.0=py38h06a4308_0 124 | - ply=3.11=py38_0 125 | - pooch=1.7.0=py38h06a4308_0 126 | - pycparser=2.21=pyhd3eb1b0_0 127 | - pyglet=1.5.27=py38h578d9bd_4 128 | - pyopengl=3.1.1a1=py38h06a4308_0 129 | - pyopenssl=23.2.0=py38h06a4308_0 130 | - pyparsing=3.0.9=py38h06a4308_0 131 | - pyqt=5.15.10=py38h6a678d5_0 132 | - pyqt5-sip=12.13.0=py38h5eee18b_0 133 | - pyrender=0.1.45=pyh8a188c0_3 134 | - pysocks=1.7.1=py38h06a4308_0 135 | - python=3.8.18=h955ad1f_0 136 | - python-dateutil=2.8.2=pyhd3eb1b0_0 137 | - python_abi=3.8=2_cp38 138 | - pywavelets=1.4.1=py38h5eee18b_0 139 | - pyyaml=6.0.1=py38h5eee18b_0 140 | - qt-main=5.15.2=h53bd1ea_10 141 | - readline=8.2=h5eee18b_0 142 | - requests=2.31.0=py38h06a4308_0 143 | - scikit-image=0.19.3=py38h6a678d5_1 144 | - scipy=1.10.0=py38h14f4228_1 145 | - setuptools=68.0.0=py38h06a4308_0 146 | - sip=6.7.12=py38h6a678d5_0 147 | - six=1.16.0=pyhd3eb1b0_1 148 | - snappy=1.1.9=h295c915_0 149 | - sqlite=3.41.2=h5eee18b_0 150 | - tifffile=2023.4.12=py38h06a4308_0 151 | - tk=8.6.12=h1ccaba5_0 152 | - tomli=2.0.1=py38h06a4308_0 153 | - toolz=0.12.0=py38h06a4308_0 154 | - tornado=6.3.3=py38h5eee18b_0 155 | - tqdm=4.66.4=pyhd8ed1ab_0 156 | - trimesh=3.21.2=pyhd8ed1ab_0 157 | - urllib3=1.26.18=py38h06a4308_0 158 | - wheel=0.41.2=py38h06a4308_0 159 | - x264=1!161.3030=h7f98852_1 160 | - xz=5.4.2=h5eee18b_0 161 | - yaml=0.2.5=h7b6447c_0 162 | - zfp=1.0.0=h6a678d5_0 163 | - zipp=3.11.0=py38h06a4308_0 164 | - zlib=1.2.13=h5eee18b_0 165 | - zstd=1.5.5=hc292b87_0 166 | -------------------------------------------------------------------------------- /visualisation/lazy_camera.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.linalg import svd 3 | 4 | class LazyCamera: 5 | """Smooth and slightly delayed scene camera. 6 | 7 | Implements a rolling average of last few camera positions. 8 | Also zooms out to display the whole scene. 9 | """ 10 | 11 | # buffer holding last m camera positions 12 | m_camera_buffer = None 13 | 14 | m_camera_buffer_size = None 15 | m_backwards_offset = None 16 | 17 | def __init__(self, 18 | camera_buffer_size=20, 19 | backwards_offset=4): 20 | """Constructor. 21 | 22 | Parameters: 23 | camera_buffer_size: Number of last few cameras to consider 24 | backwards_offset: Move observing camera backwards from current view, in meters 25 | """ 26 | 27 | self.m_camera_buffer = [] 28 | self.m_camera_buffer_size = camera_buffer_size 29 | self.m_backwards_offset = backwards_offset 30 | 31 | @staticmethod 32 | def _orthonormalize_rotation(T): 33 | """Takes a 4x4 matrix and orthonormalizes the upper left 3x3 using SVD 34 | 35 | Returns: 36 | T with orthonormalized upper 3x3 37 | """ 38 | 39 | R = T[:3, :3] 40 | 41 | # see https://arxiv.org/pdf/2006.14616.pdf Eq.2 42 | U, S, Vt = svd(R) 43 | Z = np.eye(3) 44 | Z[-1, -1] = np.sign(np.linalg.det(U @ Vt)) 45 | R = U @ Z @ Vt 46 | 47 | T[:3, :3] = R 48 | 49 | return T 50 | 51 | def update_camera(self, view): 52 | """Update lazy camera with new view. 53 | 54 | Parameters: 55 | view: New camera view, 4x4 matrix 56 | """ 57 | 58 | observing_camera = view.copy() 59 | 60 | # push observing camera back in z-direction in camera space 61 | z_vec = np.zeros((3,)) 62 | z_vec[2] = 1 63 | offset_vector = view[:3, :3] @ z_vec 64 | observing_camera[:3, 3] += offset_vector * self.m_backwards_offset 65 | 66 | # use moving avage of last X cameras (so that observing camera is smooth and follows with slight delay) 67 | self.m_camera_buffer.append(observing_camera) 68 | 69 | if len(self.m_camera_buffer) > self.m_camera_buffer_size: 70 | self.m_camera_buffer = self.m_camera_buffer[1:] 71 | 72 | def get_current_view(self): 73 | """Get current lazy camera view for rendering. 74 | 75 | Returns: 76 | 4x4 matrix 77 | """ 78 | 79 | if self.m_camera_buffer_size == 1: 80 | return self.m_camera_buffer[0] 81 | 82 | # naive average of camera pose matrices 83 | smooth_camera_pose = np.zeros((4, 4)) 84 | for camera_pose in self.m_camera_buffer: 85 | smooth_camera_pose += camera_pose 86 | smooth_camera_pose /= len(self.m_camera_buffer) 87 | 88 | return self._orthonormalize_rotation(smooth_camera_pose) -------------------------------------------------------------------------------- /visualisation/render_estimates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright © Niantic, Inc. 2024. 3 | 4 | import os 5 | 6 | os.environ['PYOPENGL_PLATFORM'] = 'egl' 7 | 8 | import logging 9 | import argparse 10 | from pathlib import Path 11 | from render_scene import render_scene 12 | 13 | _logger = logging.getLogger(__name__) 14 | 15 | if __name__ == '__main__': 16 | # Setup logging levels. 17 | logging.basicConfig(level=logging.INFO) 18 | 19 | parser = argparse.ArgumentParser( 20 | description='Rendering map-free relocalisation estimates.', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | parser.add_argument('--estimates_path', type=Path, required=True, 24 | help="Path to the folder that contains file with estimated poses per scene." 25 | "That is the folder that contains pose_s00XXX.txt files.") 26 | 27 | parser.add_argument('--data_path', type=Path, required=True, 28 | help="Path to the dataset folder, i.e. the s00XXX folders with images.") 29 | 30 | parser.add_argument('--render_subset', type=str, 31 | help="Subset of scenes to render, comma separated, e.g. 's00460,s00461'.") 32 | 33 | parser.add_argument('--output_path', type=Path, default=Path('renderings'), 34 | help="Path to the folder where the renderings will be saved.") 35 | 36 | parser.add_argument('--confidence_threshold', type=float, default=-1, 37 | help="Filter estimates below this confidence threshold.") 38 | 39 | options = parser.parse_args() 40 | 41 | # Get list of all files with estimated poses 42 | estimates_files = list(options.estimates_path.glob('pose_s*.txt')) 43 | 44 | if len(estimates_files) == 0: 45 | _logger.error(f"No pose files found in {options.estimates_path}.") 46 | exit(1) 47 | 48 | # Filter list according to string provided by user 49 | if options.render_subset: 50 | # get list of scenes to render 51 | render_subset = options.render_subset.split(',') 52 | # only keep files that contain the requested scene ID 53 | estimates_files = [f for f in estimates_files if f.stem[5:] in render_subset] 54 | 55 | if len(estimates_files) == 0: 56 | _logger.error(f"No pose files match the requested scene subset: {options.render_subset}.") 57 | exit(1) 58 | 59 | _logger.info(f"Found {len(estimates_files)} pose files in {options.estimates_path}") 60 | 61 | # do the actual rendering 62 | for estimates_file in estimates_files: 63 | 64 | # check whether the scene folder exists 65 | scene_folder = options.data_path / estimates_file.stem[5:] 66 | 67 | if not scene_folder.exists(): 68 | _logger.error(f"Scene folder {scene_folder} does not exist. Skipping.") 69 | continue 70 | 71 | _logger.info(f"Rendering scene {scene_folder} using estimates from {estimates_file}") 72 | render_scene(estimates_file, scene_folder, options.output_path, options.confidence_threshold) 73 | -------------------------------------------------------------------------------- /visualisation/render_util.py: -------------------------------------------------------------------------------- 1 | import trimesh 2 | import numpy as np 3 | import logging 4 | from PIL import Image 5 | from PIL import ImageOps 6 | 7 | # Setup logging levels. 8 | logging.basicConfig(level=logging.WARNING) 9 | 10 | THICKNESS = 0.01 # controls how thick the frustum's 'bars' are 11 | 12 | origin_frustum_verts = np.array([ 13 | (0., 0., 0.), 14 | (0.375, -0.5, -0.5), 15 | (0.375, 0.5, -0.5), 16 | (-0.375, 0.5, -0.5), 17 | (-0.375, -0.5, -0.5), 18 | ]) 19 | 20 | frustum_edges = np.array([ 21 | (1, 2), 22 | (1, 3), 23 | (1, 4), 24 | (1, 5), 25 | (2, 3), 26 | (3, 4), 27 | (4, 5), 28 | (5, 2), 29 | ]) - 1 30 | 31 | 32 | def get_image_box( 33 | image_path, 34 | frustum_pose, 35 | aspect_ratio=4.0 / 3.0, 36 | cam_marker_size=1.0, 37 | flip=False 38 | ): 39 | """ Gets a textured mesh of an image. """ 40 | 41 | pil_image = Image.open(image_path) 42 | pil_image = ImageOps.flip(pil_image) # flip top/bottom to align with scene space 43 | 44 | width = 0.75 45 | height = width * aspect_ratio 46 | width *= cam_marker_size 47 | height *= cam_marker_size 48 | 49 | if flip: 50 | pil_image = ImageOps.mirror(pil_image) # flips left/right 51 | width = -width 52 | 53 | vertices = np.zeros((4, 3)) 54 | vertices[0, :] = [width / 2, height / 2, -cam_marker_size / 2] 55 | vertices[1, :] = [width / 2, -height / 2, -cam_marker_size / 2] 56 | vertices[2, :] = [-width / 2, -height / 2, -cam_marker_size / 2] 57 | vertices[3, :] = [-width / 2, height / 2, -cam_marker_size / 2] 58 | 59 | faces = np.zeros((2, 3)) 60 | faces[0, :] = [0, 1, 2] 61 | faces[1, :] = [2, 3, 0] 62 | 63 | uvs = np.zeros((4, 2)) 64 | 65 | uvs[0, :] = [1.0, 0] 66 | uvs[1, :] = [1.0, 1.0] 67 | uvs[2, :] = [0, 1.0] 68 | uvs[3, :] = [0, 0] 69 | 70 | face_normals = np.zeros((2, 3)) 71 | face_normals[0, :] = [0.0, 0.0, 1.0] 72 | face_normals[1, :] = [0.0, 0.0, 1.0] 73 | 74 | material = trimesh.visual.texture.SimpleMaterial( 75 | image=pil_image, 76 | ambient=(1.0, 1.0, 1.0, 1.0), 77 | diffuse=(1.0, 1.0, 1.0, 1.0), 78 | ) 79 | texture = trimesh.visual.TextureVisuals( 80 | uv=uvs, 81 | image=pil_image, 82 | material=material, 83 | ) 84 | 85 | mesh = trimesh.Trimesh( 86 | vertices=vertices, 87 | faces=faces, 88 | face_normals=face_normals, 89 | visual=texture, 90 | validate=True, 91 | process=False 92 | ) 93 | 94 | def transform_trimesh(mesh, transform): 95 | """ Applies a transform to a trimesh. """ 96 | np_vertices = np.array(mesh.vertices) 97 | np_vertices = (transform @ np.concatenate([np_vertices, np.ones((np_vertices.shape[0], 1))], 1).T).T 98 | np_vertices = np_vertices / np_vertices[:, 3][:, None] 99 | mesh.vertices[:, 0] = np_vertices[:, 0] 100 | mesh.vertices[:, 1] = np_vertices[:, 1] 101 | mesh.vertices[:, 2] = np_vertices[:, 2] 102 | 103 | return mesh 104 | 105 | return transform_trimesh(mesh, frustum_pose) 106 | 107 | 108 | def normalise_vector(vect): 109 | length = np.sqrt((vect ** 2).sum()) 110 | return vect / length 111 | 112 | 113 | def cuboid_from_line(line_start, line_end, color=(255, 0, 255)): 114 | """Approximates a line with a long cuboid 115 | color is a 3-element RGB tuple, with each element a uint8 value 116 | """ 117 | # create two vectors which are both (a) perpendicular to the direction of the line and 118 | # (b) perpendicular to each other. 119 | direction = normalise_vector(line_end - line_start) 120 | random_dir = normalise_vector(np.random.rand(3)) 121 | perpendicular_x = normalise_vector(np.cross(direction, random_dir)) 122 | perpendicular_y = normalise_vector(np.cross(direction, perpendicular_x)) 123 | 124 | vertices = [] 125 | for node in (line_start, line_end): 126 | for x_offset in (-1, 1): 127 | for y_offset in (-1, 1): 128 | vert = node + THICKNESS * (perpendicular_y * y_offset + perpendicular_x * x_offset) 129 | vertices.append(vert) 130 | 131 | faces = [ 132 | (4, 5, 1, 0), 133 | (5, 7, 3, 1), 134 | (7, 6, 2, 3), 135 | (6, 4, 0, 2), 136 | (0, 1, 3, 2), # end of tube 137 | (6, 7, 5, 4), # other end of tube 138 | ] 139 | 140 | mesh = trimesh.Trimesh(vertices=np.array(vertices), faces=np.array(faces)) 141 | 142 | for c in (0, 1, 2): 143 | mesh.visual.vertex_colors[:, c] = color[c] 144 | 145 | return mesh 146 | 147 | 148 | def get_position_marker(marker_pose, marker_color, marker_extent=0.03): 149 | """ 150 | Generates a cube to signify a singular camera position. 151 | 152 | @param marker_pose: 4x4 camera pose, OpenGL convention 153 | @param marker_color: RGB color of the marker 154 | @param marker_extent: size of the marker, marker is a cube of this side length 155 | """ 156 | current_pos_marker = trimesh.primitives.Box( 157 | extents=(marker_extent, marker_extent, marker_extent), 158 | transform=marker_pose) 159 | for c in (0, 1, 2): 160 | current_pos_marker.visual.vertex_colors[:, c] = marker_color[c] 161 | 162 | return current_pos_marker 163 | 164 | 165 | def generate_grid(frame_idx, cmap): 166 | """ 167 | Generates a grid of lines that fade in over time. 168 | 169 | @param frame_idx: Controls the fade-in of the grid. 170 | @param cmap: Color map for the grid. 171 | @return: trimesh object of the grid. 172 | """ 173 | 174 | y_offset = -2.5 175 | z_offset = -2 176 | line_count = 100 177 | width = line_count // 2 178 | 179 | grid_edges_1 = [np.array([-width, y_offset, i + z_offset, width, y_offset, i + z_offset]) for i in 180 | range(1, line_count // 2 + 1)] 181 | grid_edges_2 = [np.array([-width, y_offset, i + z_offset, width, y_offset, i + z_offset]) for i in 182 | range(-line_count // 2, 0)] 183 | grid_edges_2.reverse() 184 | grid_edges = [val for pair in zip(grid_edges_1, grid_edges_2) for val in pair] 185 | grid_edges = [np.array([-width, y_offset, z_offset, width, y_offset, z_offset])] + grid_edges 186 | 187 | cuboids = [] 188 | for edge_idx, edge in enumerate(grid_edges): 189 | 190 | opacity = max(0, min(1, (frame_idx - edge_idx) / 10) * 255) 191 | opacity = max(0, min(opacity, 245 - edge_idx * 5)) 192 | if opacity == 0: 193 | continue 194 | 195 | color = cmap[int(opacity)] * 255 196 | 197 | line_cuboid = cuboid_from_line(line_start=edge[:3], 198 | line_end=edge[3:], 199 | color=color) 200 | cuboids.append(line_cuboid) 201 | 202 | grid_edges_1 = [np.array([i, y_offset, -width + z_offset, i, y_offset, width + z_offset]) for i in 203 | range(1, line_count // 2 + 1)] 204 | grid_edges_2 = [np.array([i, y_offset, -width + z_offset, i, y_offset, width + z_offset]) for i in 205 | range(-line_count // 2, 0)] 206 | grid_edges_2.reverse() 207 | grid_edges = [val for pair in zip(grid_edges_1, grid_edges_2) for val in pair] 208 | grid_edges = [np.array([0, y_offset, -width + z_offset, 0, y_offset, width + z_offset])] + grid_edges 209 | 210 | for edge_idx, edge in enumerate(grid_edges): 211 | 212 | opacity = max(0, min(1, (frame_idx - edge_idx) / 10) * 255) 213 | opacity = max(0, min(opacity, 245 - edge_idx * 5)) 214 | if opacity == 0: 215 | continue 216 | 217 | color = cmap[int(opacity)] * 255 218 | 219 | line_cuboid = cuboid_from_line(line_start=edge[:3], 220 | line_end=edge[3:], 221 | color=color) 222 | cuboids.append(line_cuboid) 223 | 224 | if len(cuboids) == 0: 225 | return None 226 | else: 227 | return trimesh.util.concatenate(cuboids) 228 | 229 | 230 | def generate_frustum_at_position(rotation, translation, color, size, border_only=False): 231 | """Generates a frustum mesh at a specified (rotation, translation), with given color and size 232 | : rotation is a 3x3 numpy array 233 | : translation is a 3-long numpy vector 234 | : color is a 3-long numpy vector or tuple or list; each element is a uint8 RGB value 235 | : size is a float 236 | : border_only is a boolean that controls whether to only draw the border of the image 237 | """ 238 | 239 | transformed_frustum_verts = \ 240 | size * rotation.dot(origin_frustum_verts.T).T + translation[None, :] 241 | 242 | cuboids = [] 243 | for edge in frustum_edges: 244 | line_cuboid = cuboid_from_line(line_start=transformed_frustum_verts[edge[0]], 245 | line_end=transformed_frustum_verts[edge[1]], 246 | color=color) 247 | cuboids.append(line_cuboid) 248 | 249 | if border_only: 250 | cuboids = cuboids[4:] 251 | 252 | return trimesh.util.concatenate(cuboids) --------------------------------------------------------------------------------