├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── benchmark
    ├── config.py
    ├── extended_datasets.md
    ├── mapfree.py
    ├── metrics.py
    ├── reprojection.py
    ├── scannet.py
    ├── sevenscenes.py
    ├── test_metrics.py
    └── utils.py
├── config
    ├── default.py
    ├── mapfree.yaml
    ├── mapfree_multi.yaml
    ├── matching
    │   ├── mapfree
    │   │   ├── loftr_emat_dptkitti.yaml
    │   │   ├── loftr_emat_dptnyu.yaml
    │   │   ├── loftr_pnp_dptkitti.yaml
    │   │   ├── loftr_pnp_dptnyu.yaml
    │   │   ├── sg_emat_dptkitti.yaml
    │   │   ├── sg_emat_dptnyu.yaml
    │   │   ├── sg_pnp_dptkitti.yaml
    │   │   ├── sg_pnp_dptnyu.yaml
    │   │   ├── sg_procrustes_dptkitti.yaml
    │   │   ├── sift_emat_dptkitti.yaml
    │   │   ├── sift_emat_dptnyu.yaml
    │   │   ├── sift_pnp_dptkitti.yaml
    │   │   └── sift_pnp_dptnyu.yaml
    │   ├── scannet
    │   │   ├── loftr_emat_dpt.yaml
    │   │   ├── loftr_emat_gt.yaml
    │   │   ├── loftr_emat_planercnn.yaml
    │   │   ├── loftr_pnp_dpt.yaml
    │   │   ├── loftr_pnp_gt.yaml
    │   │   ├── loftr_pnp_planercnn.yaml
    │   │   ├── loftr_procrustes_dpt.yaml
    │   │   ├── loftr_procrustes_dpt_icp.yaml
    │   │   ├── loftr_procrustes_gt.yaml
    │   │   ├── loftr_procrustes_gt_icp.yaml
    │   │   ├── loftr_procrustes_planercnn.yaml
    │   │   ├── loftr_procrustes_planercnn_icp.yaml
    │   │   ├── sg_emat_dpt.yaml
    │   │   ├── sg_emat_gt.yaml
    │   │   ├── sg_emat_planercnn.yaml
    │   │   ├── sg_pnp_dpt.yaml
    │   │   ├── sg_pnp_gt.yaml
    │   │   ├── sg_pnp_planercnn.yaml
    │   │   ├── sg_procrustes_dpt.yaml
    │   │   ├── sg_procrustes_gt.yaml
    │   │   ├── sg_procrustes_planercnn.yaml
    │   │   ├── sift_emat_dpt.yaml
    │   │   ├── sift_emat_gt.yaml
    │   │   ├── sift_emat_planercnn.yaml
    │   │   ├── sift_pnp_dpt.yaml
    │   │   ├── sift_pnp_gt.yaml
    │   │   ├── sift_pnp_planercnn.yaml
    │   │   ├── sift_procrustes_dpt.yaml
    │   │   ├── sift_procrustes_dpt_icp.yaml
    │   │   ├── sift_procrustes_gtdepth.yaml
    │   │   ├── sift_procrustes_gtdepth_icp.yaml
    │   │   ├── sift_procrustes_planercnn.yaml
    │   │   ├── sift_procrustes_planercnn_icp.yaml
    │   │   └── sift_procrustes_smdp.yaml
    │   └── sevenscenes
    │   │   ├── loftr_emat_planercnn.yaml
    │   │   ├── loftr_pnp_planercnn.yaml
    │   │   ├── sg_emat_planercnn.yaml
    │   │   ├── sg_pnp_planercnn.yaml
    │   │   ├── sift_emat_planercnn.yaml
    │   │   └── sift_pnp_planercnn.yaml
    ├── regression
    │   ├── mapfree
    │   │   ├── 3d3d.yaml
    │   │   ├── 3d3d_lowoverlap.yaml
    │   │   ├── 3d3d_no_posencoder.yaml
    │   │   ├── 3d3d_no_warping.yaml
    │   │   ├── 3d3d_weighted_loss.yaml
    │   │   ├── multiframe
    │   │   │   └── 3d3d_multi.yaml
    │   │   ├── rot6d_trans.yaml
    │   │   ├── rotbin_trans.yaml
    │   │   ├── rotbin_transdirectionbin_scale.yaml
    │   │   ├── rotbin_transdirectionbin_scale_lowoverlap.yaml
    │   │   ├── rotbin_transdirectionbin_scale_qkv.yaml
    │   │   ├── rotquat_trans.yaml
    │   │   └── rotquat_transdirection_scale.yaml
    │   └── scannet
    │   │   ├── 3d3d.yaml
    │   │   ├── 3d3d_dual_posenc.yaml
    │   │   ├── 3d3d_dual_posenc_upsampling.yaml
    │   │   ├── 3d3d_half_cv.yaml
    │   │   ├── 3d3d_lowoverlap.yaml
    │   │   ├── 3d3d_no_avgpool.yaml
    │   │   ├── 3d3d_qkv.yaml
    │   │   ├── 3d3d_with_dustbin.yaml
    │   │   └── rotbin_transdirectionbin_scale.yaml
    ├── scannet.yaml
    ├── sevenscenes.yaml
    └── utils.py
├── environment.yml
├── environment_eccv22.yml
├── etc
    ├── feature_matching_baselines
    │   ├── compute.py
    │   ├── matchers.py
    │   └── utils.py
    └── teaser.png
├── lib
    ├── datasets
    │   ├── datamodules.py
    │   ├── mapfree.py
    │   ├── sampler.py
    │   ├── scannet.py
    │   ├── sevenscenes.py
    │   └── utils.py
    ├── models
    │   ├── builder.py
    │   ├── matching
    │   │   ├── feature_matching.py
    │   │   ├── model.py
    │   │   └── pose_solver.py
    │   └── regression
    │   │   ├── aggregator.py
    │   │   ├── encoder
    │   │       ├── preact.py
    │   │       ├── resnet.py
    │   │       └── resunet.py
    │   │   ├── head.py
    │   │   └── model.py
    └── utils
    │   ├── data.py
    │   ├── localize.py
    │   ├── logger.py
    │   ├── loss.py
    │   ├── metrics.py
    │   ├── rotationutils.py
    │   ├── solver.py
    │   └── visualisation.py
├── pyrightconfig.json
├── submission.py
├── train.py
└── visualisation
    ├── README.md
    ├── environment.yml
    ├── lazy_camera.py
    ├── render_estimates.py
    ├── render_scene.py
    └── render_util.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.pyc
  2 | .DS_Store
  3 | __pycache__/
  4 | .mypy_cache
  5 | *.egg-info
  6 | tmp/
  7 | 
  8 | data/
  9 | outputs/
 10 | results/
 11 | 
 12 | # scripts for running experiments
 13 | .idea/
 14 | 
 15 | # testing
 16 | .coverage
 17 | .coverage.*
 18 | *,cover
 19 | .pytest_cache
 20 | 
 21 | # Python related
 22 | # Byte-compiled / optimized / DLL files
 23 | __pycache__/
 24 | *.py[cod]
 25 | *$py.class
 26 | 
 27 | # C extensions
 28 | *.so
 29 | 
 30 | # Distribution / packaging
 31 | .gradle
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | pip-wheel-metadata/
 45 | share/python-wheels/
 46 | *.egg-info/
 47 | .installed.cfg
 48 | *.egg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | .hypothesis/
 72 | .pytest_cache/
 73 | 
 74 | # Translations
 75 | *.mo
 76 | *.pot
 77 | 
 78 | # Django stuff:
 79 | *.log
 80 | local_settings.py
 81 | db.sqlite3
 82 | db.sqlite3-journal
 83 | 
 84 | # Flask stuff:
 85 | instance/
 86 | .webassets-cache
 87 | 
 88 | # Scrapy stuff:
 89 | .scrapy
 90 | 
 91 | # Sphinx documentation
 92 | docs/_build/
 93 | 
 94 | # PyBuilder
 95 | target/
 96 | 
 97 | # Jupyter Notebook
 98 | .ipynb_checkpoints
 99 | 
100 | # IPython
101 | profile_default/
102 | ipython_config.py
103 | 
104 | # pyenv
105 | .python-version
106 | 
107 | # pipenv
108 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
109 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
110 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
111 | #   install all needed dependencies.
112 | #Pipfile.lock
113 | 
114 | # celery beat schedule file
115 | celerybeat-schedule
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 
147 | /visloc/keyframes/data/
148 | 
149 | # Blender backup files
150 | .blend1


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "etc/feature_matching_baselines/LoFTR"]
2 | 	path = etc/feature_matching_baselines/LoFTR
3 | 	url = git@github.com:zju3dv/LoFTR.git
4 | [submodule "etc/feature_matching_baselines/SuperGlue"]
5 | 	path = etc/feature_matching_baselines/SuperGlue
6 | 	url = git@github.com:magicleap/SuperGluePretrainedNetwork.git
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright © Niantic, Inc. 2022. Patent Pending.
  2 | 
  3 | All rights reserved.
  4 | 
  5 | 
  6 | 
  7 | =======================================================================================
  8 | 
  9 | 
 10 | 
 11 | This Software is licensed under the terms of the following Map-free Relocalizaation
 12 | license which allows for non-commercial use only. For any other use of the software 
 13 | not covered by the terms of this license, please contact partnerships@nianticlabs.com
 14 | 
 15 | 
 16 | 
 17 | =======================================================================================
 18 | 
 19 | 
 20 | 
 21 | Map-free Visual Relocalization License
 22 | 
 23 | 
 24 |     This Agreement is made by and between the Licensor and the Licensee as
 25 | defined and identified below.
 26 | 
 27 | 
 28 | 1.  Definitions.
 29 | 
 30 |     In this Agreement (“the Agreement”) the following words shall have the
 31 | following meanings:
 32 | 
 33 |     "Authors" shall mean E. Arnold, J. Wynn, S. Vicente, G. Garcia-Hernando,
 34 |     A. Monszpart, V. Prisacariu, D. Turmukhambetov, E. Brachmann
 35 |     "Licensee" Shall mean the person or organization agreeing to use the
 36 | Software in accordance with these terms and conditions.
 37 |     "Licensor" shall mean Niantic Inc., a company organized and existing under
 38 | the laws of Delaware, whose principal place of business is at 1 Ferry Building,
 39 | Suite 200, San Francisco, 94111.
 40 |     "Software" shall mean the Map-free Relocalization Software uploaded by
 41 |      Licensor to the GitHub repository at https://github.com/nianticlabs/map-free-reloc
 42 | on October 10th 2022 in source code or object code form and any
 43 | accompanying documentation as well as any modifications or additions uploaded
 44 | to the same GitHub repository by Licensor.
 45 | 
 46 | 
 47 | 2.  License.
 48 | 
 49 |     2.1 The Licensor has all necessary rights to grant a license under: (i)
 50 | copyright and rights in the nature of copyright subsisting in the Software; and
 51 | (ii) certain patent rights resulting from a patent application(s) filed by the
 52 | Licensor in the United States and/or other jurisdictions in connection with the 
 53 | Software. The Licensor grants the Licensee for the duration of this Agreement, 
 54 | a free of charge, non-sublicenseable, non-exclusive, non-transferable copyright 
 55 | and patent license (in consequence of said patent application(s)) to use the 
 56 | Software for non-commercial purpose only, including teaching and research at 
 57 | educational institutions and research at not-for-profit research institutions 
 58 | in accordance with the provisions of this Agreement. Non-commercial use 
 59 | expressly excludes any profit-making or commercial activities, including without 
 60 | limitation sale, license, manufacture or development of commercial products, use in
 61 | commercially-sponsored research, use at a laboratory or other facility owned or
 62 | controlled (whether in whole or in part) by a commercial entity, provision of
 63 | consulting service, use for or on behalf of any commercial entity, use in
 64 | research where a commercial party obtains rights to research results or any
 65 | other benefit, and use of the code in any models, model weights or code 
 66 | resulting from such procedure in any commercial product. Notwithstanding the 
 67 | foregoing restrictions, you can use this code for publishing comparison results 
 68 | for academic papers, including retraining on your own data. Any use of the 
 69 | Software for any purpose other than pursuant to the license grant set forth 
 70 | above shall automatically terminate this License.
 71 | 
 72 | 
 73 |       2.2 The Licensee is permitted to make modifications to the Software
 74 | provided that any distribution of such modifications is in accordance with
 75 | Clause 3.
 76 | 
 77 |       2.3 Except as expressly permitted by this Agreement and save to the
 78 | extent and in the circumstances expressly required to be permitted by law, the
 79 | Licensee is not permitted to rent, lease, sell, offer to sell, or loan the
 80 | Software or its associated documentation.
 81 | 
 82 | 
 83 | 3.  Redistribution and modifications
 84 | 
 85 |     3.1 The Licensee may reproduce and distribute copies of the Software, with
 86 | or without modifications, in source format only and only to this same GitHub
 87 | repository , and provided that any and every distribution is accompanied by an
 88 | unmodified copy of this License and that the following copyright notice is
 89 | always displayed in an obvious manner: Copyright © Niantic, Inc. 2018. All
 90 | rights reserved.
 91 | 
 92 | 
 93 |     3.2 In the case where the Software has been modified, any distribution must
 94 | include prominent notices indicating which files have been changed.
 95 | 
 96 |     3.3 The Licensee shall cause any work that it distributes or publishes,
 97 | that in whole or in part contains or is derived from the Software or any part
 98 | thereof (“Work based on the Software”), to be licensed as a whole at no charge
 99 | to all third parties entitled to a license to the Software under the terms of
100 | this License and on the same terms provided in this License.
101 | 
102 | 
103 | 4.  Duration.
104 | 
105 |     This Agreement is effective until the Licensee terminates it by destroying
106 | the Software, any Work based on the Software, and its documentation together
107 | with all copies. It will also terminate automatically if the Licensee fails to
108 | abide by its terms. Upon automatic termination the Licensee agrees to destroy
109 | all copies of the Software, Work based on the Software, and its documentation.
110 | 
111 | 
112 | 5.  Disclaimer of Warranties.
113 | 
114 |     The Software is provided as is. To the maximum extent permitted by law,
115 | Licensor provides no warranties or conditions of any kind, either express or
116 | implied, including without limitation, any warranties or condition of title,
117 | non-infringement or fitness for a particular purpose.
118 | 
119 | 
120 | 6.  LIMITATION OF LIABILITY.
121 | 
122 |     IN NO EVENT SHALL THE LICENSOR AND/OR AUTHORS BE LIABLE FOR ANY DIRECT,
123 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING
124 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
125 | DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
126 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
127 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
128 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
129 | 
130 | 
131 | 7.  Indemnity.
132 | 
133 |     The Licensee shall indemnify the Licensor and/or Authors against all third
134 | party claims that may be asserted against or suffered by the Licensor and/or
135 | Authors and which relate to use of the Software by the Licensee.
136 | 
137 | 
138 | 8.  Intellectual Property.
139 | 
140 |     8.1 As between the Licensee and Licensor, copyright and all other
141 | intellectual property rights subsisting in or in connection with the Software
142 | and supporting information shall remain at all times the property of the
143 | Licensor. The Licensee shall acquire no rights in any such material except as
144 | expressly provided in this Agreement.
145 | 
146 |     8.2 No permission is granted to use the trademarks or product names of the
147 | Licensor except as required for reasonable and customary use in describing the
148 | origin of the Software and for the purposes of abiding by the terms of Clause
149 | 3.1.
150 | 
151 |     8.3 The Licensee shall promptly notify the Licensor of any improvement or
152 | new use of the Software (“Improvements”) in sufficient detail for Licensor to
153 | evaluate the Improvements. The Licensee hereby grants the Licensor and its
154 | affiliates a non-exclusive, fully paid-up, royalty-free, irrevocable and
155 | perpetual license to all Improvements for non-commercial academic research and
156 | teaching purposes upon creation of such improvements.
157 | 
158 |     8.4 The Licensee grants an exclusive first option to the Licensor to be
159 | exercised by the Licensor within three (3) years of the date of notification of
160 | an Improvement under Clause 8.3 to use any the Improvement for commercial
161 | purposes on terms to be negotiated and agreed by Licensee and Licensor in good
162 | faith within a period of six (6) months from the date of exercise of the said
163 | option (including without limitation any royalty share in net income from such
164 | commercialization payable to the Licensee, as the case may be).
165 | 
166 | 
167 | 9.  Acknowledgements.
168 | 
169 |     The Licensee shall acknowledge the Authors and use of the Software in the
170 | publication of any work that uses, or results that are achieved through, the
171 | use of the Software. The following citation shall be included in the
172 | acknowledgement: “Map-free Visual Relocalization: Metric Pose Relative to a 
173 | Single Image", by E. Arnold, J. Wynn, S. Vicente, G. Garcia-Hernando, 
174 | A. Monszpart, V. Prisacariu, D. Turmukhambetov, E. Brachmann, ECCV 2022. 
175 | 
176 | 
177 | 10. Governing Law.
178 | 
179 |     This Agreement shall be governed by, construed and interpreted in
180 | accordance with English law and the parties submit to the exclusive
181 | jurisdiction of the English courts.
182 | 
183 | 
184 | 11. Termination.
185 | 
186 |     Upon termination of this Agreement, the licenses granted hereunder will
187 | terminate and Sections 5, 6, 7, 8, 9, 10 and 11 shall survive any termination
188 | of this Agreement.


--------------------------------------------------------------------------------
/benchmark/config.py:
--------------------------------------------------------------------------------
1 | # translation and rotation thresholds [meters, degrees]
2 | # used to compute Precision and AUC considering Pose Error
3 | t_threshold = 0.25
4 | R_threshold = 5
5 | 
6 | # reprojection (VCRE) threshold [pixels]
7 | # used to compute Precision and AUC considering VCRE
8 | vcre_threshold = 90
9 | 


--------------------------------------------------------------------------------
/benchmark/extended_datasets.md:
--------------------------------------------------------------------------------
  1 | # Dataset preparation
  2 | 
  3 | ## Scannet
  4 | - Download the Scannet dataset following the [official instructions](https://github.com/ScanNet/ScanNet#scannet-data).
  5 | - Extract the dataset root folder to `data/scannet`
  6 | - Download the [Scannet indices](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/scannet_indices.zip) used for train/val/test splits.
  7 | - Download [estimated depth maps and correspondences](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/scannet_baselines_aux.zip).
  8 | - Extract both zip files contents to `data/`
  9 | <details>
 10 | <summary> Note on Scannet indices</summary>
 11 | 
 12 | - The test pairs are the same as SuperGlue/LoFTR (sequences `0707_00 - 0806_00`);
 13 | - Training uses SG/LoFTR pairs from sequences `0000_00 - 0699_00`;
 14 | - The validation uses the SG/LoFTR pairs from sequences `0700_00 - 0706_00`;
 15 | - This split is used to prevent overlapping train/val sequences.
 16 | </details>
 17 | 
 18 | ## 7Scenes
 19 | - Download the [7Scenes dataset](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/).
 20 | - Download [7Scenes pairs indices](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_pairs.zip).
 21 | - Download [7Scenes feature-matching correspondences](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_correspondences.tar.gz).
 22 | - Download [7Scenes PlaneRCNN estimated depth maps](https://storage.googleapis.com/niantic-lon-static/research/map-free-reloc/assets/sevenscenes_prcnn_depth.zip).
 23 | - Extract all zip/tar files to `data/sevenscenes`
 24 | 
 25 | ## Pre-computed correspondences and depth maps
 26 | The pre-computed correspondences (SIFT, SuperGlue+SuperPoint and LoFTR) are found in the path
 27 | - Scannet: `data/scannet_misc/correspondences_{feature_method}_scannet_test.npz`
 28 | - 7Scenes: `data/sevenscenes/{scene}/correspondences_{feature_method}_test_pairs_{pair_variant}.npz`
 29 | 
 30 | The pre-computed depth maps are found in path:
 31 | - Scannet (PlaneRCNN monodepth): `data/scannet_misc/scannet_test_depthmaps_planercnn.npz`
 32 | - Scannet (DPT NYU monodepth): `data/scannet_misc/scannet_test_depthmaps_dpt.npz`
 33 | - 7Scenes (PlaneRCNN monodepth): `data/sevenscenes/{scene}/frame_{framenum}.depth.planercnn.png`
 34 | 
 35 | # 📈 Scannet Relative Pose Evaluation
 36 | ```bash
 37 | python -m benchmark.scannet [model config file] [--checkpoint path_to_checkpoint]
 38 | ```
 39 | Each time the script runs, a result file is created in the folder `results` with the same name as the config file. 
 40 | This result file contains the rotation and translation errors of each sample in the Scannet test set.
 41 | A log text file is also created in `results/log/` with the config file name.
 42 | 
 43 | For example, feature-matching methods (more options in [config/matching/scannet](config/matching/scannet)) can be evaluated using:
 44 | ```bash
 45 | #for E-mat based R,t with GT depth maps to get metric pose
 46 | python -m benchmark.scannet config/matching/scannet/sift_emat_gt.yaml 
 47 | 
 48 | #For E-mat based R,t, with DPT monodepth to get metric pose
 49 | python -m benchmark.scannet config/matching/scannet/sift_emat_dpt.yaml
 50 | 
 51 | #For PnP based R,t, with PlaneRCNN monodepth to get metric pose
 52 | python -m benchmark.scannet config/matching/scannet/sift_pnp_planercnn.yaml
 53 | 
 54 | #For Procrustes based R,t, with DPT monodepth to backproject correspondences to 3D
 55 | python -m benchmark.scannet config/matching/scannet/sift_procrustes_dpt.yaml
 56 | ```
 57 | 
 58 | # 📈 7Scenes Visual Localisation Evaluation
 59 | ```bash
 60 | python -m benchmark.sevenscenes [model config file] \
 61 |                                 [dataset config file] \
 62 |                                 [--checkpoint path_to_checkpoint] \
 63 |                                 [--test_pair_txt pair_file_name]
 64 | ```
 65 | 
 66 | - Use `config/sevenscenes.yaml` as the dataset config.
 67 | - `--test_pair_txt` specifies the pairs of training/query images used in the evaluation (Overrides the one in `config/sevenscenes.yaml`, with default value: `test_pairs.5nn.5cm10m.vlad.minmax.txt` (full EssNetPairs))
 68 | - `--one_nn` to filter the single nearest neighbour training image with highest DVLAD similarity to each query image.
 69 | - `--triang` uses triangulation (discards translation vector norm) to estimate the absolute pose of the query image
 70 | - `--triang_ransac_thres` is the angular inlier threshold for the triangulation RANSAC loop
 71 | 
 72 | Note that if neither `--triang` or `--one_nn` is specified, the absolute pose of a query image is computed using all its nearest neighbours.
 73 | The absolute pose predictions from each neighbour are aggregated using geometric median of the translation vectors, and the chordal L2 mean of rotation matrices.
 74 | 
 75 | Once completed, this evaluation saves the result log as `test_results.txt`.
 76 | Additionally, the predicted absolute pose for each query image in a SCENE is saved in a file `pose_7scenes_SCENE.txt`.
 77 | Each line in this file follows the format: `image_path qw qx qy qz tx ty tz`, where the quaternion `q` and translation vector `t` encode the predicted absolute pose from world to camera coordinates.
 78 | 
 79 | The evaluation code supports feature-matching baselines (SIFT/SuperGlue/LoFTR) for non-metric relative pose (absolute pose obtained via triangulation); and feature-matching & predicted depth, where the metric pose can be obtained using scale from depth.
 80 | For example, the baseline SuperGlue + PlaneRCNN depth considering a database of only 10 images per scan, and considering only the closest (DVLAD similarity) database image can be executed with:
 81 | ```bash
 82 | python -m benchmark.sevenscenes \
 83 |           config/baseline/sevenscenes/baseline_sg_emat_metric_planercnn_depth.yaml \
 84 |           --test_pair_txt test_pairs_ours_km10.txt \
 85 |           --one_nn
 86 | ```
 87 | Other baselines, including SIFT/LoFTR are available in `config/matching/sevenscenes/`.
 88 | We also provide different test pairs, considering different numbers of database images, namely, `test_pairs_ours_{km1/km2/km5/km10}.txt`.
 89 | For each one of these pairs, the database images are selected based on the K-Means clustering of their D-VLAD features.
 90 | The pairs file formatting follows the pattern from [EssNet](https://vision.in.tum.de/webshare/u/zhouq/visloc-datasets/README.md).
 91 | 
 92 | The other evaluation flags also apply for baselines, for example, one can compute results for SuperGlue + triangulation:
 93 | ```bash
 94 | python -m benchmark.sevenscenes \
 95 |           config/baseline/sevenscenes/baseline_sg_emat_metric_planercnn_depth.yaml \
 96 |           --test_pair_txt test_pairs_ours_km10.txt \
 97 |           --triang
 98 | ```
 99 | 
100 | Note that the correspondences from feature-matching baselines have been pre-computed for each test pair, and saved in a file for each scene of the 7Scenes dataset.


--------------------------------------------------------------------------------
/benchmark/mapfree.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from collections import defaultdict
  3 | from pathlib import Path
  4 | from zipfile import ZipFile
  5 | from io import TextIOWrapper
  6 | import json
  7 | import logging
  8 | 
  9 | import numpy as np
 10 | 
 11 | from benchmark.utils import load_poses, subsample_poses, load_K, precision_recall
 12 | from benchmark.metrics import MetricManager, Inputs
 13 | import benchmark.config as config
 14 | from config.default import cfg
 15 | 
 16 | 
 17 | def compute_scene_metrics(dataset_path: Path, submission_zip: ZipFile, scene: str):
 18 |     metric_manager = MetricManager()
 19 | 
 20 |     # load intrinsics and poses
 21 |     try:
 22 |         K, W, H = load_K(dataset_path / scene / 'intrinsics.txt')
 23 |         with (dataset_path / scene / 'poses.txt').open('r', encoding='utf-8') as gt_poses_file:
 24 |             gt_poses = load_poses(gt_poses_file, load_confidence=False)
 25 |     except FileNotFoundError as e:
 26 |         logging.error(f'Could not find ground-truth dataset files: {e}')
 27 |         raise
 28 |     else:
 29 |         logging.info(
 30 |             f'Loaded ground-truth intrinsics and poses for scene {scene}')
 31 | 
 32 |     # try to load estimated poses from submission
 33 |     try:
 34 |         with submission_zip.open(f'pose_{scene}.txt') as estimated_poses_file:
 35 |             estimated_poses_file_wrapper = TextIOWrapper(
 36 |                 estimated_poses_file, encoding='utf-8')
 37 |             estimated_poses = load_poses(
 38 |                 estimated_poses_file_wrapper, load_confidence=True)
 39 |     except KeyError as e:
 40 |         logging.warning(
 41 |             f'Submission does not have estimates for scene {scene}.')
 42 |         return dict(), len(gt_poses)
 43 |     except UnicodeDecodeError as e:
 44 |         logging.error('Unsupported file encoding: please use UTF-8')
 45 |         raise
 46 |     else:
 47 |         logging.info(f'Loaded estimated poses for scene {scene}')
 48 | 
 49 |     # The val/test set is subsampled by a factor of 5
 50 |     gt_poses = subsample_poses(gt_poses, subsample=5)
 51 | 
 52 |     # failures encode how many frames did not have an estimate
 53 |     # e.g. user/method did not provide an estimate for that frame
 54 |     # it's different from when an estimate is provided with low confidence!
 55 |     failures = 0
 56 | 
 57 |     # Results encoded as dict
 58 |     # key: metric name; value: list of values (one per frame).
 59 |     # e.g. results['t_err'] = [1.2, 0.3, 0.5, ...]
 60 |     results = defaultdict(list)
 61 | 
 62 |     # compute metrics per frame
 63 |     for frame_num, (q_gt, t_gt, _) in gt_poses.items():
 64 |         if frame_num not in estimated_poses:
 65 |             failures += 1
 66 |             continue
 67 | 
 68 |         q_est, t_est, confidence = estimated_poses[frame_num]
 69 |         inputs = Inputs(q_gt=q_gt, t_gt=t_gt, q_est=q_est, t_est=t_est,
 70 |                         confidence=confidence, K=K[frame_num], W=W, H=H)
 71 |         metric_manager(inputs, results)
 72 | 
 73 |     return results, failures
 74 | 
 75 | 
 76 | def aggregate_results(all_results, all_failures):
 77 |     # aggregate metrics
 78 |     median_metrics = defaultdict(list)
 79 |     all_metrics = defaultdict(list)
 80 |     for scene_results in all_results.values():
 81 |         for metric, values in scene_results.items():
 82 |             median_metrics[metric].append(np.median(values))
 83 |             all_metrics[metric].extend(values)
 84 |     all_metrics = {k: np.array(v) for k, v in all_metrics.items()}
 85 |     assert all([v.ndim == 1 for v in all_metrics.values()]
 86 |                ), 'invalid metrics shape'
 87 | 
 88 |     # compute avg median metrics
 89 |     avg_median_metrics = {metric: np.mean(
 90 |         values) for metric, values in median_metrics.items()}
 91 | 
 92 |     # compute precision/AUC for pose error and reprojection errors
 93 |     accepted_poses = (all_metrics['trans_err'] < config.t_threshold) * \
 94 |         (all_metrics['rot_err'] < config.R_threshold)
 95 |     accepted_vcre = all_metrics['reproj_err'] < config.vcre_threshold
 96 |     total_samples = len(next(iter(all_metrics.values()))) + all_failures
 97 | 
 98 |     prec_pose = np.sum(accepted_poses) / total_samples
 99 |     prec_vcre = np.sum(accepted_vcre) / total_samples
100 | 
101 |     # compute AUC for pose and VCRE
102 |     _, _, auc_pose = precision_recall(
103 |         inliers=all_metrics['confidence'], tp=accepted_poses, failures=all_failures)
104 |     _, _, auc_vcre = precision_recall(
105 |         inliers=all_metrics['confidence'], tp=accepted_vcre, failures=all_failures)
106 | 
107 |     # output metrics
108 |     output_metrics = dict()
109 |     output_metrics['Average Median Translation Error'] = avg_median_metrics['trans_err']
110 |     output_metrics['Average Median Rotation Error'] = avg_median_metrics['rot_err']
111 |     output_metrics['Average Median Reprojection Error'] = avg_median_metrics['reproj_err']
112 |     output_metrics[f'Precision @ Pose Error < ({config.t_threshold*100}cm, {config.R_threshold}deg)'] = prec_pose
113 |     output_metrics[f'AUC @ Pose Error < ({config.t_threshold*100}cm, {config.R_threshold}deg)'] = auc_pose
114 |     output_metrics[f'Precision @ VCRE < {config.vcre_threshold}px'] = prec_vcre
115 |     output_metrics[f'AUC @ VCRE < {config.vcre_threshold}px'] = auc_vcre
116 |     output_metrics[f'Estimates for % of frames'] = len(all_metrics['trans_err']) / total_samples
117 |     return output_metrics
118 | 
119 | 
120 | def count_unexpected_scenes(scenes: tuple, submission_zip: ZipFile):
121 |     submission_scenes = [fname[5:-4]
122 |                          for fname in submission_zip.namelist() if fname.startswith("pose_")]
123 |     return len(set(submission_scenes) - set(scenes))
124 | 
125 | 
126 | def main(args):
127 |     dataset_path = args.dataset_path / args.split
128 |     scenes = tuple(f.name for f in dataset_path.iterdir() if f.is_dir())
129 | 
130 |     try:
131 |         submission_zip = ZipFile(args.submission_path, 'r')
132 |     except FileNotFoundError as e:
133 |         logging.error(f'Could not find ZIP file in path {args.submission_path}')
134 |         return
135 | 
136 |     all_results = dict()
137 |     all_failures = 0
138 |     for scene in scenes:
139 |         metrics, failures = compute_scene_metrics(
140 |             dataset_path, submission_zip, scene)
141 |         all_results[scene] = metrics
142 |         all_failures += failures
143 | 
144 |     if all_failures > 0:
145 |         logging.warning(
146 |             f'Submission is missing pose estimates for {all_failures} frames')
147 | 
148 |     unexpected_scene_count = count_unexpected_scenes(scenes, submission_zip)
149 |     if unexpected_scene_count > 0:
150 |         logging.warning(
151 |             f'Submission contains estimates for {unexpected_scene_count} scenes outside the {args.split} set')
152 | 
153 |     if all((len(metrics) == 0 for metrics in all_results.values())):
154 |         logging.error(
155 |             f'Submission does not have any valid pose estimates')
156 |         return
157 | 
158 |     output_metrics = aggregate_results(all_results, all_failures)
159 |     output_json = json.dumps(output_metrics, indent=2)
160 |     print(output_json)
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     parser = argparse.ArgumentParser(
165 |         'eval', description='Evaluate submissions for the MapFree dataset benchmark')
166 |     parser.add_argument('submission_path', type=Path,
167 |                         help='Path to the submission ZIP file')
168 |     parser.add_argument('--split', choices=('val', 'test'), default='test',
169 |                         help='Dataset split to use for evaluation. Default: test')
170 |     parser.add_argument('--log', choices=('warning', 'info', 'error'),
171 |                         default='warning', help='Logging level. Default: warning')
172 |     parser.add_argument('--dataset_path', type=Path, default=None,
173 |                         help='Path to the dataset folder')
174 | 
175 |     args = parser.parse_args()
176 | 
177 |     if args.dataset_path is None:
178 |         cfg.merge_from_file('config/mapfree.yaml')
179 |         args.dataset_path = Path(cfg.DATASET.DATA_ROOT)
180 | 
181 |     logging.basicConfig(level=args.log.upper())
182 |     try:
183 |         main(args)
184 |     except Exception:
185 |         logging.error("Unexpected behaviour. Exiting.")
186 | 


--------------------------------------------------------------------------------
/benchmark/metrics.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Callable
 3 | 
 4 | import numpy as np
 5 | 
 6 | from benchmark.reprojection import reprojection_error
 7 | from benchmark.utils import VARIANTS_ANGLE_SIN, quat_angle_error
 8 | 
 9 | 
10 | @dataclass
11 | class Inputs:
12 |     q_gt: np.array
13 |     t_gt: np.array
14 |     q_est: np.array
15 |     t_est: np.array
16 |     confidence: float
17 |     K: np.array
18 |     W: int
19 |     H: int
20 | 
21 |     def __post_init__(self):
22 |         assert self.q_gt.shape == (4,), 'invalid gt quaternion shape'
23 |         assert self.t_gt.shape == (3,), 'invalid gt translation shape'
24 |         assert self.q_est.shape == (4,), 'invalid estimated quaternion shape'
25 |         assert self.t_est.shape == (3,), 'invalid estimated translation shape'
26 |         assert self.confidence >= 0, 'confidence must be non negative'
27 |         assert self.K.shape == (3, 3), 'invalid K shape'
28 |         assert self.W > 0, 'invalid image width'
29 |         assert self.H > 0, 'invalid image height'
30 | 
31 | 
32 | class MyDict(dict):
33 |     def register(self, fn) -> Callable:
34 |         """Registers a function within dict(fn_name -> fn_ref).
35 |         This is used to evaluate all registered metrics in MetricManager.__call__()"""
36 |         self[fn.__name__] = fn
37 |         return fn
38 | 
39 | 
40 | class MetricManager:
41 |     _metrics = MyDict()
42 | 
43 |     def __call__(self, inputs: Inputs, results: dict) -> None:
44 |         for metric, metric_fn in self._metrics.items():
45 |             results[metric].append(metric_fn(inputs))
46 | 
47 |     @staticmethod
48 |     @_metrics.register
49 |     def trans_err(inputs: Inputs) -> np.float64:
50 |         return np.linalg.norm(inputs.t_est - inputs.t_gt)
51 | 
52 |     @staticmethod
53 |     @_metrics.register
54 |     def rot_err(inputs: Inputs, variant: str = VARIANTS_ANGLE_SIN) -> np.float64:
55 |         return quat_angle_error(label=inputs.q_est, pred=inputs.q_gt, variant=variant)[0, 0]
56 | 
57 |     @staticmethod
58 |     @_metrics.register
59 |     def reproj_err(inputs: Inputs) -> float:
60 |         return reprojection_error(
61 |             q_est=inputs.q_est, t_est=inputs.t_est, q_gt=inputs.q_gt, t_gt=inputs.t_gt, K=inputs.K,
62 |             W=inputs.W, H=inputs.H)
63 | 
64 |     @staticmethod
65 |     @_metrics.register
66 |     def confidence(inputs: Inputs) -> float:
67 |         return inputs.confidence
68 | 


--------------------------------------------------------------------------------
/benchmark/reprojection.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import numpy as np
 4 | from transforms3d.quaternions import quat2mat
 5 | 
 6 | 
 7 | def project(pts: np.ndarray, K: np.ndarray, img_size: List[int] or Tuple[int] = None) -> np.ndarray:
 8 |     """Projects 3D points to image plane.
 9 | 
10 |     Args:
11 |         - pts [N, 3/4]: points in camera coordinates (homogeneous or non-homogeneous)
12 |         - K [3, 3]: intrinsic matrix
13 |         - img_size (width, height): optional, clamp projection to image borders
14 |         Outputs:
15 |         - uv [N, 2]: coordinates of projected points
16 |     """
17 | 
18 |     assert len(pts.shape) == 2, 'incorrect number of dimensions'
19 |     assert pts.shape[1] in [3, 4], 'invalid dimension size'
20 |     assert K.shape == (3, 3), 'incorrect intrinsic shape'
21 | 
22 |     uv_h = (K @ pts[:, :3].T).T
23 |     uv = uv_h[:, :2] / uv_h[:, -1:]
24 | 
25 |     if img_size is not None:
26 |         uv[:, 0] = np.clip(uv[:, 0], 0, img_size[0])
27 |         uv[:, 1] = np.clip(uv[:, 1], 0, img_size[1])
28 | 
29 |     return uv
30 | 
31 | 
32 | def get_grid_multipleheight() -> np.ndarray:
33 |     # create grid of points
34 |     ar_grid_step = 0.3
35 |     ar_grid_num_x = 7
36 |     ar_grid_num_y = 4
37 |     ar_grid_num_z = 7
38 |     ar_grid_z_offset = 1.8
39 |     ar_grid_y_offset = 0
40 | 
41 |     ar_grid_x_pos = np.arange(0, ar_grid_num_x)-(ar_grid_num_x-1)/2
42 |     ar_grid_x_pos *= ar_grid_step
43 | 
44 |     ar_grid_y_pos = np.arange(0, ar_grid_num_y)-(ar_grid_num_y-1)/2
45 |     ar_grid_y_pos *= ar_grid_step
46 |     ar_grid_y_pos += ar_grid_y_offset
47 | 
48 |     ar_grid_z_pos = np.arange(0, ar_grid_num_z).astype(float)
49 |     ar_grid_z_pos *= ar_grid_step
50 |     ar_grid_z_pos += ar_grid_z_offset
51 | 
52 |     xx, yy, zz = np.meshgrid(ar_grid_x_pos, ar_grid_y_pos, ar_grid_z_pos)
53 |     ones = np.ones(xx.shape[0]*xx.shape[1]*xx.shape[2])
54 |     eye_coords = np.concatenate([c.reshape(-1, 1)
55 |                                 for c in (xx, yy, zz, ones)], axis=-1)
56 |     return eye_coords
57 | 
58 | 
59 | # global variable, avoids creating it again
60 | eye_coords_glob = get_grid_multipleheight()
61 | 
62 | 
63 | def reprojection_error(
64 |         q_est: np.ndarray, t_est: np.ndarray, q_gt: np.ndarray, t_gt: np.ndarray, K: np.ndarray,
65 |         W: int, H: int) -> float:
66 |     eye_coords = eye_coords_glob
67 | 
68 |     # obtain ground-truth position of projected points
69 |     uv_gt = project(eye_coords, K, (W, H))
70 | 
71 |     # residual transformation
72 |     cam2w_est = np.eye(4)
73 |     cam2w_est[:3, :3] = quat2mat(q_est)
74 |     cam2w_est[:3, -1] = t_est
75 |     cam2w_gt = np.eye(4)
76 |     cam2w_gt[:3, :3] = quat2mat(q_gt)
77 |     cam2w_gt[:3, -1] = t_gt
78 | 
79 |     # residual reprojection
80 |     eyes_residual = (np.linalg.inv(cam2w_est) @ cam2w_gt @ eye_coords.T).T
81 |     uv_pred = project(eyes_residual, K, (W, H))
82 | 
83 |     # get reprojection error
84 |     repr_err = np.linalg.norm(uv_gt - uv_pred, ord=2, axis=1)
85 |     mean_repr_err = float(repr_err.mean().item())
86 |     return mean_repr_err
87 | 


--------------------------------------------------------------------------------
/benchmark/scannet.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | from tqdm import tqdm
 6 | 
 7 | from config.default import cfg
 8 | from lib.utils.logger import set_log
 9 | from lib.datasets.datamodules import DataModule
10 | from lib.models.builder import build_model
11 | from lib.utils.data import data_to_model_device
12 | from lib.utils.metrics import MetricsAccumulator, print_auc_table, pose_error_torch, A_metrics, precision
13 | 
14 | 
15 | def main(args):
16 |     cfg.merge_from_file('config/scannet.yaml')
17 |     cfg.merge_from_file(args.config)
18 | 
19 |     # Set-up dataloader and model
20 |     datamodule = DataModule(cfg)
21 |     dataset_loader = datamodule.test_dataloader()
22 |     model = build_model(cfg, args.checkpoint)
23 | 
24 |     # Create logger and save to file
25 |     config_name = args.config.split('/')[-1][:-5]
26 |     set_log(f'results/scannet/{config_name}.txt')
27 | 
28 |     macc = MetricsAccumulator()
29 | 
30 |     for data in tqdm(dataset_loader):
31 |         data = data_to_model_device(data, model)
32 |         with torch.no_grad():
33 |             R, t = model(data)
34 |         metrics = pose_error_torch(R, t, data['T_0to1'])
35 |         macc.accumulate(metrics)
36 | 
37 |     agg_metrics = macc.aggregate()
38 |     print(f"Median Rotation error [deg]: {np.nanmedian(agg_metrics['R_err']):.2f}")
39 |     print(f"Median Translation angular error [deg]: {np.nanmedian(agg_metrics['t_err_ang']):.2f}")
40 |     print(f"Median Translation Euclidean error [m]: {np.nanmedian(agg_metrics['t_err_euc']):.2f}")
41 |     print_auc_table(agg_metrics)
42 | 
43 |     # compute precision
44 |     thresholds = ((0.1, 5), (0.25, 5), (0.5, 10), (1, 20))
45 |     print("Recall @ "+"/".join([f"({t[0]:.1f}m,{t[1]:.0f}deg)" for t in thresholds])+': '+"/".join(
46 |         ['{:.2f}'.format(precision(agg_metrics, t[1], t[0])) for t in thresholds]))
47 | 
48 |     # compute A1/A2/A3 metric for translation scale
49 |     a1, a2, a3 = A_metrics(agg_metrics['t_err_scale_sym'])
50 |     print(f"t_scale_error A1/A2/A3 [%]: {a1*100:.1f}/{a2*100:.1f}/{a3*100:.1f}")
51 | 
52 |     # compute ratio of failures (baselines)
53 |     ratio_failures = np.isnan(agg_metrics['R_err']).mean()
54 |     print(f'failures (not enough corr.) [%]: {ratio_failures*100:.1f}')
55 | 
56 |     # Save results to `results/' with the name of the config
57 |     np.savez(f'results/scannet/{config_name}', **agg_metrics)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     parser = argparse.ArgumentParser()
62 |     parser.add_argument('config', help='path to config file')
63 |     parser.add_argument('--checkpoint', help='path to checkpoint', default='')
64 |     args = parser.parse_args()
65 | 
66 |     main(args)
67 | 


--------------------------------------------------------------------------------
/benchmark/sevenscenes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | from pathlib import Path
  4 | 
  5 | import torch
  6 | from tqdm import tqdm
  7 | 
  8 | from config.default import cfg
  9 | from lib.utils.logger import set_log
 10 | from lib.utils.visualisation import save_video
 11 | from lib.datasets.datamodules import DataModule
 12 | from lib.models.builder import build_model
 13 | from lib.utils.data import data_to_model_device
 14 | from lib.utils.localize import *
 15 | 
 16 | 
 17 | def predict(loader, model):
 18 |     results_dict = {}
 19 | 
 20 |     for data in tqdm(loader):
 21 |         # run inference
 22 |         data = data_to_model_device(data, model)
 23 |         with torch.no_grad():
 24 |             R, t = model(data)
 25 | 
 26 |         # populate results_dict
 27 |         train, test = data['pair_names'][0][0], data['pair_names'][1][0]
 28 |         scene = data['scene_id'][0]
 29 |         if scene not in results_dict:
 30 |             results_dict[scene] = {}
 31 |             results_dict[scene]['pair_data'] = {}
 32 |             results_dict[scene]['no_pt_pairs'] = []
 33 | 
 34 |         if test not in results_dict[scene]['pair_data']:
 35 |             results_dict[scene]['pair_data'][test] = {}
 36 |             results_dict[scene]['pair_data'][test]['test_pairs'] = []
 37 | 
 38 |         # Wrap pose label with RelaPose, AbsPose objects
 39 |         train_c, train_q = data['abs_c_0'][0].cpu().numpy(
 40 |         ).copy(), data['abs_q_0'][0].cpu().numpy().copy()
 41 |         train_abs_pose = AbsPose(train_q, train_c)
 42 | 
 43 |         test_c, test_q = data['abs_c_1'][0].cpu().numpy(
 44 |         ).copy(), data['abs_q_1'][0].cpu().numpy().copy()
 45 |         test_abs_pose = AbsPose(test_q, test_c)
 46 |         results_dict[scene]['pair_data'][test]['test_abs_pose'] = test_abs_pose
 47 | 
 48 |         rel_t_gt = data['T_0to1'][:, :3, -1].reshape(-1).cpu().numpy().copy()
 49 |         rel_q_gt = mat2quat(data['T_0to1'][:, :3, :3].cpu().numpy()).reshape(-1)
 50 |         rela_pose_lbl = RelaPose(rel_q_gt, rel_t_gt)
 51 | 
 52 |         # check for NaN's in output, meaning failure due to lack of correspondences (for correspondence based methods)
 53 |         R = R.detach().cpu().numpy()
 54 |         t = t.reshape(-1).detach().cpu().numpy()
 55 |         if np.isnan(R).any() or np.isnan(t).any() or np.isinf(t).any():
 56 |             results_dict[scene]['no_pt_pairs'].append(data['pair_names'])
 57 |         else:
 58 |             rel_t_pred = t
 59 |             rel_q_pred = mat2quat(R).reshape(-1)
 60 |             rela_pose_pred = RelaPose(rel_q_pred, rel_t_pred)
 61 |             test_pair = RelaPosePair(test, train_abs_pose, rela_pose_lbl,
 62 |                                      rela_pose_pred, data['sim'].item())
 63 |             test_pair.inliers = data['inliers'] if 'inliers' in data.keys() else 0
 64 |             results_dict[scene]['pair_data'][test]['test_pairs'].append(test_pair)
 65 | 
 66 |     return results_dict
 67 | 
 68 | 
 69 | def eval(args):
 70 |     # Load configs
 71 |     cfg.merge_from_file(args.dataset_config)
 72 |     cfg.merge_from_file(args.config)
 73 | 
 74 |     # update test pair txt from arguments (can be set at dataset config)
 75 |     if args.test_pair_txt:
 76 |         cfg.DATASET.PAIRS_TXT.TEST = args.test_pair_txt
 77 |     if args.one_nn:
 78 |         cfg.DATASET.PAIRS_TXT.ONE_NN = True
 79 | 
 80 |     # Set log object
 81 |     args.output_root.mkdir(parents=True, exist_ok=True)
 82 |     set_log(args.output_root / 'test_results.txt')
 83 | 
 84 |     # Create dataloader
 85 |     dataloader = DataModule(cfg).test_dataloader()
 86 | 
 87 |     # Create model
 88 |     model = build_model(cfg, args.checkpoint)
 89 | 
 90 |     # Get predictions from model
 91 |     results_dict = predict(dataloader, model)
 92 |     np.save(args.output_root / 'rawpred.npy', results_dict)  # save, just in case
 93 | 
 94 |     # Evaluate
 95 |     err_thres = ((0.1, 5), (0.25, 5), (0.5, 10), (1, 20))  # (meters, deg)
 96 |     save_res_path = args.output_root / 'results.npy'
 97 |     if args.triang:
 98 |         # Using triangulation + RANSAC
 99 |         eval_pipeline_with_ransac(results_dict, None, ransac_thres=args.triang_ransac_thres,
100 |                                   ransac_iter=10, ransac_miu=1.414, pair_type='relapose',
101 |                                   err_thres=err_thres, save_res_path=save_res_path)
102 |     else:
103 |         # Directly using metric relative pose estimate to obtain absolute query pose
104 |         # NOTE: if there are more than 1NN for a query, the absolute pose is obtained by
105 |         # the geometric median of absolute translation vectors of each NN, and
106 |         # L2 chordal mean rotation of abs. rotation matrices of each NN (see more details in cal_abs_pose_err_metric)
107 |         eval_pipeline_without_ransac(results_dict, err_thres=err_thres, save_res_path=save_res_path)
108 | 
109 |     # Create txt file per scene showing predicted pose of each query
110 |     save_results_visualisation(save_res_path)
111 | 
112 |     # Create precision/recall plots
113 |     generate_precision_recall_plots(save_res_path, err_thres[1])
114 | 
115 |     if args.save_video:
116 |         save_video(save_res_path, dataloader, args.output_root)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument('config', help='path to config file')
122 |     parser.add_argument('dataset_config', help='path to dataset config file')
123 |     parser.add_argument('--checkpoint', help='path to model checkpoint', default='')
124 |     parser.add_argument('--test_pair_txt', '-pair', type=str, default=None)
125 |     parser.add_argument('--output_root', '-odir', type=str, default='results/')
126 |     parser.add_argument(
127 |         '--one_nn', action='store_true',
128 |         help='keep only one nearest neighbour, the one with highest VLAD similarity. Applicable for 7Scenes, which has more than one NN. No effect on MapFree dataset, which by definition only contains 1 keyframe per scene.')
129 |     parser.add_argument(
130 |         '--triang', action='store_true',
131 |         help='uses triangulation to compute absolute pose of query image. Only applicable for 7Scenes.')
132 |     parser.add_argument(
133 |         '--triang_ransac_thres', '-rthres', metavar='%d', type=int, nargs='+', default=[15],
134 |         help='the set of triangulation ransac inlier thresolds(angle error)(default: %(default)s)')
135 |     parser.add_argument(
136 |         '--save_video', action='store_true',
137 |         help='create a video per sequence showing results per frame (valid only for 1NN cases)')
138 | 
139 |     args = parser.parse_args()
140 |     args.output_root = Path(args.output_root)
141 |     assert (args.one_nn and args.triang) != True, 'triangulation needs more than one nearest neighbour'
142 |     if args.save_video:
143 |         assert args.one_nn, 'video option only available when using a single keyframe (1 nearest neighbour)'
144 | 
145 |     eval(args)
146 | 


--------------------------------------------------------------------------------
/benchmark/test_metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | from transforms3d.euler import euler2quat
  4 | from transforms3d.quaternions import axangle2quat, qmult, quat2mat, rotate_vector
  5 | 
  6 | from benchmark.metrics import Inputs, MetricManager
  7 | from benchmark.reprojection import project
  8 | from benchmark.utils import VARIANTS_ANGLE_COS, VARIANTS_ANGLE_SIN
  9 | 
 10 | 
 11 | def createInput(q_gt=None, t_gt=None, q_est=None, t_est=None, confidence=None, K=None, W=None, H=None):
 12 |     q_gt = np.zeros(4) if q_gt is None else q_gt
 13 |     t_gt = np.zeros(3) if t_gt is None else t_gt
 14 |     q_est = np.zeros(4) if q_est is None else q_est
 15 |     t_est = np.zeros(3) if t_est is None else t_est
 16 |     confidence = 0. if confidence is None else confidence
 17 |     K = np.eye(3) if K is None else K
 18 |     H = 1 if H is None else H
 19 |     W = 1 if W is None else W
 20 |     return Inputs(q_gt=q_gt, t_gt=t_gt, q_est=q_est, t_est=t_est, confidence=confidence, K=K, W=W, H=H)
 21 | 
 22 | 
 23 | def randomQuat():
 24 |     angles = np.random.uniform(0, 2*np.pi, 3)
 25 |     q = euler2quat(*angles)
 26 |     return q
 27 | 
 28 | 
 29 | class TestMetrics:
 30 |     @pytest.mark.parametrize('run_number', range(50))
 31 |     def test_t_err_tinvariance(self, run_number: int) -> None:
 32 |         """Computes the translation error given an initial translation and displacement of this
 33 |         translation. The translation error must be equal to the norm of the displacement."""
 34 |         mean, var = 5, 10
 35 |         t0 = np.random.normal(mean, var, (3,))
 36 |         displacement = np.random.normal(mean, var, (3,))
 37 | 
 38 |         i = createInput(t_gt=t0, t_est=t0+displacement)
 39 |         trans_err = MetricManager.trans_err(i)
 40 |         assert np.isclose(trans_err, np.linalg.norm(displacement))
 41 | 
 42 |     @pytest.mark.parametrize('run_number', range(50))
 43 |     def test_trans_err_rinvariance(self, run_number: int) -> None:
 44 |         """Computes the translation error given estimated and gt vectors.
 45 |         The translation error must be the same for a rotated version of those vectors
 46 |         (same random rotation)"""
 47 |         mean, var = 5, 10
 48 |         t0 = np.random.normal(mean, var, (3,))
 49 |         t1 = np.random.normal(mean, var, (3,))
 50 |         q = randomQuat()
 51 | 
 52 |         i = createInput(t_gt=t0, t_est=t1)
 53 |         trans_err = MetricManager.trans_err(i)
 54 | 
 55 |         ir = createInput(t_gt=rotate_vector(t0, q), t_est=rotate_vector(t1, q))
 56 |         trans_err_r = MetricManager.trans_err(ir)
 57 | 
 58 |         assert np.isclose(trans_err, trans_err_r)
 59 | 
 60 |     @pytest.mark.parametrize('run_number', range(50))
 61 |     @pytest.mark.parametrize('dtype', (np.float64, np.float32))
 62 |     def test_rot_err_raxis(self, run_number: int, dtype: type) -> None:
 63 |         """Test rotation error for rotations around a random axis.
 64 | 
 65 |         Note: We create GT as high precision, and only downcast when calling rot_err.
 66 |         """
 67 |         q = randomQuat().astype(np.float64)
 68 | 
 69 |         axis = np.random.uniform(low=-1, high=1, size=3).astype(np.float64)
 70 |         angle = np.float64(np.random.uniform(low=-np.pi, high=np.pi))
 71 |         qres = axangle2quat(vector=axis, theta=angle, is_normalized=False).astype(np.float64)
 72 | 
 73 |         i = createInput(q_gt=q.astype(dtype), q_est=qmult(q, qres).astype(dtype))
 74 |         rot_err = MetricManager.rot_err(i)
 75 |         assert isinstance(rot_err, np.float64)
 76 |         rot_err_expected = np.abs(np.degrees(angle))
 77 |         # if we add up errors, we want them to be positive
 78 |         assert 0. <= rot_err
 79 |         rtol = 1.e-5  # numpy default
 80 |         atol = 1.e-8  # numpy default
 81 |         if isinstance(dtype, np.float32):
 82 |             atol = 1.e-7  # 1/50 test might fail at 1.e-8
 83 |         assert np.isclose(rot_err, rot_err_expected, rtol=rtol, atol=atol)
 84 | 
 85 |     @pytest.mark.parametrize('run_number', range(50))
 86 |     def test_r_err_mat(self, run_number: int) -> None:
 87 |         q0 = randomQuat()
 88 |         q1 = randomQuat()
 89 | 
 90 |         i = createInput(q_gt=q0, q_est=q1)
 91 |         rot_err = MetricManager.rot_err(i)
 92 | 
 93 |         R0 = quat2mat(q0)
 94 |         R1 = quat2mat(q1)
 95 |         Rres = R1 @ R0.T
 96 |         theta = (np.trace(Rres) - 1)/2
 97 |         theta = np.clip(theta, -1, 1)
 98 |         angle = np.degrees(np.arccos(theta))
 99 | 
100 |         assert np.isclose(angle, rot_err)
101 | 
102 |     def test_reproj_error_identity(self):
103 |         """Test that reprojection error is zero if poses match"""
104 |         q = randomQuat()
105 |         t = np.random.normal(0, 10, (3,))
106 |         i = createInput(q_gt=q, t_gt=t, q_est=q, t_est=t)
107 | 
108 |         reproj_err = MetricManager.reproj_err(i)
109 |         assert np.isclose(reproj_err, 0)
110 | 
111 |     @pytest.mark.parametrize('run_number', range(10))
112 |     @pytest.mark.parametrize('variant', (VARIANTS_ANGLE_SIN,))
113 |     @pytest.mark.parametrize('dtype', (np.float64,))
114 |     def test_r_err_small(self, run_number: int, variant: str, dtype: type) -> None:
115 |         """Test rotation error for small angle differences.
116 | 
117 |         Note: We create GT as high precision, and only downcast when calling rot_err.
118 |         """
119 |         scales_failed = []
120 |         for scale in np.logspace(start=-1, stop=-9, num=9, base=10, dtype=dtype):
121 |             q = randomQuat().astype(np.float64)
122 |             angle = np.float64(np.random.uniform(low=-np.pi, high=np.pi)) * scale
123 |             assert isinstance(angle, np.float64)
124 |             axis = np.random.uniform(low=-1., high=1., size=3).astype(np.float64)
125 |             assert axis.dtype == np.float64
126 |             qres = axangle2quat(vector=axis, theta=angle, is_normalized=False).astype(np.float64)
127 |             assert qres.dtype == np.float64
128 | 
129 |             i = createInput(q_gt=q.astype(dtype), q_est=qmult(q, qres).astype(dtype))
130 | 
131 |             # We expect the error to always be np.float64 for highest acc.
132 |             rot_err = MetricManager.rot_err(i, variant=variant)
133 |             assert isinstance(rot_err, np.float64)
134 |             rot_err_expected = np.abs(np.degrees(angle))
135 |             assert isinstance(rot_err_expected, type(rot_err))
136 | 
137 |             # if we add up errors, we want them to be positive
138 |             assert 0. <= rot_err
139 | 
140 |             # check accuracy for one magnitude higher tolerance than the angle
141 |             tol = 0.1 * scale
142 |             # need to be more permissive for lower precision
143 |             if dtype == np.float32:
144 |                 tol = 1.e3 * scale
145 | 
146 |             # cast to dtype for checking
147 |             rot_err = rot_err.astype(dtype)
148 |             rot_err_expected = rot_err_expected.astype(dtype)
149 | 
150 |             if variant == VARIANTS_ANGLE_SIN:
151 |                 assert np.isclose(rot_err, rot_err_expected, rtol=tol, atol=tol)
152 |             elif variant == VARIANTS_ANGLE_COS:
153 |                 if not np.isclose(rot_err, rot_err_expected, rtol=tol, atol=tol):
154 |                     print(f"[variant '{variant}'] raises an error for\n"
155 |                           f"\trot_err: {rot_err}"
156 |                           f"\trot_err_expected: {rot_err_expected}"
157 |                           f"\trtol: {tol}"
158 |                           f"\tatol: {tol}")
159 |                     scales_failed.append(scale)
160 |         if len(scales_failed):
161 |             pytest.fail(f"Variant {variant} failed at scales {scales_failed}")
162 | 
163 | 
164 | def test_projection() -> None:
165 |     xyz = np.array(((10, 20, 30), (10, 30, 50), (-20, -15, 5),
166 |                    (-20, -50, 10)), dtype=np.float32)
167 |     K = np.eye(3)
168 | 
169 |     uv = np.array(((1/3, 2/3), (1/5, 3/5), (-4, -3),
170 |                   (-2, -5)), dtype=np.float32)
171 |     assert np.allclose(uv, project(xyz, K))
172 | 
173 |     uv = np.array(((1/3, 2/3), (1/5, 3/5), (0, 0), (0, 0)), dtype=np.float32)
174 |     assert np.allclose(uv, project(xyz, K, img_size=(5, 5)))
175 | 


--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import typing
  3 | import logging
  4 | 
  5 | import numpy as np
  6 | from transforms3d.quaternions import qinverse, rotate_vector, qmult
  7 | 
  8 | VARIANTS_ANGLE_SIN = 'sin'
  9 | VARIANTS_ANGLE_COS = 'cos'
 10 | 
 11 | 
 12 | def convert_world2cam_to_cam2world(q, t):
 13 |     qinv = qinverse(q)
 14 |     tinv = -rotate_vector(t, qinv)
 15 |     return qinv, tinv
 16 | 
 17 | 
 18 | def load_poses(file: typing.IO, load_confidence: bool = False):
 19 |     """Load poses from text file and converts them to cam2world convention (t is the camera center in world coordinates)
 20 | 
 21 |     The text file encodes world2cam poses with the format:
 22 |         imgpath qw qx qy qz tx ty tz [confidence]
 23 |     where qw qx qy qz is the quaternion encoding rotation,
 24 |     and tx ty tz is the translation vector,
 25 |     and confidence is a float encoding confidence, for estimated poses
 26 |     """
 27 | 
 28 |     expected_parts = 9 if load_confidence else 8
 29 | 
 30 |     poses = dict()
 31 |     for line_number, line in enumerate(file.readlines()):
 32 |         parts = tuple(line.strip().split(' '))
 33 | 
 34 |         if len(parts) != expected_parts:
 35 |             logging.warning(
 36 |                 f'Invalid number of fields in file {file.name} line {line_number}.'
 37 |                 f' Expected {expected_parts}, received {len(parts)}. Ignoring line.')
 38 |             continue
 39 | 
 40 |         try:
 41 |             name = parts[0]
 42 |             if '#' in name:
 43 |                 logging.info(f'Ignoring comment line in {file.name} line {line_number}')
 44 |                 continue
 45 |             frame_num = int(name[-9:-4])
 46 |         except ValueError:
 47 |             logging.warning(
 48 |                 f'Invalid frame number in file {file.name} line {line_number}.'
 49 |                 f' Expected formatting "seq1/frame_00000.jpg". Ignoring line.')
 50 |             continue
 51 | 
 52 |         try:
 53 |             parts_float = tuple(map(float, parts[1:]))
 54 |             if any(np.isnan(v) or np.isinf(v) for v in parts_float):
 55 |                 raise ValueError()
 56 |             qw, qx, qy, qz, tx, ty, tz = parts_float[:7]
 57 |             confidence = parts_float[7] if load_confidence else None
 58 |         except ValueError:
 59 |             logging.warning(
 60 |                 f'Error parsing pose in file {file.name} line {line_number}. Ignoring line.')
 61 |             continue
 62 | 
 63 |         q = np.array((qw, qx, qy, qz), dtype=np.float64)
 64 |         t = np.array((tx, ty, tz), dtype=np.float64)
 65 | 
 66 |         if np.isclose(np.linalg.norm(q), 0):
 67 |             logging.warning(
 68 |                 f'Error parsing pose in file {file.name} line {line_number}. '
 69 |                 'Quaternion must have non-zero norm. Ignoring line.')
 70 |             continue
 71 | 
 72 |         q, t = convert_world2cam_to_cam2world(q, t)
 73 |         poses[frame_num] = (q, t, confidence)
 74 |     return poses
 75 | 
 76 | 
 77 | def subsample_poses(poses: dict, subsample: int = 1):
 78 |     return {k: v for i, (k, v) in enumerate(poses.items()) if i % subsample == 0}
 79 | 
 80 | 
 81 | def load_K(file_path: Path):
 82 |     K = dict()
 83 |     with file_path.open('r', encoding='utf-8') as f:
 84 |         for line in f.readlines():
 85 |             if '#' in line:
 86 |                 continue
 87 |             line = line.strip().split(' ')
 88 | 
 89 |             frame_num = int(line[0][-9:-4])
 90 |             fx, fy, cx, cy, W, H = map(float, line[1:])
 91 |             K[frame_num] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
 92 |     return K, W, H
 93 | 
 94 | 
 95 | def quat_angle_error(label, pred, variant=VARIANTS_ANGLE_SIN) -> np.ndarray:
 96 |     assert label.shape == (4,)
 97 |     assert pred.shape == (4,)
 98 |     assert variant in (VARIANTS_ANGLE_SIN, VARIANTS_ANGLE_COS), \
 99 |         f"Need variant to be in ({VARIANTS_ANGLE_SIN}, {VARIANTS_ANGLE_COS})"
100 | 
101 |     if len(label.shape) == 1:
102 |         label = np.expand_dims(label, axis=0)
103 |     if len(label.shape) != 2 or label.shape[0] != 1 or label.shape[1] != 4:
104 |         raise RuntimeError(f"Unexpected shape of label: {label.shape}, expected: (1, 4)")
105 | 
106 |     if len(pred.shape) == 1:
107 |         pred = np.expand_dims(pred, axis=0)
108 |     if len(pred.shape) != 2 or pred.shape[0] != 1 or pred.shape[1] != 4:
109 |         raise RuntimeError(f"Unexpected shape of pred: {pred.shape}, expected: (1, 4)")
110 | 
111 |     label = label.astype(np.float64)
112 |     pred = pred.astype(np.float64)
113 | 
114 |     q1 = pred / np.linalg.norm(pred, axis=1, keepdims=True)
115 |     q2 = label / np.linalg.norm(label, axis=1, keepdims=True)
116 |     if variant == VARIANTS_ANGLE_COS:
117 |         d = np.abs(np.sum(np.multiply(q1, q2), axis=1, keepdims=True))
118 |         d = np.clip(d, a_min=-1, a_max=1)
119 |         angle = 2. * np.degrees(np.arccos(d))
120 |     elif variant == VARIANTS_ANGLE_SIN:
121 |         if q1.shape[0] != 1 or q2.shape[0] != 1:
122 |             raise NotImplementedError(f"Multiple angles is todo")
123 |         # https://www.researchgate.net/post/How_do_I_calculate_the_smallest_angle_between_two_quaternions/5d6ed4a84f3a3e1ed3656616/citation/download
124 |         sine = qmult(q1[0], qinverse(q2[0]))  # note: takes first element in 2D array
125 |         # 114.59 = 2. * 180. / pi
126 |         angle = np.arcsin(np.linalg.norm(sine[1:], keepdims=True)) * 114.59155902616465
127 |         angle = np.expand_dims(angle, axis=0)
128 | 
129 |     return angle.astype(np.float64)
130 | 
131 | 
132 | def precision_recall(inliers, tp, failures):
133 |     """
134 |     Computes Precision/Recall plot for a set of poses given inliers (confidence) and whether the
135 |     estimated pose error (whatever it may be) is within a threshold.
136 |     Each point in the plot is obtained by choosing a threshold for inliers (i.e. inlier_thr).
137 |     Recall measures how many images have inliers >= inlier_thr
138 |     Precision measures how many images that have inliers >= inlier_thr have 
139 |     estimated pose error <= pose_threshold (measured by counting tps)
140 |     Where pose_threshold is (trans_thr[m], rot_thr[deg])
141 | 
142 |     Inputs:
143 |         - inliers [N]
144 |         - terr [N]
145 |         - rerr [N]
146 |         - failures (int)
147 |         - pose_threshold (tuple float)
148 |     Output
149 |         - precision [N]
150 |         - recall [N]
151 |         - average_precision (scalar)
152 |     """
153 | 
154 |     assert len(inliers) == len(tp), 'unequal shapes'
155 | 
156 |     # sort by inliers (descending order)
157 |     inliers = np.array(inliers)
158 |     sort_idx = np.argsort(inliers)[::-1]
159 |     inliers = inliers[sort_idx]
160 |     tp = np.array(tp).reshape(-1)[sort_idx]
161 | 
162 |     # get idxs where inliers change (avoid tied up values)
163 |     distinct_value_indices = np.where(np.diff(inliers))[0]
164 |     threshold_idxs = np.r_[distinct_value_indices, inliers.size - 1]
165 | 
166 |     # compute prec/recall
167 |     N = inliers.shape[0]
168 |     rec = np.arange(N, dtype=np.float32) + 1
169 |     cum_tp = np.cumsum(tp)
170 |     prec = cum_tp[threshold_idxs] / rec[threshold_idxs]
171 |     rec = rec[threshold_idxs] / (float(N) + float(failures))
172 | 
173 |     # invert order and ensures (prec=1, rec=0) point
174 |     last_ind = rec.searchsorted(rec[-1])
175 |     sl = slice(last_ind, None, -1)
176 |     prec = np.r_[prec[sl], 1]
177 |     rec = np.r_[rec[sl], 0]
178 | 
179 |     # compute average precision (AUC) as the weighted average of precisions
180 |     average_precision = np.abs(np.sum(np.diff(rec) * np.array(prec)[:-1]))
181 | 
182 |     return prec, rec, average_precision
183 | 


--------------------------------------------------------------------------------
/config/default.py:
--------------------------------------------------------------------------------
  1 | from yacs.config import CfgNode as CN
  2 | 
  3 | _CN = CN()
  4 | 
  5 | ##############  Model    ##############
  6 | _CN.MODEL = None  # options: ['Regression', 'FeatureMatching']
  7 | _CN.DEBUG = False
  8 | 
  9 | # Regression model options
 10 | _CN.ENCODER = CN()
 11 | _CN.ENCODER.TYPE = None   # options: ['ResNet', 'ResUNet']
 12 | _CN.ENCODER.NUM_BLOCKS = None  # # blocks per layer separated by dashes. e.g. 3-3-3
 13 | _CN.ENCODER.BLOCK_TYPE = None  # 0:PreactBlock, 1:PreactBlockBottleneck
 14 | _CN.ENCODER.NOT_CONCAT = None  # ResUNet option
 15 | _CN.ENCODER.NUM_OUT_LAYERS = None  # ResUNet option
 16 | 
 17 | _CN.AGGREGATOR = CN()
 18 | _CN.AGGREGATOR.TYPE = None  # options: ['CorrelationVolumeWarping', 'CorrelationVolumeWarpingQKV']
 19 | _CN.AGGREGATOR.POSITION_ENCODER = None   # True/False. If True adds two channel with average u,v coordinates of warp
 20 | _CN.AGGREGATOR.POSITION_ENCODER_IM1 = None   # True/False. If True adds two channel with uniform u,v coordinates of im1
 21 | _CN.AGGREGATOR.MAX_SCORE_CHANNEL = None  # True/False. If True adds a channel with max score to global features
 22 | _CN.AGGREGATOR.NORMALISE_DOT = False     # True/False. If True normalise features before dot product
 23 | _CN.AGGREGATOR.RESIDUAL_ATT = False      # True/False. If True Q,K,V are residuals from features
 24 | _CN.AGGREGATOR.CV_OUTLAYERS = 0          # If >0, compresses CorrelationVolume into OutLayers and channel-wise append to Global Volume
 25 | _CN.AGGREGATOR.CV_HALF_CHANNELS = False  # If True, computes correlation volume using only half the images feature channels, giving more freedom for the rest
 26 | _CN.AGGREGATOR.UPSAMPLE_POS_ENC = 0      # If >0, upsamples positional encoder with number of channels
 27 | _CN.AGGREGATOR.DUSTBIN = False           # If True, creates dustbins to assign 'unmatched' features. Also learns a 'dustbin feature' to be used when warping feature maps
 28 | 
 29 | _CN.HEAD = CN()
 30 | _CN.HEAD.TYPE = None     # options: ['ProcrustesResBlockMLP', 'DirectResBlockMLP']
 31 | _CN.BACKPROJECT_ANCHORS = None    # whether to backproject anchors to 3D or assume that HEAD already gives 3D points
 32 | _CN.HEAD.ADD_BASIS = False        # if true, add orthonormal basis to MLP anchors, only valid if NUM_PTS=3 or 6
 33 | _CN.HEAD.NUM_PTS = 6              # number of points to estimate. 3, 6 or more. (3: predict correspondences to fixed orthonormal-basis, 6: predict full 3D-3D correspondences, even, more than 6: predict overcomplete set)
 34 | _CN.HEAD.AVG_POOL = False         # if true, reduce last feature volume to vector using Global Avg. Pool. Otherwise, use ravel()
 35 | _CN.HEAD.BATCH_NORM = True        # enable/disable batch-norm for head res-blocks
 36 | _CN.HEAD.SEPARATE_SCALE = True    # For QuatDeepResblock: if True, regress scale separately (unitary translation vector (3D) + 1D scale); else, regress scaled translation vector (3D)
 37 |                                   # For AngularBinsResblock: if True, regress scale separately (bins for trans. angle + 1D scale); else, regress scaled translation vector
 38 | 
 39 | # Feature Matching Options
 40 | _CN.FEATURE_MATCHING = None  # options: ['SIFT', 'Precomputed']
 41 | _CN.POSE_SOLVER = None  # options: ['EssentialMatrix', 'EssentialMatrixMetric', 'Procrustes', 'PNP']
 42 | 
 43 | # SIFT options
 44 | _CN.SIFT = CN()
 45 | _CN.SIFT.NUM_FEATURES = None
 46 | _CN.SIFT.RATIO_THRESHOLD = None
 47 | 
 48 | # Pre-computed feature matching options
 49 | _CN.MATCHES_FILE_PATH = None    # path to NPY storing the correspondences pre-computed using the learned algorithm
 50 | 
 51 | # EMAT RANSAC options
 52 | _CN.EMAT_RANSAC = CN()
 53 | _CN.EMAT_RANSAC.PIX_THRESHOLD = None
 54 | _CN.EMAT_RANSAC.SCALE_THRESHOLD = None
 55 | _CN.EMAT_RANSAC.CONFIDENCE = None
 56 | 
 57 | # Procrustes RANSAC options
 58 | _CN.PROCRUSTES = CN()
 59 | _CN.PROCRUSTES.MAX_CORR_DIST = None
 60 | _CN.PROCRUSTES.REFINE = False      #refine pose with ICP
 61 | 
 62 | # PNP RANSAC options
 63 | _CN.PNP = CN()
 64 | _CN.PNP.RANSAC_ITER = None
 65 | _CN.PNP.REPROJECTION_INLIER_THRESHOLD = None  # pixels
 66 | _CN.PNP.CONFIDENCE = None
 67 | 
 68 | ##############  Dataset  ##############
 69 | _CN.DATASET = CN()
 70 | # 1. data config
 71 | _CN.DATASET.DATA_SOURCE = None # options: ['ScanNet', '7Scenes', 'MapFree']
 72 | _CN.DATASET.SCENES = None      # scenes to use (for 7Scenes/MapFree); should be a list []; If none, use all scenes.
 73 | _CN.DATASET.DATA_ROOT = None   # path to dataset folder
 74 | _CN.DATASET.NPZ_ROOT = None    # path to npz files containing pairs of frame indices per sample
 75 | _CN.DATASET.MIN_OVERLAP_SCORE = None  # discard data with overlap_score < min_overlap_score
 76 | _CN.DATASET.MAX_OVERLAP_SCORE = None  # discard data with overlap_score > max_overlap_score
 77 | _CN.DATASET.AUGMENTATION_TYPE = None  # options: [None, 'colorjitter']
 78 | _CN.DATASET.BLACK_WHITE = False       # if true, transform images to black & white
 79 | _CN.DATASET.PAIRS_TXT = CN()          # Path to text file defining the train/val/test pairs (7Scenes)
 80 | _CN.DATASET.PAIRS_TXT.TRAIN = None
 81 | _CN.DATASET.PAIRS_TXT.VAL = None
 82 | _CN.DATASET.PAIRS_TXT.TEST = None
 83 | _CN.DATASET.PAIRS_TXT.ONE_NN = False  # If true, keeps only reference image w/ highest similarity to each query
 84 | _CN.DATASET.HEIGHT = None
 85 | _CN.DATASET.WIDTH = None
 86 | _CN.DATASET.ESTIMATED_DEPTH = None  # Use 'estimated' predictions of depth map, if None uses GT depth map
 87 |                                     # For Scannet: path to NPZ storing the depth maps (for a given method); if None use GT depth
 88 |                                     # For 7Scenes: suffix to add to depthpath when loading depth maps;  if None use GT depth
 89 |                                     # For Mapfree: suffix to add to depthpath when loading depth maps;  if None, no depth
 90 | _CN.DATASET.QUERY_FRAME_COUNT = 1   # number of query frames to infer from. 1 or 9
 91 | 
 92 | ############# TRAINING #############
 93 | _CN.TRAINING = CN()
 94 | # Data Loader settings
 95 | _CN.TRAINING.BATCH_SIZE = None
 96 | _CN.TRAINING.NUM_WORKERS = None
 97 | _CN.TRAINING.SAMPLER = None  # options: ['random', 'scene_balance']
 98 | _CN.TRAINING.N_SAMPLES_SCENE = None  # if 'scene_balance' sampler, the number of samples to get per scene
 99 | _CN.TRAINING.SAMPLE_WITH_REPLACEMENT = None  # if 'scene_balance' sampler, whether to sample with replacement
100 | # Training settings
101 | _CN.TRAINING.LR = None
102 | _CN.TRAINING.LR_STEP_INTERVAL = None
103 | _CN.TRAINING.LR_STEP_GAMMA = None      # multiplicative factor of LR every LR_STEP_ITERATIONS
104 | _CN.TRAINING.VAL_INTERVAL = None
105 | _CN.TRAINING.VAL_BATCHES = None
106 | _CN.TRAINING.LOG_INTERVAL = None
107 | _CN.TRAINING.EPOCHS = None
108 | _CN.TRAINING.GRAD_CLIP = 0.   #  Indicates the L2 norm at which to clip the gradient. Disabled if 0
109 | # Loss settings
110 | _CN.TRAINING.ROT_LOSS = 'rot_frobenius_loss'  # options: ['rot_frobenius_loss', 'rot_l1_loss', 'rot_angle_loss']
111 | _CN.TRAINING.TRANS_LOSS = 'trans_l2_loss'     # options: ['trans_l2_loss', 'trans_ang_loss']
112 | _CN.TRAINING.LAMBDA = 1.0  # scaling term for the translation loss term. If 0.0, learns optimal weighting.
113 | 
114 | 
115 | 
116 | cfg = _CN


--------------------------------------------------------------------------------
/config/mapfree.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   DATA_SOURCE: 'MapFree'
 3 |   DATA_ROOT: 'data/mapfree/'
 4 |   SCENES: None  # should be a list [] or None. If none, use all scenes.
 5 |   ESTIMATED_DEPTH: None  # To load estimated depth map, provide the suffix to the depth files, e.g. 'dptnyu', 'dptkitti'
 6 |   AUGMENTATION_TYPE: None
 7 |   HEIGHT: 720
 8 |   WIDTH: 540
 9 |   MIN_OVERLAP_SCORE: 0.2  # [train only] discard data with overlap_score < min_overlap_score
10 |   MAX_OVERLAP_SCORE: 0.7  # [train only] discard data with overlap_score < min_overlap_score
11 |   QUERY_FRAME_COUNT: 1    # 1 (single frame task) or 9 (multi-frame task) only!
12 | 


--------------------------------------------------------------------------------
/config/mapfree_multi.yaml:
--------------------------------------------------------------------------------
1 | DATASET:
2 |   QUERY_FRAME_COUNT: 9    # 1 (single frame task) or 9 (multi-frame task) only!


--------------------------------------------------------------------------------
/config/matching/mapfree/loftr_emat_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/loftr_emat_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/loftr_pnp_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/mapfree/loftr_pnp_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/mapfree/sg_emat_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sg_emat_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sg_pnp_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sg_pnp_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sg_procrustes_dptkitti.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: 'dptkitti'
6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sift_emat_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 3.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sift_emat_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 3.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sift_pnp_dptkitti.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptkitti'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/mapfree/sift_pnp_dptnyu.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'dptnyu'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_emat_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_emat_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: None
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_pnp_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_pnp_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: None
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_dpt.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_dpt_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 7 | PROCRUSTES:
 8 |   MAX_CORR_DIST: 0.05 # meters
 9 |   REFINE: True
10 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_gt.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: None
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_gt_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 7 | PROCRUSTES:
 8 |   MAX_CORR_DIST: 0.05 # meters
 9 |   REFINE: True
10 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_planercnn.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/loftr_procrustes_planercnn_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_LoFTR_scannet_test.npz'
 7 | PROCRUSTES:
 8 |   MAX_CORR_DIST: 0.05 # meters
 9 |   REFINE: True
10 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_emat_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_emat_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_pnp_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_pnp_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: None
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
 5 | DATASET:
 6 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_procrustes_dpt.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_procrustes_gt.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: None
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sg_procrustes_planercnn.yaml:
--------------------------------------------------------------------------------
1 | MODEL: 'FeatureMatching'
2 | FEATURE_MATCHING: 'Precomputed'
3 | POSE_SOLVER: 'Procrustes'
4 | DATASET:
5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
6 | MATCHES_FILE_PATH: 'data/scannet_misc/correspondences_SG_scannet_test.npz'
7 | PROCRUSTES:
8 |   MAX_CORR_DIST: 0.05 # meters
9 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_emat_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | EMAT_RANSAC:
10 |   PIX_THRESHOLD: 3.0
11 |   SCALE_THRESHOLD: 0.1
12 |   CONFIDENCE: 0.9999
13 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_emat_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | EMAT_RANSAC:
10 |   PIX_THRESHOLD: 3.0
11 |   SCALE_THRESHOLD: 0.1
12 |   CONFIDENCE: 0.9999
13 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | EMAT_RANSAC:
10 |   PIX_THRESHOLD: 3.0
11 |   SCALE_THRESHOLD: 0.1
12 |   CONFIDENCE: 0.9999
13 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_pnp_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | PNP:
 7 |   RANSAC_ITER: 1000
 8 |   REPROJECTION_INLIER_THRESHOLD: 3
 9 |   CONFIDENCE: 0.9999
10 | SIFT:
11 |   NUM_FEATURES: 2048
12 |   RATIO_THRESHOLD: 0.8


--------------------------------------------------------------------------------
/config/matching/scannet/sift_pnp_gt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | PNP:
 7 |   RANSAC_ITER: 1000
 8 |   REPROJECTION_INLIER_THRESHOLD: 3
 9 |   CONFIDENCE: 0.9999
10 | SIFT:
11 |   NUM_FEATURES: 2048
12 |   RATIO_THRESHOLD: 0.8


--------------------------------------------------------------------------------
/config/matching/scannet/sift_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | PNP:
 7 |   RANSAC_ITER: 1000
 8 |   REPROJECTION_INLIER_THRESHOLD: 3
 9 |   CONFIDENCE: 0.9999
10 | SIFT:
11 |   NUM_FEATURES: 2048
12 |   RATIO_THRESHOLD: 0.8


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_dpt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_dpt_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_dpt.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 |   REFINE: True
12 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_gtdepth.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_gtdepth_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: None
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 |   REFINE: True
12 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_planercnn_icp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_planercnn.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 |   REFINE: True
12 | 


--------------------------------------------------------------------------------
/config/matching/scannet/sift_procrustes_smdp.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'SIFT'
 3 | POSE_SOLVER: 'Procrustes'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'data/scannet_misc/scannet_test_depthmaps_smdp_bilinear.npz'
 6 | SIFT:
 7 |   NUM_FEATURES: 2048
 8 |   RATIO_THRESHOLD: 0.8
 9 | PROCRUSTES:
10 |   MAX_CORR_DIST: 0.05 # meters
11 | 


--------------------------------------------------------------------------------
/config/matching/sevenscenes/loftr_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR_{pairs_txt}.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/sevenscenes/loftr_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_LoFTR_{pairs_txt}.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999


--------------------------------------------------------------------------------
/config/matching/sevenscenes/sg_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG_{pairs_txt}.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 2.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/sevenscenes/sg_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SG_{pairs_txt}.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/sevenscenes/sift_emat_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'EssentialMatrixMetric'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT_{pairs_txt}.npz'
 7 | EMAT_RANSAC:
 8 |   PIX_THRESHOLD: 3.0
 9 |   SCALE_THRESHOLD: 0.1
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/matching/sevenscenes/sift_pnp_planercnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'FeatureMatching'
 2 | FEATURE_MATCHING: 'Precomputed'
 3 | POSE_SOLVER: 'PNP'
 4 | DATASET:
 5 |   ESTIMATED_DEPTH: 'prcnn'
 6 | MATCHES_FILE_PATH: '{scene_root}/correspondences_SIFT_{pairs_txt}.npz'
 7 | PNP:
 8 |   RANSAC_ITER: 1000
 9 |   REPROJECTION_INLIER_THRESHOLD: 3
10 |   CONFIDENCE: 0.9999
11 | 


--------------------------------------------------------------------------------
/config/regression/mapfree/3d3d.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/3d3d_lowoverlap.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.2  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/3d3d_no_posencoder.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: False
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/3d3d_no_warping.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'Concat'
10 | HEAD:
11 |   TYPE: 'ProcrustesDeepResBlock'
12 |   ADD_BASIS: True
13 |   AVG_POOL: True
14 | TRAINING:
15 |   BATCH_SIZE: 10
16 |   NUM_WORKERS: 10
17 |   SAMPLER: 'scene_balance'
18 |   N_SAMPLES_SCENE: 700
19 |   SAMPLE_WITH_REPLACEMENT: True
20 |   LR: 1e-4
21 |   LOG_INTERVAL: 50
22 |   VAL_INTERVAL: 0.25
23 |   VAL_BATCHES: 500
24 |   EPOCHS: 50
25 |   ROT_LOSS: 'rot_angle_loss'
26 |   TRANS_LOSS: 'trans_l1_loss'
27 |   LAMBDA: 1.
28 | BACKPROJECT_ANCHORS: False
29 | DATASET:
30 |   HEIGHT: 360
31 |   WIDTH: 270
32 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
33 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/3d3d_weighted_loss.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 0.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/multiframe/3d3d_multi.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'RegressionMultiFrame'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rot6d_trans.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'DirectDeepResBlockMLP'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotbin_trans.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'AngularBinsDeepResBlockMLP'
14 |   SEPARATE_SCALE: False
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_bin_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotbin_transdirectionbin_scale.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'AngularBinsDeepResBlockMLP'
14 |   SEPARATE_SCALE: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_bin_loss'
28 |   TRANS_LOSS: 'trans_sphbin_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotbin_transdirectionbin_scale_lowoverlap.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'AngularBinsDeepResBlockMLP'
14 |   SEPARATE_SCALE: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 10
18 |   NUM_WORKERS: 10
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 700
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 500
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_bin_loss'
28 |   TRANS_LOSS: 'trans_sphbin_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 360
33 |   WIDTH: 270
34 |   MIN_OVERLAP_SCORE: 0.2  # [train only] discard data with overlap_score < min_overlap_score
35 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotbin_transdirectionbin_scale_qkv.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarpingQKV'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 |   NORMALISE_DOT: False
13 |   RESIDUAL_ATT: True
14 | HEAD:
15 |   TYPE: 'AngularBinsDeepResBlockMLP'
16 |   SEPARATE_SCALE: True
17 |   AVG_POOL: True
18 | TRAINING:
19 |   BATCH_SIZE: 10
20 |   NUM_WORKERS: 10
21 |   SAMPLER: 'scene_balance'
22 |   N_SAMPLES_SCENE: 700
23 |   SAMPLE_WITH_REPLACEMENT: True
24 |   LR: 1e-4
25 |   LOG_INTERVAL: 50
26 |   VAL_INTERVAL: 0.25
27 |   VAL_BATCHES: 500
28 |   EPOCHS: 50
29 |   ROT_LOSS: 'rot_bin_loss'
30 |   TRANS_LOSS: 'trans_sphbin_loss'
31 |   LAMBDA: 1.
32 | BACKPROJECT_ANCHORS: False
33 | DATASET:
34 |   HEIGHT: 360
35 |   WIDTH: 270
36 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
37 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotquat_trans.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'QuatDeepResBlock'
14 |   SEPARATE_SCALE: False # trans. vector is regressed WITH scale, not using independent 1D scale
15 | TRAINING:
16 |   BATCH_SIZE: 10
17 |   NUM_WORKERS: 10
18 |   SAMPLER: 'scene_balance'
19 |   N_SAMPLES_SCENE: 700
20 |   SAMPLE_WITH_REPLACEMENT: True
21 |   LR: 1e-4
22 |   LOG_INTERVAL: 50
23 |   VAL_INTERVAL: 0.25
24 |   VAL_BATCHES: 500
25 |   EPOCHS: 50
26 |   ROT_LOSS: 'quat_l1_loss'
27 |   TRANS_LOSS: 'trans_l1_loss'
28 |   LAMBDA: 0.
29 | BACKPROJECT_ANCHORS: False
30 | DATASET:
31 |   HEIGHT: 360
32 |   WIDTH: 270
33 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
34 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/mapfree/rotquat_transdirection_scale.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'QuatDeepResBlock'
14 |   SEPARATE_SCALE: True   # trans. vector is regressed as 3D unitary direction + 1D scale
15 | TRAINING:
16 |   BATCH_SIZE: 10
17 |   NUM_WORKERS: 10
18 |   SAMPLER: 'scene_balance'
19 |   N_SAMPLES_SCENE: 700
20 |   SAMPLE_WITH_REPLACEMENT: True
21 |   LR: 1e-4
22 |   LOG_INTERVAL: 50
23 |   VAL_INTERVAL: 0.25
24 |   VAL_BATCHES: 500
25 |   EPOCHS: 50
26 |   ROT_LOSS: 'quat_l1_loss'
27 |   TRANS_LOSS: 'trans_scale_direction_loss'
28 |   LAMBDA: 1.
29 | BACKPROJECT_ANCHORS: False
30 | DATASET:
31 |   HEIGHT: 360
32 |   WIDTH: 270
33 |   MIN_OVERLAP_SCORE: 0.4  # [train only] discard data with overlap_score < min_overlap_score
34 |   MAX_OVERLAP_SCORE: 0.8  # [train only] discard data with overlap_score < min_overlap_score


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 12
18 |   NUM_WORKERS: 12
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 200
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 200
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 240
33 |   WIDTH: 320
34 | 
35 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_dual_posenc.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   POSITION_ENCODER_IM1: True
12 |   MAX_SCORE_CHANNEL: True
13 | HEAD:
14 |   TYPE: 'ProcrustesDeepResBlock'
15 |   ADD_BASIS: True
16 |   AVG_POOL: True
17 | TRAINING:
18 |   BATCH_SIZE: 12
19 |   NUM_WORKERS: 12
20 |   SAMPLER: 'scene_balance'
21 |   N_SAMPLES_SCENE: 200
22 |   SAMPLE_WITH_REPLACEMENT: True
23 |   LR: 1e-4
24 |   LOG_INTERVAL: 50
25 |   VAL_INTERVAL: 0.25
26 |   VAL_BATCHES: 200
27 |   EPOCHS: 50
28 |   ROT_LOSS: 'rot_angle_loss'
29 |   TRANS_LOSS: 'trans_l1_loss'
30 |   LAMBDA: 1.
31 | BACKPROJECT_ANCHORS: False
32 | DATASET:
33 |   HEIGHT: 240
34 |   WIDTH: 320
35 | 
36 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_dual_posenc_upsampling.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   POSITION_ENCODER_IM1: True
12 |   MAX_SCORE_CHANNEL: True
13 |   UPSAMPLE_POS_ENC: 8
14 | HEAD:
15 |   TYPE: 'ProcrustesDeepResBlock'
16 |   ADD_BASIS: True
17 |   AVG_POOL: True
18 | TRAINING:
19 |   BATCH_SIZE: 12
20 |   NUM_WORKERS: 12
21 |   SAMPLER: 'scene_balance'
22 |   N_SAMPLES_SCENE: 200
23 |   SAMPLE_WITH_REPLACEMENT: True
24 |   LR: 1e-4
25 |   LOG_INTERVAL: 50
26 |   VAL_INTERVAL: 0.25
27 |   VAL_BATCHES: 200
28 |   EPOCHS: 50
29 |   ROT_LOSS: 'rot_angle_loss'
30 |   TRANS_LOSS: 'trans_l1_loss'
31 |   LAMBDA: 1.
32 | BACKPROJECT_ANCHORS: False
33 | DATASET:
34 |   HEIGHT: 240
35 |   WIDTH: 320
36 | 
37 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_half_cv.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   POSITION_ENCODER_IM1: True
12 |   MAX_SCORE_CHANNEL: True
13 |   CV_HALF_CHANNELS: True
14 | HEAD:
15 |   TYPE: 'ProcrustesDeepResBlock'
16 |   ADD_BASIS: True
17 |   AVG_POOL: True
18 | TRAINING:
19 |   BATCH_SIZE: 12
20 |   NUM_WORKERS: 12
21 |   SAMPLER: 'scene_balance'
22 |   N_SAMPLES_SCENE: 200
23 |   SAMPLE_WITH_REPLACEMENT: True
24 |   LR: 1e-4
25 |   LOG_INTERVAL: 50
26 |   VAL_INTERVAL: 0.25
27 |   VAL_BATCHES: 200
28 |   EPOCHS: 50
29 |   ROT_LOSS: 'rot_angle_loss'
30 |   TRANS_LOSS: 'trans_l1_loss'
31 |   LAMBDA: 1.
32 | BACKPROJECT_ANCHORS: False
33 | DATASET:
34 |   HEIGHT: 240
35 |   WIDTH: 320
36 | 
37 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_lowoverlap.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 12
18 |   NUM_WORKERS: 12
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 200
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 200
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 240
33 |   WIDTH: 320
34 |   MIN_OVERLAP_SCORE: 0.0
35 | 
36 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_no_avgpool.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'ProcrustesDeepResBlock'
14 |   ADD_BASIS: True
15 |   AVG_POOL: False
16 | TRAINING:
17 |   BATCH_SIZE: 12
18 |   NUM_WORKERS: 12
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 200
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 200
26 |   EPOCHS: 20
27 |   ROT_LOSS: 'rot_angle_loss'
28 |   TRANS_LOSS: 'trans_l1_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 240
33 |   WIDTH: 320
34 | 
35 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_qkv.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarpingQKV'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 |   RESIDUAL_ATT: True
13 | HEAD:
14 |   TYPE: 'ProcrustesDeepResBlock'
15 |   ADD_BASIS: True
16 |   AVG_POOL: True
17 | TRAINING:
18 |   BATCH_SIZE: 12
19 |   NUM_WORKERS: 12
20 |   SAMPLER: 'scene_balance'
21 |   N_SAMPLES_SCENE: 200
22 |   SAMPLE_WITH_REPLACEMENT: True
23 |   LR: 1e-4
24 |   LOG_INTERVAL: 50
25 |   VAL_INTERVAL: 0.25
26 |   VAL_BATCHES: 200
27 |   EPOCHS: 50
28 |   ROT_LOSS: 'rot_angle_loss'
29 |   TRANS_LOSS: 'trans_l1_loss'
30 |   LAMBDA: 1.
31 | BACKPROJECT_ANCHORS: False
32 | DATASET:
33 |   HEIGHT: 240
34 |   WIDTH: 320
35 | 
36 | 


--------------------------------------------------------------------------------
/config/regression/scannet/3d3d_with_dustbin.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 |   DUSTBIN: True
13 | HEAD:
14 |   TYPE: 'ProcrustesDeepResBlock'
15 |   ADD_BASIS: True
16 |   AVG_POOL: True
17 | TRAINING:
18 |   BATCH_SIZE: 12
19 |   NUM_WORKERS: 12
20 |   SAMPLER: 'scene_balance'
21 |   N_SAMPLES_SCENE: 200
22 |   SAMPLE_WITH_REPLACEMENT: True
23 |   LR: 1e-4
24 |   LOG_INTERVAL: 50
25 |   VAL_INTERVAL: 0.25
26 |   VAL_BATCHES: 200
27 |   EPOCHS: 50
28 |   ROT_LOSS: 'rot_angle_loss'
29 |   TRANS_LOSS: 'trans_l1_loss'
30 |   LAMBDA: 1.
31 | BACKPROJECT_ANCHORS: False
32 | DATASET:
33 |   HEIGHT: 240
34 |   WIDTH: 320
35 | 
36 | 


--------------------------------------------------------------------------------
/config/regression/scannet/rotbin_transdirectionbin_scale.yaml:
--------------------------------------------------------------------------------
 1 | MODEL: 'Regression'
 2 | ENCODER:
 3 |   TYPE: 'ResUNet'
 4 |   BLOCK_TYPE: 1
 5 |   NUM_BLOCKS: '3-3-3'
 6 |   NOT_CONCAT: False
 7 |   NUM_OUT_LAYERS: 32
 8 | AGGREGATOR:
 9 |   TYPE: 'CorrelationVolumeWarping'
10 |   POSITION_ENCODER: True
11 |   MAX_SCORE_CHANNEL: True
12 | HEAD:
13 |   TYPE: 'AngularBinsDeepResBlockMLP'
14 |   SEPARATE_SCALE: True
15 |   AVG_POOL: True
16 | TRAINING:
17 |   BATCH_SIZE: 12
18 |   NUM_WORKERS: 12
19 |   SAMPLER: 'scene_balance'
20 |   N_SAMPLES_SCENE: 200
21 |   SAMPLE_WITH_REPLACEMENT: True
22 |   LR: 1e-4
23 |   LOG_INTERVAL: 50
24 |   VAL_INTERVAL: 0.25
25 |   VAL_BATCHES: 200
26 |   EPOCHS: 50
27 |   ROT_LOSS: 'rot_bin_loss'
28 |   TRANS_LOSS: 'trans_sphbin_loss'
29 |   LAMBDA: 1.
30 | BACKPROJECT_ANCHORS: False
31 | DATASET:
32 |   HEIGHT: 240
33 |   WIDTH: 320


--------------------------------------------------------------------------------
/config/scannet.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   DATA_SOURCE: 'ScanNet'
 3 |   DATA_ROOT: 'data/scannet/'
 4 |   NPZ_ROOT: 'data/scannet_indices/scene_data'
 5 |  # general options
 6 |   MIN_OVERLAP_SCORE: 0.4  # discard data with overlap_score < min_overlap_score
 7 |   AUGMENTATION_TYPE: None  # options: [None, 'dark', 'mobile']
 8 |   ESTIMATED_DEPTH: None    # Loads GT depth maps. To load estimated depthmaps, provide the path to the NPZ file containing them
 9 |   HEIGHT: 480
10 |   WIDTH: 640


--------------------------------------------------------------------------------
/config/sevenscenes.yaml:
--------------------------------------------------------------------------------
 1 | DATASET:
 2 |   DATA_SOURCE: '7Scenes'
 3 |   DATA_ROOT: 'data/sevenscenes'
 4 |   SCENES: None  # scenes to use (for 7Scenes); should be a list [] or None. If none, use all scenes.
 5 |   PAIRS_TXT:
 6 |     TRAIN: 'train_pairs.4nn.medium.txt'
 7 |     VAL: 'val_pairs.3nn.medium.txt'
 8 |     TEST: 'test_pairs.5nn.5cm10m.vlad.minmax.txt'
 9 |  # general options
10 |   ESTIMATED_DEPTH: None  # None loads GT depth maps. To load estimated depth map, provide the suffix to the depth files, e.g. 'prcnn' loads PlaneRCNN depth estimates
11 |   AUGMENTATION_TYPE: None
12 |   HEIGHT: 480
13 |   WIDTH: 640


--------------------------------------------------------------------------------
/config/utils.py:
--------------------------------------------------------------------------------
 1 | def config_merge_from_file(
 2 |     cfg: "yacs.config.CfgNode",
 3 |     path_to_config: "Union[str, Path, list[str], list[Path], tuple[str, ...], tuple[Path, ...]]",
 4 | ) -> "yacs.config.CfgNode":
 5 |     if isinstance(path_to_config, (list, tuple)):
 6 |         for path_to_config_ in path_to_config:
 7 |             cfg.merge_from_file(path_to_config_)
 8 |     else:
 9 |         cfg.merge_from_file(path_to_config)
10 | 
11 |     return cfg
12 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: mapfree
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.10
 7 |   - cudatoolkit=11.8
 8 |   - pip=23.2.1
 9 |   - pip:
10 |     - einops==0.6.1
11 |     - lazy-loader==0.3
12 |     - lightning-utilities==0.9.0
13 |     - matplotlib==3.7.2
14 |     - numpy==1.24.4
15 |     - omegaconf==2.3.0
16 |     - open3d==0.17.0
17 |     - opencv-python==4.8.0.74
18 |     - protobuf==4.23.4
19 |     - pytorch-lightning==2.0.6
20 |     - tensorboard==2.13.0
21 |     - tensorboard-data-server==0.7.1
22 |     - timm==0.6.7
23 |     - torch==2.0.1
24 |     - torchmetrics==1.0.2
25 |     - torchvision==0.15.2
26 |     - tqdm==4.65.1
27 |     - transforms3d==0.4.1
28 |     - yacs==0.1.8
29 |     - kornia==0.7.2


--------------------------------------------------------------------------------
/environment_eccv22.yml:
--------------------------------------------------------------------------------
 1 | name: mapfree
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - python=3.7
 8 |   - cudatoolkit=11.1
 9 |   - pytorch=1.8.0
10 |   - torchvision=0.9.0
11 |   - pip
12 |   - pip:
13 |       - opencv-python>=4.5.3
14 |       - open3d==0.14.1
15 |       - transforms3d
16 |       - yacs>=0.1.8
17 |       - h5py
18 |       - pytorch-lightning==1.6.5
19 |       - kornia==0.5.3


--------------------------------------------------------------------------------
/etc/feature_matching_baselines/compute.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | from pathlib import Path
  4 | import numpy as np
  5 | from tqdm import tqdm
  6 | 
  7 | from utils import parse_7scenes_matching_pairs, parse_mapfree_query_frames, stack_pts, load_scannet_imgpaths
  8 | from matchers import LoFTR_matcher, SuperGlue_matcher, SIFT_matcher
  9 | 
 10 | MATCHERS = {'LoFTR': LoFTR_matcher, 'SG': SuperGlue_matcher, 'SIFT': SIFT_matcher}
 11 | 
 12 | 
 13 | def get_parser():
 14 |     parser = argparse.ArgumentParser()
 15 |     parser.add_argument('--dataset', '-ds', type=str, default='7Scenes',
 16 |                         choices=['Scannet', '7Scenes', 'Mapfree'])
 17 |     parser.add_argument('--matcher', '-m', type=str, default='SIFT',
 18 |                         choices=MATCHERS.keys())
 19 |     parser.add_argument('--scenes', '-sc', type=str, nargs='*', default=None)
 20 |     parser.add_argument('--pair_txt', type=str,
 21 |                         default='test_pairs.5nn.5cm10m.vlad.minmax.txt')  # 7Scenes
 22 |     parser.add_argument('--pair_npz', type=str,
 23 |                         default='../../data/scannet_indices/scene_data/test/test.npz')  # Scannet
 24 |     parser.add_argument('--outdoor', action='store_true',
 25 |                         help='use outdoor SG/LoFTR model. If not specified, use indoor models')
 26 |     args = parser.parse_args()
 27 | 
 28 |     dataset = args.dataset
 29 |     if dataset == '7Scenes':
 30 |         args.data_root = '../../data/sevenscenes'
 31 |         scenes = ['chess', 'fire', 'heads', 'office', 'pumpkin', 'redkitchen', 'stairs']
 32 |         args.scenes = scenes if not args.scenes else args.scenes
 33 |         resize = 640, 480
 34 |     elif dataset == 'Scannet':
 35 |         args.data_root = '../../data/scannet/scans_test'
 36 |         resize = 640, 480
 37 |     elif dataset == 'Mapfree':
 38 |         args.data_root = Path('../../data/mapfree/')
 39 |         test_scenes = [folder for folder in (args.data_root / 'test').iterdir() if folder.is_dir()]
 40 |         val_scenes = [folder for folder in (args.data_root / 'val').iterdir() if folder.is_dir()]
 41 |         args.scenes = test_scenes + val_scenes
 42 |         resize = 540, 720
 43 | 
 44 |     return args, MATCHERS[args.matcher](resize, args.outdoor)
 45 | 
 46 | 
 47 | if __name__ == '__main__':
 48 |     args, matcher = get_parser()
 49 | 
 50 |     if args.dataset == '7Scenes':
 51 |         for scene in args.scenes:
 52 |             scene_dir = Path(args.data_root) / scene
 53 |             im_pairs = parse_7scenes_matching_pairs(
 54 |                 str(scene_dir / args.pair_txt))  # {(im1, im2) : (q, t, ess_mat)}
 55 |             pair_names = list(im_pairs.keys())
 56 |             im_pairs_path = [(str(scene_dir / train_im),
 57 |                               str(scene_dir / test_im)) for (train_im, test_im) in pair_names]
 58 | 
 59 |             pts_stack = list()
 60 |             print(f'Started {scene}')
 61 |             for pair in tqdm(im_pairs_path):
 62 |                 pts = matcher.match(pair)
 63 |                 pts_stack.append(pts)
 64 |             pts_stack = stack_pts(pts_stack)
 65 |             results = {'correspondences': pts_stack}
 66 |             np.savez_compressed(os.path.join(
 67 |                 scene_dir,
 68 |                 f'correspondences_{args.matcher}_{args.pair_txt}.npz'),
 69 |                 **results)
 70 |             print(f'Finished {scene}')
 71 | 
 72 |     elif args.dataset == 'Mapfree':
 73 |         for scene_dir in args.scenes:
 74 |             query_frames_paths = parse_mapfree_query_frames(scene_dir / 'poses.txt')
 75 |             im_pairs_path = [(str(scene_dir / 'seq0' / 'frame_00000.jpg'),
 76 |                               str(scene_dir / qpath)) for qpath in query_frames_paths]
 77 | 
 78 |             pts_stack = list()
 79 |             print(f'Started {scene_dir.name}')
 80 |             for pair in tqdm(im_pairs_path):
 81 |                 pts = matcher.match(pair)
 82 |                 pts_stack.append(pts)
 83 |             pts_stack = stack_pts(pts_stack)
 84 |             results = {'correspondences': pts_stack}
 85 |             np.savez_compressed(scene_dir / f'correspondences_{args.matcher}.npz', **results)
 86 |             print(f'Finished {scene_dir.name}')
 87 | 
 88 |     elif args.dataset == 'Scannet':
 89 |         im_pairs_path = load_scannet_imgpaths(args.pair_npz, args.data_root)
 90 |         pts_stack = list()
 91 |         print(f'Started Scannet')
 92 |         for pair in tqdm(im_pairs_path):
 93 |             pts = matcher.match(pair)
 94 |             pts_stack.append(pts)
 95 |         pts_stack = stack_pts(pts_stack)
 96 |         results = {'correspondences': pts_stack}
 97 |         np.savez_compressed(
 98 |             f'../../data/scannet_misc/correspondences_{args.matcher}_scannet_test.npz',
 99 |             **results)
100 |         print(f'Finished Scannet')
101 |     else:
102 |         raise NotImplementedError('Invalid dataset')
103 | 


--------------------------------------------------------------------------------
/etc/feature_matching_baselines/matchers.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | 
  5 | from LoFTR.src.loftr import LoFTR, default_cfg
  6 | from SuperGlue.models.utils import read_image
  7 | from SuperGlue.models.matching import Matching
  8 | 
  9 | torch.set_grad_enabled(False)
 10 | 
 11 | 
 12 | class LoFTR_matcher:
 13 |     def __init__(self, resize, outdoor=False):
 14 |         # Initialize LoFTR
 15 |         print("started loading model")
 16 |         matcher = LoFTR(config=default_cfg)
 17 |         weights_path = "LoFTR/weights/outdoor_ot.ckpt" if outdoor else "LoFTR/weights/indoor_ot.ckpt"
 18 |         matcher.load_state_dict(torch.load(weights_path)['state_dict'], strict=False)
 19 |         matcher = matcher.eval().cuda()
 20 |         self.matcher = matcher
 21 |         print("model loaded")
 22 |         self.resize = resize
 23 | 
 24 |     def match(self, pair_path):
 25 |         '''retrurn correspondences between images (w/ path pair_path)'''
 26 | 
 27 |         input_path0, input_path1 = pair_path
 28 |         resize = self.resize
 29 |         resize_float = True
 30 |         rot0, rot1 = 0, 0
 31 |         device = 'cuda'
 32 | 
 33 |         # using resolution [640, 480] (default for 7Scenes, re-scale Scannet)
 34 |         image0, inp0, scales0 = read_image(
 35 |             input_path0, device, resize, rot0, resize_float)
 36 | 
 37 |         image1, inp1, scales1 = read_image(
 38 |             input_path1, device, resize, rot1, resize_float)
 39 | 
 40 |         # LoFTR needs resolution multiple of 8. If that is not the case, we pad 0's to get to a multiple of 8
 41 |         if inp0.size(2) % 8 != 0 or inp0.size(1) % 8 != 0:
 42 |             pad_bottom = inp0.size(2) % 8
 43 |             pad_right = inp0.size(3) % 8
 44 |             pad_fn = torch.nn.ConstantPad2d((0, pad_right, 0, pad_bottom), 0)
 45 |             inp0 = pad_fn(inp0)
 46 |             inp1 = pad_fn(inp1)
 47 | 
 48 |         with torch.no_grad():
 49 |             batch = {'image0': inp0, 'image1': inp1}
 50 |             self.matcher(batch)
 51 |             mkpts0 = batch['mkpts0_f'].cpu().numpy()
 52 |             mkpts1 = batch['mkpts1_f'].cpu().numpy()
 53 | 
 54 |         if mkpts0.shape[0] > 0:
 55 |             pts = np.concatenate([mkpts0, mkpts1], axis=1)
 56 |             return pts
 57 |         else:
 58 |             print("no correspondences")
 59 |             return np.full((1, 4), np.nan)
 60 | 
 61 | 
 62 | class SuperGlue_matcher:
 63 |     def __init__(self, resize, outdoor=False):
 64 |         # copied default values
 65 |         nms_radius = 4
 66 |         keypoint_threshold = 0.005
 67 |         max_keypoints = 1024
 68 | 
 69 |         superglue_weights = 'outdoor' if outdoor else 'indoor'  # indoor trained on scannet
 70 |         sinkhorn_iterations = 20
 71 |         match_threshold = 0.2
 72 | 
 73 |         # Load the SuperPoint and SuperGlue models.
 74 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 75 |         print('Running inference on device \"{}\"'.format(device))
 76 |         config = {
 77 |             'superpoint': {
 78 |                 'nms_radius': nms_radius,
 79 |                 'keypoint_threshold': keypoint_threshold,
 80 |                 'max_keypoints': max_keypoints
 81 |             },
 82 |             'superglue': {
 83 |                 'weights': superglue_weights,
 84 |                 'sinkhorn_iterations': sinkhorn_iterations,
 85 |                 'match_threshold': match_threshold,
 86 |             }
 87 |         }
 88 |         self.matching = Matching(config).eval().to(device)
 89 |         self.device = device
 90 |         print('SuperGlue model loaded')
 91 |         self.resize = resize
 92 | 
 93 |     def match(self, pair_path):
 94 |         '''retrurn correspondences between images (w/ path pair_path)'''
 95 | 
 96 |         input_path0, input_path1 = pair_path
 97 |         resize = self.resize
 98 |         resize_float = True
 99 |         rot0, rot1 = 0, 0
100 | 
101 |         image0, inp0, scales0 = read_image(
102 |             input_path0, self.device, resize, rot0, resize_float)
103 |         image1, inp1, scales1 = read_image(
104 |             input_path1, self.device, resize, rot1, resize_float)
105 |         pred = self.matching({'image0': inp0, 'image1': inp1})
106 |         pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
107 |         kpts0, kpts1 = pred['keypoints0'], pred['keypoints1']
108 |         matches, conf = pred['matches0'], pred['matching_scores0']
109 | 
110 |         # Keep the matching keypoints.
111 |         valid = matches > -1
112 |         mkpts0 = kpts0[valid]
113 |         mkpts1 = kpts1[matches[valid]]
114 | 
115 |         if mkpts0.shape[0] > 0:
116 |             pts = np.concatenate([mkpts0, mkpts1], axis=1)
117 |             return pts
118 |         else:
119 |             print("no correspondences")
120 |             return np.full((1, 4), np.nan)
121 | 
122 | 
123 | class SIFT_matcher:
124 |     def __init__(self, resize, outdoor=False):
125 |         self.resize = resize
126 | 
127 |     def root_sift(self, descs):
128 |         '''Apply the Hellinger kernel by first L1-normalizing, taking the square-root, and then L2-normalizing'''
129 | 
130 |         eps = 1e-7
131 |         descs /= (descs.sum(axis=1, keepdims=True) + eps)
132 |         descs = np.sqrt(descs)
133 |         return descs
134 | 
135 |     def match(self, pair_path):
136 |         '''
137 |         Given path to im1, im2, extract correspondences using OpenCV SIFT.
138 |         Returns: pts (N x 4) array containing (x1, y1, x2, y2) correspondences; returns nan array if no correspondences.
139 |         '''
140 | 
141 |         im1_path, im2_path = pair_path
142 | 
143 |         # hyper-parameters
144 |         ratio_test_threshold = 0.8
145 |         n_features = 2048
146 |         sift = cv2.SIFT_create(n_features)
147 | 
148 |         # Read images in grayscale
149 |         img0 = cv2.imread(im1_path, 0)
150 |         img1 = cv2.imread(im2_path, 0)
151 | 
152 |         # Resize
153 |         img0 = cv2.resize(img0, self.resize)
154 |         img1 = cv2.resize(img1, self.resize)
155 | 
156 |         # get SIFT key points and descriptors
157 |         kp0, des0 = sift.detectAndCompute(img0, None)
158 |         kp1, des1 = sift.detectAndCompute(img1, None)
159 | 
160 |         # Apply normalisation (rootSIFT)
161 |         des0, des1 = self.root_sift(des0), self.root_sift(des1)
162 | 
163 |         # Get matches using FLANN
164 |         FLANN_INDEX_KDTREE = 1
165 |         index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
166 |         search_params = dict(checks=50)
167 |         flann = cv2.FlannBasedMatcher(index_params, search_params)
168 |         matches = flann.knnMatch(des0, des1, k=2)
169 | 
170 |         pts1 = []
171 |         pts2 = []
172 |         good_matches = []
173 |         # ratio test as per Lowe's paper
174 |         for i, (m, n) in enumerate(matches):
175 |             if m.distance < ratio_test_threshold * n.distance:
176 |                 pts2.append(kp1[m.trainIdx].pt)
177 |                 pts1.append(kp0[m.queryIdx].pt)
178 |                 good_matches.append(m)
179 | 
180 |         pts1 = np.float32(pts1).reshape(-1, 2)
181 |         pts2 = np.float32(pts2).reshape(-1, 2)
182 | 
183 |         if pts1.shape[0] > 0:
184 |             pts = np.concatenate([pts1, pts2], axis=1)
185 |             return pts
186 |         else:
187 |             print("no correspondences")
188 |             return np.full((1, 4), np.nan)
189 | 


--------------------------------------------------------------------------------
/etc/feature_matching_baselines/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_scannet_imgpaths(npz_path, root_dir):
 6 |     data_names = np.load(npz_path)['name']
 7 |     pair_paths = []
 8 | 
 9 |     for scene_name, scene_sub_name, stem_name_0, stem_name_1 in data_names:
10 |         scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}'
11 |         img_name0 = os.path.join(root_dir, scene_name, 'sensor_data',
12 |                                  f'frame-{stem_name_0:06}.color.jpg')
13 |         img_name1 = os.path.join(root_dir, scene_name, 'sensor_data',
14 |                                  f'frame-{stem_name_1:06}.color.jpg')
15 |         pair_paths.append((img_name0, img_name1))
16 | 
17 |     return pair_paths
18 | 
19 | 
20 | def parse_7scenes_matching_pairs(pair_txt):
21 |     """Get list of image pairs for matching
22 |     Arg:
23 |         pair_txt: file contains image pairs and essential
24 |         matrix with line format
25 |             image1 image2 sim w p q r x y z ess_vec
26 |     Return:
27 |         list of 3d-tuple contains (q=[wpqr], t=[xyz], essential matrix)
28 |     """
29 |     im_pairs = {}
30 |     f = open(pair_txt)
31 |     for line in f:
32 |         cur = line.split()
33 |         im1, im2 = cur[0], cur[1]
34 |         q = np.array([float(i) for i in cur[3:7]], dtype=np.float32)
35 |         t = np.array([float(i) for i in cur[7:10]], dtype=np.float32)
36 |         ess_mat = np.array([float(i) for i in cur[10:19]], dtype=np.float32).reshape(3, 3)
37 |         im_pairs[(im1, im2)] = (q, t, ess_mat)
38 |     f.close()
39 |     return im_pairs
40 | 
41 | 
42 | def parse_mapfree_query_frames(pose_path):
43 |     """
44 |     Get list of query frames given a pose path
45 |     :param pose_path:
46 |     :return:
47 |     """
48 |     query_paths = []
49 |     with pose_path.open('r') as f:
50 |         for l in f.readlines():
51 |             # skip if comment(#) or keyframe (seq0)
52 |             if '#' in l or 'seq0' in l:
53 |                 continue
54 |             qpath = l.strip().split(' ')[0]
55 |             query_paths.append(qpath)
56 |     return query_paths
57 | 
58 | 
59 | def stack_pts(pts_list):
60 |     '''Given a pts list with N arrays, each shaped (Npts, D), where Npts varies, creates a common array shaped (N, max(Npts), D) filled with NaNs when Npts < Max(Npts)'''
61 |     assert len(pts_list) > 0, 'list must not be empty'
62 | 
63 |     N = len(pts_list)
64 |     max_npts = max([pts.shape[0] for pts in pts_list])
65 |     D = pts_list[0].shape[1]
66 |     pts_stack = np.full((N, max_npts, D), np.nan)
67 |     for i, pts in enumerate(pts_list):
68 |         pts_stack[i, :pts.shape[0]] = pts
69 |     return pts_stack
70 | 


--------------------------------------------------------------------------------
/etc/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nianticlabs/map-free-reloc/b5182dc7b32231edb3b0f01f8cc97f289ec8a241/etc/teaser.png


--------------------------------------------------------------------------------
/lib/datasets/datamodules.py:
--------------------------------------------------------------------------------
 1 | import torch.utils as utils
 2 | from torchvision.transforms import ColorJitter, Grayscale
 3 | import pytorch_lightning as pl
 4 | 
 5 | from lib.datasets.sampler import RandomConcatSampler
 6 | from lib.datasets.scannet import ScanNetDataset
 7 | from lib.datasets.sevenscenes import SevenScenesDataset
 8 | from lib.datasets.mapfree import MapFreeDataset
 9 | 
10 | 
11 | class DataModule(pl.LightningDataModule):
12 |     def __init__(self, cfg):
13 |         super().__init__()
14 |         self.cfg = cfg
15 | 
16 |         datasets = {'ScanNet': ScanNetDataset,
17 |                     '7Scenes': SevenScenesDataset,
18 |                     'MapFree': MapFreeDataset}
19 | 
20 |         assert cfg.DATASET.DATA_SOURCE in datasets.keys(), 'invalid DATA_SOURCE, this dataset is not implemented'
21 |         self.dataset_type = datasets[cfg.DATASET.DATA_SOURCE]
22 | 
23 |     def get_sampler(self, dataset, reset_epoch=False):
24 |         if self.cfg.TRAINING.SAMPLER == 'scene_balance':
25 |             sampler = RandomConcatSampler(dataset,
26 |                                           self.cfg.TRAINING.N_SAMPLES_SCENE,
27 |                                           self.cfg.TRAINING.SAMPLE_WITH_REPLACEMENT,
28 |                                           shuffle=True,
29 |                                           reset_on_iter=reset_epoch
30 |                                           )
31 |         else:
32 |             sampler = None
33 |         return sampler
34 | 
35 |     def train_dataloader(self):
36 |         transforms = ColorJitter() if self.cfg.DATASET.AUGMENTATION_TYPE == 'colorjitter' else None
37 |         transforms = Grayscale(
38 |             num_output_channels=3) if self.cfg.DATASET.BLACK_WHITE else transforms
39 | 
40 |         dataset = self.dataset_type(self.cfg, 'train', transforms=transforms)
41 |         sampler = self.get_sampler(dataset)
42 |         dataloader = utils.data.DataLoader(dataset,
43 |                                            batch_size=self.cfg.TRAINING.BATCH_SIZE,
44 |                                            num_workers=self.cfg.TRAINING.NUM_WORKERS,
45 |                                            sampler=sampler
46 |                                            )
47 |         return dataloader
48 | 
49 |     def val_dataloader(self):
50 |         dataset = self.dataset_type(self.cfg, 'val')
51 |         # Scannet uses scene_balance for validation (resets at each epoch, so val samples are always the same)
52 |         if isinstance(dataset, ScanNetDataset):
53 |             sampler = self.get_sampler(dataset, reset_epoch=True)
54 |         else:
55 |             sampler = None
56 |         dataloader = utils.data.DataLoader(dataset,
57 |                                            batch_size=self.cfg.TRAINING.BATCH_SIZE,
58 |                                            num_workers=self.cfg.TRAINING.NUM_WORKERS,
59 |                                            sampler=sampler,
60 |                                            drop_last=True
61 |                                            )
62 |         return dataloader
63 | 
64 |     def test_dataloader(self):
65 |         dataset = self.dataset_type(self.cfg, 'test')
66 |         dataloader = utils.data.DataLoader(dataset,
67 |                                            batch_size=1,
68 |                                            num_workers=1,
69 |                                            shuffle=False)
70 |         return dataloader
71 | 


--------------------------------------------------------------------------------
/lib/datasets/sampler.py:
--------------------------------------------------------------------------------
 1 | # From https://github.com/zju3dv/LoFTR/blob/261baf641cb9ada07dd9746e420ada7fe8a03152/src/datasets/sampler.py
 2 | import torch
 3 | from torch.utils.data import Sampler, ConcatDataset
 4 | 
 5 | 
 6 | class RandomConcatSampler(Sampler):
 7 |     """ Random sampler for ConcatDataset. At each epoch, `n_samples_per_subset` samples will be draw from each subset
 8 |     in the ConcatDataset. If `subset_replacement` is ``True``, sampling within each subset will be done with replacement.
 9 |     However, it is impossible to sample data without replacement between epochs, unless bulding a stateful sampler lived along the entire training phase.
10 | 
11 |     For current implementation, the randomness of sampling is ensured no matter the sampler is recreated across epochs or not and call `torch.manual_seed()` or not.
12 |     Args:
13 |         shuffle (bool): shuffle the random sampled indices across all sub-datsets.
14 |         repeat (int): repeatedly use the sampled indices multiple times for training.
15 |             [arXiv:1902.05509, arXiv:1901.09335]
16 |     NOTE: Don't re-initialize the sampler between epochs (will lead to repeated samples)
17 |     NOTE: This sampler behaves differently with DistributedSampler.
18 |           It assume the dataset is splitted across ranks instead of replicated.
19 |     TODO: Add a `set_epoch()` method to fullfill sampling without replacement across epochs.
20 |           ref: https://github.com/PyTorchLightning/pytorch-lightning/blob/e9846dd758cfb1500eb9dba2d86f6912eb487587/pytorch_lightning/trainer/training_loop.py#L373
21 |     """
22 | 
23 |     def __init__(self,
24 |                  data_source: ConcatDataset,
25 |                  n_samples_per_subset: int,
26 |                  subset_replacement: bool = True,
27 |                  shuffle: bool = True,
28 |                  repeat: int = 1,
29 |                  seed: int = 66,
30 |                  reset_on_iter: bool = False):
31 |         if not isinstance(data_source, ConcatDataset):
32 |             raise TypeError("data_source should be torch.utils.data.ConcatDataset")
33 | 
34 |         self.data_source = data_source
35 |         self.n_subset = len(self.data_source.datasets)
36 |         self.n_samples_per_subset = n_samples_per_subset
37 |         self.n_samples = self.n_subset * self.n_samples_per_subset * repeat
38 |         self.subset_replacement = subset_replacement
39 |         self.repeat = repeat
40 |         self.shuffle = shuffle
41 |         self.seed = seed
42 |         self.reset_on_iter = reset_on_iter   # If true, recreate random seed to that samples are the same every epoch
43 |         self.generator = torch.manual_seed(self.seed)
44 |         assert self.repeat >= 1
45 | 
46 |     def __len__(self):
47 |         return self.n_samples
48 | 
49 |     def __iter__(self):
50 |         if self.reset_on_iter:
51 |             self.generator = torch.manual_seed(self.seed)
52 | 
53 |         indices = []
54 |         # sample from each sub-dataset
55 |         for d_idx in range(self.n_subset):
56 |             low = 0 if d_idx == 0 else self.data_source.cumulative_sizes[d_idx - 1]
57 |             high = self.data_source.cumulative_sizes[d_idx]
58 |             if self.subset_replacement:
59 |                 rand_tensor = torch.randint(low, high, (self.n_samples_per_subset,),
60 |                                             generator=self.generator, dtype=torch.int64)
61 |             else:  # sample without replacement
62 |                 len_subset = len(self.data_source.datasets[d_idx])
63 |                 rand_tensor = torch.randperm(len_subset, generator=self.generator) + low
64 |                 if len_subset >= self.n_samples_per_subset:
65 |                     rand_tensor = rand_tensor[:self.n_samples_per_subset]
66 |                 else:  # padding with replacement
67 |                     rand_tensor_replacement = torch.randint(
68 |                         low, high, (self.n_samples_per_subset - len_subset,),
69 |                         generator=self.generator, dtype=torch.int64)
70 |                     rand_tensor = torch.cat([rand_tensor, rand_tensor_replacement])
71 |             indices.append(rand_tensor)
72 |         indices = torch.cat(indices)
73 |         if self.shuffle:  # shuffle the sampled dataset (from multiple subsets)
74 |             rand_tensor = torch.randperm(len(indices), generator=self.generator)
75 |             indices = indices[rand_tensor]
76 | 
77 |         # repeat the sampled indices (can be used for RepeatAugmentation or pure RepeatSampling)
78 |         if self.repeat > 1:
79 |             repeat_indices = [indices.clone() for _ in range(self.repeat - 1)]
80 |             if self.shuffle:
81 |                 def _choice(x): return x[torch.randperm(len(x), generator=self.generator)]
82 |                 repeat_indices = map(_choice, repeat_indices)
83 |             indices = torch.cat([indices, *repeat_indices], 0)
84 | 
85 |         assert indices.shape[0] == self.n_samples
86 |         return iter(indices.tolist())
87 | 


--------------------------------------------------------------------------------
/lib/datasets/scannet.py:
--------------------------------------------------------------------------------
  1 | # Based on https://github.com/zju3dv/LoFTR/blob/master/src/datasets/scannet.py
  2 | from os import path as osp
  3 | from os import listdir
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.utils as utils
  8 | from numpy.linalg import inv
  9 | 
 10 | from lib.datasets.utils import (
 11 |     read_color_image,
 12 |     read_depth_image,
 13 |     read_scannet_pose,
 14 |     read_scannet_intrinsic,
 15 |     correct_intrinsic_scale
 16 | )
 17 | 
 18 | 
 19 | class ScanNetScene(utils.data.Dataset):
 20 |     def __init__(self,
 21 |                  root_dir,
 22 |                  npz_path,
 23 |                  mode='train',
 24 |                  min_overlap_score=0.4,
 25 |                  augment_fn=None,
 26 |                  resize=(640, 480),
 27 |                  estimated_depth=None,
 28 |                  **kwargs):
 29 |         """Manage one scene of ScanNet Dataset.
 30 |         Args:
 31 |             root_dir (str): ScanNet root directory that contains scene folders.
 32 |             npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
 33 |             intrinsic_path (str): path to depth-camera intrinsic file.
 34 |             mode (str): options are ['train', 'val', 'test'].
 35 |             augment_fn (callable, optional): augments images with pre-defined visual effects.
 36 |             pose_dir (str): ScanNet root directory that contains all poses.
 37 |                 (we use a separate (optional) pose_dir since we store images and poses separately.)
 38 |         """
 39 |         super().__init__()
 40 |         self.root_dir = root_dir
 41 |         self.mode = mode
 42 |         self.resize = resize
 43 | 
 44 |         # prepare data_names, intrinsics and extrinsics(T)
 45 |         with np.load(npz_path) as data:
 46 |             self.data_names = data['name']
 47 |             if 'score' in data.keys() and mode not in ['val' or 'test']:
 48 |                 kept_mask = data['score'] > min_overlap_score
 49 |                 self.data_names = self.data_names[kept_mask]
 50 | 
 51 |         # for training
 52 |         self.augment_fn = augment_fn if mode == 'train' else None
 53 | 
 54 |         # load pre-computed estimated depth, if exists
 55 |         self.depthmaps = np.load(estimated_depth) if estimated_depth is not None else None
 56 | 
 57 |     def __len__(self):
 58 |         return len(self.data_names)
 59 | 
 60 |     def _read_abs_pose(self, scene_name, name):
 61 |         pth = osp.join(self.root_dir,
 62 |                        scene_name,
 63 |                        'sensor_data', f'frame-{name:06}.pose.txt')
 64 |         return read_scannet_pose(pth)
 65 | 
 66 |     def _compute_rel_pose(self, scene_name, name0, name1):
 67 |         pose0 = self._read_abs_pose(scene_name, name0)
 68 |         pose1 = self._read_abs_pose(scene_name, name1)
 69 | 
 70 |         return np.matmul(pose1, inv(pose0))  # (4, 4)
 71 | 
 72 |     def __getitem__(self, idx):
 73 |         scene_name, scene_sub_name, stem_name_0, stem_name_1 = self.data_names[idx]
 74 |         scene_name = f'scene{scene_name:04d}_{scene_sub_name:02d}'
 75 | 
 76 |         # loads image and rescales. apply augmentation if available
 77 |         img_name0 = osp.join(self.root_dir, scene_name, 'sensor_data',
 78 |                              f'frame-{stem_name_0:06}.color.jpg')
 79 |         img_name1 = osp.join(self.root_dir, scene_name, 'sensor_data',
 80 |                              f'frame-{stem_name_1:06}.color.jpg')
 81 |         image0 = read_color_image(img_name0, resize=self.resize, augment_fn=self.augment_fn)
 82 |         image1 = read_color_image(img_name1, resize=self.resize, augment_fn=self.augment_fn)
 83 | 
 84 |         # read the depthmap which is stored as (480, 640)
 85 |         if self.mode in ['test']:
 86 |             if self.depthmaps is None:
 87 |                 # Load GT depth
 88 |                 dimg_name0 = osp.join(self.root_dir, scene_name, 'sensor_data',
 89 |                                       f'frame-{stem_name_0:06}.depth.pgm')
 90 |                 dimg_name1 = osp.join(self.root_dir, scene_name, 'sensor_data',
 91 |                                       f'frame-{stem_name_1:06}.depth.pgm')
 92 |                 depth0 = read_depth_image(dimg_name0)
 93 |                 depth1 = read_depth_image(dimg_name1)
 94 |             else:
 95 |                 # Load pre-computed depth (using arbitrary methods) from npz file
 96 |                 def key(frame_idx): return f'{scene_name[5:]}_frame_{frame_idx:06}'
 97 |                 depth0 = torch.from_numpy(self.depthmaps[key(stem_name_0)].astype(np.float32))
 98 |                 depth1 = torch.from_numpy(self.depthmaps[key(stem_name_1)].astype(np.float32))
 99 |         else:
100 |             depth0 = depth1 = torch.tensor([])
101 | 
102 |         # get intrinsics
103 |         intrinsics_path = osp.join(self.root_dir, scene_name, 'sensor_data', '_info.txt')
104 |         K_color = read_scannet_intrinsic(intrinsics_path, color=True)
105 |         K_color = correct_intrinsic_scale(
106 |             K_color, scale_x=self.resize[0] / 1296, scale_y=self.resize[1] / 968)
107 |         K_color = torch.from_numpy(K_color)
108 |         K_depth = torch.from_numpy(read_scannet_intrinsic(intrinsics_path, color=False))
109 | 
110 |         # read and compute relative poses
111 |         T_0to1 = torch.tensor(self._compute_rel_pose(scene_name, stem_name_0, stem_name_1),
112 |                               dtype=torch.float32)
113 |         T_1to0 = T_0to1.inverse()
114 | 
115 |         data = {
116 |             'image0': image0,  # (3, h, w)
117 |             'depth0': depth0,  # (h, w)
118 |             'image1': image1,
119 |             'depth1': depth1,
120 |             'T_0to1': T_0to1,  # (4, 4)
121 |             'T_1to0': T_1to0,
122 |             'K_color0': K_color,  # (3, 3)
123 |             'K_color1': K_color,  # (3, 3)
124 |             'K_depth': K_depth,  # (3, 3)
125 |             'dataset_name': 'ScanNet',
126 |             'scene_id': scene_name,
127 |             'pair_id': idx,
128 |             'pair_names': (osp.join(scene_name, 'color', f'{stem_name_0}.jpg'),
129 |                            osp.join(scene_name, 'color', f'{stem_name_1}.jpg'))
130 |         }
131 | 
132 |         return data
133 | 
134 | 
135 | class ScanNetDataset(utils.data.ConcatDataset):
136 |     def __init__(self,
137 |                  cfg,
138 |                  mode: str,
139 |                  transforms=None):
140 |         assert mode in ('train', 'val', 'test'), 'Invalid dataset mode'
141 | 
142 |         root_dir = cfg.DATASET.DATA_ROOT
143 |         index_npz_dir = cfg.DATASET.NPZ_ROOT
144 |         min_overlap_score = cfg.DATASET.MIN_OVERLAP_SCORE
145 |         resize = (cfg.DATASET.WIDTH, cfg.DATASET.HEIGHT)
146 |         estimated_depth = cfg.DATASET.ESTIMATED_DEPTH
147 | 
148 |         # create a dataset for each npz file
149 |         # usually each npz file contains the information for a single scene (training and val)
150 |         # however, for testing all pairs are concatenated into a single npz file (test.npz)
151 |         root_dir = osp.join(root_dir, 'scans_test' if mode == 'test' else 'scans')
152 |         npz_path = osp.join(index_npz_dir, mode)
153 |         npz_list = [osp.join(npz_path, fname) for fname in listdir(npz_path) if fname[-3:] == 'npz']
154 | 
155 |         dataset_list = [ScanNetScene(root_dir=root_dir,
156 |                                      npz_path=npz_fname,
157 |                                      mode=mode,
158 |                                      min_overlap_score=min_overlap_score,
159 |                                      augment_fn=transforms,
160 |                                      resize=resize,
161 |                                      estimated_depth=estimated_depth) for npz_fname in npz_list]
162 | 
163 |         super().__init__(dataset_list)
164 | 


--------------------------------------------------------------------------------
/lib/datasets/sevenscenes.py:
--------------------------------------------------------------------------------
  1 | # Based on https://github.com/GrumpyZhou/visloc-relapose/blob/master/utils/datasets/relapose.py
  2 | 
  3 | import os
  4 | import glob
  5 | 
  6 | import torch
  7 | import torch.utils.data as data
  8 | import numpy as np
  9 | from scipy.spatial.transform import Rotation
 10 | 
 11 | from lib.datasets.utils import read_color_image, read_depth_image, correct_intrinsic_scale
 12 | 
 13 | 
 14 | class SceneDataset(data.Dataset):
 15 |     def __init__(self, scene_root, pair_txt, resize, transforms=None, one_nn=False,
 16 |                  estimated_depth=None):
 17 |         ''' scene_root: path to scene folder
 18 |             pair_txt: path to file specifying the (reference,query) pairs
 19 |             resize: shape to resize images
 20 |             transforms: function to apply to images
 21 |             one_nn: if True, keep only the reference image with highest DVLAD similarity to each query
 22 |         '''
 23 |         self.scene_root = scene_root
 24 |         self.transforms = transforms
 25 |         self.resize = resize
 26 |         self.estimated_depth = estimated_depth
 27 | 
 28 |         # load relative poses for given pairs
 29 |         self.im_pairs, self.relv_poses, _, self.sim = self.parse_relv_pose_txt(os.path.join(
 30 |             scene_root,
 31 |             pair_txt))
 32 |         self.original_idxs = list(range(len(self.im_pairs)))
 33 |         if one_nn:
 34 |             self.filter_one_nn()
 35 |         self.num = len(self.im_pairs)
 36 | 
 37 |         # load absolute poses for each sample
 38 |         self.abs_poses = self.parse_abs_pose_txt(os.path.join(scene_root, 'dataset_test.txt'))
 39 |         self.abs_poses.update(self.parse_abs_pose_txt(
 40 |             os.path.join(scene_root, 'dataset_train.txt')))
 41 | 
 42 |         # static intrinsic matrix
 43 |         ox, oy = 320, 240
 44 |         f = 525
 45 |         self.K = np.array([[f, 0, ox], [0, f, oy], [0, 0, 1]], dtype=np.float32)
 46 |         self.K = correct_intrinsic_scale(self.K, resize[0] / 640, resize[1] / 480)
 47 | 
 48 |     def parse_relv_pose_txt(self, fpath, with_ess=False):
 49 |         '''Relative pose pair format:image1 image2 sim w p q r x y z ess_vec'''
 50 |         im_pairs = []
 51 |         ess_vecs = [] if with_ess else None
 52 |         relv_poses = []
 53 |         sim = []
 54 |         with open(fpath) as f:
 55 |             for line in f:
 56 |                 cur = line.split()
 57 |                 im_pairs.append((cur[0], cur[1]))
 58 |                 sim.append(float(cur[2]))
 59 |                 q = np.array([float(i) for i in cur[3:7]], dtype=np.float32)
 60 |                 t = np.array([float(i) for i in cur[7:10]], dtype=np.float32)
 61 | 
 62 |                 # change q convention to [x, y, z, w]
 63 |                 q = q[[1, 2, 3, 0]]
 64 |                 R = Rotation.from_quat(q).as_matrix()
 65 | 
 66 |                 # Convert to rotation matrix and 4x4 pose matrix
 67 |                 T = np.eye(4)
 68 |                 T[:3, :3] = R
 69 |                 T[:3, -1] = t.ravel()
 70 |                 relv_poses.append(T)
 71 | 
 72 |                 if with_ess:
 73 |                     ess_vecs.append(np.array([float(i) for i in cur[10:19]], dtype=np.float32))
 74 |         return im_pairs, relv_poses, ess_vecs, sim
 75 | 
 76 |     def parse_abs_pose_txt(self, fpath):
 77 |         """Absolute pose label format:
 78 |             3 header lines
 79 |             list of samples with format:
 80 |                 image x y z w p q r
 81 |         """
 82 | 
 83 |         pose_dict = {}
 84 |         with open(fpath) as f:
 85 |             for line in f.readlines()[3::]:  # Skip 3 header lines
 86 |                 cur = line.split(' ')
 87 |                 c = np.array([float(v) for v in cur[1:4]], dtype=np.float32)
 88 |                 q = np.array([float(v) for v in cur[4:8]], dtype=np.float32)
 89 |                 im = cur[0]
 90 |                 pose_dict[im] = (c, q)
 91 |         return pose_dict
 92 | 
 93 |     def filter_one_nn(self):
 94 |         """Filters pairs such that for each query image, only the reference image with highest similarity is kept"""
 95 | 
 96 |         kept_queries_idx = {}  # dict (query image, kept_idx)
 97 |         kept_queries_sim = {}  # dict (query image, kept_similarity)
 98 | 
 99 |         for i, ((ref, query), sim) in enumerate(zip(self.im_pairs, self.sim)):
100 |             if query in kept_queries_sim:
101 |                 if sim < kept_queries_sim[query]:
102 |                     continue
103 | 
104 |             kept_queries_idx[query] = i
105 |             kept_queries_sim[query] = sim
106 | 
107 |         # update internal arrays
108 |         keep_idxs = list(kept_queries_idx.values())
109 |         self.im_pairs = [self.im_pairs[idx] for idx in keep_idxs]
110 |         self.relv_poses = [self.relv_poses[idx] for idx in keep_idxs]
111 |         self.sim = [self.sim[idx] for idx in keep_idxs]
112 |         self.original_idxs = keep_idxs
113 | 
114 |     def __getitem__(self, index):
115 |         # load color images
116 |         im1_path, im2_path = [os.path.join(self.scene_root, im_ref)
117 |                               for im_ref in self.im_pairs[index]]
118 |         image1 = read_color_image(im1_path, self.resize, augment_fn=self.transforms)
119 |         image2 = read_color_image(im2_path, self.resize, augment_fn=self.transforms)
120 | 
121 |         # load depth maps
122 |         depth_path_suffix = '.depth.' if self.estimated_depth is None else f'.depth.{self.estimated_depth}.'
123 |         dim1_path = im1_path.replace('.color.', depth_path_suffix)
124 |         dim2_path = im2_path.replace('.color.', depth_path_suffix)
125 |         depth1 = read_depth_image(dim1_path)
126 |         depth2 = read_depth_image(dim2_path)
127 | 
128 |         # get relative pose transformation
129 |         T_0to1 = torch.tensor(self.relv_poses[index], dtype=torch.float32)
130 | 
131 |         # get absolute pose of im0 and im1
132 |         im1ref, im2ref = self.im_pairs[index]
133 |         # center of camera 1 in world coordinates, quaternion transf. from camera to world
134 |         c1, q1 = self.abs_poses[im1ref]
135 |         # center of camera 2 in world coordinates, quaternion transf. from camera to world
136 |         c2, q2 = self.abs_poses[im2ref]
137 | 
138 |         data = {
139 |             'image0': image1,  # (3, h, w)
140 |             'depth0': depth1,  # (h, w)
141 |             'image1': image2,
142 |             'depth1': depth2,
143 |             'T_0to1': T_0to1,  # (4, 4)  # relative pose
144 |             'abs_q_0': q1,
145 |             'abs_c_0': c1,
146 |             'abs_q_1': q2,
147 |             'abs_c_1': c2,
148 |             'sim': self.sim[index],  # DVLAD similarity
149 |             'K_color0': self.K.copy(),  # (3, 3)
150 |             'K_color1': self.K.copy(),  # (3, 3)
151 |             'K_depth': self.K.copy(),  # (3, 3)
152 |             'dataset_name': '7Scenes',
153 |             'scene_id': self.scene_root.split('/')[-1],
154 |             'scene_root': str(self.scene_root),
155 |             'pair_id': self.original_idxs[index],
156 |             'pair_names': self.im_pairs[index]
157 |         }
158 | 
159 |         return data
160 | 
161 |     def __len__(self):
162 |         return self.num
163 | 
164 | 
165 | class SevenScenesDataset(data.ConcatDataset):
166 |     def __init__(self, cfg, mode, transforms=None):
167 | 
168 |         scenes = cfg.DATASET.SCENES
169 |         data_root = cfg.DATASET.DATA_ROOT
170 |         resize = (cfg.DATASET.WIDTH, cfg.DATASET.HEIGHT)
171 |         # If None, loads GT depth. Otherwise, loads depth map with name `pairs.depth.suffix.png` where suffix is estimated_depth
172 |         estimated_depth = cfg.DATASET.ESTIMATED_DEPTH
173 | 
174 |         assert mode in ['train', 'val', 'test'], 'Invalid dataset mode'
175 |         pair_txt = {'train': cfg.DATASET.PAIRS_TXT.TRAIN,
176 |                     'val': cfg.DATASET.PAIRS_TXT.VAL,
177 |                     'test': cfg.DATASET.PAIRS_TXT.TEST}[mode]
178 |         one_nn = cfg.DATASET.PAIRS_TXT.ONE_NN
179 | 
180 |         if scenes is None:
181 |             # Locate all scenes of the current dataset
182 |             scenes = self.glob_scenes(data_root, pair_txt)
183 | 
184 |         # Init dataset objects for each scene
185 |         data_srcs = [
186 |             SceneDataset(
187 |                 os.path.join(data_root, scene),
188 |                 pair_txt, resize, transforms, one_nn, estimated_depth) for scene in scenes]
189 |         super().__init__(data_srcs)
190 | 
191 |     def glob_scenes(self, data_root, pair_txt):
192 |         scenes = []
193 |         for sdir in glob.iglob('{}/*/{}'.format(data_root, pair_txt)):
194 |             sdir = sdir.split('/')[-2]
195 |             scenes.append(sdir)
196 |         return sorted(scenes)
197 | 


--------------------------------------------------------------------------------
/lib/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | from numpy.linalg import inv
  5 | 
  6 | 
  7 | def imread(path, augment_fn=None):
  8 |     cv_type = cv2.IMREAD_COLOR
  9 |     image = cv2.imread(str(path), cv_type)
 10 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 11 | 
 12 |     if augment_fn is not None:
 13 |         image = cv2.imread(str(path), cv2.IMREAD_COLOR)
 14 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 15 |         image = augment_fn(image)
 16 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 17 |     return image  # (h, w, 3)
 18 | 
 19 | 
 20 | def get_resized_wh(w, h, resize=None):
 21 |     if resize is not None:  # resize the longer edge
 22 |         scale = resize / max(h, w)
 23 |         w_new, h_new = int(round(w * scale)), int(round(h * scale))
 24 |     else:
 25 |         w_new, h_new = w, h
 26 |     return w_new, h_new
 27 | 
 28 | 
 29 | def get_divisible_wh(w, h, df=None):
 30 |     if df is not None:
 31 |         w_new, h_new = map(lambda x: int(x // df * df), [w, h])
 32 |     else:
 33 |         w_new, h_new = w, h
 34 |     return w_new, h_new
 35 | 
 36 | 
 37 | def pad_bottom_right(inp, pad_size, ret_mask=False):
 38 |     assert isinstance(pad_size, int) and pad_size >= max(
 39 |         inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}"
 40 |     mask = None
 41 |     if inp.ndim == 2:
 42 |         padded = np.zeros((pad_size, pad_size), dtype=inp.dtype)
 43 |         padded[:inp.shape[0], :inp.shape[1]] = inp
 44 |         if ret_mask:
 45 |             mask = np.zeros((pad_size, pad_size), dtype=bool)
 46 |             mask[:inp.shape[0], :inp.shape[1]] = True
 47 |     elif inp.ndim == 3:
 48 |         padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype)
 49 |         padded[:, :inp.shape[1], :inp.shape[2]] = inp
 50 |         if ret_mask:
 51 |             mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool)
 52 |             mask[:, :inp.shape[1], :inp.shape[2]] = True
 53 |     else:
 54 |         raise NotImplementedError()
 55 |     return padded, mask
 56 | 
 57 | 
 58 | def read_color_image(path, resize=(640, 480), augment_fn=None):
 59 |     """
 60 |     Args:
 61 |         resize (tuple): align image to depthmap, in (w, h).
 62 |         augment_fn (callable, optional): augments images with pre-defined visual effects
 63 |     Returns:
 64 |         image (torch.tensor): (3, h, w)
 65 |     """
 66 |     # read and resize image
 67 |     image = imread(path, None)
 68 |     image = cv2.resize(image, resize)
 69 | 
 70 |     # (h, w, 3) -> (3, h, w) and normalized
 71 |     image = torch.from_numpy(image).float().permute(2, 0, 1) / 255
 72 |     if augment_fn:
 73 |         image = augment_fn(image)
 74 |     return image
 75 | 
 76 | 
 77 | def read_depth_image(path):
 78 |     depth = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
 79 |     depth = depth / 1000
 80 |     depth = torch.from_numpy(depth).float()  # (h, w)
 81 |     return depth
 82 | 
 83 | 
 84 | def read_scannet_pose(path):
 85 |     """ Read ScanNet's Camera2World pose and transform it to World2Camera.
 86 | 
 87 |     Returns:
 88 |         pose_w2c (np.ndarray): (4, 4)
 89 |     """
 90 |     cam2world = np.loadtxt(path, delimiter=' ')
 91 |     world2cam = inv(cam2world)
 92 |     return world2cam
 93 | 
 94 | 
 95 | def read_scannet_intrinsic(path, color=True):
 96 |     """
 97 |     Read ScanNet's intrinsic matrix and returns 3x3 matrix. If color is True, returns color camera intrinsics.
 98 |     Otherwise returns depth camera intrinsics.
 99 |     The file containing the intrinsics is located in {scannet_root}/scans/scene{id}/sensor_data/_info.txt
100 |     This file has the intrinsics of the depth camera and color camera under the keys 'm_calibrationColorIntrinsic'
101 |     and 'm_calibrationDepthIntrinsic'.
102 |     """
103 | 
104 |     key = 'm_calibrationColorIntrinsic' if color else 'm_calibrationDepthIntrinsic'
105 | 
106 |     with open(path, 'r') as f:
107 |         for line in f.readlines():
108 |             if key in line:
109 |                 mat = line.split(' = ')[1]
110 |                 mat = mat.lstrip().rstrip().split(' ')
111 |                 mat = [float(m) for m in mat]
112 |                 return np.array(mat).reshape(4, 4)[:-1, :-1]
113 | 
114 |     raise Exception(f'Invalid key {key}')
115 | 
116 | 
117 | def correct_intrinsic_scale(K, scale_x, scale_y):
118 |     '''Given an intrinsic matrix (3x3) and two scale factors, returns the new intrinsic matrix corresponding to
119 |     the new coordinates x' = scale_x * x; y' = scale_y * y
120 |     Source: https://dsp.stackexchange.com/questions/6055/how-does-resizing-an-image-affect-the-intrinsic-camera-matrix
121 |     '''
122 | 
123 |     transform = np.eye(3)
124 |     transform[0, 0] = scale_x
125 |     transform[0, 2] = scale_x / 2 - 0.5
126 |     transform[1, 1] = scale_y
127 |     transform[1, 2] = scale_y / 2 - 0.5
128 |     Kprime = transform @ K
129 | 
130 |     return Kprime
131 | 


--------------------------------------------------------------------------------
/lib/models/builder.py:
--------------------------------------------------------------------------------
 1 | import torch.cuda
 2 | 
 3 | from lib.models.regression.model import RegressionModel
 4 | from lib.models.regression.model import RegressionMultiFrameModel
 5 | from lib.models.matching.model import FeatureMatchingModel
 6 | 
 7 | 
 8 | def build_model(cfg, checkpoint=''):
 9 |     if cfg.MODEL == 'FeatureMatching':
10 |         return FeatureMatchingModel(cfg)
11 |     elif cfg.MODEL == 'Regression':
12 |         model = RegressionModel.load_from_checkpoint(checkpoint, cfg=cfg) if \
13 |             checkpoint is not '' else RegressionModel(cfg)
14 |         if torch.cuda.is_available():
15 |             model = model.cuda()
16 |         model.eval()
17 |         return model
18 |     elif cfg.MODEL == 'RegressionMultiFrame':
19 |         model = RegressionMultiFrameModel.load_from_checkpoint(checkpoint, cfg=cfg) if \
20 |             checkpoint is not '' else RegressionMultiFrameModel(cfg)
21 |         if torch.cuda.is_available():
22 |             model = model.cuda()
23 |         model.eval()
24 |         return model
25 |     else:
26 |         raise NotImplementedError()
27 | 


--------------------------------------------------------------------------------
/lib/models/matching/feature_matching.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2 as cv
  3 | 
  4 | 
  5 | class PrecomputedMatching:
  6 |     '''Get correspondences from pre-computed file'''
  7 | 
  8 |     def __init__(self, cfg):
  9 |         # Scannet correspondences are stored in a single file, pointed by MATCHES_FILE_PATH
 10 |         # 7Scenes correspondences are split in a file per scene and dependent on the pairs.
 11 |         # The 7Scenes file pattern (including {scene_id} and {test_pairs} tags) is stored in MATCHES_FILE_PATH
 12 | 
 13 |         self.correspondences = None
 14 |         self.debug = cfg.DEBUG
 15 | 
 16 |         # If there is a pattern, save that string pattern, and will load correspondences once the scene_id is defined
 17 |         if '{' in cfg.MATCHES_FILE_PATH:
 18 |             self.matches_file_path = cfg.MATCHES_FILE_PATH
 19 |             self.scene_id = None
 20 |             self.pairs_txt = cfg.DATASET.PAIRS_TXT.TEST
 21 |         else:
 22 |             self.load_correspondences(cfg.MATCHES_FILE_PATH)
 23 | 
 24 |     def load_correspondences(self, file_path):
 25 |         data = np.load(file_path, allow_pickle=True)
 26 |         self.correspondences = data['correspondences'].astype(np.float32)
 27 | 
 28 |     def get_correspondences(self, data):
 29 |         # Check if loaded scene_id is still valid (in the case where correspondences are stored over multiple files)
 30 |         # If not, load the correct scene_id correspondences
 31 |         if hasattr(self, 'scene_id'):
 32 |             if self.scene_id != data['scene_id'][0]:
 33 |                 self.scene_id = data['scene_id'][0]
 34 |                 scene_root = data['scene_root'][0]
 35 |                 matches_fpath = self.matches_file_path.format(
 36 |                     scene_root=scene_root, pairs_txt=self.pairs_txt)
 37 |                 self.load_correspondences(matches_fpath)
 38 | 
 39 |         # get correspondences for the given pair
 40 |         pair_id = data['pair_id'].item()
 41 |         corr = self.correspondences[pair_id]
 42 | 
 43 |         # remove nan's (filler)
 44 |         corr = corr[~np.isnan(corr)].reshape(-1, 4)
 45 |         if len(corr) > 0:
 46 |             pts1, pts2 = corr[:, :2], corr[:, 2:]
 47 |         else:
 48 |             pts1 = pts2 = np.array([])
 49 | 
 50 |         return pts1, pts2
 51 | 
 52 | 
 53 | class SIFTMatching:
 54 |     def __init__(self, cfg):
 55 | 
 56 |         # SIFT parameters
 57 |         self.ratio_threshold = cfg.SIFT.RATIO_THRESHOLD
 58 |         self.sift = cv.SIFT_create(cfg.SIFT.NUM_FEATURES)
 59 |         self.debug = cfg.DEBUG
 60 | 
 61 |     def transform_grayscale(self, img):
 62 |         img = img.permute(1, 2, 0).numpy()
 63 |         img = (255 * img).astype(np.uint8)
 64 |         img_gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
 65 |         return img_gray
 66 | 
 67 |     def root_sift(self, descs):
 68 |         '''Apply the Hellinger kernel by first L1-normalizing, taking the square-root, and then L2-normalizing'''
 69 | 
 70 |         eps = 1e-7
 71 |         descs /= (descs.sum(axis=1, keepdims=True) + eps)
 72 |         descs = np.sqrt(descs)
 73 |         return descs
 74 | 
 75 |     def get_correspondences(self, data):
 76 |         # get grayscale images
 77 |         img0 = self.transform_grayscale(data['image0'].squeeze(0))
 78 |         img1 = self.transform_grayscale(data['image1'].squeeze(0))
 79 | 
 80 |         # get SIFT key points and descriptors
 81 |         kp0, des0 = self.sift.detectAndCompute(img0, None)
 82 |         kp1, des1 = self.sift.detectAndCompute(img1, None)
 83 | 
 84 |         # Apply normalisation (rootSIFT)
 85 |         des0, des1 = self.root_sift(des0), self.root_sift(des1)
 86 | 
 87 |         # Get matches using FLANN
 88 |         FLANN_INDEX_KDTREE = 1
 89 |         index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
 90 |         search_params = dict(checks=50)
 91 |         flann = cv.FlannBasedMatcher(index_params, search_params)
 92 |         matches = flann.knnMatch(des0, des1, k=2)
 93 | 
 94 |         pts1 = []
 95 |         pts2 = []
 96 |         good_matches = []
 97 |         # ratio test as per Lowe's paper
 98 |         for i, (m, n) in enumerate(matches):
 99 |             if m.distance < self.ratio_threshold * n.distance:
100 |                 pts2.append(kp1[m.trainIdx].pt)
101 |                 pts1.append(kp0[m.queryIdx].pt)
102 |                 good_matches.append(m)
103 | 
104 |         pts1 = np.float32(pts1).reshape(-1, 2)
105 |         pts2 = np.float32(pts2).reshape(-1, 2)
106 | 
107 |         # plot results (DEBUG)
108 |         if self.debug:
109 |             img_matches = np.empty(
110 |                 (max(img0.shape[0],
111 |                      img1.shape[0]),
112 |                  img1.shape[1] + img1.shape[1],
113 |                  3),
114 |                 dtype=np.uint8)
115 |             cv.drawMatches(img0, kp0, img1, kp1, good_matches, img_matches,
116 |                            flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
117 |             data['debug_img_matches'] = img_matches
118 |         return pts1, pts2
119 | 


--------------------------------------------------------------------------------
/lib/models/matching/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from lib.models.matching.feature_matching import *
 4 | from lib.models.matching.pose_solver import *
 5 | 
 6 | 
 7 | class FeatureMatchingModel(torch.nn.Module):
 8 |     def __init__(self, cfg):
 9 |         super().__init__()
10 | 
11 |         if cfg.FEATURE_MATCHING == 'SIFT':
12 |             self.feature_matching = SIFTMatching(cfg)
13 |         elif cfg.FEATURE_MATCHING == 'Precomputed':
14 |             self.feature_matching = PrecomputedMatching(cfg)
15 |         else:
16 |             raise NotImplementedError('Invalid feature matching')
17 | 
18 |         if cfg.POSE_SOLVER == 'EssentialMatrix':
19 |             self.pose_solver = EssentialMatrixSolver(cfg)
20 |         elif cfg.POSE_SOLVER == 'EssentialMatrixMetric':
21 |             self.pose_solver = EssentialMatrixMetricSolver(cfg)
22 |         elif cfg.POSE_SOLVER == 'Procrustes':
23 |             self.pose_solver = ProcrustesSolver(cfg)
24 |         elif cfg.POSE_SOLVER == 'PNP':
25 |             self.pose_solver = PnPSolver(cfg)
26 |         else:
27 |             raise NotImplementedError('Invalid pose solver')
28 | 
29 |     def forward(self, data):
30 |         assert data['depth0'].shape[0] == 1, 'Baseline models require batch size of 1'
31 | 
32 |         # get 2D-2D correspondences
33 |         pts1, pts2 = self.feature_matching.get_correspondences(data)
34 | 
35 |         # get relative pose
36 |         R, t, inliers = self.pose_solver.estimate_pose(pts1, pts2, data)
37 |         data['inliers'] = inliers
38 |         R = torch.from_numpy(R.copy()).unsqueeze(0).float()
39 |         t = torch.from_numpy(t.copy()).view(1, 3).unsqueeze(0).float()
40 |         return R, t
41 | 


--------------------------------------------------------------------------------
/lib/models/regression/encoder/preact.py:
--------------------------------------------------------------------------------
 1 | '''Pre-activation ResNet in PyTorch.
 2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/preact_resnet.py
 3 | Reference:
 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 5 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
 6 | '''
 7 | 
 8 | 
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class PreActBlock(nn.Module):
14 |     '''Pre-activation version of the BasicBlock.'''
15 |     expansion = 1
16 | 
17 |     def __init__(self, in_planes, planes, stride=1, bn=True):
18 |         super(PreActBlock, self).__init__()
19 |         self.bn1 = nn.BatchNorm2d(in_planes) if bn else nn.Identity()
20 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
21 |                                stride=stride, padding=1, bias=False)
22 |         self.bn2 = nn.BatchNorm2d(planes) if bn else nn.Identity()
23 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
24 | 
25 |         if stride != 1 or in_planes != self.expansion*planes:
26 |             self.shortcut = nn.Sequential(nn.Conv2d(
27 |                 in_planes, self.expansion * planes, kernel_size=1,
28 |                 stride=stride, bias=False))
29 | 
30 |     def forward(self, x):
31 |         out = F.relu(self.bn1(x))
32 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
33 |         out = self.conv1(out)
34 |         out = self.conv2(F.relu(self.bn2(out)))
35 |         out += shortcut
36 |         return out
37 | 
38 | 
39 | class PreActBottleneck(nn.Module):
40 |     '''Pre-activation version of the original Bottleneck module.'''
41 |     expansion = 4
42 | 
43 |     def __init__(self, in_planes, planes, stride=1):
44 |         super(PreActBottleneck, self).__init__()
45 |         self.bn1 = nn.BatchNorm2d(in_planes)
46 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
47 |         self.bn2 = nn.BatchNorm2d(planes)
48 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
49 |         self.bn3 = nn.BatchNorm2d(planes)
50 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
51 | 
52 |         if stride != 1 or in_planes != self.expansion*planes:
53 |             self.shortcut = nn.Sequential(nn.Conv2d(
54 |                 in_planes, self.expansion * planes, kernel_size=1,
55 |                 stride=stride, bias=False))
56 | 
57 |     def forward(self, x):
58 |         out = F.relu(self.bn1(x))
59 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
60 |         out = self.conv1(out)
61 |         out = self.conv2(F.relu(self.bn2(out)))
62 |         out = self.conv3(F.relu(self.bn3(out)))
63 |         out += shortcut
64 |         return out
65 | 
66 | 
67 | class PreActBottleneck_depthwise(nn.Module):
68 |     '''Pre-activation version of the original Bottleneck module.'''
69 |     expansion = 4
70 | 
71 |     def __init__(self, in_planes, planes, stride=1):
72 |         super(PreActBottleneck_depthwise, self).__init__()
73 |         self.bn1 = nn.BatchNorm2d(in_planes)
74 |         self.group_num = in_planes if in_planes < planes else planes
75 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False, groups=self.group_num)
76 |         self.bn2 = nn.BatchNorm2d(planes)
77 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
78 |                                padding=1, bias=False, groups=self.group_num)
79 |         self.bn3 = nn.BatchNorm2d(planes)
80 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1,
81 |                                bias=False, groups=self.group_num)
82 | 
83 |         if stride != 1 or in_planes != self.expansion*planes:
84 |             self.shortcut = nn.Sequential(
85 |                 nn.Conv2d(
86 |                     in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False,
87 |                     groups=self.group_num))
88 | 
89 |     def forward(self, x):
90 |         out = F.relu(self.bn1(x))
91 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
92 |         out = self.conv1(out)
93 |         out = self.conv2(F.relu(self.bn2(out)))
94 |         out = self.conv3(F.relu(self.bn3(out)))
95 |         out += shortcut
96 |         return out
97 | 


--------------------------------------------------------------------------------
/lib/models/regression/encoder/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from lib.models.regression.encoder.preact import PreActBlock, PreActBottleneck
 5 | 
 6 | 
 7 | class ResNet(nn.Module):
 8 |     def __init__(self, cfg):
 9 |         super().__init__()
10 |         block_type = [PreActBlock, PreActBottleneck]
11 |         block = block_type[cfg.BLOCK_TYPE]
12 |         num_blocks = [int(x) for x in cfg.NUM_BLOCKS.strip().split("-")]
13 |         self.in_planes = 64
14 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False)
15 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
16 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
17 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
18 |         self.num_out_layers = 256 * block.expansion
19 | 
20 |     def _make_layer(self, block, planes, num_blocks, stride):
21 |         strides = [stride] + [1] * (num_blocks - 1)
22 |         layers = []
23 |         for stride in strides:
24 |             layers.append(block(self.in_planes, planes, stride))
25 |             self.in_planes = planes * block.expansion
26 |         return nn.Sequential(*layers)
27 | 
28 |     def forward(self, x):
29 |         # todo recheck
30 |         out = self.conv1(x)
31 |         out = self.layer1(out)
32 |         out = F.avg_pool2d(out, 2)
33 |         out = self.layer2(out)
34 |         out = F.avg_pool2d(out, 2)
35 |         out = self.layer3(out)
36 |         out = F.avg_pool2d(out, 2)
37 |         return out
38 | 


--------------------------------------------------------------------------------
/lib/models/regression/encoder/resunet.py:
--------------------------------------------------------------------------------
  1 | '''ResUNet in PyTorch.
  2 | https://github.com/qianqianwang68/caps/blob/master/CAPS/network.py
  3 | Reference:
  4 | [1] Zhengxin Zhang, Qingjie Liu
  5 |     Road Extraction by Deep Residual U-Net. arXiv:1711.10684
  6 | '''
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | 
 12 | from lib.models.regression.encoder.preact import PreActBlock, PreActBottleneck
 13 | 
 14 | 
 15 | class conv(nn.Module):
 16 |     def __init__(self, num_in_layers, num_out_layers, kernel_size, stride):
 17 |         super(conv, self).__init__()
 18 |         self.kernel_size = kernel_size
 19 |         self.conv = nn.Conv2d(num_in_layers, num_out_layers, kernel_size=kernel_size, stride=stride,
 20 |                               padding=(self.kernel_size - 1) // 2)
 21 |         self.normalize = nn.BatchNorm2d(num_out_layers)
 22 | 
 23 |     def forward(self, x):
 24 |         x = self.conv(x)
 25 |         x = self.normalize(x)
 26 |         return F.elu(x, inplace=True)
 27 | 
 28 | 
 29 | class upconv(nn.Module):
 30 |     def __init__(self, num_in_layers, num_out_layers, kernel_size, scale):
 31 |         super(upconv, self).__init__()
 32 |         self.scale = scale
 33 |         self.conv1 = conv(num_in_layers, num_out_layers, kernel_size, 1)
 34 | 
 35 |     def forward(self, x):
 36 |         x = nn.functional.interpolate(x, scale_factor=self.scale,
 37 |                                       mode='bilinear', align_corners=True)
 38 |         return self.conv1(x)
 39 | 
 40 | 
 41 | class ResUNet(nn.Module):
 42 |     def __init__(self, cfgmodel, num_in_layers=3):
 43 |         super().__init__()
 44 |         filters = [256, 512, 1024, 2048]
 45 |         self.in_planes = 64
 46 |         if num_in_layers != 3:  # Number of input channels
 47 |             self.firstconv = nn.Conv2d(
 48 |                 num_in_layers, 64, kernel_size=(7, 7),
 49 |                 stride=(2, 2),
 50 |                 padding=(3, 3),
 51 |                 bias=False)
 52 |         else:
 53 |             self.firstconv = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)  # H/2
 54 |         self.firstbn = nn.BatchNorm2d(64)
 55 |         self.firstrelu = nn.ReLU(inplace=True)
 56 |         self.firstmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # H/4
 57 |         # encoder
 58 |         block_type = [PreActBlock, PreActBottleneck]
 59 |         block = block_type[cfgmodel.BLOCK_TYPE]
 60 |         num_blocks = [int(x) for x in cfgmodel.NUM_BLOCKS.strip().split("-")]
 61 |         self.encoder1 = self._make_layer(block, 64, num_blocks[0], stride=1)  # H/4
 62 |         self.encoder2 = self._make_layer(block, 128, num_blocks[1], stride=2)  # H/8
 63 |         self.encoder3 = self._make_layer(block, 256, num_blocks[2], stride=2)  # H/16
 64 | 
 65 |         # decoder
 66 |         self.not_concat = getattr(cfgmodel, "NOT_CONCAT", False)
 67 |         self.upconv4 = upconv(filters[2], 512, 3, 2)
 68 |         if not self.not_concat:
 69 |             self.iconv4 = conv(filters[1] + 512, 512, 3, 1)
 70 |         else:
 71 |             self.iconv4 = conv(512, 512, 3, 1)
 72 | 
 73 |         self.upconv3 = upconv(512, 256, 3, 2)
 74 |         if not self.not_concat:
 75 |             self.iconv3 = conv(filters[0] + 256, 256, 3, 1)
 76 |         else:
 77 |             self.iconv3 = conv(256, 256, 3, 1)
 78 | 
 79 |         num_out_layers = getattr(cfgmodel, "NUM_OUT_LAYERS", 128)
 80 |         self.num_out_layers = num_out_layers
 81 |         self.outconv = conv(256, num_out_layers, 1, 1)
 82 | 
 83 |     def _make_layer(self, block, planes, num_blocks, stride):
 84 |         strides = [stride] + [1] * (num_blocks - 1)
 85 |         layers = []
 86 |         for stride in strides:
 87 |             layers.append(block(self.in_planes, planes, stride))
 88 |             self.in_planes = planes * block.expansion
 89 |         return nn.Sequential(*layers)
 90 | 
 91 |     def skipconnect(self, x1, x2):
 92 |         diffY = x2.size()[2] - x1.size()[2]
 93 |         diffX = x2.size()[3] - x1.size()[3]
 94 | 
 95 |         x1 = F.pad(x1, (diffX // 2, diffX - diffX // 2,
 96 |                         diffY // 2, diffY - diffY // 2))
 97 | 
 98 |         # for padding issues, see
 99 |         # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
100 |         # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
101 | 
102 |         x = torch.cat([x2, x1], dim=1)
103 |         return x
104 | 
105 |     def forward(self, x):
106 |         # encoding
107 |         x1 = self.firstconv(x)
108 |         x1 = self.firstbn(x1)
109 |         x1 = self.firstrelu(x1)
110 |         x1 = self.firstmaxpool(x1)
111 | 
112 |         x2 = self.encoder1(x1)
113 |         x3 = self.encoder2(x2)
114 |         x4 = self.encoder3(x3)
115 | 
116 |         # decoding
117 |         x = self.upconv4(x4)
118 |         if not self.not_concat:
119 |             x = self.skipconnect(x3, x)
120 |         x = self.iconv4(x)
121 | 
122 |         x = self.upconv3(x)
123 |         if not self.not_concat:
124 |             x = self.skipconnect(x2, x)
125 |         x = self.iconv3(x)
126 | 
127 |         x = self.outconv(x)
128 |         return x
129 | 


--------------------------------------------------------------------------------
/lib/utils/data.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def data_to_model_device(data, model):
 5 |     '''Move all tensors in data dictionary to the same device as model'''
 6 | 
 7 |     try:
 8 |         device = next(model.parameters()).device
 9 |     except:
10 |         # in case the model has no parameters (baseline models)
11 |         device = 'cpu'
12 | 
13 |     for k, v in data.items():
14 |         if torch.is_tensor(v):
15 |             data[k] = v.to(device)
16 | 
17 |     return data
18 | 


--------------------------------------------------------------------------------
/lib/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     def __init__(self, filepath):
 6 |         self.terminal = sys.stdout
 7 |         self.log = open(filepath, "w")
 8 | 
 9 |     def write(self, message):
10 |         self.terminal.write(message)
11 |         self.log.write(message)
12 | 
13 |     def flush(self):
14 |         self.terminal.flush()
15 |         self.log.flush()
16 | 
17 | 
18 | def set_log(filepath):
19 |     sys.stdout = Logger(filepath)
20 |     return Logger
21 | 


--------------------------------------------------------------------------------
/lib/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from collections import defaultdict
  4 | 
  5 | 
  6 | def pose_error_torch(R, t, Tgt, reduce=None):
  7 |     """Compute angular, scale and euclidean error of translation vector (metric). Compute angular rotation error."""
  8 | 
  9 |     Rgt = Tgt[:, :3, :3]                  # [B, 3, 3]
 10 |     tgt = Tgt[:, :3, 3:].transpose(1, 2)  # [B, 1, 3]
 11 | 
 12 |     scale_t = torch.linalg.norm(t, dim=-1)
 13 |     scale_tgt = torch.linalg.norm(tgt, dim=-1)
 14 | 
 15 |     cosine = (t @ tgt.transpose(1, 2)).squeeze(-1) / (scale_t * scale_tgt + 1e-9)
 16 |     cosine = torch.clip(cosine, -1.0, 1.0)    # handle numerical errors
 17 |     t_ang_err = torch.rad2deg(torch.acos(cosine))
 18 |     t_ang_err = torch.minimum(t_ang_err, 180 - t_ang_err)
 19 | 
 20 |     t_scale_err = scale_t / scale_tgt
 21 |     t_scale_err_sym = torch.maximum(scale_t / scale_tgt, scale_tgt / scale_t)
 22 |     t_euclidean_err = torch.linalg.norm(t - tgt, dim=-1)
 23 | 
 24 |     residual = R.transpose(1, 2) @ Rgt
 25 |     trace = torch.diagonal(residual, dim1=-2, dim2=-1).sum(-1)
 26 |     cosine = (trace - 1) / 2
 27 |     cosine = torch.clip(cosine, -1., 1.)  # handle numerical errors
 28 |     R_err = torch.rad2deg(torch.acos(cosine))
 29 | 
 30 |     if reduce is None:
 31 |         def fn(x): return x
 32 |     elif reduce == 'mean':
 33 |         fn = torch.mean
 34 |     elif reduce == 'median':
 35 |         fn = torch.median
 36 | 
 37 |     t_ang_err = fn(t_ang_err)
 38 |     t_scale_err = fn(t_scale_err)
 39 |     t_euclidean_err = fn(t_euclidean_err)
 40 |     R_err = fn(R_err)
 41 | 
 42 |     errors = {'t_err_ang': t_ang_err,
 43 |               't_err_scale': t_scale_err,
 44 |               't_err_scale_sym': t_scale_err_sym,
 45 |               't_err_euc': t_euclidean_err,
 46 |               'R_err': R_err}
 47 |     return errors
 48 | 
 49 | 
 50 | def error_auc(errors, thresholds):
 51 |     """
 52 |     Args:
 53 |         errors (list): [N,]
 54 |         thresholds (list)
 55 |     """
 56 |     errors = np.nan_to_num(errors, nan=float('inf'))   # convert nans to inf
 57 |     errors = [0] + sorted(list(errors))
 58 |     recall = list(np.linspace(0, 1, len(errors)))
 59 | 
 60 |     aucs = []
 61 |     for thr in thresholds:
 62 |         last_index = np.searchsorted(errors, thr)
 63 |         y = recall[:last_index] + [recall[last_index-1]]
 64 |         x = errors[:last_index] + [thr]
 65 |         aucs.append(np.trapz(y, x) / thr)
 66 | 
 67 |     return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)}
 68 | 
 69 | 
 70 | def ecdf(x):
 71 |     """Get Empirical Cumulative Distribution Function (ECDF) given samples x [N,]"""
 72 |     cd = np.linspace(0, 1, x.shape[0])
 73 |     v = np.sort(x)
 74 |     return v, cd
 75 | 
 76 | 
 77 | def print_auc_table(agg_metrics):
 78 |     pose_error = np.maximum(agg_metrics['R_err'], agg_metrics['t_err_ang'])
 79 |     auc_pose = error_auc(pose_error, (5, 10, 20))
 80 |     print('Pose error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_pose.values()))
 81 | 
 82 |     auc_rotation = error_auc(agg_metrics['R_err'], (5, 10, 20))
 83 |     print('Rotation error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_rotation.values()))
 84 | 
 85 |     auc_translation_ang = error_auc(agg_metrics['t_err_ang'], (5, 10, 20))
 86 |     print(
 87 |         'Translation angular error AUC @ 5/10/20deg: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_translation_ang.values()))
 88 | 
 89 |     auc_translation_euc = error_auc(agg_metrics['t_err_euc'], (0.1, 0.5, 1))
 90 |     print(
 91 |         'Translation Euclidean error AUC @ 0.1/0.5/1m: {0:.3f}/{1:.3f}/{2:.3f}'.format(*auc_translation_euc.values()))
 92 | 
 93 | 
 94 | def precision(agg_metrics, rot_threshold, trans_threshold):
 95 |     '''Provides ratio of samples with rotation error < rot_threshold AND translation error < trans_threshold'''
 96 |     mask_rot = agg_metrics['R_err'] <= rot_threshold
 97 |     mask_trans = agg_metrics['t_err_euc'] <= trans_threshold
 98 |     recall = (mask_rot * mask_trans).mean()
 99 |     return recall
100 | 
101 | 
102 | def A_metrics(t_scale_err_sym):
103 |     """Returns A1/A2/A3 metrics of translation vector norm given the "symmetric" scale error
104 |     where
105 |     t_scale_err_sym = torch.maximum((t_norm_gt / t_norm_pred), (t_norm_pred / t_norm_gt))
106 |     """
107 | 
108 |     if not torch.is_tensor(t_scale_err_sym):
109 |         t_scale_err_sym = torch.from_numpy(t_scale_err_sym)
110 | 
111 |     thresh = t_scale_err_sym
112 |     a1 = (thresh < 1.25).float().mean()
113 |     a2 = (thresh < 1.25 ** 2).float().mean()
114 |     a3 = (thresh < 1.25 ** 3).float().mean()
115 |     return a1, a2, a3
116 | 
117 | 
118 | class MetricsAccumulator:
119 |     """Accumulates metrics and aggregates them when requested"""
120 | 
121 |     def __init__(self):
122 |         self.data = defaultdict(list)
123 | 
124 |     def accumulate(self, data):
125 |         for key, value in data.items():
126 |             self.data[key].append(value)
127 | 
128 |     def aggregate(self):
129 |         res = dict()
130 |         for key in self.data.keys():
131 |             res[key] = torch.cat(self.data[key]).view(-1).cpu().numpy()
132 |         return res
133 | 


--------------------------------------------------------------------------------
/lib/utils/rotationutils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from transforms3d.quaternions import qmult, qinverse, rotate_vector
 3 | 
 4 | 
 5 | # Based on the paper : On the Continuity of Rotation Representations in Neural Networks
 6 | # code from https://github.com/papagina/RotationContinuity/blob/master/Inverse_Kinematics/code/tools.py
 7 | 
 8 | # batch*n
 9 | 
10 | 
11 | def normalize_vector(v):
12 |     batch = v.shape[0]
13 |     v_mag = torch.sqrt(v.pow(2).sum(1))  # batch
14 |     v_mag = torch.max(v_mag, torch.autograd.Variable(torch.FloatTensor([1e-8]).cuda()))
15 |     v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1])
16 |     v = v / v_mag
17 |     return v
18 | 
19 | 
20 | # u, v batch*n
21 | def cross_product(u, v):
22 |     batch = u.shape[0]
23 |     # print (u.shape)
24 |     # print (v.shape)
25 |     i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1]
26 |     j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2]
27 |     k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0]
28 | 
29 |     out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1)  # batch*3
30 | 
31 |     return out
32 | 
33 | 
34 | def rotation_matrix_from_ortho6d(poses):
35 |     """
36 |     Computes rotation matrix from 6D continuous space according to the parametrisation proposed in
37 |     On the Continuity of Rotation Representations in Neural Networks
38 |     https://arxiv.org/pdf/1812.07035.pdf
39 |     :param poses: [B, 6]
40 |     :return: R: [B, 3, 3]
41 |     """
42 | 
43 |     x_raw = poses[:, 0:3]  # batch*3
44 |     y_raw = poses[:, 3:6]  # batch*3
45 | 
46 |     x = normalize_vector(x_raw)  # batch*3
47 |     z = cross_product(x, y_raw)  # batch*3
48 |     z = normalize_vector(z)  # batch*3
49 |     y = cross_product(z, x)  # batch*3
50 | 
51 |     x = x.view(-1, 3, 1)
52 |     y = y.view(-1, 3, 1)
53 |     z = z.view(-1, 3, 1)
54 |     matrix = torch.cat((x, y, z), 2)  # batch*3*3
55 |     return matrix
56 | 
57 | 
58 | def relative_pose_wxyz(q1_wxyz, t1, q2_wxyz, t2):
59 |     q12_wxyz = qmult(q2_wxyz, qinverse(q1_wxyz))
60 |     t12 = t2 - rotate_vector(t1, q12_wxyz)
61 |     return q12_wxyz, t12
62 | 


--------------------------------------------------------------------------------
/lib/utils/solver.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def procrustes(A, B):
 5 |     """
 6 |     See: https://en.wikipedia.org/wiki/Kabsch_algorithm
 7 |     2-D or 3-D registration with known correspondences.
 8 |     Registration occurs in the zero centered coordinate system, and then
 9 |     must be transported back.
10 |         Args:
11 |         -    A: Torch tensor of shape (B, N, 3) -- Point Cloud to Align (source)
12 |         -    B: Torch tensor of shape (B, N, 3) -- Reference Point Cloud (target)
13 |         Returns:
14 |         -    R: optimal rotation (B, 3, 3)
15 |         -    t: optimal translation  (B, 3, 1)
16 |     Based on: https://gist.github.com/bougui505/e392a371f5bab095a3673ea6f4976cc8
17 |     """
18 |     assert len(A.shape) == len(B.shape) == 3, 'three dimensions are required'
19 |     assert A.shape[0] == B.shape[0], 'batch size must match'
20 |     assert A.shape[1] == B.shape[1], 'number of correspondences must match'
21 |     assert A.shape[2] == B.shape[2], 'number of spatial dimensions must be 3'
22 | 
23 |     a_mean = A.mean(axis=1, keepdim=True)
24 |     b_mean = B.mean(axis=1, keepdim=True)
25 |     A_c = A - a_mean
26 |     B_c = B - b_mean
27 |     # Covariance matrix
28 |     H = A_c.transpose(1, 2) @ B_c
29 |     U, S, V = torch.svd(H)
30 |     # Fixes orientation such that Det(R) = + 1
31 |     Z = torch.eye(3).unsqueeze(0).repeat(A.shape[0], 1, 1).to(A.device)
32 |     Z[:, -1, -1] = torch.sign(torch.linalg.det(U @ V.transpose(1, 2)))
33 |     # Rotation matrix
34 |     R = V @ Z @ U.transpose(1, 2)
35 |     # Translation vector
36 |     t = b_mean - a_mean @ R.transpose(1, 2)
37 |     return R, t
38 | 


--------------------------------------------------------------------------------
/lib/utils/visualisation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | from lib.utils.localize import cal_vec_angle_error, cal_quat_angle_error
 6 | 
 7 | 
 8 | def save_video(save_res_path, dataloader, odir):
 9 |     """Generate a video per sequence with per frame metrics."""
10 | 
11 |     from vidgear.gears import WriteGear
12 | 
13 |     def save_video_gear(odir, old_scene, frames):
14 |         video_writer = WriteGear(
15 |             output_filename=f'{odir / old_scene}.mp4', custom_ffmpeg=os.getenv('FFMPEG_PATH'))
16 |         if not video_writer._WriteGear__ffmpeg:
17 |             print('Could not find ffmpeg path in the system. If available, set ffmpeg path in env. var. FFMPEG_PATH')
18 | 
19 |         # sort frames by filename and write to disk
20 |         for k, frame in sorted(frames.items(), key=lambda item: item[0]):
21 |             video_writer.write(frame)
22 |         video_writer.close()
23 |         return
24 | 
25 |     results_dict = np.load(save_res_path, allow_pickle=True).item()
26 |     old_scene = None
27 |     scenes = []
28 |     write_frames = {}
29 | 
30 |     for data in dataloader:
31 |         scene = data['scene_id'][0]
32 |         train_img_path, test_img_path = data['pair_names'][0][0], data['pair_names'][1][0]
33 | 
34 |         if scene not in scenes and old_scene is not None:
35 |             save_video_gear(odir, old_scene, write_frames)
36 |             write_frames = {}
37 |             scenes.append(scene)
38 | 
39 |         # get performance metrics
40 |         try:
41 |             abs_pose_lbl = results_dict[scene][test_img_path]['abs_pose_lbl']
42 |             abs_pose_pred = results_dict[scene][test_img_path]['abs_pose_pred']
43 |             r_err = cal_quat_angle_error(abs_pose_lbl.q, abs_pose_pred.q).item()
44 |             t_ang_err = cal_vec_angle_error(abs_pose_lbl.t, abs_pose_pred.t).item()
45 |             t_err = np.linalg.norm(abs_pose_lbl.c - abs_pose_pred.c).item()
46 |         except:
47 |             r_err = float('inf')
48 |             t_err = float('inf')
49 |             t_ang_err = float('inf')
50 | 
51 |         # convert frames (pytorch -> OCV)
52 |         c0 = (data['image0'].squeeze(0).permute(1, 2, 0)
53 |               * 255).detach().cpu().numpy().astype(np.uint8)
54 |         c0 = c0[:, :, ::-1]
55 |         c1 = (data['image1'].squeeze(0).permute(1, 2, 0)
56 |               * 255).detach().cpu().numpy().astype(np.uint8)
57 |         c1 = c1[:, :, ::-1]
58 |         frame = np.concatenate((c0, c1), axis=1).copy()
59 | 
60 |         # write metrics
61 |         text = f'R_err: {r_err:.1f}deg. t_ang_err: {t_ang_err:.1f}deg. t_err: {t_err:.2f}m'
62 |         font_size = 1 if c0.shape[0] > 500 else 0.5
63 |         tx = 100 if c0.shape[0] > 500 else 10
64 |         ty = c0.shape[0] - 30
65 |         color = (0, 255, 0) if r_err <= 5 and t_err <= 0.25 else (0, 0, 255)
66 |         cv2.putText(frame, text, (tx, ty), cv2.FONT_HERSHEY_DUPLEX,
67 |                     font_size, (0, 0, 0), 4, cv2.LINE_AA)
68 |         cv2.putText(frame, text, (tx, ty), cv2.FONT_HERSHEY_DUPLEX,
69 |                     font_size, color, 1, cv2.LINE_AA)
70 | 
71 |         # resize
72 |         frame = cv2.resize(frame, None, fx=0.5, fy=0.5)
73 | 
74 |         # save in memory (need to order before saving)
75 |         write_frames[test_img_path] = frame
76 |         old_scene = scene
77 | 
78 |     # last sequence
79 |     save_video_gear(odir, old_scene, write_frames)
80 |     return
81 | 


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"exclude": ["data"]
3 | }
4 | 


--------------------------------------------------------------------------------
/submission.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | from collections import defaultdict
  4 | from dataclasses import dataclass
  5 | from zipfile import ZipFile
  6 | 
  7 | import torch
  8 | import numpy as np
  9 | from tqdm import tqdm
 10 | 
 11 | from config.default import cfg
 12 | from lib.datasets.datamodules import DataModule
 13 | from lib.models.builder import build_model
 14 | from lib.utils.data import data_to_model_device
 15 | from transforms3d.quaternions import mat2quat
 16 | 
 17 | 
 18 | @dataclass
 19 | class Pose:
 20 |     image_name: str
 21 |     q: np.ndarray
 22 |     t: np.ndarray
 23 |     inliers: float
 24 | 
 25 |     def __str__(self) -> str:
 26 |         formatter = {'float': lambda v: f'{v:.6f}'}
 27 |         max_line_width = 1000
 28 |         q_str = np.array2string(self.q, formatter=formatter, max_line_width=max_line_width)[1:-1]
 29 |         t_str = np.array2string(self.t, formatter=formatter, max_line_width=max_line_width)[1:-1]
 30 |         return f'{self.image_name} {q_str} {t_str} {self.inliers}'
 31 | 
 32 | 
 33 | def predict(loader, model):
 34 |     results_dict = defaultdict(list)
 35 | 
 36 |     for data in tqdm(loader):
 37 |         # run inference
 38 |         data = data_to_model_device(data, model)
 39 |         with torch.no_grad():
 40 |             R, t = model(data)
 41 |         R = R.detach().cpu().numpy()
 42 |         t = t.reshape(-1).detach().cpu().numpy()
 43 |         inliers = data['inliers']
 44 |         scene = data['scene_id'][0]
 45 |         query_img = data['pair_names'][1][0]
 46 | 
 47 |         # ignore frames without poses (e.g. not enough feature matches)
 48 |         if np.isnan(R).any() or np.isnan(t).any() or np.isinf(t).any():
 49 |             continue
 50 | 
 51 |         # populate results_dict
 52 |         estimated_pose = Pose(image_name=query_img,
 53 |                               q=mat2quat(R).reshape(-1),
 54 |                               t=t.reshape(-1),
 55 |                               inliers=inliers)
 56 |         results_dict[scene].append(estimated_pose)
 57 | 
 58 |     return results_dict
 59 | 
 60 | 
 61 | def save_submission(results_dict: dict, output_path: Path):
 62 |     with ZipFile(output_path, 'w') as zip:
 63 |         for scene, poses in results_dict.items():
 64 |             poses_str = '\n'.join((str(pose) for pose in poses))
 65 |             zip.writestr(f'pose_{scene}.txt', poses_str.encode('utf-8'))
 66 | 
 67 | 
 68 | def eval(args):
 69 |     # Load configs
 70 |     cfg.merge_from_file('config/mapfree.yaml')
 71 |     cfg.merge_from_file(args.config)
 72 | 
 73 |     # Create dataloader
 74 |     if args.split == 'test':
 75 |         dataloader = DataModule(cfg).test_dataloader()
 76 |     elif args.split == 'val':
 77 |         cfg.TRAINING.BATCH_SIZE = 1
 78 |         cfg.TRAINING.NUM_WORKERS = 1
 79 |         dataloader = DataModule(cfg).val_dataloader()
 80 |     else:
 81 |         raise NotImplemented(f'Invalid split: {args.split}')
 82 | 
 83 |     # Create model
 84 |     model = build_model(cfg, args.checkpoint)
 85 | 
 86 |     # Get predictions from model
 87 |     results_dict = predict(dataloader, model)
 88 | 
 89 |     # Save predictions to txt per scene within zip
 90 |     args.output_root.mkdir(parents=True, exist_ok=True)
 91 |     save_submission(results_dict, args.output_root / 'submission.zip')
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument('config', help='path to config file')
 97 |     parser.add_argument(
 98 |         '--checkpoint', help='path to model checkpoint (models with learned parameters)',
 99 |         default='')
100 |     parser.add_argument('--output_root', '-o', type=Path, default=Path('results/'))
101 |     parser.add_argument(
102 |         '--split', choices=('val', 'test'),
103 |         default='test',
104 |         help='Dataset split to use for evaluation. Choose from test or val. Default: test')
105 | 
106 |     args = parser.parse_args()
107 |     eval(args)
108 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from pathlib import Path
  4 | 
  5 | # do this before importing numpy! (doing it right up here in case numpy is dependency of e.g. json)
  6 | os.environ["MKL_NUM_THREADS"] = "1"  # noqa: E402
  7 | os.environ["NUMEXPR_NUM_THREADS"] = "1"  # noqa: E402
  8 | os.environ["OMP_NUM_THREADS"] = "1"  # noqa: E402
  9 | os.environ["OPENBLAS_NUM_THREADS"] = "1"  # noqa: E402
 10 | 
 11 | import pytorch_lightning as pl
 12 | from pytorch_lightning.loggers import TensorBoardLogger
 13 | 
 14 | from config.default import cfg
 15 | from config.utils import config_merge_from_file
 16 | from lib.datasets.datamodules import DataModule
 17 | from lib.models.regression.model import RegressionModel, RegressionMultiFrameModel
 18 | 
 19 | 
 20 | def main(args):
 21 |     global cfg
 22 |     cfg = config_merge_from_file(cfg=cfg, path_to_config=args.dataset_config)
 23 |     cfg = config_merge_from_file(cfg=cfg, path_to_config=args.config)
 24 | 
 25 |     pl.seed_everything(0)
 26 | 
 27 |     datamodule = DataModule(cfg)
 28 |     if cfg.MODEL == 'RegressionMultiFrame':
 29 |         model = RegressionMultiFrameModel(cfg)
 30 |     elif cfg.MODEL == 'Regression':
 31 |         model = RegressionModel(cfg)
 32 |     else:
 33 |         raise NotImplementedError(f'Invalid model {cfg.MODEL}')
 34 | 
 35 |     logger = TensorBoardLogger(save_dir='weights', name=args.experiment)
 36 | 
 37 |     checkpoint_callback = pl.callbacks.ModelCheckpoint(
 38 |         save_last=True,
 39 |         save_top_k=5,
 40 |         verbose=True,
 41 |         monitor='val_loss/loss',
 42 |         mode='min'
 43 |     )
 44 | 
 45 |     epochend_callback = pl.callbacks.ModelCheckpoint(
 46 |         filename='e{epoch}-last',
 47 |         save_top_k=-1,
 48 |         every_n_epochs=1,
 49 |         save_on_train_epoch_end=True
 50 |     )
 51 | 
 52 |     lr_monitoring_callback = pl.callbacks.LearningRateMonitor(logging_interval='step')
 53 |     trainer = pl.Trainer(devices=1,
 54 |                          log_every_n_steps=cfg.TRAINING.LOG_INTERVAL,
 55 |                          val_check_interval=cfg.TRAINING.VAL_INTERVAL,
 56 |                          limit_val_batches=cfg.TRAINING.VAL_BATCHES,
 57 |                          max_epochs=cfg.TRAINING.EPOCHS,
 58 |                          logger=logger,
 59 |                          callbacks=[checkpoint_callback, lr_monitoring_callback, epochend_callback],
 60 |                          num_sanity_val_steps=1,
 61 |                          gradient_clip_val=cfg.TRAINING.GRAD_CLIP)
 62 |                          # track_grad_norm=-1) # TODO: put back the equivalent!
 63 | 
 64 |     trainer.fit(model, datamodule, ckpt_path=args.resume)
 65 | 
 66 | 
 67 | if __name__ == '__main__':
 68 |     """
 69 |     Single-frame query:
 70 |     ```shell
 71 |     python3 train.py \
 72 |       config/regression/mapfree/3d3d.yaml config/mapfree.yaml
 73 |     ```
 74 |     
 75 |     Multi-frame query:
 76 |     ```shell
 77 |     python3 train.py \
 78 |       config/regression/mapfree/multiframe/3d3d_multi.yaml \
 79 |       config/mapfree.yaml \
 80 |       config/mapfree_multi.yaml
 81 |     ```
 82 |     """
 83 |     parser = argparse.ArgumentParser()
 84 |     parser.add_argument('config', action='append', help='path to config file')
 85 |     parser.add_argument('dataset_config', nargs='+', action='store',
 86 |                         help='path to dataset config file')
 87 |     # Allow for further config files to be added. Later values overwrite earlier ones.
 88 |     parser.add_argument('--config', action='append', dest='config',
 89 |                         help='one more path to a config file')
 90 |     # Allow for further dataset-config files to be added. Later values overwrite earlier ones.
 91 |     parser.add_argument('--dataset-config', '--dataset_config', action='append',
 92 |                         dest='dataset_config', help='one more path to a dataset config file')
 93 |     parser.add_argument('--experiment', help='experiment name', default='default')
 94 |     parser.add_argument('--resume', help='resume from checkpoint path', default=None)
 95 |     args = parser.parse_args()
 96 | 
 97 |     assert isinstance(args.config, (list, tuple, str, Path))
 98 |     # make sure we don't have nested lists by accident
 99 |     if isinstance(args.config, (list, tuple)):
100 |         for args_config in args.config:
101 |             assert isinstance(args_config, (str, Path))
102 | 
103 |     assert isinstance(args.dataset_config, (list, tuple, str, Path))
104 |     # make sure we don't have nested lists by accident
105 |     if isinstance(args.dataset_config, (list, tuple)):
106 |         for args_dataset_config in args.dataset_config:
107 |             assert isinstance(args_dataset_config, (str, Path))
108 | 
109 |     main(args)
110 | 


--------------------------------------------------------------------------------
/visualisation/README.md:
--------------------------------------------------------------------------------
 1 | ### Map-Free Relocalization Visualisation Script
 2 | 
 3 | The code in this folder can be used to render a video that shows map-free relocalisation estimates.
 4 | If ground truth is available (e.g. for the validation set), both the ground truth and estimated poses will be visualised.
 5 | The estimates will be color-coded according to their metric positional error with respect to the ground truth.
 6 | In particular, estimates will be green to yellow for up to 1 meter positional error, and red for more than 1 meter error.
 7 | If no ground truth is available (e.g. for the test set), only the estimated poses will be visualised.
 8 | 
 9 | These videos will look best, if ground truth is available and estimated poses are given for all frames.
10 | 
11 | The visualisation uses the `pyrender` library, and in particular it's [off-screen rendering capabilities](https://pyrender.readthedocs.io/en/latest/examples/offscreen.html).
12 | The code uses the EGL platform of PyOpenGL. 
13 | 
14 | We provide an environment file `environment.yml` that can be used to create a conda environment with all necessary dependencies.
15 | To create the environment, run:
16 | 
17 | ```bash
18 | conda env create -f environment.yml
19 | ```
20 | 
21 | Activate the environment via:
22 | 
23 | ```bash
24 | conda activate mapfreevis
25 | ```
26 | 
27 | Call the visualisation script via:
28 | 
29 | ```bash
30 | python render_estimates.py --estimates_path /path/to/estimates --data_path /path/to/data
31 | ```
32 | 
33 | `path/to/estimates` should point to a folder contains the map-free pose files, e.g. `pose_s00460.txt` etc.
34 | `path/to/data` should point to the map-free dataset, e.g. the `test` or `val` folder with scene subfolders `s00460` etc.
35 | 
36 | The script will iterate through all pose files and create a video for each one. 
37 | All videos will be saved in the folder `renderings`, which can be changed via the `--output_path` argument.
38 | 
39 | If you want to render a video for a subset of scenes, you can specify them using `--render_subset` followed by a list of scene names, separated by commas, e.g. `--render_subset s00460,s00461`.


--------------------------------------------------------------------------------
/visualisation/environment.yml:
--------------------------------------------------------------------------------
  1 | name: mapfreevis
  2 | channels:
  3 |   - conda-forge
  4 |   - anaconda
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=main
  8 |   - _openmp_mutex=5.1=1_gnu
  9 |   - aom=3.6.0=h6a678d5_0
 10 |   - blas=1.0=mkl
 11 |   - blosc=1.21.3=h6a678d5_0
 12 |   - brotli=1.0.9=h5eee18b_7
 13 |   - brotli-bin=1.0.9=h5eee18b_7
 14 |   - brotli-python=1.0.9=py38h6a678d5_7
 15 |   - brunsli=0.1=h2531618_0
 16 |   - bzip2=1.0.8=h7b6447c_0
 17 |   - c-ares=1.19.1=h5eee18b_0
 18 |   - ca-certificates=2024.7.2=h06a4308_0
 19 |   - certifi=2024.7.4=py38h06a4308_0
 20 |   - cffi=1.16.0=py38h5eee18b_0
 21 |   - cfitsio=3.470=h5893167_7
 22 |   - charls=2.2.0=h2531618_0
 23 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 24 |   - click=8.1.7=py38h06a4308_0
 25 |   - cloudpickle=2.2.1=py38h06a4308_0
 26 |   - colorama=0.4.6=pyhd8ed1ab_0
 27 |   - contourpy=1.0.5=py38hdb19cb5_0
 28 |   - cryptography=41.0.3=py38hdda0065_0
 29 |   - cycler=0.11.0=pyhd3eb1b0_0
 30 |   - cyrus-sasl=2.1.28=h52b45da_1
 31 |   - cytoolz=0.12.0=py38h5eee18b_0
 32 |   - dask-core=2023.4.1=py38h06a4308_0
 33 |   - dataclasses=0.8=pyh6d0b6a4_7
 34 |   - dav1d=1.2.1=h5eee18b_0
 35 |   - dbus=1.13.18=hb2f20db_0
 36 |   - expat=2.5.0=h6a678d5_0
 37 |   - ffmpeg=4.3.2=hca11adc_0
 38 |   - fontconfig=2.14.1=h4c34cd2_2
 39 |   - fonttools=4.25.0=pyhd3eb1b0_0
 40 |   - freetype=2.12.1=h4a9f257_0
 41 |   - freetype-py=2.2.0=pyhd3eb1b0_0
 42 |   - fsspec=2023.9.2=py38h06a4308_0
 43 |   - future=0.18.3=py38h06a4308_0
 44 |   - giflib=5.2.1=h5eee18b_3
 45 |   - glib=2.69.1=he621ea3_2
 46 |   - gmp=6.2.1=h295c915_3
 47 |   - gnutls=3.6.15=he1e5248_0
 48 |   - gst-plugins-base=1.14.1=h6a678d5_1
 49 |   - gstreamer=1.14.1=h5eee18b_1
 50 |   - icu=73.1=h6a678d5_0
 51 |   - idna=3.4=py38h06a4308_0
 52 |   - imagecodecs=2023.1.23=py38hc4b7b5f_0
 53 |   - imageio=2.31.4=py38h06a4308_0
 54 |   - importlib-metadata=6.0.0=py38h06a4308_0
 55 |   - importlib_resources=6.1.0=py38h06a4308_0
 56 |   - intel-openmp=2021.4.0=h06a4308_3561
 57 |   - jpeg=9e=h5eee18b_1
 58 |   - jxrlib=1.1=h7b6447c_2
 59 |   - kiwisolver=1.4.4=py38h6a678d5_0
 60 |   - krb5=1.20.1=h143b758_1
 61 |   - lame=3.100=h7b6447c_0
 62 |   - lcms2=2.12=h3be6417_0
 63 |   - ld_impl_linux-64=2.38=h1181459_1
 64 |   - lerc=3.0=h295c915_0
 65 |   - libaec=1.0.4=he6710b0_1
 66 |   - libavif=0.11.1=h5eee18b_0
 67 |   - libbrotlicommon=1.0.9=h5eee18b_7
 68 |   - libbrotlidec=1.0.9=h5eee18b_7
 69 |   - libbrotlienc=1.0.9=h5eee18b_7
 70 |   - libclang=14.0.6=default_hc6dbbc7_1
 71 |   - libclang13=14.0.6=default_he11475f_1
 72 |   - libcups=2.4.2=h2d74bed_1
 73 |   - libcurl=7.88.1=h251f7ec_2
 74 |   - libdeflate=1.17=h5eee18b_1
 75 |   - libedit=3.1.20221030=h5eee18b_0
 76 |   - libev=4.33=h7f8727e_1
 77 |   - libffi=3.4.4=h6a678d5_0
 78 |   - libgcc-ng=11.2.0=h1234567_1
 79 |   - libgfortran-ng=11.2.0=h00389a5_1
 80 |   - libgfortran5=11.2.0=h1234567_1
 81 |   - libglu=9.0.0=hf484d3e_1
 82 |   - libgomp=11.2.0=h1234567_1
 83 |   - libidn2=2.3.4=h5eee18b_0
 84 |   - libllvm14=14.0.6=hdb19cb5_3
 85 |   - libnghttp2=1.57.0=h2d74bed_0
 86 |   - libpng=1.6.39=h5eee18b_0
 87 |   - libpq=12.15=hdbd6064_1
 88 |   - libssh2=1.10.0=hdbd6064_2
 89 |   - libstdcxx-ng=11.2.0=h1234567_1
 90 |   - libtasn1=4.19.0=h5eee18b_0
 91 |   - libtiff=4.5.1=h6a678d5_0
 92 |   - libunistring=0.9.10=h27cfd23_0
 93 |   - libuuid=1.41.5=h5eee18b_0
 94 |   - libwebp=1.3.2=h11a3e52_0
 95 |   - libwebp-base=1.3.2=h5eee18b_0
 96 |   - libxcb=1.15=h7f8727e_0
 97 |   - libxkbcommon=1.0.1=h5eee18b_1
 98 |   - libxml2=2.10.4=hf1b16e4_1
 99 |   - libzopfli=1.0.3=he6710b0_0
100 |   - locket=1.0.0=py38h06a4308_0
101 |   - lz4-c=1.9.4=h6a678d5_0
102 |   - matplotlib=3.7.1=py38h578d9bd_0
103 |   - matplotlib-base=3.7.1=py38h417a72b_1
104 |   - mkl=2021.4.0=h06a4308_640
105 |   - mkl-service=2.4.0=py38h7f8727e_0
106 |   - mkl_fft=1.3.1=py38hd3c417c_0
107 |   - mkl_random=1.2.2=py38h51133e4_0
108 |   - munkres=1.1.4=py_0
109 |   - mysql=5.7.24=h721c034_2
110 |   - ncurses=6.4=h6a678d5_0
111 |   - nettle=3.7.3=hbbd107a_1
112 |   - networkx=3.1=py38h06a4308_0
113 |   - numpy=1.23.5=py38h14f4228_0
114 |   - numpy-base=1.23.5=py38h31eccc5_0
115 |   - openh264=2.1.1=h4ff587b_0
116 |   - openjpeg=2.4.0=h3ad879b_0
117 |   - openssl=3.0.14=h5eee18b_0
118 |   - packaging=23.1=py38h06a4308_0
119 |   - partd=1.4.1=py38h06a4308_0
120 |   - pcre=8.45=h295c915_0
121 |   - pillow=10.0.1=py38ha6cbd5a_0
122 |   - pip=23.3=py38h06a4308_0
123 |   - platformdirs=3.10.0=py38h06a4308_0
124 |   - ply=3.11=py38_0
125 |   - pooch=1.7.0=py38h06a4308_0
126 |   - pycparser=2.21=pyhd3eb1b0_0
127 |   - pyglet=1.5.27=py38h578d9bd_4
128 |   - pyopengl=3.1.1a1=py38h06a4308_0
129 |   - pyopenssl=23.2.0=py38h06a4308_0
130 |   - pyparsing=3.0.9=py38h06a4308_0
131 |   - pyqt=5.15.10=py38h6a678d5_0
132 |   - pyqt5-sip=12.13.0=py38h5eee18b_0
133 |   - pyrender=0.1.45=pyh8a188c0_3
134 |   - pysocks=1.7.1=py38h06a4308_0
135 |   - python=3.8.18=h955ad1f_0
136 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
137 |   - python_abi=3.8=2_cp38
138 |   - pywavelets=1.4.1=py38h5eee18b_0
139 |   - pyyaml=6.0.1=py38h5eee18b_0
140 |   - qt-main=5.15.2=h53bd1ea_10
141 |   - readline=8.2=h5eee18b_0
142 |   - requests=2.31.0=py38h06a4308_0
143 |   - scikit-image=0.19.3=py38h6a678d5_1
144 |   - scipy=1.10.0=py38h14f4228_1
145 |   - setuptools=68.0.0=py38h06a4308_0
146 |   - sip=6.7.12=py38h6a678d5_0
147 |   - six=1.16.0=pyhd3eb1b0_1
148 |   - snappy=1.1.9=h295c915_0
149 |   - sqlite=3.41.2=h5eee18b_0
150 |   - tifffile=2023.4.12=py38h06a4308_0
151 |   - tk=8.6.12=h1ccaba5_0
152 |   - tomli=2.0.1=py38h06a4308_0
153 |   - toolz=0.12.0=py38h06a4308_0
154 |   - tornado=6.3.3=py38h5eee18b_0
155 |   - tqdm=4.66.4=pyhd8ed1ab_0
156 |   - trimesh=3.21.2=pyhd8ed1ab_0
157 |   - urllib3=1.26.18=py38h06a4308_0
158 |   - wheel=0.41.2=py38h06a4308_0
159 |   - x264=1!161.3030=h7f98852_1
160 |   - xz=5.4.2=h5eee18b_0
161 |   - yaml=0.2.5=h7b6447c_0
162 |   - zfp=1.0.0=h6a678d5_0
163 |   - zipp=3.11.0=py38h06a4308_0
164 |   - zlib=1.2.13=h5eee18b_0
165 |   - zstd=1.5.5=hc292b87_0
166 | 


--------------------------------------------------------------------------------
/visualisation/lazy_camera.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.linalg import svd
 3 | 
 4 | class LazyCamera:
 5 |     """Smooth and slightly delayed scene camera.
 6 | 
 7 |     Implements a rolling average of last few camera positions.
 8 |     Also zooms out to display the whole scene.
 9 |     """
10 | 
11 |     # buffer holding last m camera positions
12 |     m_camera_buffer = None
13 | 
14 |     m_camera_buffer_size = None
15 |     m_backwards_offset = None
16 | 
17 |     def __init__(self,
18 |                  camera_buffer_size=20,
19 |                  backwards_offset=4):
20 |         """Constructor.
21 | 
22 |         Parameters:
23 |             camera_buffer_size: Number of last few cameras to consider
24 |             backwards_offset: Move observing camera backwards from current view, in meters
25 |         """
26 | 
27 |         self.m_camera_buffer = []
28 |         self.m_camera_buffer_size = camera_buffer_size
29 |         self.m_backwards_offset = backwards_offset
30 | 
31 |     @staticmethod
32 |     def _orthonormalize_rotation(T):
33 |         """Takes a 4x4 matrix and orthonormalizes the upper left 3x3 using SVD
34 | 
35 |         Returns:
36 |             T with orthonormalized upper 3x3
37 |         """
38 | 
39 |         R = T[:3, :3]
40 | 
41 |         # see https://arxiv.org/pdf/2006.14616.pdf Eq.2
42 |         U, S, Vt = svd(R)
43 |         Z = np.eye(3)
44 |         Z[-1, -1] = np.sign(np.linalg.det(U @ Vt))
45 |         R = U @ Z @ Vt
46 | 
47 |         T[:3, :3] = R
48 | 
49 |         return T
50 | 
51 |     def update_camera(self, view):
52 |         """Update lazy camera with new view.
53 | 
54 |         Parameters:
55 |             view: New camera view, 4x4 matrix
56 |         """
57 | 
58 |         observing_camera = view.copy()
59 | 
60 |         # push observing camera back in z-direction in camera space
61 |         z_vec = np.zeros((3,))
62 |         z_vec[2] = 1
63 |         offset_vector = view[:3, :3] @ z_vec
64 |         observing_camera[:3, 3] += offset_vector * self.m_backwards_offset
65 | 
66 |         # use moving avage of last X cameras (so that observing camera is smooth and follows with slight delay)
67 |         self.m_camera_buffer.append(observing_camera)
68 | 
69 |         if len(self.m_camera_buffer) > self.m_camera_buffer_size:
70 |             self.m_camera_buffer = self.m_camera_buffer[1:]
71 | 
72 |     def get_current_view(self):
73 |         """Get current lazy camera view for rendering.
74 | 
75 |         Returns:
76 |             4x4 matrix
77 |         """
78 | 
79 |         if self.m_camera_buffer_size == 1:
80 |             return self.m_camera_buffer[0]
81 | 
82 |         # naive average of camera pose matrices
83 |         smooth_camera_pose = np.zeros((4, 4))
84 |         for camera_pose in self.m_camera_buffer:
85 |             smooth_camera_pose += camera_pose
86 |         smooth_camera_pose /= len(self.m_camera_buffer)
87 | 
88 |         return self._orthonormalize_rotation(smooth_camera_pose)


--------------------------------------------------------------------------------
/visualisation/render_estimates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright © Niantic, Inc. 2024.
 3 | 
 4 | import os
 5 | 
 6 | os.environ['PYOPENGL_PLATFORM'] = 'egl'
 7 | 
 8 | import logging
 9 | import argparse
10 | from pathlib import Path
11 | from render_scene import render_scene
12 | 
13 | _logger = logging.getLogger(__name__)
14 | 
15 | if __name__ == '__main__':
16 |     # Setup logging levels.
17 |     logging.basicConfig(level=logging.INFO)
18 | 
19 |     parser = argparse.ArgumentParser(
20 |         description='Rendering map-free relocalisation estimates.',
21 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 | 
23 |     parser.add_argument('--estimates_path', type=Path, required=True,
24 |                         help="Path to the folder that contains file with estimated poses per scene."
25 |                              "That is the folder that contains pose_s00XXX.txt files.")
26 | 
27 |     parser.add_argument('--data_path', type=Path, required=True,
28 |                         help="Path to the dataset folder, i.e. the s00XXX folders with images.")
29 | 
30 |     parser.add_argument('--render_subset', type=str,
31 |                         help="Subset of scenes to render, comma separated, e.g. 's00460,s00461'.")
32 | 
33 |     parser.add_argument('--output_path', type=Path, default=Path('renderings'),
34 |                         help="Path to the folder where the renderings will be saved.")
35 | 
36 |     parser.add_argument('--confidence_threshold', type=float, default=-1,
37 |                         help="Filter estimates below this confidence threshold.")
38 | 
39 |     options = parser.parse_args()
40 | 
41 |     # Get list of all files with estimated poses
42 |     estimates_files = list(options.estimates_path.glob('pose_s*.txt'))
43 | 
44 |     if len(estimates_files) == 0:
45 |         _logger.error(f"No pose files found in {options.estimates_path}.")
46 |         exit(1)
47 | 
48 |     # Filter list according to string provided by user
49 |     if options.render_subset:
50 |         # get list of scenes to render
51 |         render_subset = options.render_subset.split(',')
52 |         # only keep files that contain the requested scene ID
53 |         estimates_files = [f for f in estimates_files if f.stem[5:] in render_subset]
54 | 
55 |     if len(estimates_files) == 0:
56 |         _logger.error(f"No pose files match the requested scene subset: {options.render_subset}.")
57 |         exit(1)
58 | 
59 |     _logger.info(f"Found {len(estimates_files)} pose files in {options.estimates_path}")
60 | 
61 |     # do the actual rendering
62 |     for estimates_file in estimates_files:
63 | 
64 |         # check whether the scene folder exists
65 |         scene_folder = options.data_path / estimates_file.stem[5:]
66 | 
67 |         if not scene_folder.exists():
68 |             _logger.error(f"Scene folder {scene_folder} does not exist. Skipping.")
69 |             continue
70 | 
71 |         _logger.info(f"Rendering scene {scene_folder} using estimates from {estimates_file}")
72 |         render_scene(estimates_file, scene_folder, options.output_path, options.confidence_threshold)
73 | 


--------------------------------------------------------------------------------
/visualisation/render_util.py:
--------------------------------------------------------------------------------
  1 | import trimesh
  2 | import numpy as np
  3 | import logging
  4 | from PIL import Image
  5 | from PIL import ImageOps
  6 | 
  7 | # Setup logging levels.
  8 | logging.basicConfig(level=logging.WARNING)
  9 | 
 10 | THICKNESS = 0.01  # controls how thick the frustum's 'bars' are
 11 | 
 12 | origin_frustum_verts = np.array([
 13 |     (0., 0., 0.),
 14 |     (0.375, -0.5, -0.5),
 15 |     (0.375, 0.5, -0.5),
 16 |     (-0.375, 0.5, -0.5),
 17 |     (-0.375, -0.5, -0.5),
 18 | ])
 19 | 
 20 | frustum_edges = np.array([
 21 |     (1, 2),
 22 |     (1, 3),
 23 |     (1, 4),
 24 |     (1, 5),
 25 |     (2, 3),
 26 |     (3, 4),
 27 |     (4, 5),
 28 |     (5, 2),
 29 | ]) - 1
 30 | 
 31 | 
 32 | def get_image_box(
 33 |         image_path,
 34 |         frustum_pose,
 35 |         aspect_ratio=4.0 / 3.0,
 36 |         cam_marker_size=1.0,
 37 |         flip=False
 38 | ):
 39 |     """ Gets a textured mesh of an image. """
 40 | 
 41 |     pil_image = Image.open(image_path)
 42 |     pil_image = ImageOps.flip(pil_image)  # flip top/bottom to align with scene space
 43 | 
 44 |     width = 0.75
 45 |     height = width * aspect_ratio
 46 |     width *= cam_marker_size
 47 |     height *= cam_marker_size
 48 | 
 49 |     if flip:
 50 |         pil_image = ImageOps.mirror(pil_image)  # flips left/right
 51 |         width = -width
 52 | 
 53 |     vertices = np.zeros((4, 3))
 54 |     vertices[0, :] = [width / 2, height / 2, -cam_marker_size / 2]
 55 |     vertices[1, :] = [width / 2, -height / 2, -cam_marker_size / 2]
 56 |     vertices[2, :] = [-width / 2, -height / 2, -cam_marker_size / 2]
 57 |     vertices[3, :] = [-width / 2, height / 2, -cam_marker_size / 2]
 58 | 
 59 |     faces = np.zeros((2, 3))
 60 |     faces[0, :] = [0, 1, 2]
 61 |     faces[1, :] = [2, 3, 0]
 62 | 
 63 |     uvs = np.zeros((4, 2))
 64 | 
 65 |     uvs[0, :] = [1.0, 0]
 66 |     uvs[1, :] = [1.0, 1.0]
 67 |     uvs[2, :] = [0, 1.0]
 68 |     uvs[3, :] = [0, 0]
 69 | 
 70 |     face_normals = np.zeros((2, 3))
 71 |     face_normals[0, :] = [0.0, 0.0, 1.0]
 72 |     face_normals[1, :] = [0.0, 0.0, 1.0]
 73 | 
 74 |     material = trimesh.visual.texture.SimpleMaterial(
 75 |         image=pil_image,
 76 |         ambient=(1.0, 1.0, 1.0, 1.0),
 77 |         diffuse=(1.0, 1.0, 1.0, 1.0),
 78 |     )
 79 |     texture = trimesh.visual.TextureVisuals(
 80 |         uv=uvs,
 81 |         image=pil_image,
 82 |         material=material,
 83 |     )
 84 | 
 85 |     mesh = trimesh.Trimesh(
 86 |         vertices=vertices,
 87 |         faces=faces,
 88 |         face_normals=face_normals,
 89 |         visual=texture,
 90 |         validate=True,
 91 |         process=False
 92 |     )
 93 | 
 94 |     def transform_trimesh(mesh, transform):
 95 |         """ Applies a transform to a trimesh. """
 96 |         np_vertices = np.array(mesh.vertices)
 97 |         np_vertices = (transform @ np.concatenate([np_vertices, np.ones((np_vertices.shape[0], 1))], 1).T).T
 98 |         np_vertices = np_vertices / np_vertices[:, 3][:, None]
 99 |         mesh.vertices[:, 0] = np_vertices[:, 0]
100 |         mesh.vertices[:, 1] = np_vertices[:, 1]
101 |         mesh.vertices[:, 2] = np_vertices[:, 2]
102 | 
103 |         return mesh
104 | 
105 |     return transform_trimesh(mesh, frustum_pose)
106 | 
107 | 
108 | def normalise_vector(vect):
109 |     length = np.sqrt((vect ** 2).sum())
110 |     return vect / length
111 | 
112 | 
113 | def cuboid_from_line(line_start, line_end, color=(255, 0, 255)):
114 |     """Approximates a line with a long cuboid
115 |     color is a 3-element RGB tuple, with each element a uint8 value
116 |     """
117 |     # create two vectors which are both (a) perpendicular to the direction of the line and
118 |     # (b) perpendicular to each other.
119 |     direction = normalise_vector(line_end - line_start)
120 |     random_dir = normalise_vector(np.random.rand(3))
121 |     perpendicular_x = normalise_vector(np.cross(direction, random_dir))
122 |     perpendicular_y = normalise_vector(np.cross(direction, perpendicular_x))
123 | 
124 |     vertices = []
125 |     for node in (line_start, line_end):
126 |         for x_offset in (-1, 1):
127 |             for y_offset in (-1, 1):
128 |                 vert = node + THICKNESS * (perpendicular_y * y_offset + perpendicular_x * x_offset)
129 |                 vertices.append(vert)
130 | 
131 |     faces = [
132 |         (4, 5, 1, 0),
133 |         (5, 7, 3, 1),
134 |         (7, 6, 2, 3),
135 |         (6, 4, 0, 2),
136 |         (0, 1, 3, 2),  # end of tube
137 |         (6, 7, 5, 4),  # other end of tube
138 |     ]
139 | 
140 |     mesh = trimesh.Trimesh(vertices=np.array(vertices), faces=np.array(faces))
141 | 
142 |     for c in (0, 1, 2):
143 |         mesh.visual.vertex_colors[:, c] = color[c]
144 | 
145 |     return mesh
146 | 
147 | 
148 | def get_position_marker(marker_pose, marker_color, marker_extent=0.03):
149 |     """
150 |     Generates a cube to signify a singular camera position.
151 | 
152 |     @param marker_pose: 4x4 camera pose, OpenGL convention
153 |     @param marker_color: RGB color of the marker
154 |     @param marker_extent: size of the marker, marker is a cube of this side length
155 |     """
156 |     current_pos_marker = trimesh.primitives.Box(
157 |         extents=(marker_extent, marker_extent, marker_extent),
158 |         transform=marker_pose)
159 |     for c in (0, 1, 2):
160 |         current_pos_marker.visual.vertex_colors[:, c] = marker_color[c]
161 | 
162 |     return current_pos_marker
163 | 
164 | 
165 | def generate_grid(frame_idx, cmap):
166 |     """
167 |     Generates a grid of lines that fade in over time.
168 | 
169 |     @param frame_idx: Controls the fade-in of the grid.
170 |     @param cmap: Color map for the grid.
171 |     @return: trimesh object of the grid.
172 |     """
173 | 
174 |     y_offset = -2.5
175 |     z_offset = -2
176 |     line_count = 100
177 |     width = line_count // 2
178 | 
179 |     grid_edges_1 = [np.array([-width, y_offset, i + z_offset, width, y_offset, i + z_offset]) for i in
180 |                     range(1, line_count // 2 + 1)]
181 |     grid_edges_2 = [np.array([-width, y_offset, i + z_offset, width, y_offset, i + z_offset]) for i in
182 |                     range(-line_count // 2, 0)]
183 |     grid_edges_2.reverse()
184 |     grid_edges = [val for pair in zip(grid_edges_1, grid_edges_2) for val in pair]
185 |     grid_edges = [np.array([-width, y_offset, z_offset, width, y_offset, z_offset])] + grid_edges
186 | 
187 |     cuboids = []
188 |     for edge_idx, edge in enumerate(grid_edges):
189 | 
190 |         opacity = max(0, min(1, (frame_idx - edge_idx) / 10) * 255)
191 |         opacity = max(0, min(opacity, 245 - edge_idx * 5))
192 |         if opacity == 0:
193 |             continue
194 | 
195 |         color = cmap[int(opacity)] * 255
196 | 
197 |         line_cuboid = cuboid_from_line(line_start=edge[:3],
198 |                                        line_end=edge[3:],
199 |                                        color=color)
200 |         cuboids.append(line_cuboid)
201 | 
202 |     grid_edges_1 = [np.array([i, y_offset, -width + z_offset, i, y_offset, width + z_offset]) for i in
203 |                     range(1, line_count // 2 + 1)]
204 |     grid_edges_2 = [np.array([i, y_offset, -width + z_offset, i, y_offset, width + z_offset]) for i in
205 |                     range(-line_count // 2, 0)]
206 |     grid_edges_2.reverse()
207 |     grid_edges = [val for pair in zip(grid_edges_1, grid_edges_2) for val in pair]
208 |     grid_edges = [np.array([0, y_offset, -width + z_offset, 0, y_offset, width + z_offset])] + grid_edges
209 | 
210 |     for edge_idx, edge in enumerate(grid_edges):
211 | 
212 |         opacity = max(0, min(1, (frame_idx - edge_idx) / 10) * 255)
213 |         opacity = max(0, min(opacity, 245 - edge_idx * 5))
214 |         if opacity == 0:
215 |             continue
216 | 
217 |         color = cmap[int(opacity)] * 255
218 | 
219 |         line_cuboid = cuboid_from_line(line_start=edge[:3],
220 |                                        line_end=edge[3:],
221 |                                        color=color)
222 |         cuboids.append(line_cuboid)
223 | 
224 |     if len(cuboids) == 0:
225 |         return None
226 |     else:
227 |         return trimesh.util.concatenate(cuboids)
228 | 
229 | 
230 | def generate_frustum_at_position(rotation, translation, color, size, border_only=False):
231 |     """Generates a frustum mesh at a specified (rotation, translation), with given color and size
232 |     : rotation is a 3x3 numpy array
233 |     : translation is a 3-long numpy vector
234 |     : color is a 3-long numpy vector or tuple or list; each element is a uint8 RGB value
235 |     : size is a float
236 |     : border_only is a boolean that controls whether to only draw the border of the image
237 |     """
238 | 
239 |     transformed_frustum_verts = \
240 |         size * rotation.dot(origin_frustum_verts.T).T + translation[None, :]
241 | 
242 |     cuboids = []
243 |     for edge in frustum_edges:
244 |         line_cuboid = cuboid_from_line(line_start=transformed_frustum_verts[edge[0]],
245 |                                        line_end=transformed_frustum_verts[edge[1]],
246 |                                        color=color)
247 |         cuboids.append(line_cuboid)
248 | 
249 |     if border_only:
250 |         cuboids = cuboids[4:]
251 | 
252 |     return trimesh.util.concatenate(cuboids)


--------------------------------------------------------------------------------