├── .flake8
├── .gitattributes
├── .github
    └── workflows
    │   └── code-quality.yml
├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── DSC_0410.JPG
    ├── DSC_0411.JPG
    ├── architecture.svg
    ├── benchmark.png
    ├── benchmark_cpu.png
    ├── easy_hard.jpg
    ├── sacre_coeur1.jpg
    ├── sacre_coeur2.jpg
    └── teaser.svg
├── benchmark.py
├── demo.ipynb
├── lightglue
    ├── __init__.py
    ├── aliked.py
    ├── disk.py
    ├── dog_hardnet.py
    ├── lightglue.py
    ├── sift.py
    ├── superpoint.py
    ├── utils.py
    └── viz2d.py
├── pyproject.toml
└── requirements.txt


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203
4 | exclude = .git,__pycache__,build,.venv/
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-documentation


--------------------------------------------------------------------------------
/.github/workflows/code-quality.yml:
--------------------------------------------------------------------------------
 1 | name: Format and Lint Checks
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |     paths:
 7 |       - '*.py'
 8 |   pull_request:
 9 |     types: [ assigned, opened, synchronize, reopened ]
10 | jobs:
11 |   check:
12 |     name: Format and Lint Checks
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - uses: actions/setup-python@v4
17 |         with:
18 |           python-version: '3.10'
19 |           cache: 'pip'
20 |       - run: python -m pip install --upgrade pip
21 |       - run: python -m pip install .[dev]
22 |       - run: python -m flake8 .
23 |       - run: python -m isort . --check-only --diff
24 |       - run: python -m black . --check --diff
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /data/
  2 | /outputs/
  3 | /lightglue/weights/
  4 | *-checkpoint.ipynb
  5 | *.pth
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | cover/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | .pybuilder/
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | #   For a library or package, you might want to ignore these files since the code is
 93 | #   intended to run in multiple environments; otherwise, check them in:
 94 | # .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/#use-with-ide
116 | .pdm.toml
117 | 
118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119 | __pypackages__/
120 | 
121 | # Celery stuff
122 | celerybeat-schedule
123 | celerybeat.pid
124 | 
125 | # SageMath parsed files
126 | *.sage.py
127 | 
128 | # Environments
129 | .env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | .idea/
167 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2023 ETH Zurich
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <h1 align="center"><ins>LightGlue</ins> ⚡️<br>Local Feature Matching at Light Speed</h1>
  3 |   <p align="center">
  4 |     <a href="https://www.linkedin.com/in/philipplindenberger/">Philipp Lindenberger</a>
  5 |     ·
  6 |     <a href="https://psarlin.com/">Paul-Edouard&nbsp;Sarlin</a>
  7 |     ·
  8 |     <a href="https://www.microsoft.com/en-us/research/people/mapoll/">Marc&nbsp;Pollefeys</a>
  9 |   </p>
 10 |   <h2 align="center">
 11 |     <p>ICCV 2023</p>
 12 |     <a href="https://arxiv.org/pdf/2306.13643.pdf" align="center">Paper</a> | 
 13 |     <a href="https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb" align="center">Colab</a> | 
 14 |     <a href="https://psarlin.com/assets/LightGlue_ICCV2023_poster_compressed.pdf" align="center">Poster</a> | 
 15 |     <a href="https://github.com/cvg/glue-factory" align="center">Train your own!</a>
 16 |   </h2>
 17 |   
 18 | </p>
 19 | <p align="center">
 20 |     <a href="https://arxiv.org/abs/2306.13643"><img src="assets/easy_hard.jpg" alt="example" width=80%></a>
 21 |     <br>
 22 |     <em>LightGlue is a deep neural network that matches sparse local features across image pairs.<br>An adaptive mechanism makes it fast for easy pairs (top) and reduces the computational complexity for difficult ones (bottom).</em>
 23 | </p>
 24 | 
 25 | ##
 26 | 
 27 | This repository hosts the inference code of LightGlue, a lightweight feature matcher with high accuracy and blazing fast inference. It takes as input a set of keypoints and descriptors for each image and returns the indices of corresponding points. The architecture is based on adaptive pruning techniques, in both network width and depth - [check out the paper for more details](https://arxiv.org/pdf/2306.13643.pdf).
 28 | 
 29 | We release pretrained weights of LightGlue with [SuperPoint](https://arxiv.org/abs/1712.07629), [DISK](https://arxiv.org/abs/2006.13566), [ALIKED](https://arxiv.org/abs/2304.03608) and [SIFT](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf) local features.
 30 | The training and evaluation code can be found in our library [glue-factory](https://github.com/cvg/glue-factory/).
 31 | 
 32 | ## Installation and demo [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/cvg/LightGlue/blob/main/demo.ipynb)
 33 | 
 34 | Install this repo using pip:
 35 | 
 36 | ```bash
 37 | git clone https://github.com/cvg/LightGlue.git && cd LightGlue
 38 | python -m pip install -e .
 39 | ```
 40 | 
 41 | We provide a [demo notebook](demo.ipynb) which shows how to perform feature extraction and matching on an image pair.
 42 | 
 43 | Here is a minimal script to match two images:
 44 | 
 45 | ```python
 46 | from lightglue import LightGlue, SuperPoint, DISK, SIFT, ALIKED, DoGHardNet
 47 | from lightglue.utils import load_image, rbd
 48 | 
 49 | # SuperPoint+LightGlue
 50 | extractor = SuperPoint(max_num_keypoints=2048).eval().cuda()  # load the extractor
 51 | matcher = LightGlue(features='superpoint').eval().cuda()  # load the matcher
 52 | 
 53 | # or DISK+LightGlue, ALIKED+LightGlue or SIFT+LightGlue
 54 | extractor = DISK(max_num_keypoints=2048).eval().cuda()  # load the extractor
 55 | matcher = LightGlue(features='disk').eval().cuda()  # load the matcher
 56 | 
 57 | # load each image as a torch.Tensor on GPU with shape (3,H,W), normalized in [0,1]
 58 | image0 = load_image('path/to/image_0.jpg').cuda()
 59 | image1 = load_image('path/to/image_1.jpg').cuda()
 60 | 
 61 | # extract local features
 62 | feats0 = extractor.extract(image0)  # auto-resize the image, disable with resize=None
 63 | feats1 = extractor.extract(image1)
 64 | 
 65 | # match the features
 66 | matches01 = matcher({'image0': feats0, 'image1': feats1})
 67 | feats0, feats1, matches01 = [rbd(x) for x in [feats0, feats1, matches01]]  # remove batch dimension
 68 | matches = matches01['matches']  # indices with shape (K,2)
 69 | points0 = feats0['keypoints'][matches[..., 0]]  # coordinates in image #0, shape (K,2)
 70 | points1 = feats1['keypoints'][matches[..., 1]]  # coordinates in image #1, shape (K,2)
 71 | ```
 72 | 
 73 | We also provide a convenience method to match a pair of images:
 74 | 
 75 | ```python
 76 | from lightglue import match_pair
 77 | feats0, feats1, matches01 = match_pair(extractor, matcher, image0, image1)
 78 | ```
 79 | 
 80 | ##
 81 | 
 82 | <p align="center">
 83 |   <a href="https://arxiv.org/abs/2306.13643"><img src="assets/teaser.svg" alt="Logo" width=50%></a>
 84 |   <br>
 85 |   <em>LightGlue can adjust its depth (number of layers) and width (number of keypoints) per image pair, with a marginal impact on accuracy.</em>
 86 | </p>
 87 | 
 88 | ## Advanced configuration
 89 | 
 90 | <details>
 91 | <summary>[Detail of all parameters - click to expand]</summary>
 92 | 
 93 | - ```n_layers```: Number of stacked self+cross attention layers. Reduce this value for faster inference at the cost of accuracy (continuous red line in the plot above). Default: 9 (all layers).
 94 | - ```flash```: Enable FlashAttention. Significantly increases the speed and reduces the memory consumption without any impact on accuracy. Default: True (LightGlue automatically detects if FlashAttention is available).
 95 | - ```mp```: Enable mixed precision inference. Default: False (off)
 96 | - ```depth_confidence```: Controls the early stopping. A lower values stops more often at earlier layers. Default: 0.95, disable with -1.
 97 | - ```width_confidence```: Controls the iterative point pruning. A lower value prunes more points earlier. Default: 0.99, disable with -1.
 98 | - ```filter_threshold```: Match confidence. Increase this value to obtain less, but stronger matches. Default: 0.1
 99 | 
100 | </details>
101 | 
102 | The default values give a good trade-off between speed and accuracy. To maximize the accuracy, use all keypoints and disable the adaptive mechanisms:
103 | ```python
104 | extractor = SuperPoint(max_num_keypoints=None)
105 | matcher = LightGlue(features='superpoint', depth_confidence=-1, width_confidence=-1)
106 | ```
107 | 
108 | To increase the speed with a small drop of accuracy, decrease the number of keypoints and lower the adaptive thresholds:
109 | ```python
110 | extractor = SuperPoint(max_num_keypoints=1024)
111 | matcher = LightGlue(features='superpoint', depth_confidence=0.9, width_confidence=0.95)
112 | ```
113 | 
114 | The maximum speed is obtained with a combination of:
115 | - [FlashAttention](https://arxiv.org/abs/2205.14135): automatically used when ```torch >= 2.0``` or if [installed from source](https://github.com/HazyResearch/flash-attention#installation-and-features).
116 | - PyTorch compilation, available when ```torch >= 2.0```:
117 | ```python
118 | matcher = matcher.eval().cuda()
119 | matcher.compile(mode='reduce-overhead')
120 | ```
121 | For inputs with fewer than 1536 keypoints (determined experimentally), this compiles LightGlue but disables point pruning (large overhead). For larger input sizes, it automatically falls backs to eager mode with point pruning. Adaptive depths is supported for any input size.
122 | 
123 | ## Benchmark
124 | 
125 | 
126 | <p align="center">
127 |   <a><img src="assets/benchmark.png" alt="Logo" width=80%></a>
128 |   <br>
129 |   <em>Benchmark results on GPU (RTX 3080). With compilation and adaptivity, LightGlue runs at 150 FPS @ 1024 keypoints and 50 FPS @ 4096 keypoints per image. This is a 4-10x speedup over SuperGlue. </em>
130 | </p>
131 | 
132 | <p align="center">
133 |   <a><img src="assets/benchmark_cpu.png" alt="Logo" width=80%></a>
134 |   <br>
135 |   <em>Benchmark results on CPU (Intel i7 10700K). LightGlue runs at 20 FPS @ 512 keypoints. </em>
136 | </p>
137 | 
138 | Obtain the same plots for your setup using our [benchmark script](benchmark.py):
139 | ```
140 | python benchmark.py [--device cuda] [--add_superglue] [--num_keypoints 512 1024 2048 4096] [--compile]
141 | ```
142 | 
143 | <details>
144 | <summary>[Performance tip - click to expand]</summary>
145 | 
146 | Note: **Point pruning** introduces an overhead that sometimes outweighs its benefits.
147 | Point pruning is thus enabled only when the there are more than N keypoints in an image, where N is hardware-dependent.
148 | We provide defaults optimized for current hardware (RTX 30xx GPUs).
149 | We suggest running the benchmark script and adjusting the thresholds for your hardware by updating `LightGlue.pruning_keypoint_thresholds['cuda']`.
150 | 
151 | </details>
152 | 
153 | ## Training and evaluation
154 | 
155 | With [Glue Factory](https://github.com/cvg/glue-factory), you can train LightGlue with your own local features, on your own dataset!
156 | You can also evaluate it and other baselines on standard benchmarks like HPatches and MegaDepth.
157 | 
158 | ## Other links
159 | - [hloc - the visual localization toolbox](https://github.com/cvg/Hierarchical-Localization/): run LightGlue for Structure-from-Motion and visual localization.
160 | - [LightGlue-ONNX](https://github.com/fabio-sim/LightGlue-ONNX): export LightGlue to the Open Neural Network Exchange (ONNX) format with support for TensorRT and OpenVINO.
161 | - [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui): a web GUI to easily compare different matchers, including LightGlue.
162 | - [kornia](https://kornia.readthedocs.io) now exposes LightGlue via the interfaces [`LightGlue`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlue) and [`LightGlueMatcher`](https://kornia.readthedocs.io/en/latest/feature.html#kornia.feature.LightGlueMatcher).
163 | 
164 | ## BibTeX citation
165 | If you use any ideas from the paper or code from this repo, please consider citing:
166 | 
167 | ```txt
168 | @inproceedings{lindenberger2023lightglue,
169 |   author    = {Philipp Lindenberger and
170 |                Paul-Edouard Sarlin and
171 |                Marc Pollefeys},
172 |   title     = {{LightGlue: Local Feature Matching at Light Speed}},
173 |   booktitle = {ICCV},
174 |   year      = {2023}
175 | }
176 | ```
177 | 
178 | 
179 | ## License
180 | The pre-trained weights of LightGlue and the code provided in this repository are released under the [Apache-2.0 license](./LICENSE). [DISK](https://github.com/cvlab-epfl/disk) follows this license as well but SuperPoint follows [a different, restrictive license](https://github.com/magicleap/SuperPointPretrainedNetwork/blob/master/LICENSE) (this includes its pre-trained weights and its [inference file](./lightglue/superpoint.py)). [ALIKED](https://github.com/Shiaoming/ALIKED) was published under a BSD-3-Clause license. 
181 | 


--------------------------------------------------------------------------------
/assets/DSC_0410.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/DSC_0410.JPG


--------------------------------------------------------------------------------
/assets/DSC_0411.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/DSC_0411.JPG


--------------------------------------------------------------------------------
/assets/benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/benchmark.png


--------------------------------------------------------------------------------
/assets/benchmark_cpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/benchmark_cpu.png


--------------------------------------------------------------------------------
/assets/easy_hard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/easy_hard.jpg


--------------------------------------------------------------------------------
/assets/sacre_coeur1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/sacre_coeur1.jpg


--------------------------------------------------------------------------------
/assets/sacre_coeur2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cvg/LightGlue/edb2b838efb2ecfe3f88097c5fad9887d95aedad/assets/sacre_coeur2.jpg


--------------------------------------------------------------------------------
/assets/teaser.svg:
--------------------------------------------------------------------------------
   1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
   2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
   4 | <svg xmlns:xlink="http://www.w3.org/1999/xlink" width="351.50156pt" height="237.315312pt" viewBox="0 0 351.50156 237.315312" xmlns="http://www.w3.org/2000/svg" version="1.1">
   5 |  <metadata>
   6 |   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
   7 |    <cc:Work>
   8 |     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
   9 |     <dc:date>2023-06-25T11:23:59.261938</dc:date>
  10 |     <dc:format>image/svg+xml</dc:format>
  11 |     <dc:creator>
  12 |      <cc:Agent>
  13 |       <dc:title>Matplotlib v3.7.1, https://matplotlib.org/</dc:title>
  14 |      </cc:Agent>
  15 |     </dc:creator>
  16 |    </cc:Work>
  17 |   </rdf:RDF>
  18 |  </metadata>
  19 |  <defs>
  20 |   <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
  21 |  </defs>
  22 |  <g id="figure_1">
  23 |   <g id="patch_1">
  24 |    <path d="M 0 237.315312 
  25 | L 351.50156 237.315312 
  26 | L 351.50156 0 
  27 | L 0 0 
  28 | z
  29 | " style="fill: #ffffff"/>
  30 |   </g>
  31 |   <g id="axes_1">
  32 |    <g id="patch_2">
  33 |     <path d="M 38.242188 202.12 
  34 | L 351.50156 202.12 
  35 | L 351.50156 0 
  36 | L 38.242188 0 
  37 | z
  38 | " style="fill: #f2f2f2"/>
  39 |    </g>
  40 |    <g id="PathCollection_1">
  41 |     <defs>
  42 |      <path id="md5bda44a6b" d="M 0 2.738613 
  43 | C 0.726289 2.738613 1.422928 2.450055 1.936492 1.936492 
  44 | C 2.450055 1.422928 2.738613 0.726289 2.738613 0 
  45 | C 2.738613 -0.726289 2.450055 -1.422928 1.936492 -1.936492 
  46 | C 1.422928 -2.450055 0.726289 -2.738613 0 -2.738613 
  47 | C -0.726289 -2.738613 -1.422928 -2.450055 -1.936492 -1.936492 
  48 | C -2.450055 -1.422928 -2.738613 -0.726289 -2.738613 0 
  49 | C -2.738613 0.726289 -2.450055 1.422928 -1.936492 1.936492 
  50 | C -1.422928 2.450055 -0.726289 2.738613 0 2.738613 
  51 | z
  52 | "/>
  53 |     </defs>
  54 |     <g clip-path="url(#pb46ed2897c)">
  55 |      <use xlink:href="#md5bda44a6b" x="117.273002" y="77.281176" style="fill: #0000ff"/>
  56 |     </g>
  57 |    </g>
  58 |    <g id="PathCollection_2">
  59 |     <defs>
  60 |      <path id="m3541600ca9" d="M -0 3.872983 
  61 | L 3.872983 -3.872983 
  62 | L -3.872983 -3.872983 
  63 | z
  64 | "/>
  65 |     </defs>
  66 |     <g clip-path="url(#pb46ed2897c)">
  67 |      <use xlink:href="#m3541600ca9" x="113.203664" y="196.175294" style="fill: #008000"/>
  68 |     </g>
  69 |    </g>
  70 |    <g id="PathCollection_3">
  71 |     <defs>
  72 |      <path id="mee49ddcd29" d="M 0 2.738613 
  73 | C 0.726289 2.738613 1.422928 2.450055 1.936492 1.936492 
  74 | C 2.450055 1.422928 2.738613 0.726289 2.738613 0 
  75 | C 2.738613 -0.726289 2.450055 -1.422928 1.936492 -1.936492 
  76 | C 1.422928 -2.450055 0.726289 -2.738613 0 -2.738613 
  77 | C -0.726289 -2.738613 -1.422928 -2.450055 -1.936492 -1.936492 
  78 | C -2.450055 -1.422928 -2.738613 -0.726289 -2.738613 0 
  79 | C -2.738613 0.726289 -2.450055 1.422928 -1.936492 1.936492 
  80 | C -1.422928 2.450055 -0.726289 2.738613 0 2.738613 
  81 | z
  82 | "/>
  83 |     </defs>
  84 |     <g clip-path="url(#pb46ed2897c)">
  85 |      <use xlink:href="#mee49ddcd29" x="68.806591" y="41.612941"/>
  86 |     </g>
  87 |    </g>
  88 |    <g id="PathCollection_4">
  89 |     <defs>
  90 |      <path id="m3986887d56" d="M 0 2.738613 
  91 | C 0.726289 2.738613 1.422928 2.450055 1.936492 1.936492 
  92 | C 2.450055 1.422928 2.738613 0.726289 2.738613 0 
  93 | C 2.738613 -0.726289 2.450055 -1.422928 1.936492 -1.936492 
  94 | C 1.422928 -2.450055 0.726289 -2.738613 0 -2.738613 
  95 | C -0.726289 -2.738613 -1.422928 -2.450055 -1.936492 -1.936492 
  96 | C -2.450055 -1.422928 -2.738613 -0.726289 -2.738613 0 
  97 | C -2.738613 0.726289 -2.450055 1.422928 -1.936492 1.936492 
  98 | C -1.422928 2.450055 -0.726289 2.738613 0 2.738613 
  99 | z
 100 | "/>
 101 |     </defs>
 102 |     <g clip-path="url(#pb46ed2897c)">
 103 |      <use xlink:href="#m3986887d56" x="52.800495" y="34.479294" style="fill: #800080"/>
 104 |     </g>
 105 |    </g>
 106 |    <g id="PathCollection_5">
 107 |     <defs>
 108 |      <path id="m73cb4f1908" d="M 0 -5.91608 
 109 | L -1.328243 -1.828169 
 110 | L -5.626526 -1.828169 
 111 | L -2.149142 0.698298 
 112 | L -3.477384 4.786209 
 113 | L -0 2.259741 
 114 | L 3.477384 4.786209 
 115 | L 2.149142 0.698298 
 116 | L 5.626526 -1.828169 
 117 | L 1.328243 -1.828169 
 118 | z
 119 | "/>
 120 |     </defs>
 121 |     <g clip-path="url(#pb46ed2897c)">
 122 |      <use xlink:href="#m73cb4f1908" x="289.703869" y="47.557647" style="fill: #ff0000"/>
 123 |     </g>
 124 |    </g>
 125 |    <g id="matplotlib.axis_1">
 126 |     <g id="xtick_1">
 127 |      <g id="line2d_1">
 128 |       <defs>
 129 |        <path id="m69d2a2ec97" d="M 0 0 
 130 | L 0 3.5 
 131 | " style="stroke: #000000; stroke-width: 0.8"/>
 132 |       </defs>
 133 |       <g>
 134 |        <use xlink:href="#m69d2a2ec97" x="38.242188" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 135 |       </g>
 136 |      </g>
 137 |      <g id="text_1">
 138 |       <!-- 0 -->
 139 |       <g transform="translate(35.060938 216.718437) scale(0.1 -0.1)">
 140 |        <defs>
 141 |         <path id="DejaVuSans-30" d="M 2034 4250 
 142 | Q 1547 4250 1301 3770 
 143 | Q 1056 3291 1056 2328 
 144 | Q 1056 1369 1301 889 
 145 | Q 1547 409 2034 409 
 146 | Q 2525 409 2770 889 
 147 | Q 3016 1369 3016 2328 
 148 | Q 3016 3291 2770 3770 
 149 | Q 2525 4250 2034 4250 
 150 | z
 151 | M 2034 4750 
 152 | Q 2819 4750 3233 4129 
 153 | Q 3647 3509 3647 2328 
 154 | Q 3647 1150 3233 529 
 155 | Q 2819 -91 2034 -91 
 156 | Q 1250 -91 836 529 
 157 | Q 422 1150 422 2328 
 158 | Q 422 3509 836 4129 
 159 | Q 1250 4750 2034 4750 
 160 | z
 161 | " transform="scale(0.015625)"/>
 162 |        </defs>
 163 |        <use xlink:href="#DejaVuSans-30"/>
 164 |       </g>
 165 |      </g>
 166 |     </g>
 167 |     <g id="xtick_2">
 168 |      <g id="line2d_2">
 169 |       <g>
 170 |        <use xlink:href="#m69d2a2ec97" x="93.563757" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 171 |       </g>
 172 |      </g>
 173 |      <g id="text_2">
 174 |       <!-- 10 -->
 175 |       <g transform="translate(87.201257 216.718437) scale(0.1 -0.1)">
 176 |        <defs>
 177 |         <path id="DejaVuSans-31" d="M 794 531 
 178 | L 1825 531 
 179 | L 1825 4091 
 180 | L 703 3866 
 181 | L 703 4441 
 182 | L 1819 4666 
 183 | L 2450 4666 
 184 | L 2450 531 
 185 | L 3481 531 
 186 | L 3481 0 
 187 | L 794 0 
 188 | L 794 531 
 189 | z
 190 | " transform="scale(0.015625)"/>
 191 |        </defs>
 192 |        <use xlink:href="#DejaVuSans-31"/>
 193 |        <use xlink:href="#DejaVuSans-30" x="63.623047"/>
 194 |       </g>
 195 |      </g>
 196 |     </g>
 197 |     <g id="xtick_3">
 198 |      <g id="line2d_3">
 199 |       <g>
 200 |        <use xlink:href="#m69d2a2ec97" x="148.885327" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 201 |       </g>
 202 |      </g>
 203 |      <g id="text_3">
 204 |       <!-- 20 -->
 205 |       <g transform="translate(142.522827 216.718437) scale(0.1 -0.1)">
 206 |        <defs>
 207 |         <path id="DejaVuSans-32" d="M 1228 531 
 208 | L 3431 531 
 209 | L 3431 0 
 210 | L 469 0 
 211 | L 469 531 
 212 | Q 828 903 1448 1529 
 213 | Q 2069 2156 2228 2338 
 214 | Q 2531 2678 2651 2914 
 215 | Q 2772 3150 2772 3378 
 216 | Q 2772 3750 2511 3984 
 217 | Q 2250 4219 1831 4219 
 218 | Q 1534 4219 1204 4116 
 219 | Q 875 4013 500 3803 
 220 | L 500 4441 
 221 | Q 881 4594 1212 4672 
 222 | Q 1544 4750 1819 4750 
 223 | Q 2544 4750 2975 4387 
 224 | Q 3406 4025 3406 3419 
 225 | Q 3406 3131 3298 2873 
 226 | Q 3191 2616 2906 2266 
 227 | Q 2828 2175 2409 1742 
 228 | Q 1991 1309 1228 531 
 229 | z
 230 | " transform="scale(0.015625)"/>
 231 |        </defs>
 232 |        <use xlink:href="#DejaVuSans-32"/>
 233 |        <use xlink:href="#DejaVuSans-30" x="63.623047"/>
 234 |       </g>
 235 |      </g>
 236 |     </g>
 237 |     <g id="xtick_4">
 238 |      <g id="line2d_4">
 239 |       <g>
 240 |        <use xlink:href="#m69d2a2ec97" x="204.206897" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 241 |       </g>
 242 |      </g>
 243 |      <g id="text_4">
 244 |       <!-- 30 -->
 245 |       <g transform="translate(197.844397 216.718437) scale(0.1 -0.1)">
 246 |        <defs>
 247 |         <path id="DejaVuSans-33" d="M 2597 2516 
 248 | Q 3050 2419 3304 2112 
 249 | Q 3559 1806 3559 1356 
 250 | Q 3559 666 3084 287 
 251 | Q 2609 -91 1734 -91 
 252 | Q 1441 -91 1130 -33 
 253 | Q 819 25 488 141 
 254 | L 488 750 
 255 | Q 750 597 1062 519 
 256 | Q 1375 441 1716 441 
 257 | Q 2309 441 2620 675 
 258 | Q 2931 909 2931 1356 
 259 | Q 2931 1769 2642 2001 
 260 | Q 2353 2234 1838 2234 
 261 | L 1294 2234 
 262 | L 1294 2753 
 263 | L 1863 2753 
 264 | Q 2328 2753 2575 2939 
 265 | Q 2822 3125 2822 3475 
 266 | Q 2822 3834 2567 4026 
 267 | Q 2313 4219 1838 4219 
 268 | Q 1578 4219 1281 4162 
 269 | Q 984 4106 628 3988 
 270 | L 628 4550 
 271 | Q 988 4650 1302 4700 
 272 | Q 1616 4750 1894 4750 
 273 | Q 2613 4750 3031 4423 
 274 | Q 3450 4097 3450 3541 
 275 | Q 3450 3153 3228 2886 
 276 | Q 3006 2619 2597 2516 
 277 | z
 278 | " transform="scale(0.015625)"/>
 279 |        </defs>
 280 |        <use xlink:href="#DejaVuSans-33"/>
 281 |        <use xlink:href="#DejaVuSans-30" x="63.623047"/>
 282 |       </g>
 283 |      </g>
 284 |     </g>
 285 |     <g id="xtick_5">
 286 |      <g id="line2d_5">
 287 |       <g>
 288 |        <use xlink:href="#m69d2a2ec97" x="259.528467" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 289 |       </g>
 290 |      </g>
 291 |      <g id="text_5">
 292 |       <!-- 40 -->
 293 |       <g transform="translate(253.165967 216.718437) scale(0.1 -0.1)">
 294 |        <defs>
 295 |         <path id="DejaVuSans-34" d="M 2419 4116 
 296 | L 825 1625 
 297 | L 2419 1625 
 298 | L 2419 4116 
 299 | z
 300 | M 2253 4666 
 301 | L 3047 4666 
 302 | L 3047 1625 
 303 | L 3713 1625 
 304 | L 3713 1100 
 305 | L 3047 1100 
 306 | L 3047 0 
 307 | L 2419 0 
 308 | L 2419 1100 
 309 | L 313 1100 
 310 | L 313 1709 
 311 | L 2253 4666 
 312 | z
 313 | " transform="scale(0.015625)"/>
 314 |        </defs>
 315 |        <use xlink:href="#DejaVuSans-34"/>
 316 |        <use xlink:href="#DejaVuSans-30" x="63.623047"/>
 317 |       </g>
 318 |      </g>
 319 |     </g>
 320 |     <g id="xtick_6">
 321 |      <g id="line2d_6">
 322 |       <g>
 323 |        <use xlink:href="#m69d2a2ec97" x="314.850037" y="202.12" style="stroke: #000000; stroke-width: 0.8"/>
 324 |       </g>
 325 |      </g>
 326 |      <g id="text_6">
 327 |       <!-- 50 -->
 328 |       <g transform="translate(308.487537 216.718437) scale(0.1 -0.1)">
 329 |        <defs>
 330 |         <path id="DejaVuSans-35" d="M 691 4666 
 331 | L 3169 4666 
 332 | L 3169 4134 
 333 | L 1269 4134 
 334 | L 1269 2991 
 335 | Q 1406 3038 1543 3061 
 336 | Q 1681 3084 1819 3084 
 337 | Q 2600 3084 3056 2656 
 338 | Q 3513 2228 3513 1497 
 339 | Q 3513 744 3044 326 
 340 | Q 2575 -91 1722 -91 
 341 | Q 1428 -91 1123 -41 
 342 | Q 819 9 494 109 
 343 | L 494 744 
 344 | Q 775 591 1075 516 
 345 | Q 1375 441 1709 441 
 346 | Q 2250 441 2565 725 
 347 | Q 2881 1009 2881 1497 
 348 | Q 2881 1984 2565 2268 
 349 | Q 2250 2553 1709 2553 
 350 | Q 1456 2553 1204 2497 
 351 | Q 953 2441 691 2322 
 352 | L 691 4666 
 353 | z
 354 | " transform="scale(0.015625)"/>
 355 |        </defs>
 356 |        <use xlink:href="#DejaVuSans-35"/>
 357 |        <use xlink:href="#DejaVuSans-30" x="63.623047"/>
 358 |       </g>
 359 |      </g>
 360 |     </g>
 361 |     <g id="text_7">
 362 |      <!-- Image Pairs Per Second -->
 363 |      <g transform="translate(106.824218 234.195781) scale(0.15 -0.15)">
 364 |       <defs>
 365 |        <path id="DejaVuSans-49" d="M 628 4666 
 366 | L 1259 4666 
 367 | L 1259 0 
 368 | L 628 0 
 369 | L 628 4666 
 370 | z
 371 | " transform="scale(0.015625)"/>
 372 |        <path id="DejaVuSans-6d" d="M 3328 2828 
 373 | Q 3544 3216 3844 3400 
 374 | Q 4144 3584 4550 3584 
 375 | Q 5097 3584 5394 3201 
 376 | Q 5691 2819 5691 2113 
 377 | L 5691 0 
 378 | L 5113 0 
 379 | L 5113 2094 
 380 | Q 5113 2597 4934 2840 
 381 | Q 4756 3084 4391 3084 
 382 | Q 3944 3084 3684 2787 
 383 | Q 3425 2491 3425 1978 
 384 | L 3425 0 
 385 | L 2847 0 
 386 | L 2847 2094 
 387 | Q 2847 2600 2669 2842 
 388 | Q 2491 3084 2119 3084 
 389 | Q 1678 3084 1418 2786 
 390 | Q 1159 2488 1159 1978 
 391 | L 1159 0 
 392 | L 581 0 
 393 | L 581 3500 
 394 | L 1159 3500 
 395 | L 1159 2956 
 396 | Q 1356 3278 1631 3431 
 397 | Q 1906 3584 2284 3584 
 398 | Q 2666 3584 2933 3390 
 399 | Q 3200 3197 3328 2828 
 400 | z
 401 | " transform="scale(0.015625)"/>
 402 |        <path id="DejaVuSans-61" d="M 2194 1759 
 403 | Q 1497 1759 1228 1600 
 404 | Q 959 1441 959 1056 
 405 | Q 959 750 1161 570 
 406 | Q 1363 391 1709 391 
 407 | Q 2188 391 2477 730 
 408 | Q 2766 1069 2766 1631 
 409 | L 2766 1759 
 410 | L 2194 1759 
 411 | z
 412 | M 3341 1997 
 413 | L 3341 0 
 414 | L 2766 0 
 415 | L 2766 531 
 416 | Q 2569 213 2275 61 
 417 | Q 1981 -91 1556 -91 
 418 | Q 1019 -91 701 211 
 419 | Q 384 513 384 1019 
 420 | Q 384 1609 779 1909 
 421 | Q 1175 2209 1959 2209 
 422 | L 2766 2209 
 423 | L 2766 2266 
 424 | Q 2766 2663 2505 2880 
 425 | Q 2244 3097 1772 3097 
 426 | Q 1472 3097 1187 3025 
 427 | Q 903 2953 641 2809 
 428 | L 641 3341 
 429 | Q 956 3463 1253 3523 
 430 | Q 1550 3584 1831 3584 
 431 | Q 2591 3584 2966 3190 
 432 | Q 3341 2797 3341 1997 
 433 | z
 434 | " transform="scale(0.015625)"/>
 435 |        <path id="DejaVuSans-67" d="M 2906 1791 
 436 | Q 2906 2416 2648 2759 
 437 | Q 2391 3103 1925 3103 
 438 | Q 1463 3103 1205 2759 
 439 | Q 947 2416 947 1791 
 440 | Q 947 1169 1205 825 
 441 | Q 1463 481 1925 481 
 442 | Q 2391 481 2648 825 
 443 | Q 2906 1169 2906 1791 
 444 | z
 445 | M 3481 434 
 446 | Q 3481 -459 3084 -895 
 447 | Q 2688 -1331 1869 -1331 
 448 | Q 1566 -1331 1297 -1286 
 449 | Q 1028 -1241 775 -1147 
 450 | L 775 -588 
 451 | Q 1028 -725 1275 -790 
 452 | Q 1522 -856 1778 -856 
 453 | Q 2344 -856 2625 -561 
 454 | Q 2906 -266 2906 331 
 455 | L 2906 616 
 456 | Q 2728 306 2450 153 
 457 | Q 2172 0 1784 0 
 458 | Q 1141 0 747 490 
 459 | Q 353 981 353 1791 
 460 | Q 353 2603 747 3093 
 461 | Q 1141 3584 1784 3584 
 462 | Q 2172 3584 2450 3431 
 463 | Q 2728 3278 2906 2969 
 464 | L 2906 3500 
 465 | L 3481 3500 
 466 | L 3481 434 
 467 | z
 468 | " transform="scale(0.015625)"/>
 469 |        <path id="DejaVuSans-65" d="M 3597 1894 
 470 | L 3597 1613 
 471 | L 953 1613 
 472 | Q 991 1019 1311 708 
 473 | Q 1631 397 2203 397 
 474 | Q 2534 397 2845 478 
 475 | Q 3156 559 3463 722 
 476 | L 3463 178 
 477 | Q 3153 47 2828 -22 
 478 | Q 2503 -91 2169 -91 
 479 | Q 1331 -91 842 396 
 480 | Q 353 884 353 1716 
 481 | Q 353 2575 817 3079 
 482 | Q 1281 3584 2069 3584 
 483 | Q 2775 3584 3186 3129 
 484 | Q 3597 2675 3597 1894 
 485 | z
 486 | M 3022 2063 
 487 | Q 3016 2534 2758 2815 
 488 | Q 2500 3097 2075 3097 
 489 | Q 1594 3097 1305 2825 
 490 | Q 1016 2553 972 2059 
 491 | L 3022 2063 
 492 | z
 493 | " transform="scale(0.015625)"/>
 494 |        <path id="DejaVuSans-20" transform="scale(0.015625)"/>
 495 |        <path id="DejaVuSans-50" d="M 1259 4147 
 496 | L 1259 2394 
 497 | L 2053 2394 
 498 | Q 2494 2394 2734 2622 
 499 | Q 2975 2850 2975 3272 
 500 | Q 2975 3691 2734 3919 
 501 | Q 2494 4147 2053 4147 
 502 | L 1259 4147 
 503 | z
 504 | M 628 4666 
 505 | L 2053 4666 
 506 | Q 2838 4666 3239 4311 
 507 | Q 3641 3956 3641 3272 
 508 | Q 3641 2581 3239 2228 
 509 | Q 2838 1875 2053 1875 
 510 | L 1259 1875 
 511 | L 1259 0 
 512 | L 628 0 
 513 | L 628 4666 
 514 | z
 515 | " transform="scale(0.015625)"/>
 516 |        <path id="DejaVuSans-69" d="M 603 3500 
 517 | L 1178 3500 
 518 | L 1178 0 
 519 | L 603 0 
 520 | L 603 3500 
 521 | z
 522 | M 603 4863 
 523 | L 1178 4863 
 524 | L 1178 4134 
 525 | L 603 4134 
 526 | L 603 4863 
 527 | z
 528 | " transform="scale(0.015625)"/>
 529 |        <path id="DejaVuSans-72" d="M 2631 2963 
 530 | Q 2534 3019 2420 3045 
 531 | Q 2306 3072 2169 3072 
 532 | Q 1681 3072 1420 2755 
 533 | Q 1159 2438 1159 1844 
 534 | L 1159 0 
 535 | L 581 0 
 536 | L 581 3500 
 537 | L 1159 3500 
 538 | L 1159 2956 
 539 | Q 1341 3275 1631 3429 
 540 | Q 1922 3584 2338 3584 
 541 | Q 2397 3584 2469 3576 
 542 | Q 2541 3569 2628 3553 
 543 | L 2631 2963 
 544 | z
 545 | " transform="scale(0.015625)"/>
 546 |        <path id="DejaVuSans-73" d="M 2834 3397 
 547 | L 2834 2853 
 548 | Q 2591 2978 2328 3040 
 549 | Q 2066 3103 1784 3103 
 550 | Q 1356 3103 1142 2972 
 551 | Q 928 2841 928 2578 
 552 | Q 928 2378 1081 2264 
 553 | Q 1234 2150 1697 2047 
 554 | L 1894 2003 
 555 | Q 2506 1872 2764 1633 
 556 | Q 3022 1394 3022 966 
 557 | Q 3022 478 2636 193 
 558 | Q 2250 -91 1575 -91 
 559 | Q 1294 -91 989 -36 
 560 | Q 684 19 347 128 
 561 | L 347 722 
 562 | Q 666 556 975 473 
 563 | Q 1284 391 1588 391 
 564 | Q 1994 391 2212 530 
 565 | Q 2431 669 2431 922 
 566 | Q 2431 1156 2273 1281 
 567 | Q 2116 1406 1581 1522 
 568 | L 1381 1569 
 569 | Q 847 1681 609 1914 
 570 | Q 372 2147 372 2553 
 571 | Q 372 3047 722 3315 
 572 | Q 1072 3584 1716 3584 
 573 | Q 2034 3584 2315 3537 
 574 | Q 2597 3491 2834 3397 
 575 | z
 576 | " transform="scale(0.015625)"/>
 577 |        <path id="DejaVuSans-53" d="M 3425 4513 
 578 | L 3425 3897 
 579 | Q 3066 4069 2747 4153 
 580 | Q 2428 4238 2131 4238 
 581 | Q 1616 4238 1336 4038 
 582 | Q 1056 3838 1056 3469 
 583 | Q 1056 3159 1242 3001 
 584 | Q 1428 2844 1947 2747 
 585 | L 2328 2669 
 586 | Q 3034 2534 3370 2195 
 587 | Q 3706 1856 3706 1288 
 588 | Q 3706 609 3251 259 
 589 | Q 2797 -91 1919 -91 
 590 | Q 1588 -91 1214 -16 
 591 | Q 841 59 441 206 
 592 | L 441 856 
 593 | Q 825 641 1194 531 
 594 | Q 1563 422 1919 422 
 595 | Q 2459 422 2753 634 
 596 | Q 3047 847 3047 1241 
 597 | Q 3047 1584 2836 1778 
 598 | Q 2625 1972 2144 2069 
 599 | L 1759 2144 
 600 | Q 1053 2284 737 2584 
 601 | Q 422 2884 422 3419 
 602 | Q 422 4038 858 4394 
 603 | Q 1294 4750 2059 4750 
 604 | Q 2388 4750 2728 4690 
 605 | Q 3069 4631 3425 4513 
 606 | z
 607 | " transform="scale(0.015625)"/>
 608 |        <path id="DejaVuSans-63" d="M 3122 3366 
 609 | L 3122 2828 
 610 | Q 2878 2963 2633 3030 
 611 | Q 2388 3097 2138 3097 
 612 | Q 1578 3097 1268 2742 
 613 | Q 959 2388 959 1747 
 614 | Q 959 1106 1268 751 
 615 | Q 1578 397 2138 397 
 616 | Q 2388 397 2633 464 
 617 | Q 2878 531 3122 666 
 618 | L 3122 134 
 619 | Q 2881 22 2623 -34 
 620 | Q 2366 -91 2075 -91 
 621 | Q 1284 -91 818 406 
 622 | Q 353 903 353 1747 
 623 | Q 353 2603 823 3093 
 624 | Q 1294 3584 2113 3584 
 625 | Q 2378 3584 2631 3529 
 626 | Q 2884 3475 3122 3366 
 627 | z
 628 | " transform="scale(0.015625)"/>
 629 |        <path id="DejaVuSans-6f" d="M 1959 3097 
 630 | Q 1497 3097 1228 2736 
 631 | Q 959 2375 959 1747 
 632 | Q 959 1119 1226 758 
 633 | Q 1494 397 1959 397 
 634 | Q 2419 397 2687 759 
 635 | Q 2956 1122 2956 1747 
 636 | Q 2956 2369 2687 2733 
 637 | Q 2419 3097 1959 3097 
 638 | z
 639 | M 1959 3584 
 640 | Q 2709 3584 3137 3096 
 641 | Q 3566 2609 3566 1747 
 642 | Q 3566 888 3137 398 
 643 | Q 2709 -91 1959 -91 
 644 | Q 1206 -91 779 398 
 645 | Q 353 888 353 1747 
 646 | Q 353 2609 779 3096 
 647 | Q 1206 3584 1959 3584 
 648 | z
 649 | " transform="scale(0.015625)"/>
 650 |        <path id="DejaVuSans-6e" d="M 3513 2113 
 651 | L 3513 0 
 652 | L 2938 0 
 653 | L 2938 2094 
 654 | Q 2938 2591 2744 2837 
 655 | Q 2550 3084 2163 3084 
 656 | Q 1697 3084 1428 2787 
 657 | Q 1159 2491 1159 1978 
 658 | L 1159 0 
 659 | L 581 0 
 660 | L 581 3500 
 661 | L 1159 3500 
 662 | L 1159 2956 
 663 | Q 1366 3272 1645 3428 
 664 | Q 1925 3584 2291 3584 
 665 | Q 2894 3584 3203 3211 
 666 | Q 3513 2838 3513 2113 
 667 | z
 668 | " transform="scale(0.015625)"/>
 669 |        <path id="DejaVuSans-64" d="M 2906 2969 
 670 | L 2906 4863 
 671 | L 3481 4863 
 672 | L 3481 0 
 673 | L 2906 0 
 674 | L 2906 525 
 675 | Q 2725 213 2448 61 
 676 | Q 2172 -91 1784 -91 
 677 | Q 1150 -91 751 415 
 678 | Q 353 922 353 1747 
 679 | Q 353 2572 751 3078 
 680 | Q 1150 3584 1784 3584 
 681 | Q 2172 3584 2448 3432 
 682 | Q 2725 3281 2906 2969 
 683 | z
 684 | M 947 1747 
 685 | Q 947 1113 1208 752 
 686 | Q 1469 391 1925 391 
 687 | Q 2381 391 2643 752 
 688 | Q 2906 1113 2906 1747 
 689 | Q 2906 2381 2643 2742 
 690 | Q 2381 3103 1925 3103 
 691 | Q 1469 3103 1208 2742 
 692 | Q 947 2381 947 1747 
 693 | z
 694 | " transform="scale(0.015625)"/>
 695 |       </defs>
 696 |       <use xlink:href="#DejaVuSans-49"/>
 697 |       <use xlink:href="#DejaVuSans-6d" x="29.492188"/>
 698 |       <use xlink:href="#DejaVuSans-61" x="126.904297"/>
 699 |       <use xlink:href="#DejaVuSans-67" x="188.183594"/>
 700 |       <use xlink:href="#DejaVuSans-65" x="251.660156"/>
 701 |       <use xlink:href="#DejaVuSans-20" x="313.183594"/>
 702 |       <use xlink:href="#DejaVuSans-50" x="344.970703"/>
 703 |       <use xlink:href="#DejaVuSans-61" x="400.773438"/>
 704 |       <use xlink:href="#DejaVuSans-69" x="462.052734"/>
 705 |       <use xlink:href="#DejaVuSans-72" x="489.835938"/>
 706 |       <use xlink:href="#DejaVuSans-73" x="530.949219"/>
 707 |       <use xlink:href="#DejaVuSans-20" x="583.048828"/>
 708 |       <use xlink:href="#DejaVuSans-50" x="614.835938"/>
 709 |       <use xlink:href="#DejaVuSans-65" x="671.513672"/>
 710 |       <use xlink:href="#DejaVuSans-72" x="733.037109"/>
 711 |       <use xlink:href="#DejaVuSans-20" x="774.150391"/>
 712 |       <use xlink:href="#DejaVuSans-53" x="805.9375"/>
 713 |       <use xlink:href="#DejaVuSans-65" x="869.414062"/>
 714 |       <use xlink:href="#DejaVuSans-63" x="930.9375"/>
 715 |       <use xlink:href="#DejaVuSans-6f" x="985.917969"/>
 716 |       <use xlink:href="#DejaVuSans-6e" x="1047.099609"/>
 717 |       <use xlink:href="#DejaVuSans-64" x="1110.478516"/>
 718 |      </g>
 719 |     </g>
 720 |    </g>
 721 |    <g id="matplotlib.axis_2">
 722 |     <g id="ytick_1">
 723 |      <g id="line2d_7">
 724 |       <defs>
 725 |        <path id="m433b6a5b4b" d="M 0 0 
 726 | L -3.5 0 
 727 | " style="stroke: #000000; stroke-width: 0.8"/>
 728 |       </defs>
 729 |       <g>
 730 |        <use xlink:href="#m433b6a5b4b" x="38.242188" y="184.285882" style="stroke: #000000; stroke-width: 0.8"/>
 731 |       </g>
 732 |      </g>
 733 |      <g id="text_8">
 734 |       <!-- 64 -->
 735 |       <g transform="translate(18.517188 188.085101) scale(0.1 -0.1)">
 736 |        <defs>
 737 |         <path id="DejaVuSans-36" d="M 2113 2584 
 738 | Q 1688 2584 1439 2293 
 739 | Q 1191 2003 1191 1497 
 740 | Q 1191 994 1439 701 
 741 | Q 1688 409 2113 409 
 742 | Q 2538 409 2786 701 
 743 | Q 3034 994 3034 1497 
 744 | Q 3034 2003 2786 2293 
 745 | Q 2538 2584 2113 2584 
 746 | z
 747 | M 3366 4563 
 748 | L 3366 3988 
 749 | Q 3128 4100 2886 4159 
 750 | Q 2644 4219 2406 4219 
 751 | Q 1781 4219 1451 3797 
 752 | Q 1122 3375 1075 2522 
 753 | Q 1259 2794 1537 2939 
 754 | Q 1816 3084 2150 3084 
 755 | Q 2853 3084 3261 2657 
 756 | Q 3669 2231 3669 1497 
 757 | Q 3669 778 3244 343 
 758 | Q 2819 -91 2113 -91 
 759 | Q 1303 -91 875 529 
 760 | Q 447 1150 447 2328 
 761 | Q 447 3434 972 4092 
 762 | Q 1497 4750 2381 4750 
 763 | Q 2619 4750 2861 4703 
 764 | Q 3103 4656 3366 4563 
 765 | z
 766 | " transform="scale(0.015625)"/>
 767 |        </defs>
 768 |        <use xlink:href="#DejaVuSans-36"/>
 769 |        <use xlink:href="#DejaVuSans-34" x="63.623047"/>
 770 |       </g>
 771 |      </g>
 772 |     </g>
 773 |     <g id="ytick_2">
 774 |      <g id="line2d_8">
 775 |       <g>
 776 |        <use xlink:href="#m433b6a5b4b" x="38.242188" y="124.838824" style="stroke: #000000; stroke-width: 0.8"/>
 777 |       </g>
 778 |      </g>
 779 |      <g id="text_9">
 780 |       <!-- 65 -->
 781 |       <g transform="translate(18.517188 128.638042) scale(0.1 -0.1)">
 782 |        <use xlink:href="#DejaVuSans-36"/>
 783 |        <use xlink:href="#DejaVuSans-35" x="63.623047"/>
 784 |       </g>
 785 |      </g>
 786 |     </g>
 787 |     <g id="ytick_3">
 788 |      <g id="line2d_9">
 789 |       <g>
 790 |        <use xlink:href="#m433b6a5b4b" x="38.242188" y="65.391765" style="stroke: #000000; stroke-width: 0.8"/>
 791 |       </g>
 792 |      </g>
 793 |      <g id="text_10">
 794 |       <!-- 66 -->
 795 |       <g transform="translate(18.517188 69.190983) scale(0.1 -0.1)">
 796 |        <use xlink:href="#DejaVuSans-36"/>
 797 |        <use xlink:href="#DejaVuSans-36" x="63.623047"/>
 798 |       </g>
 799 |      </g>
 800 |     </g>
 801 |     <g id="ytick_4">
 802 |      <g id="line2d_10">
 803 |       <g>
 804 |        <use xlink:href="#m433b6a5b4b" x="38.242188" y="5.944706" style="stroke: #000000; stroke-width: 0.8"/>
 805 |       </g>
 806 |      </g>
 807 |      <g id="text_11">
 808 |       <!-- 67 -->
 809 |       <g transform="translate(18.517188 9.743925) scale(0.1 -0.1)">
 810 |        <defs>
 811 |         <path id="DejaVuSans-37" d="M 525 4666 
 812 | L 3525 4666 
 813 | L 3525 4397 
 814 | L 1831 0 
 815 | L 1172 0 
 816 | L 2766 4134 
 817 | L 525 4134 
 818 | L 525 4666 
 819 | z
 820 | " transform="scale(0.015625)"/>
 821 |        </defs>
 822 |        <use xlink:href="#DejaVuSans-36"/>
 823 |        <use xlink:href="#DejaVuSans-37" x="63.623047"/>
 824 |       </g>
 825 |      </g>
 826 |     </g>
 827 |     <g id="text_12">
 828 |      <!-- Relative Pose Accuracy [%] -->
 829 |      <g transform="translate(11.397656 203.038906) rotate(-90) scale(0.15 -0.15)">
 830 |       <defs>
 831 |        <path id="DejaVuSans-52" d="M 2841 2188 
 832 | Q 3044 2119 3236 1894 
 833 | Q 3428 1669 3622 1275 
 834 | L 4263 0 
 835 | L 3584 0 
 836 | L 2988 1197 
 837 | Q 2756 1666 2539 1819 
 838 | Q 2322 1972 1947 1972 
 839 | L 1259 1972 
 840 | L 1259 0 
 841 | L 628 0 
 842 | L 628 4666 
 843 | L 2053 4666 
 844 | Q 2853 4666 3247 4331 
 845 | Q 3641 3997 3641 3322 
 846 | Q 3641 2881 3436 2590 
 847 | Q 3231 2300 2841 2188 
 848 | z
 849 | M 1259 4147 
 850 | L 1259 2491 
 851 | L 2053 2491 
 852 | Q 2509 2491 2742 2702 
 853 | Q 2975 2913 2975 3322 
 854 | Q 2975 3731 2742 3939 
 855 | Q 2509 4147 2053 4147 
 856 | L 1259 4147 
 857 | z
 858 | " transform="scale(0.015625)"/>
 859 |        <path id="DejaVuSans-6c" d="M 603 4863 
 860 | L 1178 4863 
 861 | L 1178 0 
 862 | L 603 0 
 863 | L 603 4863 
 864 | z
 865 | " transform="scale(0.015625)"/>
 866 |        <path id="DejaVuSans-74" d="M 1172 4494 
 867 | L 1172 3500 
 868 | L 2356 3500 
 869 | L 2356 3053 
 870 | L 1172 3053 
 871 | L 1172 1153 
 872 | Q 1172 725 1289 603 
 873 | Q 1406 481 1766 481 
 874 | L 2356 481 
 875 | L 2356 0 
 876 | L 1766 0 
 877 | Q 1100 0 847 248 
 878 | Q 594 497 594 1153 
 879 | L 594 3053 
 880 | L 172 3053 
 881 | L 172 3500 
 882 | L 594 3500 
 883 | L 594 4494 
 884 | L 1172 4494 
 885 | z
 886 | " transform="scale(0.015625)"/>
 887 |        <path id="DejaVuSans-76" d="M 191 3500 
 888 | L 800 3500 
 889 | L 1894 563 
 890 | L 2988 3500 
 891 | L 3597 3500 
 892 | L 2284 0 
 893 | L 1503 0 
 894 | L 191 3500 
 895 | z
 896 | " transform="scale(0.015625)"/>
 897 |        <path id="DejaVuSans-41" d="M 2188 4044 
 898 | L 1331 1722 
 899 | L 3047 1722 
 900 | L 2188 4044 
 901 | z
 902 | M 1831 4666 
 903 | L 2547 4666 
 904 | L 4325 0 
 905 | L 3669 0 
 906 | L 3244 1197 
 907 | L 1141 1197 
 908 | L 716 0 
 909 | L 50 0 
 910 | L 1831 4666 
 911 | z
 912 | " transform="scale(0.015625)"/>
 913 |        <path id="DejaVuSans-75" d="M 544 1381 
 914 | L 544 3500 
 915 | L 1119 3500 
 916 | L 1119 1403 
 917 | Q 1119 906 1312 657 
 918 | Q 1506 409 1894 409 
 919 | Q 2359 409 2629 706 
 920 | Q 2900 1003 2900 1516 
 921 | L 2900 3500 
 922 | L 3475 3500 
 923 | L 3475 0 
 924 | L 2900 0 
 925 | L 2900 538 
 926 | Q 2691 219 2414 64 
 927 | Q 2138 -91 1772 -91 
 928 | Q 1169 -91 856 284 
 929 | Q 544 659 544 1381 
 930 | z
 931 | M 1991 3584 
 932 | L 1991 3584 
 933 | z
 934 | " transform="scale(0.015625)"/>
 935 |        <path id="DejaVuSans-79" d="M 2059 -325 
 936 | Q 1816 -950 1584 -1140 
 937 | Q 1353 -1331 966 -1331 
 938 | L 506 -1331 
 939 | L 506 -850 
 940 | L 844 -850 
 941 | Q 1081 -850 1212 -737 
 942 | Q 1344 -625 1503 -206 
 943 | L 1606 56 
 944 | L 191 3500 
 945 | L 800 3500 
 946 | L 1894 763 
 947 | L 2988 3500 
 948 | L 3597 3500 
 949 | L 2059 -325 
 950 | z
 951 | " transform="scale(0.015625)"/>
 952 |        <path id="DejaVuSans-5b" d="M 550 4863 
 953 | L 1875 4863 
 954 | L 1875 4416 
 955 | L 1125 4416 
 956 | L 1125 -397 
 957 | L 1875 -397 
 958 | L 1875 -844 
 959 | L 550 -844 
 960 | L 550 4863 
 961 | z
 962 | " transform="scale(0.015625)"/>
 963 |        <path id="DejaVuSans-25" d="M 4653 2053 
 964 | Q 4381 2053 4226 1822 
 965 | Q 4072 1591 4072 1178 
 966 | Q 4072 772 4226 539 
 967 | Q 4381 306 4653 306 
 968 | Q 4919 306 5073 539 
 969 | Q 5228 772 5228 1178 
 970 | Q 5228 1588 5073 1820 
 971 | Q 4919 2053 4653 2053 
 972 | z
 973 | M 4653 2450 
 974 | Q 5147 2450 5437 2106 
 975 | Q 5728 1763 5728 1178 
 976 | Q 5728 594 5436 251 
 977 | Q 5144 -91 4653 -91 
 978 | Q 4153 -91 3862 251 
 979 | Q 3572 594 3572 1178 
 980 | Q 3572 1766 3864 2108 
 981 | Q 4156 2450 4653 2450 
 982 | z
 983 | M 1428 4353 
 984 | Q 1159 4353 1004 4120 
 985 | Q 850 3888 850 3481 
 986 | Q 850 3069 1003 2837 
 987 | Q 1156 2606 1428 2606 
 988 | Q 1700 2606 1854 2837 
 989 | Q 2009 3069 2009 3481 
 990 | Q 2009 3884 1853 4118 
 991 | Q 1697 4353 1428 4353 
 992 | z
 993 | M 4250 4750 
 994 | L 4750 4750 
 995 | L 1831 -91 
 996 | L 1331 -91 
 997 | L 4250 4750 
 998 | z
 999 | M 1428 4750 
1000 | Q 1922 4750 2215 4408 
1001 | Q 2509 4066 2509 3481 
1002 | Q 2509 2891 2217 2550 
1003 | Q 1925 2209 1428 2209 
1004 | Q 931 2209 642 2551 
1005 | Q 353 2894 353 3481 
1006 | Q 353 4063 643 4406 
1007 | Q 934 4750 1428 4750 
1008 | z
1009 | " transform="scale(0.015625)"/>
1010 |        <path id="DejaVuSans-5d" d="M 1947 4863 
1011 | L 1947 -844 
1012 | L 622 -844 
1013 | L 622 -397 
1014 | L 1369 -397 
1015 | L 1369 4416 
1016 | L 622 4416 
1017 | L 622 4863 
1018 | L 1947 4863 
1019 | z
1020 | " transform="scale(0.015625)"/>
1021 |       </defs>
1022 |       <use xlink:href="#DejaVuSans-52"/>
1023 |       <use xlink:href="#DejaVuSans-65" x="64.982422"/>
1024 |       <use xlink:href="#DejaVuSans-6c" x="126.505859"/>
1025 |       <use xlink:href="#DejaVuSans-61" x="154.289062"/>
1026 |       <use xlink:href="#DejaVuSans-74" x="215.568359"/>
1027 |       <use xlink:href="#DejaVuSans-69" x="254.777344"/>
1028 |       <use xlink:href="#DejaVuSans-76" x="282.560547"/>
1029 |       <use xlink:href="#DejaVuSans-65" x="341.740234"/>
1030 |       <use xlink:href="#DejaVuSans-20" x="403.263672"/>
1031 |       <use xlink:href="#DejaVuSans-50" x="435.050781"/>
1032 |       <use xlink:href="#DejaVuSans-6f" x="491.728516"/>
1033 |       <use xlink:href="#DejaVuSans-73" x="552.910156"/>
1034 |       <use xlink:href="#DejaVuSans-65" x="605.009766"/>
1035 |       <use xlink:href="#DejaVuSans-20" x="666.533203"/>
1036 |       <use xlink:href="#DejaVuSans-41" x="698.320312"/>
1037 |       <use xlink:href="#DejaVuSans-63" x="764.978516"/>
1038 |       <use xlink:href="#DejaVuSans-63" x="819.958984"/>
1039 |       <use xlink:href="#DejaVuSans-75" x="874.939453"/>
1040 |       <use xlink:href="#DejaVuSans-72" x="938.318359"/>
1041 |       <use xlink:href="#DejaVuSans-61" x="979.431641"/>
1042 |       <use xlink:href="#DejaVuSans-63" x="1040.710938"/>
1043 |       <use xlink:href="#DejaVuSans-79" x="1095.691406"/>
1044 |       <use xlink:href="#DejaVuSans-20" x="1154.871094"/>
1045 |       <use xlink:href="#DejaVuSans-5b" x="1186.658203"/>
1046 |       <use xlink:href="#DejaVuSans-25" x="1225.671875"/>
1047 |       <use xlink:href="#DejaVuSans-5d" x="1320.691406"/>
1048 |      </g>
1049 |     </g>
1050 |    </g>
1051 |    <g id="patch_3">
1052 |     <path d="M 38.242188 202.12 
1053 | L 38.242188 0 
1054 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
1055 |    </g>
1056 |    <g id="patch_4">
1057 |     <path d="M 351.50156 202.12 
1058 | L 351.50156 0 
1059 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
1060 |    </g>
1061 |    <g id="patch_5">
1062 |     <path d="M 38.242188 202.12 
1063 | L 351.50156 202.12 
1064 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
1065 |    </g>
1066 |    <g id="patch_6">
1067 |     <path d="M 38.242188 0 
1068 | L 351.50156 0 
1069 | " style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
1070 |    </g>
1071 |    <g id="text_13">
1072 |     <!-- SuperGlue -->
1073 |     <g style="fill: #0000ff" transform="translate(73.036283 100.678833) scale(0.15 -0.15)">
1074 |      <defs>
1075 |       <path id="DejaVuSans-70" d="M 1159 525 
1076 | L 1159 -1331 
1077 | L 581 -1331 
1078 | L 581 3500 
1079 | L 1159 3500 
1080 | L 1159 2969 
1081 | Q 1341 3281 1617 3432 
1082 | Q 1894 3584 2278 3584 
1083 | Q 2916 3584 3314 3078 
1084 | Q 3713 2572 3713 1747 
1085 | Q 3713 922 3314 415 
1086 | Q 2916 -91 2278 -91 
1087 | Q 1894 -91 1617 61 
1088 | Q 1341 213 1159 525 
1089 | z
1090 | M 3116 1747 
1091 | Q 3116 2381 2855 2742 
1092 | Q 2594 3103 2138 3103 
1093 | Q 1681 3103 1420 2742 
1094 | Q 1159 2381 1159 1747 
1095 | Q 1159 1113 1420 752 
1096 | Q 1681 391 2138 391 
1097 | Q 2594 391 2855 752 
1098 | Q 3116 1113 3116 1747 
1099 | z
1100 | " transform="scale(0.015625)"/>
1101 |       <path id="DejaVuSans-47" d="M 3809 666 
1102 | L 3809 1919 
1103 | L 2778 1919 
1104 | L 2778 2438 
1105 | L 4434 2438 
1106 | L 4434 434 
1107 | Q 4069 175 3628 42 
1108 | Q 3188 -91 2688 -91 
1109 | Q 1594 -91 976 548 
1110 | Q 359 1188 359 2328 
1111 | Q 359 3472 976 4111 
1112 | Q 1594 4750 2688 4750 
1113 | Q 3144 4750 3555 4637 
1114 | Q 3966 4525 4313 4306 
1115 | L 4313 3634 
1116 | Q 3963 3931 3569 4081 
1117 | Q 3175 4231 2741 4231 
1118 | Q 1884 4231 1454 3753 
1119 | Q 1025 3275 1025 2328 
1120 | Q 1025 1384 1454 906 
1121 | Q 1884 428 2741 428 
1122 | Q 3075 428 3337 486 
1123 | Q 3600 544 3809 666 
1124 | z
1125 | " transform="scale(0.015625)"/>
1126 |      </defs>
1127 |      <use xlink:href="#DejaVuSans-53"/>
1128 |      <use xlink:href="#DejaVuSans-75" x="63.476562"/>
1129 |      <use xlink:href="#DejaVuSans-70" x="126.855469"/>
1130 |      <use xlink:href="#DejaVuSans-65" x="190.332031"/>
1131 |      <use xlink:href="#DejaVuSans-72" x="251.855469"/>
1132 |      <use xlink:href="#DejaVuSans-47" x="292.96875"/>
1133 |      <use xlink:href="#DejaVuSans-6c" x="370.458984"/>
1134 |      <use xlink:href="#DejaVuSans-75" x="398.242188"/>
1135 |      <use xlink:href="#DejaVuSans-65" x="461.621094"/>
1136 |     </g>
1137 |    </g>
1138 |    <g id="text_14">
1139 |     <!-- SGMNet -->
1140 |     <g style="fill: #008000" transform="translate(87.993899 188.055763) scale(0.15 -0.15)">
1141 |      <defs>
1142 |       <path id="DejaVuSans-4d" d="M 628 4666 
1143 | L 1569 4666 
1144 | L 2759 1491 
1145 | L 3956 4666 
1146 | L 4897 4666 
1147 | L 4897 0 
1148 | L 4281 0 
1149 | L 4281 4097 
1150 | L 3078 897 
1151 | L 2444 897 
1152 | L 1241 4097 
1153 | L 1241 0 
1154 | L 628 0 
1155 | L 628 4666 
1156 | z
1157 | " transform="scale(0.015625)"/>
1158 |       <path id="DejaVuSans-4e" d="M 628 4666 
1159 | L 1478 4666 
1160 | L 3547 763 
1161 | L 3547 4666 
1162 | L 4159 4666 
1163 | L 4159 0 
1164 | L 3309 0 
1165 | L 1241 3903 
1166 | L 1241 0 
1167 | L 628 0 
1168 | L 628 4666 
1169 | z
1170 | " transform="scale(0.015625)"/>
1171 |      </defs>
1172 |      <use xlink:href="#DejaVuSans-53"/>
1173 |      <use xlink:href="#DejaVuSans-47" x="63.476562"/>
1174 |      <use xlink:href="#DejaVuSans-4d" x="140.966797"/>
1175 |      <use xlink:href="#DejaVuSans-4e" x="227.246094"/>
1176 |      <use xlink:href="#DejaVuSans-65" x="302.050781"/>
1177 |      <use xlink:href="#DejaVuSans-74" x="363.574219"/>
1178 |     </g>
1179 |    </g>
1180 |    <g id="text_15">
1181 |     <!-- LoFTR -->
1182 |     <g transform="translate(46.195263 63.010597) scale(0.15 -0.15)">
1183 |      <defs>
1184 |       <path id="DejaVuSans-4c" d="M 628 4666 
1185 | L 1259 4666 
1186 | L 1259 531 
1187 | L 3531 531 
1188 | L 3531 0 
1189 | L 628 0 
1190 | L 628 4666 
1191 | z
1192 | " transform="scale(0.015625)"/>
1193 |       <path id="DejaVuSans-46" d="M 628 4666 
1194 | L 3309 4666 
1195 | L 3309 4134 
1196 | L 1259 4134 
1197 | L 1259 2759 
1198 | L 3109 2759 
1199 | L 3109 2228 
1200 | L 1259 2228 
1201 | L 1259 0 
1202 | L 628 0 
1203 | L 628 4666 
1204 | z
1205 | " transform="scale(0.015625)"/>
1206 |       <path id="DejaVuSans-54" d="M -19 4666 
1207 | L 3928 4666 
1208 | L 3928 4134 
1209 | L 2272 4134 
1210 | L 2272 0 
1211 | L 1638 0 
1212 | L 1638 4134 
1213 | L -19 4134 
1214 | L -19 4666 
1215 | z
1216 | " transform="scale(0.015625)"/>
1217 |      </defs>
1218 |      <use xlink:href="#DejaVuSans-4c"/>
1219 |      <use xlink:href="#DejaVuSans-6f" x="53.962891"/>
1220 |      <use xlink:href="#DejaVuSans-46" x="115.144531"/>
1221 |      <use xlink:href="#DejaVuSans-54" x="170.914062"/>
1222 |      <use xlink:href="#DejaVuSans-52" x="231.998047"/>
1223 |     </g>
1224 |    </g>
1225 |    <g id="text_16">
1226 |     <!-- MatchFormer -->
1227 |     <g style="fill: #800080" transform="translate(42.800495 23.359763) scale(0.15 -0.15)">
1228 |      <defs>
1229 |       <path id="DejaVuSans-68" d="M 3513 2113 
1230 | L 3513 0 
1231 | L 2938 0 
1232 | L 2938 2094 
1233 | Q 2938 2591 2744 2837 
1234 | Q 2550 3084 2163 3084 
1235 | Q 1697 3084 1428 2787 
1236 | Q 1159 2491 1159 1978 
1237 | L 1159 0 
1238 | L 581 0 
1239 | L 581 4863 
1240 | L 1159 4863 
1241 | L 1159 2956 
1242 | Q 1366 3272 1645 3428 
1243 | Q 1925 3584 2291 3584 
1244 | Q 2894 3584 3203 3211 
1245 | Q 3513 2838 3513 2113 
1246 | z
1247 | " transform="scale(0.015625)"/>
1248 |      </defs>
1249 |      <use xlink:href="#DejaVuSans-4d"/>
1250 |      <use xlink:href="#DejaVuSans-61" x="86.279297"/>
1251 |      <use xlink:href="#DejaVuSans-74" x="147.558594"/>
1252 |      <use xlink:href="#DejaVuSans-63" x="186.767578"/>
1253 |      <use xlink:href="#DejaVuSans-68" x="241.748047"/>
1254 |      <use xlink:href="#DejaVuSans-46" x="305.126953"/>
1255 |      <use xlink:href="#DejaVuSans-6f" x="359.021484"/>
1256 |      <use xlink:href="#DejaVuSans-72" x="420.203125"/>
1257 |      <use xlink:href="#DejaVuSans-6d" x="459.566406"/>
1258 |      <use xlink:href="#DejaVuSans-65" x="556.978516"/>
1259 |      <use xlink:href="#DejaVuSans-72" x="618.501953"/>
1260 |     </g>
1261 |    </g>
1262 |    <g id="text_17">
1263 |     <!-- L=3 -->
1264 |     <g style="fill: #ff0000" transform="translate(318.963638 198.045257) scale(0.1 -0.1)">
1265 |      <defs>
1266 |       <path id="DejaVuSans-3d" d="M 678 2906 
1267 | L 4684 2906 
1268 | L 4684 2381 
1269 | L 678 2381 
1270 | L 678 2906 
1271 | z
1272 | M 678 1631 
1273 | L 4684 1631 
1274 | L 4684 1100 
1275 | L 678 1100 
1276 | L 678 1631 
1277 | z
1278 | " transform="scale(0.015625)"/>
1279 |      </defs>
1280 |      <use xlink:href="#DejaVuSans-4c"/>
1281 |      <use xlink:href="#DejaVuSans-3d" x="55.712891"/>
1282 |      <use xlink:href="#DejaVuSans-33" x="139.501953"/>
1283 |     </g>
1284 |    </g>
1285 |    <g id="text_18">
1286 |     <!-- L=5 -->
1287 |     <g style="fill: #ff0000" transform="translate(228.688766 138.598199) scale(0.1 -0.1)">
1288 |      <use xlink:href="#DejaVuSans-4c"/>
1289 |      <use xlink:href="#DejaVuSans-3d" x="55.712891"/>
1290 |      <use xlink:href="#DejaVuSans-35" x="139.501953"/>
1291 |     </g>
1292 |    </g>
1293 |    <g id="text_19">
1294 |     <!-- L=7 -->
1295 |     <g style="fill: #ff0000" transform="translate(171.91046 67.261728) scale(0.1 -0.1)">
1296 |      <use xlink:href="#DejaVuSans-4c"/>
1297 |      <use xlink:href="#DejaVuSans-3d" x="55.712891"/>
1298 |      <use xlink:href="#DejaVuSans-37" x="139.501953"/>
1299 |     </g>
1300 |    </g>
1301 |    <g id="text_20">
1302 |     <!-- L=9 -->
1303 |     <g style="fill: #ff0000" transform="translate(145.090048 37.538199) scale(0.1 -0.1)">
1304 |      <defs>
1305 |       <path id="DejaVuSans-39" d="M 703 97 
1306 | L 703 672 
1307 | Q 941 559 1184 500 
1308 | Q 1428 441 1663 441 
1309 | Q 2288 441 2617 861 
1310 | Q 2947 1281 2994 2138 
1311 | Q 2813 1869 2534 1725 
1312 | Q 2256 1581 1919 1581 
1313 | Q 1219 1581 811 2004 
1314 | Q 403 2428 403 3163 
1315 | Q 403 3881 828 4315 
1316 | Q 1253 4750 1959 4750 
1317 | Q 2769 4750 3195 4129 
1318 | Q 3622 3509 3622 2328 
1319 | Q 3622 1225 3098 567 
1320 | Q 2575 -91 1691 -91 
1321 | Q 1453 -91 1209 -44 
1322 | Q 966 3 703 97 
1323 | z
1324 | M 1959 2075 
1325 | Q 2384 2075 2632 2365 
1326 | Q 2881 2656 2881 3163 
1327 | Q 2881 3666 2632 3958 
1328 | Q 2384 4250 1959 4250 
1329 | Q 1534 4250 1286 3958 
1330 | Q 1038 3666 1038 3163 
1331 | Q 1038 2656 1286 2365 
1332 | Q 1534 2075 1959 2075 
1333 | z
1334 | " transform="scale(0.015625)"/>
1335 |      </defs>
1336 |      <use xlink:href="#DejaVuSans-4c"/>
1337 |      <use xlink:href="#DejaVuSans-3d" x="55.712891"/>
1338 |      <use xlink:href="#DejaVuSans-39" x="139.501953"/>
1339 |     </g>
1340 |    </g>
1341 |    <g id="text_21">
1342 |     <!-- fixed-depth -->
1343 |     <g style="fill: #ff0000" transform="translate(225.255342 166.790662) scale(0.12 -0.12)">
1344 |      <defs>
1345 |       <path id="DejaVuSans-66" d="M 2375 4863 
1346 | L 2375 4384 
1347 | L 1825 4384 
1348 | Q 1516 4384 1395 4259 
1349 | Q 1275 4134 1275 3809 
1350 | L 1275 3500 
1351 | L 2222 3500 
1352 | L 2222 3053 
1353 | L 1275 3053 
1354 | L 1275 0 
1355 | L 697 0 
1356 | L 697 3053 
1357 | L 147 3053 
1358 | L 147 3500 
1359 | L 697 3500 
1360 | L 697 3744 
1361 | Q 697 4328 969 4595 
1362 | Q 1241 4863 1831 4863 
1363 | L 2375 4863 
1364 | z
1365 | " transform="scale(0.015625)"/>
1366 |       <path id="DejaVuSans-78" d="M 3513 3500 
1367 | L 2247 1797 
1368 | L 3578 0 
1369 | L 2900 0 
1370 | L 1881 1375 
1371 | L 863 0 
1372 | L 184 0 
1373 | L 1544 1831 
1374 | L 300 3500 
1375 | L 978 3500 
1376 | L 1906 2253 
1377 | L 2834 3500 
1378 | L 3513 3500 
1379 | z
1380 | " transform="scale(0.015625)"/>
1381 |       <path id="DejaVuSans-2d" d="M 313 2009 
1382 | L 1997 2009 
1383 | L 1997 1497 
1384 | L 313 1497 
1385 | L 313 2009 
1386 | z
1387 | " transform="scale(0.015625)"/>
1388 |      </defs>
1389 |      <use xlink:href="#DejaVuSans-66"/>
1390 |      <use xlink:href="#DejaVuSans-69" x="35.205078"/>
1391 |      <use xlink:href="#DejaVuSans-78" x="62.988281"/>
1392 |      <use xlink:href="#DejaVuSans-65" x="119.042969"/>
1393 |      <use xlink:href="#DejaVuSans-64" x="180.566406"/>
1394 |      <use xlink:href="#DejaVuSans-2d" x="244.042969"/>
1395 |      <use xlink:href="#DejaVuSans-64" x="280.126953"/>
1396 |      <use xlink:href="#DejaVuSans-65" x="343.603516"/>
1397 |      <use xlink:href="#DejaVuSans-70" x="405.126953"/>
1398 |      <use xlink:href="#DejaVuSans-74" x="468.603516"/>
1399 |      <use xlink:href="#DejaVuSans-68" x="507.8125"/>
1400 |     </g>
1401 |    </g>
1402 |    <g id="text_22">
1403 |     <!-- adaptive -->
1404 |     <g style="fill: #ff0000" transform="translate(283.083817 125.177721) scale(0.12 -0.12)">
1405 |      <use xlink:href="#DejaVuSans-61"/>
1406 |      <use xlink:href="#DejaVuSans-64" x="61.279297"/>
1407 |      <use xlink:href="#DejaVuSans-61" x="124.755859"/>
1408 |      <use xlink:href="#DejaVuSans-70" x="186.035156"/>
1409 |      <use xlink:href="#DejaVuSans-74" x="249.511719"/>
1410 |      <use xlink:href="#DejaVuSans-69" x="288.720703"/>
1411 |      <use xlink:href="#DejaVuSans-76" x="316.503906"/>
1412 |      <use xlink:href="#DejaVuSans-65" x="375.683594"/>
1413 |     </g>
1414 |    </g>
1415 |    <g id="text_23">
1416 |     <!-- optimized -->
1417 |     <g style="fill: #ff0000" transform="translate(260.043244 64.675772) scale(0.12 -0.12)">
1418 |      <defs>
1419 |       <path id="DejaVuSans-7a" d="M 353 3500 
1420 | L 3084 3500 
1421 | L 3084 2975 
1422 | L 922 459 
1423 | L 3084 459 
1424 | L 3084 0 
1425 | L 275 0 
1426 | L 275 525 
1427 | L 2438 3041 
1428 | L 353 3041 
1429 | L 353 3500 
1430 | z
1431 | " transform="scale(0.015625)"/>
1432 |      </defs>
1433 |      <use xlink:href="#DejaVuSans-6f"/>
1434 |      <use xlink:href="#DejaVuSans-70" x="61.181641"/>
1435 |      <use xlink:href="#DejaVuSans-74" x="124.658203"/>
1436 |      <use xlink:href="#DejaVuSans-69" x="163.867188"/>
1437 |      <use xlink:href="#DejaVuSans-6d" x="191.650391"/>
1438 |      <use xlink:href="#DejaVuSans-69" x="289.0625"/>
1439 |      <use xlink:href="#DejaVuSans-7a" x="316.845703"/>
1440 |      <use xlink:href="#DejaVuSans-65" x="369.335938"/>
1441 |      <use xlink:href="#DejaVuSans-64" x="430.859375"/>
1442 |     </g>
1443 |    </g>
1444 |    <g id="text_24">
1445 |     <!-- LightGlue -->
1446 |     <g style="fill: #ff0000" transform="translate(253.72379 21.69671) scale(0.15 -0.15)">
1447 |      <use xlink:href="#DejaVuSans-4c"/>
1448 |      <use xlink:href="#DejaVuSans-69" x="55.712891"/>
1449 |      <use xlink:href="#DejaVuSans-67" x="83.496094"/>
1450 |      <use xlink:href="#DejaVuSans-68" x="146.972656"/>
1451 |      <use xlink:href="#DejaVuSans-74" x="210.351562"/>
1452 |      <use xlink:href="#DejaVuSans-47" x="249.560547"/>
1453 |      <use xlink:href="#DejaVuSans-6c" x="327.050781"/>
1454 |      <use xlink:href="#DejaVuSans-75" x="354.833984"/>
1455 |      <use xlink:href="#DejaVuSans-65" x="418.212891"/>
1456 |     </g>
1457 |    </g>
1458 |    <g id="line2d_11">
1459 |     <path d="M 337.2777 184.285882 
1460 | L 247.002828 124.838824 
1461 | L 190.224522 53.502353 
1462 | L 163.40411 23.778824 
1463 | " clip-path="url(#pb46ed2897c)" style="fill: none; stroke: #ff0000; stroke-width: 2; stroke-linecap: square"/>
1464 |     <defs>
1465 |      <path id="m8759e5a643" d="M 0 3 
1466 | C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
1467 | C 2.683901 1.55874 3 0.795609 3 0 
1468 | C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
1469 | C 1.55874 -2.683901 0.795609 -3 0 -3 
1470 | C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
1471 | C -2.683901 -1.55874 -3 -0.795609 -3 0 
1472 | C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
1473 | C -1.55874 2.683901 -0.795609 3 0 3 
1474 | z
1475 | " style="stroke: #ff0000"/>
1476 |     </defs>
1477 |     <g clip-path="url(#pb46ed2897c)">
1478 |      <use xlink:href="#m8759e5a643" x="337.2777" y="184.285882" style="fill: #ff0000; stroke: #ff0000"/>
1479 |      <use xlink:href="#m8759e5a643" x="247.002828" y="124.838824" style="fill: #ff0000; stroke: #ff0000"/>
1480 |      <use xlink:href="#m8759e5a643" x="190.224522" y="53.502353" style="fill: #ff0000; stroke: #ff0000"/>
1481 |      <use xlink:href="#m8759e5a643" x="163.40411" y="23.778824" style="fill: #ff0000; stroke: #ff0000"/>
1482 |     </g>
1483 |    </g>
1484 |    <g id="line2d_12">
1485 |     <path d="M 296.754196 112.949412 
1486 | L 241.630312 71.336471 
1487 | L 214.425531 47.557647 
1488 | L 194.077595 29.723529 
1489 | L 163.121578 23.778824 
1490 | " clip-path="url(#pb46ed2897c)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #ff0000; stroke-width: 2"/>
1491 |    </g>
1492 |   </g>
1493 |  </g>
1494 |  <defs>
1495 |   <clipPath id="pb46ed2897c">
1496 |    <rect x="38.242188" y="0" width="313.259373" height="202.12"/>
1497 |   </clipPath>
1498 |  </defs>
1499 | </svg>
1500 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Benchmark script for LightGlue on real images
  2 | import argparse
  3 | import time
  4 | from collections import defaultdict
  5 | from pathlib import Path
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | import torch._dynamo
 11 | 
 12 | from lightglue import LightGlue, SuperPoint
 13 | from lightglue.utils import load_image
 14 | 
 15 | torch.set_grad_enabled(False)
 16 | 
 17 | 
 18 | def measure(matcher, data, device="cuda", r=100):
 19 |     timings = np.zeros((r, 1))
 20 |     if device.type == "cuda":
 21 |         starter = torch.cuda.Event(enable_timing=True)
 22 |         ender = torch.cuda.Event(enable_timing=True)
 23 |     # warmup
 24 |     for _ in range(10):
 25 |         _ = matcher(data)
 26 |     # measurements
 27 |     with torch.no_grad():
 28 |         for rep in range(r):
 29 |             if device.type == "cuda":
 30 |                 starter.record()
 31 |                 _ = matcher(data)
 32 |                 ender.record()
 33 |                 # sync gpu
 34 |                 torch.cuda.synchronize()
 35 |                 curr_time = starter.elapsed_time(ender)
 36 |             else:
 37 |                 start = time.perf_counter()
 38 |                 _ = matcher(data)
 39 |                 curr_time = (time.perf_counter() - start) * 1e3
 40 |             timings[rep] = curr_time
 41 |     mean_syn = np.sum(timings) / r
 42 |     std_syn = np.std(timings)
 43 |     return {"mean": mean_syn, "std": std_syn}
 44 | 
 45 | 
 46 | def print_as_table(d, title, cnames):
 47 |     print()
 48 |     header = f"{title:30} " + " ".join([f"{x:>7}" for x in cnames])
 49 |     print(header)
 50 |     print("-" * len(header))
 51 |     for k, l in d.items():
 52 |         print(f"{k:30}", " ".join([f"{x:>7.1f}" for x in l]))
 53 | 
 54 | 
 55 | if __name__ == "__main__":
 56 |     parser = argparse.ArgumentParser(description="Benchmark script for LightGlue")
 57 |     parser.add_argument(
 58 |         "--device",
 59 |         choices=["auto", "cuda", "cpu", "mps"],
 60 |         default="auto",
 61 |         help="device to benchmark on",
 62 |     )
 63 |     parser.add_argument("--compile", action="store_true", help="Compile LightGlue runs")
 64 |     parser.add_argument(
 65 |         "--no_flash", action="store_true", help="disable FlashAttention"
 66 |     )
 67 |     parser.add_argument(
 68 |         "--no_prune_thresholds",
 69 |         action="store_true",
 70 |         help="disable pruning thresholds (i.e. always do pruning)",
 71 |     )
 72 |     parser.add_argument(
 73 |         "--add_superglue",
 74 |         action="store_true",
 75 |         help="add SuperGlue to the benchmark (requires hloc)",
 76 |     )
 77 |     parser.add_argument(
 78 |         "--measure", default="time", choices=["time", "log-time", "throughput"]
 79 |     )
 80 |     parser.add_argument(
 81 |         "--repeat", "--r", type=int, default=100, help="repetitions of measurements"
 82 |     )
 83 |     parser.add_argument(
 84 |         "--num_keypoints",
 85 |         nargs="+",
 86 |         type=int,
 87 |         default=[256, 512, 1024, 2048, 4096],
 88 |         help="number of keypoints (list separated by spaces)",
 89 |     )
 90 |     parser.add_argument(
 91 |         "--matmul_precision", default="highest", choices=["highest", "high", "medium"]
 92 |     )
 93 |     parser.add_argument(
 94 |         "--save", default=None, type=str, help="path where figure should be saved"
 95 |     )
 96 |     args = parser.parse_intermixed_args()
 97 | 
 98 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 99 |     if args.device != "auto":
100 |         device = torch.device(args.device)
101 | 
102 |     print("Running benchmark on device:", device)
103 | 
104 |     images = Path("assets")
105 |     inputs = {
106 |         "easy": (
107 |             load_image(images / "DSC_0411.JPG"),
108 |             load_image(images / "DSC_0410.JPG"),
109 |         ),
110 |         "difficult": (
111 |             load_image(images / "sacre_coeur1.jpg"),
112 |             load_image(images / "sacre_coeur2.jpg"),
113 |         ),
114 |     }
115 | 
116 |     configs = {
117 |         "LightGlue-full": {
118 |             "depth_confidence": -1,
119 |             "width_confidence": -1,
120 |         },
121 |         # 'LG-prune': {
122 |         #     'width_confidence': -1,
123 |         # },
124 |         # 'LG-depth': {
125 |         #     'depth_confidence': -1,
126 |         # },
127 |         "LightGlue-adaptive": {},
128 |     }
129 | 
130 |     if args.compile:
131 |         configs = {**configs, **{k + "-compile": v for k, v in configs.items()}}
132 | 
133 |     sg_configs = {
134 |         # 'SuperGlue': {},
135 |         "SuperGlue-fast": {"sinkhorn_iterations": 5}
136 |     }
137 | 
138 |     torch.set_float32_matmul_precision(args.matmul_precision)
139 | 
140 |     results = {k: defaultdict(list) for k, v in inputs.items()}
141 | 
142 |     extractor = SuperPoint(max_num_keypoints=None, detection_threshold=-1)
143 |     extractor = extractor.eval().to(device)
144 |     figsize = (len(inputs) * 4.5, 4.5)
145 |     fig, axes = plt.subplots(1, len(inputs), sharey=True, figsize=figsize)
146 |     axes = axes if len(inputs) > 1 else [axes]
147 |     fig.canvas.manager.set_window_title(f"LightGlue benchmark ({device.type})")
148 | 
149 |     for title, ax in zip(inputs.keys(), axes):
150 |         ax.set_xscale("log", base=2)
151 |         bases = [2**x for x in range(7, 16)]
152 |         ax.set_xticks(bases, bases)
153 |         ax.grid(which="major")
154 |         if args.measure == "log-time":
155 |             ax.set_yscale("log")
156 |             yticks = [10**x for x in range(6)]
157 |             ax.set_yticks(yticks, yticks)
158 |             mpos = [10**x * i for x in range(6) for i in range(2, 10)]
159 |             mlabel = [
160 |                 10**x * i if i in [2, 5] else None
161 |                 for x in range(6)
162 |                 for i in range(2, 10)
163 |             ]
164 |             ax.set_yticks(mpos, mlabel, minor=True)
165 |             ax.grid(which="minor", linewidth=0.2)
166 |         ax.set_title(title)
167 | 
168 |         ax.set_xlabel("# keypoints")
169 |         if args.measure == "throughput":
170 |             ax.set_ylabel("Throughput [pairs/s]")
171 |         else:
172 |             ax.set_ylabel("Latency [ms]")
173 | 
174 |     for name, conf in configs.items():
175 |         print("Run benchmark for:", name)
176 |         torch.cuda.empty_cache()
177 |         matcher = LightGlue(features="superpoint", flash=not args.no_flash, **conf)
178 |         if args.no_prune_thresholds:
179 |             matcher.pruning_keypoint_thresholds = {
180 |                 k: -1 for k in matcher.pruning_keypoint_thresholds
181 |             }
182 |         matcher = matcher.eval().to(device)
183 |         if name.endswith("compile"):
184 |             import torch._dynamo
185 | 
186 |             torch._dynamo.reset()  # avoid buffer overflow
187 |             matcher.compile()
188 |         for pair_name, ax in zip(inputs.keys(), axes):
189 |             image0, image1 = [x.to(device) for x in inputs[pair_name]]
190 |             runtimes = []
191 |             for num_kpts in args.num_keypoints:
192 |                 extractor.conf.max_num_keypoints = num_kpts
193 |                 feats0 = extractor.extract(image0)
194 |                 feats1 = extractor.extract(image1)
195 |                 runtime = measure(
196 |                     matcher,
197 |                     {"image0": feats0, "image1": feats1},
198 |                     device=device,
199 |                     r=args.repeat,
200 |                 )["mean"]
201 |                 results[pair_name][name].append(
202 |                     1000 / runtime if args.measure == "throughput" else runtime
203 |                 )
204 |             ax.plot(
205 |                 args.num_keypoints, results[pair_name][name], label=name, marker="o"
206 |             )
207 |         del matcher, feats0, feats1
208 | 
209 |     if args.add_superglue:
210 |         from hloc.matchers.superglue import SuperGlue
211 | 
212 |         for name, conf in sg_configs.items():
213 |             print("Run benchmark for:", name)
214 |             matcher = SuperGlue(conf)
215 |             matcher = matcher.eval().to(device)
216 |             for pair_name, ax in zip(inputs.keys(), axes):
217 |                 image0, image1 = [x.to(device) for x in inputs[pair_name]]
218 |                 runtimes = []
219 |                 for num_kpts in args.num_keypoints:
220 |                     extractor.conf.max_num_keypoints = num_kpts
221 |                     feats0 = extractor.extract(image0)
222 |                     feats1 = extractor.extract(image1)
223 |                     data = {
224 |                         "image0": image0[None],
225 |                         "image1": image1[None],
226 |                         **{k + "0": v for k, v in feats0.items()},
227 |                         **{k + "1": v for k, v in feats1.items()},
228 |                     }
229 |                     data["scores0"] = data["keypoint_scores0"]
230 |                     data["scores1"] = data["keypoint_scores1"]
231 |                     data["descriptors0"] = (
232 |                         data["descriptors0"].transpose(-1, -2).contiguous()
233 |                     )
234 |                     data["descriptors1"] = (
235 |                         data["descriptors1"].transpose(-1, -2).contiguous()
236 |                     )
237 |                     runtime = measure(matcher, data, device=device, r=args.repeat)[
238 |                         "mean"
239 |                     ]
240 |                     results[pair_name][name].append(
241 |                         1000 / runtime if args.measure == "throughput" else runtime
242 |                     )
243 |                 ax.plot(
244 |                     args.num_keypoints, results[pair_name][name], label=name, marker="o"
245 |                 )
246 |             del matcher, data, image0, image1, feats0, feats1
247 | 
248 |     for name, runtimes in results.items():
249 |         print_as_table(runtimes, name, args.num_keypoints)
250 | 
251 |     axes[0].legend()
252 |     fig.tight_layout()
253 |     if args.save:
254 |         plt.savefig(args.save, dpi=fig.dpi)
255 |     plt.show()
256 | 


--------------------------------------------------------------------------------
/lightglue/__init__.py:
--------------------------------------------------------------------------------
1 | from .aliked import ALIKED  # noqa
2 | from .disk import DISK  # noqa
3 | from .dog_hardnet import DoGHardNet  # noqa
4 | from .lightglue import LightGlue  # noqa
5 | from .sift import SIFT  # noqa
6 | from .superpoint import SuperPoint  # noqa
7 | from .utils import match_pair  # noqa
8 | 


--------------------------------------------------------------------------------
/lightglue/aliked.py:
--------------------------------------------------------------------------------
  1 | # BSD 3-Clause License
  2 | 
  3 | # Copyright (c) 2022, Zhao Xiaoming
  4 | # All rights reserved.
  5 | 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | 
  9 | # 1. Redistributions of source code must retain the above copyright notice, this
 10 | #    list of conditions and the following disclaimer.
 11 | 
 12 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 13 | #    this list of conditions and the following disclaimer in the documentation
 14 | #    and/or other materials provided with the distribution.
 15 | 
 16 | # 3. Neither the name of the copyright holder nor the names of its
 17 | #    contributors may be used to endorse or promote products derived from
 18 | #    this software without specific prior written permission.
 19 | 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | # Authors:
 32 | # Xiaoming Zhao, Xingming Wu, Weihai Chen, Peter C.Y. Chen, Qingsong Xu, and Zhengguo Li
 33 | # Code from https://github.com/Shiaoming/ALIKED
 34 | 
 35 | from typing import Callable, Optional
 36 | 
 37 | import torch
 38 | import torch.nn.functional as F
 39 | import torchvision
 40 | from kornia.color import grayscale_to_rgb
 41 | from torch import nn
 42 | from torch.nn.modules.utils import _pair
 43 | from torchvision.models import resnet
 44 | 
 45 | from .utils import Extractor
 46 | 
 47 | 
 48 | def get_patches(
 49 |     tensor: torch.Tensor, required_corners: torch.Tensor, ps: int
 50 | ) -> torch.Tensor:
 51 |     c, h, w = tensor.shape
 52 |     corner = (required_corners - ps / 2 + 1).long()
 53 |     corner[:, 0] = corner[:, 0].clamp(min=0, max=w - 1 - ps)
 54 |     corner[:, 1] = corner[:, 1].clamp(min=0, max=h - 1 - ps)
 55 |     offset = torch.arange(0, ps)
 56 | 
 57 |     kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
 58 |     x, y = torch.meshgrid(offset, offset, **kw)
 59 |     patches = torch.stack((x, y)).permute(2, 1, 0).unsqueeze(2)
 60 |     patches = patches.to(corner) + corner[None, None]
 61 |     pts = patches.reshape(-1, 2)
 62 |     sampled = tensor.permute(1, 2, 0)[tuple(pts.T)[::-1]]
 63 |     sampled = sampled.reshape(ps, ps, -1, c)
 64 |     assert sampled.shape[:3] == patches.shape[:3]
 65 |     return sampled.permute(2, 3, 0, 1)
 66 | 
 67 | 
 68 | def simple_nms(scores: torch.Tensor, nms_radius: int):
 69 |     """Fast Non-maximum suppression to remove nearby points"""
 70 | 
 71 |     zeros = torch.zeros_like(scores)
 72 |     max_mask = scores == torch.nn.functional.max_pool2d(
 73 |         scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
 74 |     )
 75 | 
 76 |     for _ in range(2):
 77 |         supp_mask = (
 78 |             torch.nn.functional.max_pool2d(
 79 |                 max_mask.float(),
 80 |                 kernel_size=nms_radius * 2 + 1,
 81 |                 stride=1,
 82 |                 padding=nms_radius,
 83 |             )
 84 |             > 0
 85 |         )
 86 |         supp_scores = torch.where(supp_mask, zeros, scores)
 87 |         new_max_mask = supp_scores == torch.nn.functional.max_pool2d(
 88 |             supp_scores, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
 89 |         )
 90 |         max_mask = max_mask | (new_max_mask & (~supp_mask))
 91 |     return torch.where(max_mask, scores, zeros)
 92 | 
 93 | 
 94 | class DKD(nn.Module):
 95 |     def __init__(
 96 |         self,
 97 |         radius: int = 2,
 98 |         top_k: int = 0,
 99 |         scores_th: float = 0.2,
100 |         n_limit: int = 20000,
101 |     ):
102 |         """
103 |         Args:
104 |             radius: soft detection radius, kernel size is (2 * radius + 1)
105 |             top_k: top_k > 0: return top k keypoints
106 |             scores_th: top_k <= 0 threshold mode:
107 |                 scores_th > 0: return keypoints with scores>scores_th
108 |                 else: return keypoints with scores > scores.mean()
109 |             n_limit: max number of keypoint in threshold mode
110 |         """
111 |         super().__init__()
112 |         self.radius = radius
113 |         self.top_k = top_k
114 |         self.scores_th = scores_th
115 |         self.n_limit = n_limit
116 |         self.kernel_size = 2 * self.radius + 1
117 |         self.temperature = 0.1  # tuned temperature
118 |         self.unfold = nn.Unfold(kernel_size=self.kernel_size, padding=self.radius)
119 |         # local xy grid
120 |         x = torch.linspace(-self.radius, self.radius, self.kernel_size)
121 |         # (kernel_size*kernel_size) x 2 : (w,h)
122 |         kw = {"indexing": "ij"} if torch.__version__ >= "1.10" else {}
123 |         self.hw_grid = (
124 |             torch.stack(torch.meshgrid([x, x], **kw)).view(2, -1).t()[:, [1, 0]]
125 |         )
126 | 
127 |     def forward(
128 |         self,
129 |         scores_map: torch.Tensor,
130 |         sub_pixel: bool = True,
131 |         image_size: Optional[torch.Tensor] = None,
132 |     ):
133 |         """
134 |         :param scores_map: Bx1xHxW
135 |         :param descriptor_map: BxCxHxW
136 |         :param sub_pixel: whether to use sub-pixel keypoint detection
137 |         :return: kpts: list[Nx2,...]; kptscores: list[N,....] normalised position: -1~1
138 |         """
139 |         b, c, h, w = scores_map.shape
140 |         scores_nograd = scores_map.detach()
141 |         nms_scores = simple_nms(scores_nograd, self.radius)
142 | 
143 |         # remove border
144 |         nms_scores[:, :, : self.radius, :] = 0
145 |         nms_scores[:, :, :, : self.radius] = 0
146 |         if image_size is not None:
147 |             for i in range(scores_map.shape[0]):
148 |                 w, h = image_size[i].long()
149 |                 nms_scores[i, :, h.item() - self.radius :, :] = 0
150 |                 nms_scores[i, :, :, w.item() - self.radius :] = 0
151 |         else:
152 |             nms_scores[:, :, -self.radius :, :] = 0
153 |             nms_scores[:, :, :, -self.radius :] = 0
154 | 
155 |         # detect keypoints without grad
156 |         if self.top_k > 0:
157 |             topk = torch.topk(nms_scores.view(b, -1), self.top_k)
158 |             indices_keypoints = [topk.indices[i] for i in range(b)]  # B x top_k
159 |         else:
160 |             if self.scores_th > 0:
161 |                 masks = nms_scores > self.scores_th
162 |                 if masks.sum() == 0:
163 |                     th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
164 |                     masks = nms_scores > th.reshape(b, 1, 1, 1)
165 |             else:
166 |                 th = scores_nograd.reshape(b, -1).mean(dim=1)  # th = self.scores_th
167 |                 masks = nms_scores > th.reshape(b, 1, 1, 1)
168 |             masks = masks.reshape(b, -1)
169 | 
170 |             indices_keypoints = []  # list, B x (any size)
171 |             scores_view = scores_nograd.reshape(b, -1)
172 |             for mask, scores in zip(masks, scores_view):
173 |                 indices = mask.nonzero()[:, 0]
174 |                 if len(indices) > self.n_limit:
175 |                     kpts_sc = scores[indices]
176 |                     sort_idx = kpts_sc.sort(descending=True)[1]
177 |                     sel_idx = sort_idx[: self.n_limit]
178 |                     indices = indices[sel_idx]
179 |                 indices_keypoints.append(indices)
180 | 
181 |         wh = torch.tensor([w - 1, h - 1], device=scores_nograd.device)
182 | 
183 |         keypoints = []
184 |         scoredispersitys = []
185 |         kptscores = []
186 |         if sub_pixel:
187 |             # detect soft keypoints with grad backpropagation
188 |             patches = self.unfold(scores_map)  # B x (kernel**2) x (H*W)
189 |             self.hw_grid = self.hw_grid.to(scores_map)  # to device
190 |             for b_idx in range(b):
191 |                 patch = patches[b_idx].t()  # (H*W) x (kernel**2)
192 |                 indices_kpt = indices_keypoints[
193 |                     b_idx
194 |                 ]  # one dimension vector, say its size is M
195 |                 patch_scores = patch[indices_kpt]  # M x (kernel**2)
196 |                 keypoints_xy_nms = torch.stack(
197 |                     [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
198 |                     dim=1,
199 |                 )  # Mx2
200 | 
201 |                 # max is detached to prevent undesired backprop loops in the graph
202 |                 max_v = patch_scores.max(dim=1).values.detach()[:, None]
203 |                 x_exp = (
204 |                     (patch_scores - max_v) / self.temperature
205 |                 ).exp()  # M * (kernel**2), in [0, 1]
206 | 
207 |                 # \frac{ \sum{(i,j) \times \exp(x/T)} }{ \sum{\exp(x/T)} }
208 |                 xy_residual = (
209 |                     x_exp @ self.hw_grid / x_exp.sum(dim=1)[:, None]
210 |                 )  # Soft-argmax, Mx2
211 | 
212 |                 hw_grid_dist2 = (
213 |                     torch.norm(
214 |                         (self.hw_grid[None, :, :] - xy_residual[:, None, :])
215 |                         / self.radius,
216 |                         dim=-1,
217 |                     )
218 |                     ** 2
219 |                 )
220 |                 scoredispersity = (x_exp * hw_grid_dist2).sum(dim=1) / x_exp.sum(dim=1)
221 | 
222 |                 # compute result keypoints
223 |                 keypoints_xy = keypoints_xy_nms + xy_residual
224 |                 keypoints_xy = keypoints_xy / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
225 | 
226 |                 kptscore = torch.nn.functional.grid_sample(
227 |                     scores_map[b_idx].unsqueeze(0),
228 |                     keypoints_xy.view(1, 1, -1, 2),
229 |                     mode="bilinear",
230 |                     align_corners=True,
231 |                 )[
232 |                     0, 0, 0, :
233 |                 ]  # CxN
234 | 
235 |                 keypoints.append(keypoints_xy)
236 |                 scoredispersitys.append(scoredispersity)
237 |                 kptscores.append(kptscore)
238 |         else:
239 |             for b_idx in range(b):
240 |                 indices_kpt = indices_keypoints[
241 |                     b_idx
242 |                 ]  # one dimension vector, say its size is M
243 |                 # To avoid warning: UserWarning: __floordiv__ is deprecated
244 |                 keypoints_xy_nms = torch.stack(
245 |                     [indices_kpt % w, torch.div(indices_kpt, w, rounding_mode="trunc")],
246 |                     dim=1,
247 |                 )  # Mx2
248 |                 keypoints_xy = keypoints_xy_nms / wh * 2 - 1  # (w,h) -> (-1~1,-1~1)
249 |                 kptscore = torch.nn.functional.grid_sample(
250 |                     scores_map[b_idx].unsqueeze(0),
251 |                     keypoints_xy.view(1, 1, -1, 2),
252 |                     mode="bilinear",
253 |                     align_corners=True,
254 |                 )[
255 |                     0, 0, 0, :
256 |                 ]  # CxN
257 |                 keypoints.append(keypoints_xy)
258 |                 scoredispersitys.append(kptscore)  # for jit.script compatability
259 |                 kptscores.append(kptscore)
260 | 
261 |         return keypoints, scoredispersitys, kptscores
262 | 
263 | 
264 | class InputPadder(object):
265 |     """Pads images such that dimensions are divisible by 8"""
266 | 
267 |     def __init__(self, h: int, w: int, divis_by: int = 8):
268 |         self.ht = h
269 |         self.wd = w
270 |         pad_ht = (((self.ht // divis_by) + 1) * divis_by - self.ht) % divis_by
271 |         pad_wd = (((self.wd // divis_by) + 1) * divis_by - self.wd) % divis_by
272 |         self._pad = [
273 |             pad_wd // 2,
274 |             pad_wd - pad_wd // 2,
275 |             pad_ht // 2,
276 |             pad_ht - pad_ht // 2,
277 |         ]
278 | 
279 |     def pad(self, x: torch.Tensor):
280 |         assert x.ndim == 4
281 |         return F.pad(x, self._pad, mode="replicate")
282 | 
283 |     def unpad(self, x: torch.Tensor):
284 |         assert x.ndim == 4
285 |         ht = x.shape[-2]
286 |         wd = x.shape[-1]
287 |         c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]]
288 |         return x[..., c[0] : c[1], c[2] : c[3]]
289 | 
290 | 
291 | class DeformableConv2d(nn.Module):
292 |     def __init__(
293 |         self,
294 |         in_channels,
295 |         out_channels,
296 |         kernel_size=3,
297 |         stride=1,
298 |         padding=1,
299 |         bias=False,
300 |         mask=False,
301 |     ):
302 |         super(DeformableConv2d, self).__init__()
303 | 
304 |         self.padding = padding
305 |         self.mask = mask
306 | 
307 |         self.channel_num = (
308 |             3 * kernel_size * kernel_size if mask else 2 * kernel_size * kernel_size
309 |         )
310 |         self.offset_conv = nn.Conv2d(
311 |             in_channels,
312 |             self.channel_num,
313 |             kernel_size=kernel_size,
314 |             stride=stride,
315 |             padding=self.padding,
316 |             bias=True,
317 |         )
318 | 
319 |         self.regular_conv = nn.Conv2d(
320 |             in_channels=in_channels,
321 |             out_channels=out_channels,
322 |             kernel_size=kernel_size,
323 |             stride=stride,
324 |             padding=self.padding,
325 |             bias=bias,
326 |         )
327 | 
328 |     def forward(self, x):
329 |         h, w = x.shape[2:]
330 |         max_offset = max(h, w) / 4.0
331 | 
332 |         out = self.offset_conv(x)
333 |         if self.mask:
334 |             o1, o2, mask = torch.chunk(out, 3, dim=1)
335 |             offset = torch.cat((o1, o2), dim=1)
336 |             mask = torch.sigmoid(mask)
337 |         else:
338 |             offset = out
339 |             mask = None
340 |         offset = offset.clamp(-max_offset, max_offset)
341 |         x = torchvision.ops.deform_conv2d(
342 |             input=x,
343 |             offset=offset,
344 |             weight=self.regular_conv.weight,
345 |             bias=self.regular_conv.bias,
346 |             padding=self.padding,
347 |             mask=mask,
348 |         )
349 |         return x
350 | 
351 | 
352 | def get_conv(
353 |     inplanes,
354 |     planes,
355 |     kernel_size=3,
356 |     stride=1,
357 |     padding=1,
358 |     bias=False,
359 |     conv_type="conv",
360 |     mask=False,
361 | ):
362 |     if conv_type == "conv":
363 |         conv = nn.Conv2d(
364 |             inplanes,
365 |             planes,
366 |             kernel_size=kernel_size,
367 |             stride=stride,
368 |             padding=padding,
369 |             bias=bias,
370 |         )
371 |     elif conv_type == "dcn":
372 |         conv = DeformableConv2d(
373 |             inplanes,
374 |             planes,
375 |             kernel_size=kernel_size,
376 |             stride=stride,
377 |             padding=_pair(padding),
378 |             bias=bias,
379 |             mask=mask,
380 |         )
381 |     else:
382 |         raise TypeError
383 |     return conv
384 | 
385 | 
386 | class ConvBlock(nn.Module):
387 |     def __init__(
388 |         self,
389 |         in_channels,
390 |         out_channels,
391 |         gate: Optional[Callable[..., nn.Module]] = None,
392 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
393 |         conv_type: str = "conv",
394 |         mask: bool = False,
395 |     ):
396 |         super().__init__()
397 |         if gate is None:
398 |             self.gate = nn.ReLU(inplace=True)
399 |         else:
400 |             self.gate = gate
401 |         if norm_layer is None:
402 |             norm_layer = nn.BatchNorm2d
403 |         self.conv1 = get_conv(
404 |             in_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
405 |         )
406 |         self.bn1 = norm_layer(out_channels)
407 |         self.conv2 = get_conv(
408 |             out_channels, out_channels, kernel_size=3, conv_type=conv_type, mask=mask
409 |         )
410 |         self.bn2 = norm_layer(out_channels)
411 | 
412 |     def forward(self, x):
413 |         x = self.gate(self.bn1(self.conv1(x)))  # B x in_channels x H x W
414 |         x = self.gate(self.bn2(self.conv2(x)))  # B x out_channels x H x W
415 |         return x
416 | 
417 | 
418 | # modified based on torchvision\models\resnet.py#27->BasicBlock
419 | class ResBlock(nn.Module):
420 |     expansion: int = 1
421 | 
422 |     def __init__(
423 |         self,
424 |         inplanes: int,
425 |         planes: int,
426 |         stride: int = 1,
427 |         downsample: Optional[nn.Module] = None,
428 |         groups: int = 1,
429 |         base_width: int = 64,
430 |         dilation: int = 1,
431 |         gate: Optional[Callable[..., nn.Module]] = None,
432 |         norm_layer: Optional[Callable[..., nn.Module]] = None,
433 |         conv_type: str = "conv",
434 |         mask: bool = False,
435 |     ) -> None:
436 |         super(ResBlock, self).__init__()
437 |         if gate is None:
438 |             self.gate = nn.ReLU(inplace=True)
439 |         else:
440 |             self.gate = gate
441 |         if norm_layer is None:
442 |             norm_layer = nn.BatchNorm2d
443 |         if groups != 1 or base_width != 64:
444 |             raise ValueError("ResBlock only supports groups=1 and base_width=64")
445 |         if dilation > 1:
446 |             raise NotImplementedError("Dilation > 1 not supported in ResBlock")
447 |         # Both self.conv1 and self.downsample layers
448 |         # downsample the input when stride != 1
449 |         self.conv1 = get_conv(
450 |             inplanes, planes, kernel_size=3, conv_type=conv_type, mask=mask
451 |         )
452 |         self.bn1 = norm_layer(planes)
453 |         self.conv2 = get_conv(
454 |             planes, planes, kernel_size=3, conv_type=conv_type, mask=mask
455 |         )
456 |         self.bn2 = norm_layer(planes)
457 |         self.downsample = downsample
458 |         self.stride = stride
459 | 
460 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
461 |         identity = x
462 | 
463 |         out = self.conv1(x)
464 |         out = self.bn1(out)
465 |         out = self.gate(out)
466 | 
467 |         out = self.conv2(out)
468 |         out = self.bn2(out)
469 | 
470 |         if self.downsample is not None:
471 |             identity = self.downsample(x)
472 | 
473 |         out += identity
474 |         out = self.gate(out)
475 | 
476 |         return out
477 | 
478 | 
479 | class SDDH(nn.Module):
480 |     def __init__(
481 |         self,
482 |         dims: int,
483 |         kernel_size: int = 3,
484 |         n_pos: int = 8,
485 |         gate=nn.ReLU(),
486 |         conv2D=False,
487 |         mask=False,
488 |     ):
489 |         super(SDDH, self).__init__()
490 |         self.kernel_size = kernel_size
491 |         self.n_pos = n_pos
492 |         self.conv2D = conv2D
493 |         self.mask = mask
494 | 
495 |         self.get_patches_func = get_patches
496 | 
497 |         # estimate offsets
498 |         self.channel_num = 3 * n_pos if mask else 2 * n_pos
499 |         self.offset_conv = nn.Sequential(
500 |             nn.Conv2d(
501 |                 dims,
502 |                 self.channel_num,
503 |                 kernel_size=kernel_size,
504 |                 stride=1,
505 |                 padding=0,
506 |                 bias=True,
507 |             ),
508 |             gate,
509 |             nn.Conv2d(
510 |                 self.channel_num,
511 |                 self.channel_num,
512 |                 kernel_size=1,
513 |                 stride=1,
514 |                 padding=0,
515 |                 bias=True,
516 |             ),
517 |         )
518 | 
519 |         # sampled feature conv
520 |         self.sf_conv = nn.Conv2d(
521 |             dims, dims, kernel_size=1, stride=1, padding=0, bias=False
522 |         )
523 | 
524 |         # convM
525 |         if not conv2D:
526 |             # deformable desc weights
527 |             agg_weights = torch.nn.Parameter(torch.rand(n_pos, dims, dims))
528 |             self.register_parameter("agg_weights", agg_weights)
529 |         else:
530 |             self.convM = nn.Conv2d(
531 |                 dims * n_pos, dims, kernel_size=1, stride=1, padding=0, bias=False
532 |             )
533 | 
534 |     def forward(self, x, keypoints):
535 |         # x: [B,C,H,W]
536 |         # keypoints: list, [[N_kpts,2], ...] (w,h)
537 |         b, c, h, w = x.shape
538 |         wh = torch.tensor([[w - 1, h - 1]], device=x.device)
539 |         max_offset = max(h, w) / 4.0
540 | 
541 |         offsets = []
542 |         descriptors = []
543 |         # get offsets for each keypoint
544 |         for ib in range(b):
545 |             xi, kptsi = x[ib], keypoints[ib]
546 |             kptsi_wh = (kptsi / 2 + 0.5) * wh
547 |             N_kpts = len(kptsi)
548 | 
549 |             if self.kernel_size > 1:
550 |                 patch = self.get_patches_func(
551 |                     xi, kptsi_wh.long(), self.kernel_size
552 |                 )  # [N_kpts, C, K, K]
553 |             else:
554 |                 kptsi_wh_long = kptsi_wh.long()
555 |                 patch = (
556 |                     xi[:, kptsi_wh_long[:, 1], kptsi_wh_long[:, 0]]
557 |                     .permute(1, 0)
558 |                     .reshape(N_kpts, c, 1, 1)
559 |                 )
560 | 
561 |             offset = self.offset_conv(patch).clamp(
562 |                 -max_offset, max_offset
563 |             )  # [N_kpts, 2*n_pos, 1, 1]
564 |             if self.mask:
565 |                 offset = (
566 |                     offset[:, :, 0, 0].view(N_kpts, 3, self.n_pos).permute(0, 2, 1)
567 |                 )  # [N_kpts, n_pos, 3]
568 |                 offset = offset[:, :, :-1]  # [N_kpts, n_pos, 2]
569 |                 mask_weight = torch.sigmoid(offset[:, :, -1])  # [N_kpts, n_pos]
570 |             else:
571 |                 offset = (
572 |                     offset[:, :, 0, 0].view(N_kpts, 2, self.n_pos).permute(0, 2, 1)
573 |                 )  # [N_kpts, n_pos, 2]
574 |             offsets.append(offset)  # for visualization
575 | 
576 |             # get sample positions
577 |             pos = kptsi_wh.unsqueeze(1) + offset  # [N_kpts, n_pos, 2]
578 |             pos = 2.0 * pos / wh[None] - 1
579 |             pos = pos.reshape(1, N_kpts * self.n_pos, 1, 2)
580 | 
581 |             # sample features
582 |             features = F.grid_sample(
583 |                 xi.unsqueeze(0), pos, mode="bilinear", align_corners=True
584 |             )  # [1,C,(N_kpts*n_pos),1]
585 |             features = features.reshape(c, N_kpts, self.n_pos, 1).permute(
586 |                 1, 0, 2, 3
587 |             )  # [N_kpts, C, n_pos, 1]
588 |             if self.mask:
589 |                 features = torch.einsum("ncpo,np->ncpo", features, mask_weight)
590 | 
591 |             features = torch.selu_(self.sf_conv(features)).squeeze(
592 |                 -1
593 |             )  # [N_kpts, C, n_pos]
594 |             # convM
595 |             if not self.conv2D:
596 |                 descs = torch.einsum(
597 |                     "ncp,pcd->nd", features, self.agg_weights
598 |                 )  # [N_kpts, C]
599 |             else:
600 |                 features = features.reshape(N_kpts, -1)[
601 |                     :, :, None, None
602 |                 ]  # [N_kpts, C*n_pos, 1, 1]
603 |                 descs = self.convM(features).squeeze()  # [N_kpts, C]
604 | 
605 |             # normalize
606 |             descs = F.normalize(descs, p=2.0, dim=1)
607 |             descriptors.append(descs)
608 | 
609 |         return descriptors, offsets
610 | 
611 | 
612 | class ALIKED(Extractor):
613 |     default_conf = {
614 |         "model_name": "aliked-n16",
615 |         "max_num_keypoints": -1,
616 |         "detection_threshold": 0.2,
617 |         "nms_radius": 2,
618 |     }
619 | 
620 |     checkpoint_url = "https://github.com/Shiaoming/ALIKED/raw/main/models/{}.pth"
621 | 
622 |     n_limit_max = 20000
623 | 
624 |     # c1, c2, c3, c4, dim, K, M
625 |     cfgs = {
626 |         "aliked-t16": [8, 16, 32, 64, 64, 3, 16],
627 |         "aliked-n16": [16, 32, 64, 128, 128, 3, 16],
628 |         "aliked-n16rot": [16, 32, 64, 128, 128, 3, 16],
629 |         "aliked-n32": [16, 32, 64, 128, 128, 3, 32],
630 |     }
631 |     preprocess_conf = {
632 |         "resize": 1024,
633 |     }
634 | 
635 |     required_data_keys = ["image"]
636 | 
637 |     def __init__(self, **conf):
638 |         super().__init__(**conf)  # Update with default configuration.
639 |         conf = self.conf
640 |         c1, c2, c3, c4, dim, K, M = self.cfgs[conf.model_name]
641 |         conv_types = ["conv", "conv", "dcn", "dcn"]
642 |         conv2D = False
643 |         mask = False
644 | 
645 |         # build model
646 |         self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
647 |         self.pool4 = nn.AvgPool2d(kernel_size=4, stride=4)
648 |         self.norm = nn.BatchNorm2d
649 |         self.gate = nn.SELU(inplace=True)
650 |         self.block1 = ConvBlock(3, c1, self.gate, self.norm, conv_type=conv_types[0])
651 |         self.block2 = self.get_resblock(c1, c2, conv_types[1], mask)
652 |         self.block3 = self.get_resblock(c2, c3, conv_types[2], mask)
653 |         self.block4 = self.get_resblock(c3, c4, conv_types[3], mask)
654 | 
655 |         self.conv1 = resnet.conv1x1(c1, dim // 4)
656 |         self.conv2 = resnet.conv1x1(c2, dim // 4)
657 |         self.conv3 = resnet.conv1x1(c3, dim // 4)
658 |         self.conv4 = resnet.conv1x1(dim, dim // 4)
659 |         self.upsample2 = nn.Upsample(
660 |             scale_factor=2, mode="bilinear", align_corners=True
661 |         )
662 |         self.upsample4 = nn.Upsample(
663 |             scale_factor=4, mode="bilinear", align_corners=True
664 |         )
665 |         self.upsample8 = nn.Upsample(
666 |             scale_factor=8, mode="bilinear", align_corners=True
667 |         )
668 |         self.upsample32 = nn.Upsample(
669 |             scale_factor=32, mode="bilinear", align_corners=True
670 |         )
671 |         self.score_head = nn.Sequential(
672 |             resnet.conv1x1(dim, 8),
673 |             self.gate,
674 |             resnet.conv3x3(8, 4),
675 |             self.gate,
676 |             resnet.conv3x3(4, 4),
677 |             self.gate,
678 |             resnet.conv3x3(4, 1),
679 |         )
680 |         self.desc_head = SDDH(dim, K, M, gate=self.gate, conv2D=conv2D, mask=mask)
681 |         self.dkd = DKD(
682 |             radius=conf.nms_radius,
683 |             top_k=-1 if conf.detection_threshold > 0 else conf.max_num_keypoints,
684 |             scores_th=conf.detection_threshold,
685 |             n_limit=conf.max_num_keypoints
686 |             if conf.max_num_keypoints > 0
687 |             else self.n_limit_max,
688 |         )
689 | 
690 |         state_dict = torch.hub.load_state_dict_from_url(
691 |             self.checkpoint_url.format(conf.model_name), map_location="cpu"
692 |         )
693 |         self.load_state_dict(state_dict, strict=True)
694 | 
695 |     def get_resblock(self, c_in, c_out, conv_type, mask):
696 |         return ResBlock(
697 |             c_in,
698 |             c_out,
699 |             1,
700 |             nn.Conv2d(c_in, c_out, 1),
701 |             gate=self.gate,
702 |             norm_layer=self.norm,
703 |             conv_type=conv_type,
704 |             mask=mask,
705 |         )
706 | 
707 |     def extract_dense_map(self, image):
708 |         # Pads images such that dimensions are divisible by
709 |         div_by = 2**5
710 |         padder = InputPadder(image.shape[-2], image.shape[-1], div_by)
711 |         image = padder.pad(image)
712 | 
713 |         # ================================== feature encoder
714 |         x1 = self.block1(image)  # B x c1 x H x W
715 |         x2 = self.pool2(x1)
716 |         x2 = self.block2(x2)  # B x c2 x H/2 x W/2
717 |         x3 = self.pool4(x2)
718 |         x3 = self.block3(x3)  # B x c3 x H/8 x W/8
719 |         x4 = self.pool4(x3)
720 |         x4 = self.block4(x4)  # B x dim x H/32 x W/32
721 |         # ================================== feature aggregation
722 |         x1 = self.gate(self.conv1(x1))  # B x dim//4 x H x W
723 |         x2 = self.gate(self.conv2(x2))  # B x dim//4 x H//2 x W//2
724 |         x3 = self.gate(self.conv3(x3))  # B x dim//4 x H//8 x W//8
725 |         x4 = self.gate(self.conv4(x4))  # B x dim//4 x H//32 x W//32
726 |         x2_up = self.upsample2(x2)  # B x dim//4 x H x W
727 |         x3_up = self.upsample8(x3)  # B x dim//4 x H x W
728 |         x4_up = self.upsample32(x4)  # B x dim//4 x H x W
729 |         x1234 = torch.cat([x1, x2_up, x3_up, x4_up], dim=1)
730 |         # ================================== score head
731 |         score_map = torch.sigmoid(self.score_head(x1234))
732 |         feature_map = torch.nn.functional.normalize(x1234, p=2, dim=1)
733 | 
734 |         # Unpads images
735 |         feature_map = padder.unpad(feature_map)
736 |         score_map = padder.unpad(score_map)
737 | 
738 |         return feature_map, score_map
739 | 
740 |     def forward(self, data: dict) -> dict:
741 |         image = data["image"]
742 |         if image.shape[1] == 1:
743 |             image = grayscale_to_rgb(image)
744 |         feature_map, score_map = self.extract_dense_map(image)
745 |         keypoints, kptscores, scoredispersitys = self.dkd(
746 |             score_map, image_size=data.get("image_size")
747 |         )
748 |         descriptors, offsets = self.desc_head(feature_map, keypoints)
749 | 
750 |         _, _, h, w = image.shape
751 |         wh = torch.tensor([w - 1, h - 1], device=image.device)
752 |         # no padding required
753 |         # we can set detection_threshold=-1 and conf.max_num_keypoints > 0
754 |         return {
755 |             "keypoints": wh * (torch.stack(keypoints) + 1) / 2.0,  # B x N x 2
756 |             "descriptors": torch.stack(descriptors),  # B x N x D
757 |             "keypoint_scores": torch.stack(kptscores),  # B x N
758 |         }
759 | 


--------------------------------------------------------------------------------
/lightglue/disk.py:
--------------------------------------------------------------------------------
 1 | import kornia
 2 | import torch
 3 | 
 4 | from .utils import Extractor
 5 | 
 6 | 
 7 | class DISK(Extractor):
 8 |     default_conf = {
 9 |         "weights": "depth",
10 |         "max_num_keypoints": None,
11 |         "desc_dim": 128,
12 |         "nms_window_size": 5,
13 |         "detection_threshold": 0.0,
14 |         "pad_if_not_divisible": True,
15 |     }
16 | 
17 |     preprocess_conf = {
18 |         "resize": 1024,
19 |         "grayscale": False,
20 |     }
21 | 
22 |     required_data_keys = ["image"]
23 | 
24 |     def __init__(self, **conf) -> None:
25 |         super().__init__(**conf)  # Update with default configuration.
26 |         self.model = kornia.feature.DISK.from_pretrained(self.conf.weights)
27 | 
28 |     def forward(self, data: dict) -> dict:
29 |         """Compute keypoints, scores, descriptors for image"""
30 |         for key in self.required_data_keys:
31 |             assert key in data, f"Missing key {key} in data"
32 |         image = data["image"]
33 |         if image.shape[1] == 1:
34 |             image = kornia.color.grayscale_to_rgb(image)
35 |         features = self.model(
36 |             image,
37 |             n=self.conf.max_num_keypoints,
38 |             window_size=self.conf.nms_window_size,
39 |             score_threshold=self.conf.detection_threshold,
40 |             pad_if_not_divisible=self.conf.pad_if_not_divisible,
41 |         )
42 |         keypoints = [f.keypoints for f in features]
43 |         scores = [f.detection_scores for f in features]
44 |         descriptors = [f.descriptors for f in features]
45 |         del features
46 | 
47 |         keypoints = torch.stack(keypoints, 0)
48 |         scores = torch.stack(scores, 0)
49 |         descriptors = torch.stack(descriptors, 0)
50 | 
51 |         return {
52 |             "keypoints": keypoints.to(image).contiguous(),
53 |             "keypoint_scores": scores.to(image).contiguous(),
54 |             "descriptors": descriptors.to(image).contiguous(),
55 |         }
56 | 


--------------------------------------------------------------------------------
/lightglue/dog_hardnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from kornia.color import rgb_to_grayscale
 3 | from kornia.feature import HardNet, LAFDescriptor, laf_from_center_scale_ori
 4 | 
 5 | from .sift import SIFT
 6 | 
 7 | 
 8 | class DoGHardNet(SIFT):
 9 |     required_data_keys = ["image"]
10 | 
11 |     def __init__(self, **conf):
12 |         super().__init__(**conf)
13 |         self.laf_desc = LAFDescriptor(HardNet(True)).eval()
14 | 
15 |     def forward(self, data: dict) -> dict:
16 |         image = data["image"]
17 |         if image.shape[1] == 3:
18 |             image = rgb_to_grayscale(image)
19 |         device = image.device
20 |         self.laf_desc = self.laf_desc.to(device)
21 |         self.laf_desc.descriptor = self.laf_desc.descriptor.eval()
22 |         pred = []
23 |         if "image_size" in data.keys():
24 |             im_size = data.get("image_size").long()
25 |         else:
26 |             im_size = None
27 |         for k in range(len(image)):
28 |             img = image[k]
29 |             if im_size is not None:
30 |                 w, h = data["image_size"][k]
31 |                 img = img[:, : h.to(torch.int32), : w.to(torch.int32)]
32 |             p = self.extract_single_image(img)
33 |             lafs = laf_from_center_scale_ori(
34 |                 p["keypoints"].reshape(1, -1, 2),
35 |                 6.0 * p["scales"].reshape(1, -1, 1, 1),
36 |                 torch.rad2deg(p["oris"]).reshape(1, -1, 1),
37 |             ).to(device)
38 |             p["descriptors"] = self.laf_desc(img[None], lafs).reshape(-1, 128)
39 |             pred.append(p)
40 |         pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
41 |         return pred
42 | 


--------------------------------------------------------------------------------
/lightglue/lightglue.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from pathlib import Path
  3 | from types import SimpleNamespace
  4 | from typing import Callable, List, Optional, Tuple
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from torch import nn
 10 | 
 11 | try:
 12 |     from flash_attn.modules.mha import FlashCrossAttention
 13 | except ModuleNotFoundError:
 14 |     FlashCrossAttention = None
 15 | 
 16 | if FlashCrossAttention or hasattr(F, "scaled_dot_product_attention"):
 17 |     FLASH_AVAILABLE = True
 18 | else:
 19 |     FLASH_AVAILABLE = False
 20 | 
 21 | torch.backends.cudnn.deterministic = True
 22 | 
 23 | 
 24 | @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)
 25 | def normalize_keypoints(
 26 |     kpts: torch.Tensor, size: Optional[torch.Tensor] = None
 27 | ) -> torch.Tensor:
 28 |     if size is None:
 29 |         size = 1 + kpts.max(-2).values - kpts.min(-2).values
 30 |     elif not isinstance(size, torch.Tensor):
 31 |         size = torch.tensor(size, device=kpts.device, dtype=kpts.dtype)
 32 |     size = size.to(kpts)
 33 |     shift = size / 2
 34 |     scale = size.max(-1).values / 2
 35 |     kpts = (kpts - shift[..., None, :]) / scale[..., None, None]
 36 |     return kpts
 37 | 
 38 | 
 39 | def pad_to_length(x: torch.Tensor, length: int) -> Tuple[torch.Tensor]:
 40 |     if length <= x.shape[-2]:
 41 |         return x, torch.ones_like(x[..., :1], dtype=torch.bool)
 42 |     pad = torch.ones(
 43 |         *x.shape[:-2], length - x.shape[-2], x.shape[-1], device=x.device, dtype=x.dtype
 44 |     )
 45 |     y = torch.cat([x, pad], dim=-2)
 46 |     mask = torch.zeros(*y.shape[:-1], 1, dtype=torch.bool, device=x.device)
 47 |     mask[..., : x.shape[-2], :] = True
 48 |     return y, mask
 49 | 
 50 | 
 51 | def rotate_half(x: torch.Tensor) -> torch.Tensor:
 52 |     x = x.unflatten(-1, (-1, 2))
 53 |     x1, x2 = x.unbind(dim=-1)
 54 |     return torch.stack((-x2, x1), dim=-1).flatten(start_dim=-2)
 55 | 
 56 | 
 57 | def apply_cached_rotary_emb(freqs: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
 58 |     return (t * freqs[0]) + (rotate_half(t) * freqs[1])
 59 | 
 60 | 
 61 | class LearnableFourierPositionalEncoding(nn.Module):
 62 |     def __init__(self, M: int, dim: int, F_dim: int = None, gamma: float = 1.0) -> None:
 63 |         super().__init__()
 64 |         F_dim = F_dim if F_dim is not None else dim
 65 |         self.gamma = gamma
 66 |         self.Wr = nn.Linear(M, F_dim // 2, bias=False)
 67 |         nn.init.normal_(self.Wr.weight.data, mean=0, std=self.gamma**-2)
 68 | 
 69 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
 70 |         """encode position vector"""
 71 |         projected = self.Wr(x)
 72 |         cosines, sines = torch.cos(projected), torch.sin(projected)
 73 |         emb = torch.stack([cosines, sines], 0).unsqueeze(-3)
 74 |         return emb.repeat_interleave(2, dim=-1)
 75 | 
 76 | 
 77 | class TokenConfidence(nn.Module):
 78 |     def __init__(self, dim: int) -> None:
 79 |         super().__init__()
 80 |         self.token = nn.Sequential(nn.Linear(dim, 1), nn.Sigmoid())
 81 | 
 82 |     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
 83 |         """get confidence tokens"""
 84 |         return (
 85 |             self.token(desc0.detach()).squeeze(-1),
 86 |             self.token(desc1.detach()).squeeze(-1),
 87 |         )
 88 | 
 89 | 
 90 | class Attention(nn.Module):
 91 |     def __init__(self, allow_flash: bool) -> None:
 92 |         super().__init__()
 93 |         if allow_flash and not FLASH_AVAILABLE:
 94 |             warnings.warn(
 95 |                 "FlashAttention is not available. For optimal speed, "
 96 |                 "consider installing torch >= 2.0 or flash-attn.",
 97 |                 stacklevel=2,
 98 |             )
 99 |         self.enable_flash = allow_flash and FLASH_AVAILABLE
100 |         self.has_sdp = hasattr(F, "scaled_dot_product_attention")
101 |         if allow_flash and FlashCrossAttention:
102 |             self.flash_ = FlashCrossAttention()
103 |         if self.has_sdp:
104 |             torch.backends.cuda.enable_flash_sdp(allow_flash)
105 | 
106 |     def forward(self, q, k, v, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
107 |         if q.shape[-2] == 0 or k.shape[-2] == 0:
108 |             return q.new_zeros((*q.shape[:-1], v.shape[-1]))
109 |         if self.enable_flash and q.device.type == "cuda":
110 |             # use torch 2.0 scaled_dot_product_attention with flash
111 |             if self.has_sdp:
112 |                 args = [x.half().contiguous() for x in [q, k, v]]
113 |                 v = F.scaled_dot_product_attention(*args, attn_mask=mask).to(q.dtype)
114 |                 return v if mask is None else v.nan_to_num()
115 |             else:
116 |                 assert mask is None
117 |                 q, k, v = [x.transpose(-2, -3).contiguous() for x in [q, k, v]]
118 |                 m = self.flash_(q.half(), torch.stack([k, v], 2).half())
119 |                 return m.transpose(-2, -3).to(q.dtype).clone()
120 |         elif self.has_sdp:
121 |             args = [x.contiguous() for x in [q, k, v]]
122 |             v = F.scaled_dot_product_attention(*args, attn_mask=mask)
123 |             return v if mask is None else v.nan_to_num()
124 |         else:
125 |             s = q.shape[-1] ** -0.5
126 |             sim = torch.einsum("...id,...jd->...ij", q, k) * s
127 |             if mask is not None:
128 |                 sim.masked_fill(~mask, -float("inf"))
129 |             attn = F.softmax(sim, -1)
130 |             return torch.einsum("...ij,...jd->...id", attn, v)
131 | 
132 | 
133 | class SelfBlock(nn.Module):
134 |     def __init__(
135 |         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
136 |     ) -> None:
137 |         super().__init__()
138 |         self.embed_dim = embed_dim
139 |         self.num_heads = num_heads
140 |         assert self.embed_dim % num_heads == 0
141 |         self.head_dim = self.embed_dim // num_heads
142 |         self.Wqkv = nn.Linear(embed_dim, 3 * embed_dim, bias=bias)
143 |         self.inner_attn = Attention(flash)
144 |         self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
145 |         self.ffn = nn.Sequential(
146 |             nn.Linear(2 * embed_dim, 2 * embed_dim),
147 |             nn.LayerNorm(2 * embed_dim, elementwise_affine=True),
148 |             nn.GELU(),
149 |             nn.Linear(2 * embed_dim, embed_dim),
150 |         )
151 | 
152 |     def forward(
153 |         self,
154 |         x: torch.Tensor,
155 |         encoding: torch.Tensor,
156 |         mask: Optional[torch.Tensor] = None,
157 |     ) -> torch.Tensor:
158 |         qkv = self.Wqkv(x)
159 |         qkv = qkv.unflatten(-1, (self.num_heads, -1, 3)).transpose(1, 2)
160 |         q, k, v = qkv[..., 0], qkv[..., 1], qkv[..., 2]
161 |         q = apply_cached_rotary_emb(encoding, q)
162 |         k = apply_cached_rotary_emb(encoding, k)
163 |         context = self.inner_attn(q, k, v, mask=mask)
164 |         message = self.out_proj(context.transpose(1, 2).flatten(start_dim=-2))
165 |         return x + self.ffn(torch.cat([x, message], -1))
166 | 
167 | 
168 | class CrossBlock(nn.Module):
169 |     def __init__(
170 |         self, embed_dim: int, num_heads: int, flash: bool = False, bias: bool = True
171 |     ) -> None:
172 |         super().__init__()
173 |         self.heads = num_heads
174 |         dim_head = embed_dim // num_heads
175 |         self.scale = dim_head**-0.5
176 |         inner_dim = dim_head * num_heads
177 |         self.to_qk = nn.Linear(embed_dim, inner_dim, bias=bias)
178 |         self.to_v = nn.Linear(embed_dim, inner_dim, bias=bias)
179 |         self.to_out = nn.Linear(inner_dim, embed_dim, bias=bias)
180 |         self.ffn = nn.Sequential(
181 |             nn.Linear(2 * embed_dim, 2 * embed_dim),
182 |             nn.LayerNorm(2 * embed_dim, elementwise_affine=True),
183 |             nn.GELU(),
184 |             nn.Linear(2 * embed_dim, embed_dim),
185 |         )
186 |         if flash and FLASH_AVAILABLE:
187 |             self.flash = Attention(True)
188 |         else:
189 |             self.flash = None
190 | 
191 |     def map_(self, func: Callable, x0: torch.Tensor, x1: torch.Tensor):
192 |         return func(x0), func(x1)
193 | 
194 |     def forward(
195 |         self, x0: torch.Tensor, x1: torch.Tensor, mask: Optional[torch.Tensor] = None
196 |     ) -> List[torch.Tensor]:
197 |         qk0, qk1 = self.map_(self.to_qk, x0, x1)
198 |         v0, v1 = self.map_(self.to_v, x0, x1)
199 |         qk0, qk1, v0, v1 = map(
200 |             lambda t: t.unflatten(-1, (self.heads, -1)).transpose(1, 2),
201 |             (qk0, qk1, v0, v1),
202 |         )
203 |         if self.flash is not None and qk0.device.type == "cuda":
204 |             m0 = self.flash(qk0, qk1, v1, mask)
205 |             m1 = self.flash(
206 |                 qk1, qk0, v0, mask.transpose(-1, -2) if mask is not None else None
207 |             )
208 |         else:
209 |             qk0, qk1 = qk0 * self.scale**0.5, qk1 * self.scale**0.5
210 |             sim = torch.einsum("bhid, bhjd -> bhij", qk0, qk1)
211 |             if mask is not None:
212 |                 sim = sim.masked_fill(~mask, -float("inf"))
213 |             attn01 = F.softmax(sim, dim=-1)
214 |             attn10 = F.softmax(sim.transpose(-2, -1).contiguous(), dim=-1)
215 |             m0 = torch.einsum("bhij, bhjd -> bhid", attn01, v1)
216 |             m1 = torch.einsum("bhji, bhjd -> bhid", attn10.transpose(-2, -1), v0)
217 |             if mask is not None:
218 |                 m0, m1 = m0.nan_to_num(), m1.nan_to_num()
219 |         m0, m1 = self.map_(lambda t: t.transpose(1, 2).flatten(start_dim=-2), m0, m1)
220 |         m0, m1 = self.map_(self.to_out, m0, m1)
221 |         x0 = x0 + self.ffn(torch.cat([x0, m0], -1))
222 |         x1 = x1 + self.ffn(torch.cat([x1, m1], -1))
223 |         return x0, x1
224 | 
225 | 
226 | class TransformerLayer(nn.Module):
227 |     def __init__(self, *args, **kwargs):
228 |         super().__init__()
229 |         self.self_attn = SelfBlock(*args, **kwargs)
230 |         self.cross_attn = CrossBlock(*args, **kwargs)
231 | 
232 |     def forward(
233 |         self,
234 |         desc0,
235 |         desc1,
236 |         encoding0,
237 |         encoding1,
238 |         mask0: Optional[torch.Tensor] = None,
239 |         mask1: Optional[torch.Tensor] = None,
240 |     ):
241 |         if mask0 is not None and mask1 is not None:
242 |             return self.masked_forward(desc0, desc1, encoding0, encoding1, mask0, mask1)
243 |         else:
244 |             desc0 = self.self_attn(desc0, encoding0)
245 |             desc1 = self.self_attn(desc1, encoding1)
246 |             return self.cross_attn(desc0, desc1)
247 | 
248 |     # This part is compiled and allows padding inputs
249 |     def masked_forward(self, desc0, desc1, encoding0, encoding1, mask0, mask1):
250 |         mask = mask0 & mask1.transpose(-1, -2)
251 |         mask0 = mask0 & mask0.transpose(-1, -2)
252 |         mask1 = mask1 & mask1.transpose(-1, -2)
253 |         desc0 = self.self_attn(desc0, encoding0, mask0)
254 |         desc1 = self.self_attn(desc1, encoding1, mask1)
255 |         return self.cross_attn(desc0, desc1, mask)
256 | 
257 | 
258 | def sigmoid_log_double_softmax(
259 |     sim: torch.Tensor, z0: torch.Tensor, z1: torch.Tensor
260 | ) -> torch.Tensor:
261 |     """create the log assignment matrix from logits and similarity"""
262 |     b, m, n = sim.shape
263 |     certainties = F.logsigmoid(z0) + F.logsigmoid(z1).transpose(1, 2)
264 |     scores0 = F.log_softmax(sim, 2)
265 |     scores1 = F.log_softmax(sim.transpose(-1, -2).contiguous(), 2).transpose(-1, -2)
266 |     scores = sim.new_full((b, m + 1, n + 1), 0)
267 |     scores[:, :m, :n] = scores0 + scores1 + certainties
268 |     scores[:, :-1, -1] = F.logsigmoid(-z0.squeeze(-1))
269 |     scores[:, -1, :-1] = F.logsigmoid(-z1.squeeze(-1))
270 |     return scores
271 | 
272 | 
273 | class MatchAssignment(nn.Module):
274 |     def __init__(self, dim: int) -> None:
275 |         super().__init__()
276 |         self.dim = dim
277 |         self.matchability = nn.Linear(dim, 1, bias=True)
278 |         self.final_proj = nn.Linear(dim, dim, bias=True)
279 | 
280 |     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor):
281 |         """build assignment matrix from descriptors"""
282 |         mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
283 |         _, _, d = mdesc0.shape
284 |         mdesc0, mdesc1 = mdesc0 / d**0.25, mdesc1 / d**0.25
285 |         sim = torch.einsum("bmd,bnd->bmn", mdesc0, mdesc1)
286 |         z0 = self.matchability(desc0)
287 |         z1 = self.matchability(desc1)
288 |         scores = sigmoid_log_double_softmax(sim, z0, z1)
289 |         return scores, sim
290 | 
291 |     def get_matchability(self, desc: torch.Tensor):
292 |         return torch.sigmoid(self.matchability(desc)).squeeze(-1)
293 | 
294 | 
295 | def filter_matches(scores: torch.Tensor, th: float):
296 |     """obtain matches from a log assignment matrix [Bx M+1 x N+1]"""
297 |     max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
298 |     m0, m1 = max0.indices, max1.indices
299 |     indices0 = torch.arange(m0.shape[1], device=m0.device)[None]
300 |     indices1 = torch.arange(m1.shape[1], device=m1.device)[None]
301 |     mutual0 = indices0 == m1.gather(1, m0)
302 |     mutual1 = indices1 == m0.gather(1, m1)
303 |     max0_exp = max0.values.exp()
304 |     zero = max0_exp.new_tensor(0)
305 |     mscores0 = torch.where(mutual0, max0_exp, zero)
306 |     mscores1 = torch.where(mutual1, mscores0.gather(1, m1), zero)
307 |     valid0 = mutual0 & (mscores0 > th)
308 |     valid1 = mutual1 & valid0.gather(1, m1)
309 |     m0 = torch.where(valid0, m0, -1)
310 |     m1 = torch.where(valid1, m1, -1)
311 |     return m0, m1, mscores0, mscores1
312 | 
313 | 
314 | class LightGlue(nn.Module):
315 |     default_conf = {
316 |         "name": "lightglue",  # just for interfacing
317 |         "input_dim": 256,  # input descriptor dimension (autoselected from weights)
318 |         "descriptor_dim": 256,
319 |         "add_scale_ori": False,
320 |         "n_layers": 9,
321 |         "num_heads": 4,
322 |         "flash": True,  # enable FlashAttention if available.
323 |         "mp": False,  # enable mixed precision
324 |         "depth_confidence": 0.95,  # early stopping, disable with -1
325 |         "width_confidence": 0.99,  # point pruning, disable with -1
326 |         "filter_threshold": 0.1,  # match threshold
327 |         "weights": None,
328 |     }
329 | 
330 |     # Point pruning involves an overhead (gather).
331 |     # Therefore, we only activate it if there are enough keypoints.
332 |     pruning_keypoint_thresholds = {
333 |         "cpu": -1,
334 |         "mps": -1,
335 |         "cuda": 1024,
336 |         "flash": 1536,
337 |     }
338 | 
339 |     required_data_keys = ["image0", "image1"]
340 | 
341 |     version = "v0.1_arxiv"
342 |     url = "https://github.com/cvg/LightGlue/releases/download/{}/{}_lightglue.pth"
343 | 
344 |     features = {
345 |         "superpoint": {
346 |             "weights": "superpoint_lightglue",
347 |             "input_dim": 256,
348 |         },
349 |         "disk": {
350 |             "weights": "disk_lightglue",
351 |             "input_dim": 128,
352 |         },
353 |         "aliked": {
354 |             "weights": "aliked_lightglue",
355 |             "input_dim": 128,
356 |         },
357 |         "sift": {
358 |             "weights": "sift_lightglue",
359 |             "input_dim": 128,
360 |             "add_scale_ori": True,
361 |         },
362 |         "doghardnet": {
363 |             "weights": "doghardnet_lightglue",
364 |             "input_dim": 128,
365 |             "add_scale_ori": True,
366 |         },
367 |     }
368 | 
369 |     def __init__(self, features="superpoint", **conf) -> None:
370 |         super().__init__()
371 |         self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
372 |         if features is not None:
373 |             if features not in self.features:
374 |                 raise ValueError(
375 |                     f"Unsupported features: {features} not in "
376 |                     f"{{{','.join(self.features)}}}"
377 |                 )
378 |             for k, v in self.features[features].items():
379 |                 setattr(conf, k, v)
380 | 
381 |         if conf.input_dim != conf.descriptor_dim:
382 |             self.input_proj = nn.Linear(conf.input_dim, conf.descriptor_dim, bias=True)
383 |         else:
384 |             self.input_proj = nn.Identity()
385 | 
386 |         head_dim = conf.descriptor_dim // conf.num_heads
387 |         self.posenc = LearnableFourierPositionalEncoding(
388 |             2 + 2 * self.conf.add_scale_ori, head_dim, head_dim
389 |         )
390 | 
391 |         h, n, d = conf.num_heads, conf.n_layers, conf.descriptor_dim
392 | 
393 |         self.transformers = nn.ModuleList(
394 |             [TransformerLayer(d, h, conf.flash) for _ in range(n)]
395 |         )
396 | 
397 |         self.log_assignment = nn.ModuleList([MatchAssignment(d) for _ in range(n)])
398 |         self.token_confidence = nn.ModuleList(
399 |             [TokenConfidence(d) for _ in range(n - 1)]
400 |         )
401 |         self.register_buffer(
402 |             "confidence_thresholds",
403 |             torch.Tensor(
404 |                 [self.confidence_threshold(i) for i in range(self.conf.n_layers)]
405 |             ),
406 |         )
407 | 
408 |         state_dict = None
409 |         if features is not None:
410 |             fname = f"{conf.weights}_{self.version.replace('.', '-')}.pth"
411 |             state_dict = torch.hub.load_state_dict_from_url(
412 |                 self.url.format(self.version, features), file_name=fname
413 |             )
414 |             self.load_state_dict(state_dict, strict=False)
415 |         elif conf.weights is not None:
416 |             path = Path(__file__).parent
417 |             path = path / "weights/{}.pth".format(self.conf.weights)
418 |             state_dict = torch.load(str(path), map_location="cpu")
419 | 
420 |         if state_dict:
421 |             # rename old state dict entries
422 |             for i in range(self.conf.n_layers):
423 |                 pattern = f"self_attn.{i}", f"transformers.{i}.self_attn"
424 |                 state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
425 |                 pattern = f"cross_attn.{i}", f"transformers.{i}.cross_attn"
426 |                 state_dict = {k.replace(*pattern): v for k, v in state_dict.items()}
427 |             self.load_state_dict(state_dict, strict=False)
428 | 
429 |         # static lengths LightGlue is compiled for (only used with torch.compile)
430 |         self.static_lengths = None
431 | 
432 |     def compile(
433 |         self, mode="reduce-overhead", static_lengths=[256, 512, 768, 1024, 1280, 1536]
434 |     ):
435 |         if self.conf.width_confidence != -1:
436 |             warnings.warn(
437 |                 "Point pruning is partially disabled for compiled forward.",
438 |                 stacklevel=2,
439 |             )
440 | 
441 |         torch._inductor.cudagraph_mark_step_begin()
442 |         for i in range(self.conf.n_layers):
443 |             self.transformers[i].masked_forward = torch.compile(
444 |                 self.transformers[i].masked_forward, mode=mode, fullgraph=True
445 |             )
446 | 
447 |         self.static_lengths = static_lengths
448 | 
449 |     def forward(self, data: dict) -> dict:
450 |         """
451 |         Match keypoints and descriptors between two images
452 | 
453 |         Input (dict):
454 |             image0: dict
455 |                 keypoints: [B x M x 2]
456 |                 descriptors: [B x M x D]
457 |                 image: [B x C x H x W] or image_size: [B x 2]
458 |             image1: dict
459 |                 keypoints: [B x N x 2]
460 |                 descriptors: [B x N x D]
461 |                 image: [B x C x H x W] or image_size: [B x 2]
462 |         Output (dict):
463 |             matches0: [B x M]
464 |             matching_scores0: [B x M]
465 |             matches1: [B x N]
466 |             matching_scores1: [B x N]
467 |             matches: List[[Si x 2]]
468 |             scores: List[[Si]]
469 |             stop: int
470 |             prune0: [B x M]
471 |             prune1: [B x N]
472 |         """
473 |         with torch.autocast(enabled=self.conf.mp, device_type="cuda"):
474 |             return self._forward(data)
475 | 
476 |     def _forward(self, data: dict) -> dict:
477 |         for key in self.required_data_keys:
478 |             assert key in data, f"Missing key {key} in data"
479 |         data0, data1 = data["image0"], data["image1"]
480 |         kpts0, kpts1 = data0["keypoints"], data1["keypoints"]
481 |         b, m, _ = kpts0.shape
482 |         b, n, _ = kpts1.shape
483 |         device = kpts0.device
484 |         size0, size1 = data0.get("image_size"), data1.get("image_size")
485 |         kpts0 = normalize_keypoints(kpts0, size0).clone()
486 |         kpts1 = normalize_keypoints(kpts1, size1).clone()
487 | 
488 |         if self.conf.add_scale_ori:
489 |             kpts0 = torch.cat(
490 |                 [kpts0] + [data0[k].unsqueeze(-1) for k in ("scales", "oris")], -1
491 |             )
492 |             kpts1 = torch.cat(
493 |                 [kpts1] + [data1[k].unsqueeze(-1) for k in ("scales", "oris")], -1
494 |             )
495 |         desc0 = data0["descriptors"].detach().contiguous()
496 |         desc1 = data1["descriptors"].detach().contiguous()
497 | 
498 |         assert desc0.shape[-1] == self.conf.input_dim
499 |         assert desc1.shape[-1] == self.conf.input_dim
500 | 
501 |         if torch.is_autocast_enabled():
502 |             desc0 = desc0.half()
503 |             desc1 = desc1.half()
504 | 
505 |         mask0, mask1 = None, None
506 |         c = max(m, n)
507 |         do_compile = self.static_lengths and c <= max(self.static_lengths)
508 |         if do_compile:
509 |             kn = min([k for k in self.static_lengths if k >= c])
510 |             desc0, mask0 = pad_to_length(desc0, kn)
511 |             desc1, mask1 = pad_to_length(desc1, kn)
512 |             kpts0, _ = pad_to_length(kpts0, kn)
513 |             kpts1, _ = pad_to_length(kpts1, kn)
514 |         desc0 = self.input_proj(desc0)
515 |         desc1 = self.input_proj(desc1)
516 |         # cache positional embeddings
517 |         encoding0 = self.posenc(kpts0)
518 |         encoding1 = self.posenc(kpts1)
519 | 
520 |         # GNN + final_proj + assignment
521 |         do_early_stop = self.conf.depth_confidence > 0
522 |         do_point_pruning = self.conf.width_confidence > 0 and not do_compile
523 |         pruning_th = self.pruning_min_kpts(device)
524 |         if do_point_pruning:
525 |             ind0 = torch.arange(0, m, device=device)[None]
526 |             ind1 = torch.arange(0, n, device=device)[None]
527 |             # We store the index of the layer at which pruning is detected.
528 |             prune0 = torch.ones_like(ind0)
529 |             prune1 = torch.ones_like(ind1)
530 |         token0, token1 = None, None
531 |         for i in range(self.conf.n_layers):
532 |             if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
533 |                 break
534 |             desc0, desc1 = self.transformers[i](
535 |                 desc0, desc1, encoding0, encoding1, mask0=mask0, mask1=mask1
536 |             )
537 |             if i == self.conf.n_layers - 1:
538 |                 continue  # no early stopping or adaptive width at last layer
539 | 
540 |             if do_early_stop:
541 |                 token0, token1 = self.token_confidence[i](desc0, desc1)
542 |                 if self.check_if_stop(token0[..., :m], token1[..., :n], i, m + n):
543 |                     break
544 |             if do_point_pruning and desc0.shape[-2] > pruning_th:
545 |                 scores0 = self.log_assignment[i].get_matchability(desc0)
546 |                 prunemask0 = self.get_pruning_mask(token0, scores0, i)
547 |                 keep0 = torch.where(prunemask0)[1]
548 |                 ind0 = ind0.index_select(1, keep0)
549 |                 desc0 = desc0.index_select(1, keep0)
550 |                 encoding0 = encoding0.index_select(-2, keep0)
551 |                 prune0[:, ind0] += 1
552 |             if do_point_pruning and desc1.shape[-2] > pruning_th:
553 |                 scores1 = self.log_assignment[i].get_matchability(desc1)
554 |                 prunemask1 = self.get_pruning_mask(token1, scores1, i)
555 |                 keep1 = torch.where(prunemask1)[1]
556 |                 ind1 = ind1.index_select(1, keep1)
557 |                 desc1 = desc1.index_select(1, keep1)
558 |                 encoding1 = encoding1.index_select(-2, keep1)
559 |                 prune1[:, ind1] += 1
560 | 
561 |         if desc0.shape[1] == 0 or desc1.shape[1] == 0:  # no keypoints
562 |             m0 = desc0.new_full((b, m), -1, dtype=torch.long)
563 |             m1 = desc1.new_full((b, n), -1, dtype=torch.long)
564 |             mscores0 = desc0.new_zeros((b, m))
565 |             mscores1 = desc1.new_zeros((b, n))
566 |             matches = desc0.new_empty((b, 0, 2), dtype=torch.long)
567 |             mscores = desc0.new_empty((b, 0))
568 |             if not do_point_pruning:
569 |                 prune0 = torch.ones_like(mscores0) * self.conf.n_layers
570 |                 prune1 = torch.ones_like(mscores1) * self.conf.n_layers
571 |             return {
572 |                 "matches0": m0,
573 |                 "matches1": m1,
574 |                 "matching_scores0": mscores0,
575 |                 "matching_scores1": mscores1,
576 |                 "stop": i + 1,
577 |                 "matches": matches,
578 |                 "scores": mscores,
579 |                 "prune0": prune0,
580 |                 "prune1": prune1,
581 |             }
582 | 
583 |         desc0, desc1 = desc0[..., :m, :], desc1[..., :n, :]  # remove padding
584 |         scores, _ = self.log_assignment[i](desc0, desc1)
585 |         m0, m1, mscores0, mscores1 = filter_matches(scores, self.conf.filter_threshold)
586 |         matches, mscores = [], []
587 |         for k in range(b):
588 |             valid = m0[k] > -1
589 |             m_indices_0 = torch.where(valid)[0]
590 |             m_indices_1 = m0[k][valid]
591 |             if do_point_pruning:
592 |                 m_indices_0 = ind0[k, m_indices_0]
593 |                 m_indices_1 = ind1[k, m_indices_1]
594 |             matches.append(torch.stack([m_indices_0, m_indices_1], -1))
595 |             mscores.append(mscores0[k][valid])
596 | 
597 |         # TODO: Remove when hloc switches to the compact format.
598 |         if do_point_pruning:
599 |             m0_ = torch.full((b, m), -1, device=m0.device, dtype=m0.dtype)
600 |             m1_ = torch.full((b, n), -1, device=m1.device, dtype=m1.dtype)
601 |             m0_[:, ind0] = torch.where(m0 == -1, -1, ind1.gather(1, m0.clamp(min=0)))
602 |             m1_[:, ind1] = torch.where(m1 == -1, -1, ind0.gather(1, m1.clamp(min=0)))
603 |             mscores0_ = torch.zeros((b, m), device=mscores0.device)
604 |             mscores1_ = torch.zeros((b, n), device=mscores1.device)
605 |             mscores0_[:, ind0] = mscores0
606 |             mscores1_[:, ind1] = mscores1
607 |             m0, m1, mscores0, mscores1 = m0_, m1_, mscores0_, mscores1_
608 |         else:
609 |             prune0 = torch.ones_like(mscores0) * self.conf.n_layers
610 |             prune1 = torch.ones_like(mscores1) * self.conf.n_layers
611 | 
612 |         return {
613 |             "matches0": m0,
614 |             "matches1": m1,
615 |             "matching_scores0": mscores0,
616 |             "matching_scores1": mscores1,
617 |             "stop": i + 1,
618 |             "matches": matches,
619 |             "scores": mscores,
620 |             "prune0": prune0,
621 |             "prune1": prune1,
622 |         }
623 | 
624 |     def confidence_threshold(self, layer_index: int) -> float:
625 |         """scaled confidence threshold"""
626 |         threshold = 0.8 + 0.1 * np.exp(-4.0 * layer_index / self.conf.n_layers)
627 |         return np.clip(threshold, 0, 1)
628 | 
629 |     def get_pruning_mask(
630 |         self, confidences: torch.Tensor, scores: torch.Tensor, layer_index: int
631 |     ) -> torch.Tensor:
632 |         """mask points which should be removed"""
633 |         keep = scores > (1 - self.conf.width_confidence)
634 |         if confidences is not None:  # Low-confidence points are never pruned.
635 |             keep |= confidences <= self.confidence_thresholds[layer_index]
636 |         return keep
637 | 
638 |     def check_if_stop(
639 |         self,
640 |         confidences0: torch.Tensor,
641 |         confidences1: torch.Tensor,
642 |         layer_index: int,
643 |         num_points: int,
644 |     ) -> torch.Tensor:
645 |         """evaluate stopping condition"""
646 |         confidences = torch.cat([confidences0, confidences1], -1)
647 |         threshold = self.confidence_thresholds[layer_index]
648 |         ratio_confident = 1.0 - (confidences < threshold).float().sum() / num_points
649 |         return ratio_confident > self.conf.depth_confidence
650 | 
651 |     def pruning_min_kpts(self, device: torch.device):
652 |         if self.conf.flash and FLASH_AVAILABLE and device.type == "cuda":
653 |             return self.pruning_keypoint_thresholds["flash"]
654 |         else:
655 |             return self.pruning_keypoint_thresholds[device.type]
656 | 


--------------------------------------------------------------------------------
/lightglue/sift.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | from kornia.color import rgb_to_grayscale
  7 | from packaging import version
  8 | 
  9 | try:
 10 |     import pycolmap
 11 | except ImportError:
 12 |     pycolmap = None
 13 | 
 14 | from .utils import Extractor
 15 | 
 16 | 
 17 | def filter_dog_point(points, scales, angles, image_shape, nms_radius, scores=None):
 18 |     h, w = image_shape
 19 |     ij = np.round(points - 0.5).astype(int).T[::-1]
 20 | 
 21 |     # Remove duplicate points (identical coordinates).
 22 |     # Pick highest scale or score
 23 |     s = scales if scores is None else scores
 24 |     buffer = np.zeros((h, w))
 25 |     np.maximum.at(buffer, tuple(ij), s)
 26 |     keep = np.where(buffer[tuple(ij)] == s)[0]
 27 | 
 28 |     # Pick lowest angle (arbitrary).
 29 |     ij = ij[:, keep]
 30 |     buffer[:] = np.inf
 31 |     o_abs = np.abs(angles[keep])
 32 |     np.minimum.at(buffer, tuple(ij), o_abs)
 33 |     mask = buffer[tuple(ij)] == o_abs
 34 |     ij = ij[:, mask]
 35 |     keep = keep[mask]
 36 | 
 37 |     if nms_radius > 0:
 38 |         # Apply NMS on the remaining points
 39 |         buffer[:] = 0
 40 |         buffer[tuple(ij)] = s[keep]  # scores or scale
 41 | 
 42 |         local_max = torch.nn.functional.max_pool2d(
 43 |             torch.from_numpy(buffer).unsqueeze(0),
 44 |             kernel_size=nms_radius * 2 + 1,
 45 |             stride=1,
 46 |             padding=nms_radius,
 47 |         ).squeeze(0)
 48 |         is_local_max = buffer == local_max.numpy()
 49 |         keep = keep[is_local_max[tuple(ij)]]
 50 |     return keep
 51 | 
 52 | 
 53 | def sift_to_rootsift(x: torch.Tensor, eps=1e-6) -> torch.Tensor:
 54 |     x = torch.nn.functional.normalize(x, p=1, dim=-1, eps=eps)
 55 |     x.clip_(min=eps).sqrt_()
 56 |     return torch.nn.functional.normalize(x, p=2, dim=-1, eps=eps)
 57 | 
 58 | 
 59 | def run_opencv_sift(features: cv2.Feature2D, image: np.ndarray) -> np.ndarray:
 60 |     """
 61 |     Detect keypoints using OpenCV Detector.
 62 |     Optionally, perform description.
 63 |     Args:
 64 |         features: OpenCV based keypoints detector and descriptor
 65 |         image: Grayscale image of uint8 data type
 66 |     Returns:
 67 |         keypoints: 1D array of detected cv2.KeyPoint
 68 |         scores: 1D array of responses
 69 |         descriptors: 1D array of descriptors
 70 |     """
 71 |     detections, descriptors = features.detectAndCompute(image, None)
 72 |     points = np.array([k.pt for k in detections], dtype=np.float32)
 73 |     scores = np.array([k.response for k in detections], dtype=np.float32)
 74 |     scales = np.array([k.size for k in detections], dtype=np.float32)
 75 |     angles = np.deg2rad(np.array([k.angle for k in detections], dtype=np.float32))
 76 |     return points, scores, scales, angles, descriptors
 77 | 
 78 | 
 79 | class SIFT(Extractor):
 80 |     default_conf = {
 81 |         "rootsift": True,
 82 |         "nms_radius": 0,  # None to disable filtering entirely.
 83 |         "max_num_keypoints": 4096,
 84 |         "backend": "opencv",  # in {opencv, pycolmap, pycolmap_cpu, pycolmap_cuda}
 85 |         "detection_threshold": 0.0066667,  # from COLMAP
 86 |         "edge_threshold": 10,
 87 |         "first_octave": -1,  # only used by pycolmap, the default of COLMAP
 88 |         "num_octaves": 4,
 89 |     }
 90 | 
 91 |     preprocess_conf = {
 92 |         "resize": 1024,
 93 |     }
 94 | 
 95 |     required_data_keys = ["image"]
 96 | 
 97 |     def __init__(self, **conf):
 98 |         super().__init__(**conf)  # Update with default configuration.
 99 |         backend = self.conf.backend
100 |         if backend.startswith("pycolmap"):
101 |             if pycolmap is None:
102 |                 raise ImportError(
103 |                     "Cannot find module pycolmap: install it with pip"
104 |                     "or use backend=opencv."
105 |                 )
106 |             options = {
107 |                 "peak_threshold": self.conf.detection_threshold,
108 |                 "edge_threshold": self.conf.edge_threshold,
109 |                 "first_octave": self.conf.first_octave,
110 |                 "num_octaves": self.conf.num_octaves,
111 |                 "normalization": pycolmap.Normalization.L2,  # L1_ROOT is buggy.
112 |             }
113 |             device = (
114 |                 "auto" if backend == "pycolmap" else backend.replace("pycolmap_", "")
115 |             )
116 |             if (
117 |                 backend == "pycolmap_cpu" or not pycolmap.has_cuda
118 |             ) and pycolmap.__version__ < "0.5.0":
119 |                 warnings.warn(
120 |                     "The pycolmap CPU SIFT is buggy in version < 0.5.0, "
121 |                     "consider upgrading pycolmap or use the CUDA version.",
122 |                     stacklevel=1,
123 |                 )
124 |             else:
125 |                 options["max_num_features"] = self.conf.max_num_keypoints
126 |             self.sift = pycolmap.Sift(options=options, device=device)
127 |         elif backend == "opencv":
128 |             self.sift = cv2.SIFT_create(
129 |                 contrastThreshold=self.conf.detection_threshold,
130 |                 nfeatures=self.conf.max_num_keypoints,
131 |                 edgeThreshold=self.conf.edge_threshold,
132 |                 nOctaveLayers=self.conf.num_octaves,
133 |             )
134 |         else:
135 |             backends = {"opencv", "pycolmap", "pycolmap_cpu", "pycolmap_cuda"}
136 |             raise ValueError(
137 |                 f"Unknown backend: {backend} not in " f"{{{','.join(backends)}}}."
138 |             )
139 | 
140 |     def extract_single_image(self, image: torch.Tensor):
141 |         image_np = image.cpu().numpy().squeeze(0)
142 | 
143 |         if self.conf.backend.startswith("pycolmap"):
144 |             if version.parse(pycolmap.__version__) >= version.parse("0.5.0"):
145 |                 detections, descriptors = self.sift.extract(image_np)
146 |                 scores = None  # Scores are not exposed by COLMAP anymore.
147 |             else:
148 |                 detections, scores, descriptors = self.sift.extract(image_np)
149 |             keypoints = detections[:, :2]  # Keep only (x, y).
150 |             scales, angles = detections[:, -2:].T
151 |             if scores is not None and (
152 |                 self.conf.backend == "pycolmap_cpu" or not pycolmap.has_cuda
153 |             ):
154 |                 # Set the scores as a combination of abs. response and scale.
155 |                 scores = np.abs(scores) * scales
156 |         elif self.conf.backend == "opencv":
157 |             # TODO: Check if opencv keypoints are already in corner convention
158 |             keypoints, scores, scales, angles, descriptors = run_opencv_sift(
159 |                 self.sift, (image_np * 255.0).astype(np.uint8)
160 |             )
161 |         pred = {
162 |             "keypoints": keypoints,
163 |             "scales": scales,
164 |             "oris": angles,
165 |             "descriptors": descriptors,
166 |         }
167 |         if scores is not None:
168 |             pred["keypoint_scores"] = scores
169 | 
170 |         # sometimes pycolmap returns points outside the image. We remove them
171 |         if self.conf.backend.startswith("pycolmap"):
172 |             is_inside = (
173 |                 pred["keypoints"] + 0.5 < np.array([image_np.shape[-2:][::-1]])
174 |             ).all(-1)
175 |             pred = {k: v[is_inside] for k, v in pred.items()}
176 | 
177 |         if self.conf.nms_radius is not None:
178 |             keep = filter_dog_point(
179 |                 pred["keypoints"],
180 |                 pred["scales"],
181 |                 pred["oris"],
182 |                 image_np.shape,
183 |                 self.conf.nms_radius,
184 |                 scores=pred.get("keypoint_scores"),
185 |             )
186 |             pred = {k: v[keep] for k, v in pred.items()}
187 | 
188 |         pred = {k: torch.from_numpy(v) for k, v in pred.items()}
189 |         if scores is not None:
190 |             # Keep the k keypoints with highest score
191 |             num_points = self.conf.max_num_keypoints
192 |             if num_points is not None and len(pred["keypoints"]) > num_points:
193 |                 indices = torch.topk(pred["keypoint_scores"], num_points).indices
194 |                 pred = {k: v[indices] for k, v in pred.items()}
195 | 
196 |         return pred
197 | 
198 |     def forward(self, data: dict) -> dict:
199 |         image = data["image"]
200 |         if image.shape[1] == 3:
201 |             image = rgb_to_grayscale(image)
202 |         device = image.device
203 |         image = image.cpu()
204 |         pred = []
205 |         for k in range(len(image)):
206 |             img = image[k]
207 |             if "image_size" in data.keys():
208 |                 # avoid extracting points in padded areas
209 |                 w, h = data["image_size"][k]
210 |                 img = img[:, :h, :w]
211 |             p = self.extract_single_image(img)
212 |             pred.append(p)
213 |         pred = {k: torch.stack([p[k] for p in pred], 0).to(device) for k in pred[0]}
214 |         if self.conf.rootsift:
215 |             pred["descriptors"] = sift_to_rootsift(pred["descriptors"])
216 |         return pred
217 | 


--------------------------------------------------------------------------------
/lightglue/superpoint.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | # Adapted by Remi Pautrat, Philipp Lindenberger
 44 | 
 45 | import torch
 46 | from kornia.color import rgb_to_grayscale
 47 | from torch import nn
 48 | 
 49 | from .utils import Extractor
 50 | 
 51 | 
 52 | def simple_nms(scores, nms_radius: int):
 53 |     """Fast Non-maximum suppression to remove nearby points"""
 54 |     assert nms_radius >= 0
 55 | 
 56 |     def max_pool(x):
 57 |         return torch.nn.functional.max_pool2d(
 58 |             x, kernel_size=nms_radius * 2 + 1, stride=1, padding=nms_radius
 59 |         )
 60 | 
 61 |     zeros = torch.zeros_like(scores)
 62 |     max_mask = scores == max_pool(scores)
 63 |     for _ in range(2):
 64 |         supp_mask = max_pool(max_mask.float()) > 0
 65 |         supp_scores = torch.where(supp_mask, zeros, scores)
 66 |         new_max_mask = supp_scores == max_pool(supp_scores)
 67 |         max_mask = max_mask | (new_max_mask & (~supp_mask))
 68 |     return torch.where(max_mask, scores, zeros)
 69 | 
 70 | 
 71 | def top_k_keypoints(keypoints, scores, k):
 72 |     if k >= len(keypoints):
 73 |         return keypoints, scores
 74 |     scores, indices = torch.topk(scores, k, dim=0, sorted=True)
 75 |     return keypoints[indices], scores
 76 | 
 77 | 
 78 | def sample_descriptors(keypoints, descriptors, s: int = 8):
 79 |     """Interpolate descriptors at keypoint locations"""
 80 |     b, c, h, w = descriptors.shape
 81 |     keypoints = keypoints - s / 2 + 0.5
 82 |     keypoints /= torch.tensor(
 83 |         [(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)],
 84 |     ).to(
 85 |         keypoints
 86 |     )[None]
 87 |     keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
 88 |     args = {"align_corners": True} if torch.__version__ >= "1.3" else {}
 89 |     descriptors = torch.nn.functional.grid_sample(
 90 |         descriptors, keypoints.view(b, 1, -1, 2), mode="bilinear", **args
 91 |     )
 92 |     descriptors = torch.nn.functional.normalize(
 93 |         descriptors.reshape(b, c, -1), p=2, dim=1
 94 |     )
 95 |     return descriptors
 96 | 
 97 | 
 98 | class SuperPoint(Extractor):
 99 |     """SuperPoint Convolutional Detector and Descriptor
100 | 
101 |     SuperPoint: Self-Supervised Interest Point Detection and
102 |     Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
103 |     Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
104 | 
105 |     """
106 | 
107 |     default_conf = {
108 |         "descriptor_dim": 256,
109 |         "nms_radius": 4,
110 |         "max_num_keypoints": None,
111 |         "detection_threshold": 0.0005,
112 |         "remove_borders": 4,
113 |     }
114 | 
115 |     preprocess_conf = {
116 |         "resize": 1024,
117 |     }
118 | 
119 |     required_data_keys = ["image"]
120 | 
121 |     def __init__(self, **conf):
122 |         super().__init__(**conf)  # Update with default configuration.
123 |         self.relu = nn.ReLU(inplace=True)
124 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
125 |         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
126 | 
127 |         self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
128 |         self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
129 |         self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
130 |         self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
131 |         self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
132 |         self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
133 |         self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
134 |         self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
135 | 
136 |         self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
137 |         self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
138 | 
139 |         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
140 |         self.convDb = nn.Conv2d(
141 |             c5, self.conf.descriptor_dim, kernel_size=1, stride=1, padding=0
142 |         )
143 | 
144 |         url = "https://github.com/cvg/LightGlue/releases/download/v0.1_arxiv/superpoint_v1.pth"  # noqa
145 |         self.load_state_dict(torch.hub.load_state_dict_from_url(url))
146 | 
147 |         if self.conf.max_num_keypoints is not None and self.conf.max_num_keypoints <= 0:
148 |             raise ValueError("max_num_keypoints must be positive or None")
149 | 
150 |     def forward(self, data: dict) -> dict:
151 |         """Compute keypoints, scores, descriptors for image"""
152 |         for key in self.required_data_keys:
153 |             assert key in data, f"Missing key {key} in data"
154 |         image = data["image"]
155 |         if image.shape[1] == 3:
156 |             image = rgb_to_grayscale(image)
157 | 
158 |         # Shared Encoder
159 |         x = self.relu(self.conv1a(image))
160 |         x = self.relu(self.conv1b(x))
161 |         x = self.pool(x)
162 |         x = self.relu(self.conv2a(x))
163 |         x = self.relu(self.conv2b(x))
164 |         x = self.pool(x)
165 |         x = self.relu(self.conv3a(x))
166 |         x = self.relu(self.conv3b(x))
167 |         x = self.pool(x)
168 |         x = self.relu(self.conv4a(x))
169 |         x = self.relu(self.conv4b(x))
170 | 
171 |         # Compute the dense keypoint scores
172 |         cPa = self.relu(self.convPa(x))
173 |         scores = self.convPb(cPa)
174 |         scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
175 |         b, _, h, w = scores.shape
176 |         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
177 |         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h * 8, w * 8)
178 |         scores = simple_nms(scores, self.conf.nms_radius)
179 | 
180 |         # Discard keypoints near the image borders
181 |         if self.conf.remove_borders:
182 |             pad = self.conf.remove_borders
183 |             scores[:, :pad] = -1
184 |             scores[:, :, :pad] = -1
185 |             scores[:, -pad:] = -1
186 |             scores[:, :, -pad:] = -1
187 | 
188 |         # Extract keypoints
189 |         best_kp = torch.where(scores > self.conf.detection_threshold)
190 |         scores = scores[best_kp]
191 | 
192 |         # Separate into batches
193 |         keypoints = [
194 |             torch.stack(best_kp[1:3], dim=-1)[best_kp[0] == i] for i in range(b)
195 |         ]
196 |         scores = [scores[best_kp[0] == i] for i in range(b)]
197 | 
198 |         # Keep the k keypoints with highest score
199 |         if self.conf.max_num_keypoints is not None:
200 |             keypoints, scores = list(
201 |                 zip(
202 |                     *[
203 |                         top_k_keypoints(k, s, self.conf.max_num_keypoints)
204 |                         for k, s in zip(keypoints, scores)
205 |                     ]
206 |                 )
207 |             )
208 | 
209 |         # Convert (h, w) to (x, y)
210 |         keypoints = [torch.flip(k, [1]).float() for k in keypoints]
211 | 
212 |         # Compute the dense descriptors
213 |         cDa = self.relu(self.convDa(x))
214 |         descriptors = self.convDb(cDa)
215 |         descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
216 | 
217 |         # Extract descriptors
218 |         descriptors = [
219 |             sample_descriptors(k[None], d[None], 8)[0]
220 |             for k, d in zip(keypoints, descriptors)
221 |         ]
222 | 
223 |         return {
224 |             "keypoints": torch.stack(keypoints, 0),
225 |             "keypoint_scores": torch.stack(scores, 0),
226 |             "descriptors": torch.stack(descriptors, 0).transpose(-1, -2).contiguous(),
227 |         }
228 | 


--------------------------------------------------------------------------------
/lightglue/utils.py:
--------------------------------------------------------------------------------
  1 | import collections.abc as collections
  2 | from pathlib import Path
  3 | from types import SimpleNamespace
  4 | from typing import Callable, List, Optional, Tuple, Union
  5 | 
  6 | import cv2
  7 | import kornia
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | 
 12 | class ImagePreprocessor:
 13 |     default_conf = {
 14 |         "resize": None,  # target edge length, None for no resizing
 15 |         "side": "long",
 16 |         "interpolation": "bilinear",
 17 |         "align_corners": None,
 18 |         "antialias": True,
 19 |     }
 20 | 
 21 |     def __init__(self, **conf) -> None:
 22 |         super().__init__()
 23 |         self.conf = {**self.default_conf, **conf}
 24 |         self.conf = SimpleNamespace(**self.conf)
 25 | 
 26 |     def __call__(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 27 |         """Resize and preprocess an image, return image and resize scale"""
 28 |         h, w = img.shape[-2:]
 29 |         if self.conf.resize is not None:
 30 |             img = kornia.geometry.transform.resize(
 31 |                 img,
 32 |                 self.conf.resize,
 33 |                 side=self.conf.side,
 34 |                 antialias=self.conf.antialias,
 35 |                 align_corners=self.conf.align_corners,
 36 |             )
 37 |         scale = torch.Tensor([img.shape[-1] / w, img.shape[-2] / h]).to(img)
 38 |         return img, scale
 39 | 
 40 | 
 41 | def map_tensor(input_, func: Callable):
 42 |     string_classes = (str, bytes)
 43 |     if isinstance(input_, string_classes):
 44 |         return input_
 45 |     elif isinstance(input_, collections.Mapping):
 46 |         return {k: map_tensor(sample, func) for k, sample in input_.items()}
 47 |     elif isinstance(input_, collections.Sequence):
 48 |         return [map_tensor(sample, func) for sample in input_]
 49 |     elif isinstance(input_, torch.Tensor):
 50 |         return func(input_)
 51 |     else:
 52 |         return input_
 53 | 
 54 | 
 55 | def batch_to_device(batch: dict, device: str = "cpu", non_blocking: bool = True):
 56 |     """Move batch (dict) to device"""
 57 | 
 58 |     def _func(tensor):
 59 |         return tensor.to(device=device, non_blocking=non_blocking).detach()
 60 | 
 61 |     return map_tensor(batch, _func)
 62 | 
 63 | 
 64 | def rbd(data: dict) -> dict:
 65 |     """Remove batch dimension from elements in data"""
 66 |     return {
 67 |         k: v[0] if isinstance(v, (torch.Tensor, np.ndarray, list)) else v
 68 |         for k, v in data.items()
 69 |     }
 70 | 
 71 | 
 72 | def read_image(path: Path, grayscale: bool = False) -> np.ndarray:
 73 |     """Read an image from path as RGB or grayscale"""
 74 |     if not Path(path).exists():
 75 |         raise FileNotFoundError(f"No image at path {path}.")
 76 |     mode = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR
 77 |     image = cv2.imread(str(path), mode)
 78 |     if image is None:
 79 |         raise IOError(f"Could not read image at {path}.")
 80 |     if not grayscale:
 81 |         image = image[..., ::-1]
 82 |     return image
 83 | 
 84 | 
 85 | def numpy_image_to_torch(image: np.ndarray) -> torch.Tensor:
 86 |     """Normalize the image tensor and reorder the dimensions."""
 87 |     if image.ndim == 3:
 88 |         image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
 89 |     elif image.ndim == 2:
 90 |         image = image[None]  # add channel axis
 91 |     else:
 92 |         raise ValueError(f"Not an image: {image.shape}")
 93 |     return torch.tensor(image / 255.0, dtype=torch.float)
 94 | 
 95 | 
 96 | def resize_image(
 97 |     image: np.ndarray,
 98 |     size: Union[List[int], int],
 99 |     fn: str = "max",
100 |     interp: Optional[str] = "area",
101 | ) -> np.ndarray:
102 |     """Resize an image to a fixed size, or according to max or min edge."""
103 |     h, w = image.shape[:2]
104 | 
105 |     fn = {"max": max, "min": min}[fn]
106 |     if isinstance(size, int):
107 |         scale = size / fn(h, w)
108 |         h_new, w_new = int(round(h * scale)), int(round(w * scale))
109 |         scale = (w_new / w, h_new / h)
110 |     elif isinstance(size, (tuple, list)):
111 |         h_new, w_new = size
112 |         scale = (w_new / w, h_new / h)
113 |     else:
114 |         raise ValueError(f"Incorrect new size: {size}")
115 |     mode = {
116 |         "linear": cv2.INTER_LINEAR,
117 |         "cubic": cv2.INTER_CUBIC,
118 |         "nearest": cv2.INTER_NEAREST,
119 |         "area": cv2.INTER_AREA,
120 |     }[interp]
121 |     return cv2.resize(image, (w_new, h_new), interpolation=mode), scale
122 | 
123 | 
124 | def load_image(path: Path, resize: int = None, **kwargs) -> torch.Tensor:
125 |     image = read_image(path)
126 |     if resize is not None:
127 |         image, _ = resize_image(image, resize, **kwargs)
128 |     return numpy_image_to_torch(image)
129 | 
130 | 
131 | class Extractor(torch.nn.Module):
132 |     def __init__(self, **conf):
133 |         super().__init__()
134 |         self.conf = SimpleNamespace(**{**self.default_conf, **conf})
135 | 
136 |     @torch.no_grad()
137 |     def extract(self, img: torch.Tensor, **conf) -> dict:
138 |         """Perform extraction with online resizing"""
139 |         if img.dim() == 3:
140 |             img = img[None]  # add batch dim
141 |         assert img.dim() == 4 and img.shape[0] == 1
142 |         shape = img.shape[-2:][::-1]
143 |         img, scales = ImagePreprocessor(**{**self.preprocess_conf, **conf})(img)
144 |         feats = self.forward({"image": img})
145 |         feats["image_size"] = torch.tensor(shape)[None].to(img).float()
146 |         feats["keypoints"] = (feats["keypoints"] + 0.5) / scales[None] - 0.5
147 |         return feats
148 | 
149 | 
150 | def match_pair(
151 |     extractor,
152 |     matcher,
153 |     image0: torch.Tensor,
154 |     image1: torch.Tensor,
155 |     device: str = "cpu",
156 |     **preprocess,
157 | ):
158 |     """Match a pair of images (image0, image1) with an extractor and matcher"""
159 |     feats0 = extractor.extract(image0, **preprocess)
160 |     feats1 = extractor.extract(image1, **preprocess)
161 |     matches01 = matcher({"image0": feats0, "image1": feats1})
162 |     data = [feats0, feats1, matches01]
163 |     # remove batch dim and move to target device
164 |     feats0, feats1, matches01 = [batch_to_device(rbd(x), device) for x in data]
165 |     return feats0, feats1, matches01
166 | 


--------------------------------------------------------------------------------
/lightglue/viz2d.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 2D visualization primitives based on Matplotlib.
  3 | 1) Plot images with `plot_images`.
  4 | 2) Call `plot_keypoints` or `plot_matches` any number of times.
  5 | 3) Optionally: save a .png or .pdf plot (nice in papers!) with `save_plot`.
  6 | """
  7 | 
  8 | import matplotlib
  9 | import matplotlib.patheffects as path_effects
 10 | import matplotlib.pyplot as plt
 11 | import numpy as np
 12 | import torch
 13 | 
 14 | 
 15 | def cm_RdGn(x):
 16 |     """Custom colormap: red (0) -> yellow (0.5) -> green (1)."""
 17 |     x = np.clip(x, 0, 1)[..., None] * 2
 18 |     c = x * np.array([[0, 1.0, 0]]) + (2 - x) * np.array([[1.0, 0, 0]])
 19 |     return np.clip(c, 0, 1)
 20 | 
 21 | 
 22 | def cm_BlRdGn(x_):
 23 |     """Custom colormap: blue (-1) -> red (0.0) -> green (1)."""
 24 |     x = np.clip(x_, 0, 1)[..., None] * 2
 25 |     c = x * np.array([[0, 1.0, 0, 1.0]]) + (2 - x) * np.array([[1.0, 0, 0, 1.0]])
 26 | 
 27 |     xn = -np.clip(x_, -1, 0)[..., None] * 2
 28 |     cn = xn * np.array([[0, 0.1, 1, 1.0]]) + (2 - xn) * np.array([[1.0, 0, 0, 1.0]])
 29 |     out = np.clip(np.where(x_[..., None] < 0, cn, c), 0, 1)
 30 |     return out
 31 | 
 32 | 
 33 | def cm_prune(x_):
 34 |     """Custom colormap to visualize pruning"""
 35 |     if isinstance(x_, torch.Tensor):
 36 |         x_ = x_.cpu().numpy()
 37 |     max_i = max(x_)
 38 |     norm_x = np.where(x_ == max_i, -1, (x_ - 1) / 9)
 39 |     return cm_BlRdGn(norm_x)
 40 | 
 41 | 
 42 | def plot_images(imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True):
 43 |     """Plot a set of images horizontally.
 44 |     Args:
 45 |         imgs: list of NumPy RGB (H, W, 3) or PyTorch RGB (3, H, W) or mono (H, W).
 46 |         titles: a list of strings, as titles for each image.
 47 |         cmaps: colormaps for monochrome images.
 48 |         adaptive: whether the figure size should fit the image aspect ratios.
 49 |     """
 50 |     # conversion to (H, W, 3) for torch.Tensor
 51 |     imgs = [
 52 |         img.permute(1, 2, 0).cpu().numpy()
 53 |         if (isinstance(img, torch.Tensor) and img.dim() == 3)
 54 |         else img
 55 |         for img in imgs
 56 |     ]
 57 | 
 58 |     n = len(imgs)
 59 |     if not isinstance(cmaps, (list, tuple)):
 60 |         cmaps = [cmaps] * n
 61 | 
 62 |     if adaptive:
 63 |         ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
 64 |     else:
 65 |         ratios = [4 / 3] * n
 66 |     figsize = [sum(ratios) * 4.5, 4.5]
 67 |     fig, ax = plt.subplots(
 68 |         1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
 69 |     )
 70 |     if n == 1:
 71 |         ax = [ax]
 72 |     for i in range(n):
 73 |         ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
 74 |         ax[i].get_yaxis().set_ticks([])
 75 |         ax[i].get_xaxis().set_ticks([])
 76 |         ax[i].set_axis_off()
 77 |         for spine in ax[i].spines.values():  # remove frame
 78 |             spine.set_visible(False)
 79 |         if titles:
 80 |             ax[i].set_title(titles[i])
 81 |     fig.tight_layout(pad=pad)
 82 | 
 83 | 
 84 | def plot_keypoints(kpts, colors="lime", ps=4, axes=None, a=1.0):
 85 |     """Plot keypoints for existing images.
 86 |     Args:
 87 |         kpts: list of ndarrays of size (N, 2).
 88 |         colors: string, or list of list of tuples (one for each keypoints).
 89 |         ps: size of the keypoints as float.
 90 |     """
 91 |     if not isinstance(colors, list):
 92 |         colors = [colors] * len(kpts)
 93 |     if not isinstance(a, list):
 94 |         a = [a] * len(kpts)
 95 |     if axes is None:
 96 |         axes = plt.gcf().axes
 97 |     for ax, k, c, alpha in zip(axes, kpts, colors, a):
 98 |         if isinstance(k, torch.Tensor):
 99 |             k = k.cpu().numpy()
100 |         ax.scatter(k[:, 0], k[:, 1], c=c, s=ps, linewidths=0, alpha=alpha)
101 | 
102 | 
103 | def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, a=1.0, labels=None, axes=None):
104 |     """Plot matches for a pair of existing images.
105 |     Args:
106 |         kpts0, kpts1: corresponding keypoints of size (N, 2).
107 |         color: color of each match, string or RGB tuple. Random if not given.
108 |         lw: width of the lines.
109 |         ps: size of the end points (no endpoint if ps=0)
110 |         indices: indices of the images to draw the matches on.
111 |         a: alpha opacity of the match lines.
112 |     """
113 |     fig = plt.gcf()
114 |     if axes is None:
115 |         ax = fig.axes
116 |         ax0, ax1 = ax[0], ax[1]
117 |     else:
118 |         ax0, ax1 = axes
119 |     if isinstance(kpts0, torch.Tensor):
120 |         kpts0 = kpts0.cpu().numpy()
121 |     if isinstance(kpts1, torch.Tensor):
122 |         kpts1 = kpts1.cpu().numpy()
123 |     assert len(kpts0) == len(kpts1)
124 |     if color is None:
125 |         color = matplotlib.cm.hsv(np.random.rand(len(kpts0))).tolist()
126 |     elif len(color) > 0 and not isinstance(color[0], (tuple, list)):
127 |         color = [color] * len(kpts0)
128 | 
129 |     if lw > 0:
130 |         for i in range(len(kpts0)):
131 |             line = matplotlib.patches.ConnectionPatch(
132 |                 xyA=(kpts0[i, 0], kpts0[i, 1]),
133 |                 xyB=(kpts1[i, 0], kpts1[i, 1]),
134 |                 coordsA=ax0.transData,
135 |                 coordsB=ax1.transData,
136 |                 axesA=ax0,
137 |                 axesB=ax1,
138 |                 zorder=1,
139 |                 color=color[i],
140 |                 linewidth=lw,
141 |                 clip_on=True,
142 |                 alpha=a,
143 |                 label=None if labels is None else labels[i],
144 |                 picker=5.0,
145 |             )
146 |             line.set_annotation_clip(True)
147 |             fig.add_artist(line)
148 | 
149 |     # freeze the axes to prevent the transform to change
150 |     ax0.autoscale(enable=False)
151 |     ax1.autoscale(enable=False)
152 | 
153 |     if ps > 0:
154 |         ax0.scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
155 |         ax1.scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
156 | 
157 | 
158 | def add_text(
159 |     idx,
160 |     text,
161 |     pos=(0.01, 0.99),
162 |     fs=15,
163 |     color="w",
164 |     lcolor="k",
165 |     lwidth=2,
166 |     ha="left",
167 |     va="top",
168 | ):
169 |     ax = plt.gcf().axes[idx]
170 |     t = ax.text(
171 |         *pos, text, fontsize=fs, ha=ha, va=va, color=color, transform=ax.transAxes
172 |     )
173 |     if lcolor is not None:
174 |         t.set_path_effects(
175 |             [
176 |                 path_effects.Stroke(linewidth=lwidth, foreground=lcolor),
177 |                 path_effects.Normal(),
178 |             ]
179 |         )
180 | 
181 | 
182 | def save_plot(path, **kw):
183 |     """Save the current figure without any white margin."""
184 |     plt.savefig(path, bbox_inches="tight", pad_inches=0, **kw)
185 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "lightglue"
 3 | description = "LightGlue: Local Feature Matching at Light Speed"
 4 | version = "0.0"
 5 | authors = [
 6 |     {name = "Philipp Lindenberger"},
 7 |     {name = "Paul-Edouard Sarlin"},
 8 | ]
 9 | readme = "README.md"
10 | requires-python = ">=3.6"
11 | license = {file = "LICENSE"}
12 | classifiers = [
13 |     "Programming Language :: Python :: 3",
14 |     "License :: OSI Approved :: Apache Software License",
15 |     "Operating System :: OS Independent",
16 | ]
17 | urls = {Repository = "https://github.com/cvg/LightGlue/"}
18 | dynamic = ["dependencies"]
19 | 
20 | [project.optional-dependencies]
21 | dev = ["black==23.12.1", "flake8", "isort"]
22 | 
23 | [tool.setuptools]
24 | packages = ["lightglue"]
25 | 
26 | [tool.setuptools.dynamic]
27 | dependencies = {file = ["requirements.txt"]}
28 | 
29 | [tool.isort]
30 | profile = "black"
31 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.9.1
2 | torchvision>=0.3
3 | numpy
4 | opencv-python
5 | matplotlib
6 | kornia>=0.6.11


--------------------------------------------------------------------------------