├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── assets
    ├── New.png
    └── demo.gif
├── cfgs
    ├── 7scenes.yaml
    ├── Cambridge.yaml
    ├── default.yaml
    └── indoor6.yaml
├── datasets
    ├── _base.py
    ├── augmentation.py
    ├── data_collection.py
    ├── dataloader.py
    └── test.py
├── detectors
    ├── line2d
    │   ├── DeepLSD
    │   │   └── deeplsd.py
    │   ├── LSD
    │   │   └── lsd.py
    │   ├── linebase_detector.py
    │   └── register_linedetector.py
    └── point2d
    │   ├── SuperPoint
    │       └── superpoint.py
    │   └── register_pointdetector.py
├── models
    ├── base_model.py
    ├── pipeline.py
    ├── pl2map.py
    ├── util.py
    └── util_learner.py
├── prepare_scripts
    ├── cambridge.sh
    ├── download_pre_trained_models.sh
    ├── indoor6.sh
    └── seven_scenes.sh
├── requirements.txt
├── runners
    ├── eval.py
    ├── evaluator.py
    ├── train.py
    └── trainer.py
└── util
    ├── config.py
    ├── help_evaluation.py
    ├── io.py
    ├── logger.py
    ├── pose_estimator.py
    ├── read_write_model.py
    └── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | datasets/gt_3Dmodels/
 2 | datasets/imgs_datasets/
 3 | detectors/point2d/SuperPoint/weights/
 4 | visualization/
 5 | visualization_all/
 6 | logs/
 7 | pre_train_logs/
 8 | experiments/
 9 | __pycache__/
10 | train_test_datasets/
11 | train_test_datasets_origin/
12 | 
13 | *.npy
14 | *.png
15 | *.jpg
16 | *.pyc
17 | *.tar
18 | *.obj
19 | *.pth
20 | *.gif
21 | *.mp4
22 | *.h5
23 | *.swp
24 | *.zip
25 | *.tar.gz
26 | *.th
27 | *.so
28 | 
29 | .vscode


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/DeepLSD"]
2 | 	path = third_party/DeepLSD
3 | 	url = https://github.com/cvg/DeepLSD.git
4 | [submodule "third_party/pytlsd"]
5 | 	path = third_party/pytlsd
6 | 	url = https://github.com/iago-suarez/pytlsd.git
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2024 Thuan
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Point-Line to Map Regresssion for Camera Relocalization
  2 | #### [Project Page](https://thpjp.github.io/pl2map/) | [PL2Map](https://arxiv.org/abs/2402.18011) | [PL2Map++](https://arxiv.org/pdf/2502.20814)(Code for PL2Map++ is coming soon)
  3 | ## Introduction
  4 | 
  5 | ![demo_vid](assets/demo.gif)
  6 | 
  7 | We introduce a lightweight neural network for visual localization that efficiently represents both 3D points and lines. Specifically, we use a single transformer block to convert line features into distinctive point-like descriptors. These features are then refined through self- and cross-attention in a graph-based framework before 3D map regression using simple MLPs. Our method outperforms [Hloc](https://github.com/cvg/Hierarchical-Localization) and [Limap](https://github.com/cvg/limap) in small-scale indoor localization and achieves the best results in outdoor settings, setting a new benchmark for learning-based approaches. It also operates in real-time at ~16 FPS, compared to [Limap](https://github.com/cvg/limap)’s ~0.03 FPS, while requiring only lightweight network weights of 33MB instead of [Limap](https://github.com/cvg/limap)’s multi-GB memory footprint.
  8 | 
  9 | ---  
 10 | ## Papers
 11 | **Improved 3D Point-Line Mapping Regression for Camera Relocalization**![new](assets/New.png)  
 12 | Bach-Thuan Bui, Huy-Hoang Bui, Yasuyuki Fujii, Dinh-Tuan Tran, and Joo-Ho Lee.   
 13 | arXiv preprint arXiv:2502.20814, 2025.
 14 | [pdf](https://arxiv.org/pdf/2502.20814)  
 15 | 
 16 | **Representing 3D sparse map points and lines for camera relocalization**  
 17 | Bach-Thuan Bui, Huy-Hoang Bui, Dinh-Tuan Tran, and Joo-Ho Lee.    
 18 | IEEE/RSJ International Conference on Intelligent Robots and Systems (**IROS**), 2024.
 19 | [pdf](https://arxiv.org/abs/2402.18011) 
 20 | 
 21 | 
 22 | ## Installation
 23 | Python 3.9 + required packages
 24 | ```
 25 | git clone https://github.com/ais-lab/pl2map.git
 26 | cd pl2map
 27 | git submodule update --init --recursive
 28 | conda create --name pl2map python=3.9
 29 | conda activate pl2map
 30 | # Refer to https://pytorch.org/get-started/previous-versions/ to install pytorch compatible with your CUDA
 31 | python -m pip install torch==1.12.0 torchvision==0.13.0 
 32 | python -m pip install -r requirements.txt
 33 | ```
 34 | ## Supported datasets
 35 | - [Microsoft 7scenes](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/)
 36 | - [Cambridge Landmarks](https://www.repository.cam.ac.uk/handle/1810/251342/)
 37 | - [Indoor-6](https://github.com/microsoft/SceneLandmarkLocalization)
 38 | 
 39 | Please run the provided scripts to prepare and download the data which has been preprocessed by running:
 40 | 
 41 | 7scenes
 42 | ```
 43 | ./prepare_scripts/seven_scenes.sh
 44 | ```
 45 | Cambridge Landmarks
 46 | ```
 47 | ./prepare_scripts/cambridge.sh 
 48 | ```
 49 | Indoor-6
 50 | ```
 51 | ./prepare_scripts/indoor6.sh
 52 | ```
 53 | 
 54 | ## Evaluation with pre-trained models
 55 | Please download the pre-trained models by running:
 56 | ```
 57 | ./prepare_scripts/download_pre_trained_models.sh
 58 | ```
 59 | For example, to evaluate KingsCollege scene:
 60 | ```
 61 | python runners/eval.py --dataset Cambridge --scene KingsCollege -expv pl2map
 62 | ```
 63 | 
 64 | ## Training
 65 | ```
 66 | python runners/train.py --dataset Cambridge --scene KingsCollege -expv pl2map_test
 67 | ```
 68 | 
 69 | ## Supported detectors
 70 | ### Lines
 71 | - [LSD](https://github.com/iago-suarez/pytlsd)
 72 | - [DeepLSD](https://github.com/cvg/DeepLSD)
 73 | ### Points
 74 | - [Superpoint](https://github.com/rpautrat/SuperPoint)
 75 | 
 76 | 
 77 | ## Citation
 78 | If you use this code in your project, please consider citing the following paper:
 79 | ```bibtex
 80 | @article{bui2024pl2map,
 81 |   title={Representing 3D sparse map points and lines for camera relocalization},
 82 |   author={Bui, Bach-Thuan and Bui, Huy-Hoang and Tran, Dinh-Tuan and Lee, Joo-Ho},
 83 |   booktitle={2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
 84 |   year={2024}
 85 | }
 86 | @article{bui2025improved,
 87 |   title={Improved 3D Point-Line Mapping Regression for Camera Relocalization},
 88 |   author={Bui, Bach-Thuan and Bui, Huy-Hoang and Fujii, Yasuyuki and Tran, Dinh-Tuan and Lee, Joo-Ho},
 89 |   journal={arXiv preprint arXiv:2502.20814},
 90 |   year={2025}
 91 | }
 92 | ```
 93 | This code builds on previous camera relocalization pipeline, namely [D2S](https://github.com/ais-lab/d2s), please consider citing:
 94 | ```bibtex
 95 | @article{bui2024d2s,
 96 |   title={D2S: Representing sparse descriptors and 3D coordinates for camera relocalization},
 97 |   author={Bui, Bach-Thuan and Bui, Huy-Hoang and Tran, Dinh-Tuan and Lee, Joo-Ho},
 98 |   journal={IEEE Robotics and Automation Letters},
 99 |   year={2024}
100 | }
101 | ```
102 | 
103 | ## Acknowledgement
104 | This code is built based on [Limap](https://github.com/cvg/limap), and [LineTR](https://github.com/yosungho/LineTR). We thank the authors for their useful source code.
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/assets/New.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ais-lab/pl2map/8d1a9289bd9505647e2fbdaf4719310e51ba8e8b/assets/New.png


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ais-lab/pl2map/8d1a9289bd9505647e2fbdaf4719310e51ba8e8b/assets/demo.gif


--------------------------------------------------------------------------------
/cfgs/7scenes.yaml:
--------------------------------------------------------------------------------
 1 | line2d:
 2 |   max_num_2d_segs: 3000
 3 |   do_merge_lines: False # Not implemented
 4 |   visualize: False
 5 |   save_l3dpp: False
 6 |   detector:
 7 |     name: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for loading labeled 3D model
 8 |     name_test_model: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for testing (not depending on pre-3D model)
 9 |   preprocessing:
10 |     grayscale: True
11 | 
12 | point2d:
13 |   detector: 
14 |     name: "superpoint" # ["superpoint", "sift"]
15 |     configs:
16 |       force_num_keypoints: False
17 |       nms_radius: 3
18 |       max_keypoints: 2048
19 |     preprocessing:
20 |       grayscale: True
21 |       resize_max: 1024
22 |       resize_force: True
23 |       interpolation: 'cv2_area'  # pil_linear is more accurate but slower
24 |   matcher: "NN-superpoint" # ["superglue", "gluestick"] # not implemented (for unlabeled learning)
25 | 
26 | regressor: 
27 |   name: pl2map # ["pl2map", or others]
28 |   use_line: True
29 |   use_point: True
30 |   n_line_keypoints: 10 # number of keypoints used to represent a line
31 | 
32 | train: # train configs
33 |   batch_size: 1
34 |   num_iters: 1500000 # number training iterations
35 |   loader_shuffle: True
36 |   loader_num_workers: 8
37 |   log_interval: 500 # log every n batches (visdom graph)
38 |   use_depth: False # use SfM corrected by depth or not
39 |   loss:
40 |     reprojection:
41 |       apply: True
42 |       type: "dyntanh" # ["l1", "l1+sqrt", "l1+log", "tanh", "dyntanh"]
43 |       soft_clamp: 50
44 |       soft_clamp_min: 1
45 |       circle_schedule: True # 'circle'(weight increasing) or 'linear' (weight decreasing)
46 |   augmentation:
47 |     apply: True
48 |     on_rate: 0.5 # probability of applying augmentation
49 |     brightness: 0.02
50 |     contrast: 0.02
51 |     homography:
52 |       apply: False  # if apply, augmented poses will be incorrect
53 |       perspective: True
54 |       scaling: True 
55 |       rotation: True
56 |       translation: True
57 |       n_scales: 5
58 |       n_angles: 25
59 |       scaling_amplitude: 0.1
60 |       perspective_amplitude_x: 0.1
61 |       perspective_amplitude_y: 0.1
62 |       patch_ratio: 0.8 # ratio of the patch to the image
63 |       max_angle: 45 # in degrees
64 |       allow_artifacts: False
65 |     dsacstar: # apply DSAC*-like augmentation
66 |       apply: True # homography augmentation must be disabled
67 |       aug_rotation: 30 # in degrees
68 |       aug_scale_min: 0.666666666 # 2/3
69 |       aug_scale_max: 1.5  # 3/2
70 |   
71 | 
72 | optimizer:
73 |   method: adam
74 |   base_lr: 0.0003 # base/start learning rate
75 |   weight_decay: 0.0
76 |   lr_decay: 0.5 # decay rate
77 |   num_lr_decay_step: 7 # decay every n epochs
78 | 
79 | localization:
80 |   ransac: 
81 |     max_reproj_error: 12.0 
82 |     max_epipolar_error: 10.0


--------------------------------------------------------------------------------
/cfgs/Cambridge.yaml:
--------------------------------------------------------------------------------
 1 | line2d:
 2 |   max_num_2d_segs: 3000
 3 |   do_merge_lines: False # Not implemented
 4 |   visualize: False
 5 |   save_l3dpp: False
 6 |   detector:
 7 |     name: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for loading labeled 3D model
 8 |     name_test_model: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for testing (not depending on pre-3D model)
 9 |   preprocessing:
10 |     grayscale: True
11 | 
12 | point2d:
13 |   detector: 
14 |     name: "superpoint" # ["superpoint", "sift"]
15 |     configs:
16 |       force_num_keypoints: False
17 |       nms_radius: 3
18 |       max_keypoints: 2048
19 |     preprocessing:
20 |       grayscale: True
21 |       resize_max: 1024
22 |       resize_force: True
23 |       interpolation: 'cv2_area'  # pil_linear is more accurate but slower
24 |   matcher: "NN-superpoint" # ["superglue", "gluestick"] # not implemented (for unlabeled learning)
25 | 
26 | regressor: 
27 |   name: pl2map # ["pl2map", "d2s"]
28 |   use_line: True
29 |   use_point: True
30 |   n_line_keypoints: 10 # number of keypoints used to represent a line
31 | 
32 | train: # train configs
33 |   batch_size: 1
34 |   num_iters: 1500000 # number training iterations
35 |   loader_shuffle: True
36 |   loader_num_workers: 8
37 |   log_interval: 500 # log every n batches (visdom graph)
38 |   loss:
39 |     reprojection:
40 |       apply: True
41 |       start_apply: 0.8 # start applying reprojection loss
42 |       type: "dyntanh" # ["l1", "l1+sqrt", "l1+log", "tanh", "dyntanh"]
43 |       soft_clamp: 100
44 |       soft_clamp_min: 1
45 |       circle_schedule: True # 'circle'(weight increasing) or 'linear' (weight decreasing)
46 |   augmentation:
47 |     apply: True
48 |     on_rate: 0.5 # probability of applying augmentation
49 |     brightness: 0.15
50 |     contrast: 0.1
51 |     homography:
52 |       apply: False  # if apply, augmented poses will be incorrect
53 |       perspective: True
54 |       scaling: True 
55 |       rotation: True
56 |       translation: True
57 |       n_scales: 5
58 |       n_angles: 25
59 |       scaling_amplitude: 0.1
60 |       perspective_amplitude_x: 0.1
61 |       perspective_amplitude_y: 0.1
62 |       patch_ratio: 0.8 # ratio of the patch to the image
63 |       max_angle: 45 # in degrees
64 |       allow_artifacts: False
65 |     dsacstar: # apply DSAC*-like augmentation
66 |       apply: True # homography augmentation must be disabled
67 |       aug_rotation: 30 # in degrees
68 |       aug_scale_min: 0.666666666 # 2/3
69 |       aug_scale_max: 1.5  # 3/2
70 |   
71 | 
72 | optimizer:
73 |   method: adam
74 |   base_lr: 0.0005 # base/start learning rate
75 |   weight_decay: 0.0
76 |   lr_decay: 0.5 # decay rate
77 |   num_lr_decay_step: 7 # decay every n epochs, 7 
78 | 
79 | localization:
80 |   ransac: 
81 |     max_reproj_error: 12.0 
82 |     max_epipolar_error: 10.0
83 | 


--------------------------------------------------------------------------------
/cfgs/default.yaml:
--------------------------------------------------------------------------------
 1 | line2d:
 2 |   max_num_2d_segs: 3000
 3 |   do_merge_lines: False # Not implemented
 4 |   visualize: False
 5 |   save_l3dpp: False
 6 |   detector:
 7 |     name: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for loading labeled 3D model
 8 |     name_test_model: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for testing (not depending on pre-3D model)
 9 |   preprocessing:
10 |     grayscale: True
11 | 
12 | point2d:
13 |   detector: 
14 |     name: "superpoint" # ["superpoint", "sift"]
15 |     configs:
16 |       force_num_keypoints: False
17 |       nms_radius: 3
18 |       max_keypoints: 2048
19 |     preprocessing:
20 |       grayscale: True
21 |       resize_max: 1024
22 |       resize_force: True
23 |       interpolation: 'cv2_area'  # pil_linear is more accurate but slower
24 |   matcher: "NN-superpoint" # ["superglue", "gluestick"] # not implemented (for unlabeled learning)
25 | 
26 | regressor: 
27 |   name: pl2map # ["pl2map", "d2s"]
28 |   use_line: True
29 |   use_point: True
30 |   n_line_keypoints: 10 # number of keypoints used to represent a line
31 | 
32 | train: # train configs
33 |   batch_size: 1
34 |   num_iters: 2500000 # number training iterations
35 |   loader_shuffle: True
36 |   loader_num_workers: 8
37 |   log_interval: 500 # log every n batches (visdom graph)
38 |   use_depth: False # use SfM corrected by depth or not
39 |   loss:
40 |     reprojection:
41 |       apply: False
42 |       type: "dyntanh" # ["l1", "l1+sqrt", "l1+log", "tanh", "dyntanh"]
43 |       soft_clamp: 50
44 |       soft_clamp_min: 1
45 |       circle_schedule: True # 'circle'(weight increasing) or 'linear' (weight decreasing)
46 |   augmentation:
47 |     apply: False
48 |     on_rate: 0.5 # probability of applying augmentation
49 |     brightness: 0.1
50 |     contrast: 0.1
51 |     homography:
52 |       apply: False  # if apply, augmented poses will be incorrect
53 |       perspective: True
54 |       scaling: True 
55 |       rotation: True
56 |       translation: True
57 |       n_scales: 5
58 |       n_angles: 25
59 |       scaling_amplitude: 0.1
60 |       perspective_amplitude_x: 0.1
61 |       perspective_amplitude_y: 0.1
62 |       patch_ratio: 0.8 # ratio of the patch to the image
63 |       max_angle: 45 # in degrees
64 |       allow_artifacts: False
65 |     dsacstar: # apply DSAC*-like augmentation
66 |       apply: False # homography augmentation must be disabled
67 |       aug_rotation: 30 # in degrees
68 |       aug_scale_min: 0.666666666 # 2/3
69 |       aug_scale_max: 1.5  # 3/2
70 | 
71 | localization:
72 |   2d_matcher: "sold2" # ["epipolar", "sold2", "superglue_endpoints"] Other configs for superglue_endpoints are the same as in "line2d" section
73 |   epipolar_filter: False # No use for epipolar matcher
74 |   IoU_threshold: 0.2
75 |   reprojection_filter: null  # [null, "Perpendicular", "Midpoint", "Midpoint_Perpendicular"]
76 |   ransac: 
77 |     method: "hybrid"    # [null, "ransac", "solver", "hybrid"]
78 |     thres: 10.0         # Only for normal & solver
79 |     thres_point: 10.0
80 |     thres_line: 10.0
81 |     weight_point: 1.0   # data type weights for scoring
82 |     weight_line: 1.0    # data type weights for scoring
83 |     final_least_squares: True
84 |     min_num_iterations: 100
85 |     solver_flags: [True, True, True, True]
86 |   optimize:
87 |     loss_func: "TrivialLoss"
88 |     loss_func_args: []
89 |   line_cost_func: "PerpendicularDist"
90 |   line_weight: 1.0        # weight for optimization (cost function)
91 |   hloc:
92 |     skip_exists: False
93 |   skip_exists: False
94 | 


--------------------------------------------------------------------------------
/cfgs/indoor6.yaml:
--------------------------------------------------------------------------------
 1 | line2d:
 2 |   max_num_2d_segs: 3000
 3 |   do_merge_lines: False # Not implemented
 4 |   visualize: False
 5 |   save_l3dpp: False
 6 |   detector:
 7 |     name: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for loading labeled 3D model
 8 |     name_test_model: "deeplsd" # ["lsd", "sold2", "deeplsd"] - for testing (not depending on pre-3D model)
 9 |   preprocessing:
10 |     grayscale: True
11 | 
12 | point2d:
13 |   detector: 
14 |     name: "superpoint" # ["superpoint", "sift"]
15 |     configs:
16 |       force_num_keypoints: False
17 |       nms_radius: 3
18 |       max_keypoints: 2048
19 |     preprocessing:
20 |       grayscale: True
21 |       resize_max: 1024
22 |       resize_force: True
23 |       interpolation: 'cv2_area'  # pil_linear is more accurate but slower
24 |   matcher: "NN-superpoint" # ["superglue", "gluestick"] # not implemented (for unlabeled learning)
25 | 
26 | regressor: 
27 |   name: pl2map # pl2map_only_point or pl2map_sep
28 |   use_line: True
29 |   use_point: True
30 |   n_line_keypoints: 10 # number of keypoints used to represent a line
31 | 
32 | train: # train configs
33 |   batch_size: 1
34 |   num_iters: 1500000 # number training iterations
35 |   loader_shuffle: True
36 |   loader_num_workers: 8
37 |   log_interval: 500 # log every n batches (visdom graph)
38 |   loss:
39 |     reprojection:
40 |       apply: False
41 |       start_apply: 0.05 # start applying reprojection loss
42 |       type: "dyntanh" # ["l1", "l1+sqrt", "l1+log", "tanh", "dyntanh"]
43 |       soft_clamp: 100
44 |       soft_clamp_min: 1
45 |       circle_schedule: True # 'circle'(weight increasing) or 'linear' (weight decreasing)
46 |   augmentation:
47 |     apply: True
48 |     on_rate: 0.5 # probability of applying augmentation
49 |     brightness: 0.15
50 |     contrast: 0.1
51 |     homography:
52 |       apply: True  # if apply, augmented poses will be incorrect
53 |       perspective: True
54 |       scaling: True 
55 |       rotation: True
56 |       translation: True
57 |       n_scales: 5
58 |       n_angles: 25
59 |       scaling_amplitude: 0.1
60 |       perspective_amplitude_x: 0.1
61 |       perspective_amplitude_y: 0.1
62 |       patch_ratio: 0.8 # ratio of the patch to the image
63 |       max_angle: 45 # in degrees
64 |       allow_artifacts: False
65 |     dsacstar: # apply DSAC*-like augmentation
66 |       apply: False # homography augmentation must be disabled
67 |       aug_rotation: 30 # in degrees
68 |       aug_scale_min: 0.666666666 # 2/3
69 |       aug_scale_max: 1.5  # 3/2
70 | 
71 | 
72 | optimizer:
73 |   method: adam
74 |   base_lr: 0.0002 # base/start learning rate
75 |   weight_decay: 0.0
76 |   lr_decay: 0.5 # decay rate
77 |   num_lr_decay_step: 7 # decay every n epochs, 7 
78 | 
79 | localization:
80 |   ransac: 
81 |     max_reproj_error: 12.0 
82 |     max_epipolar_error: 10.0


--------------------------------------------------------------------------------
/datasets/_base.py:
--------------------------------------------------------------------------------
  1 | from typing import NamedTuple
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | import torch
  6 | import math
  7 | from scipy.spatial.transform import Rotation as R
  8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9 | from detectors.line2d.register_linedetector import get_linedetector
 10 | from detectors.point2d.register_pointdetector import get_pointdetector
 11 | from util.io import read_image
 12 | import copy
 13 | import datasets.augmentation as aug
 14 | 
 15 | 
 16 | def frame2tensor(frame, device):
 17 |     return torch.from_numpy(frame/255.).float()[None, None].to(device)
 18 | 
 19 | class Camera():
 20 |     '''
 21 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 22 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 23 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 24 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 25 |     '''
 26 |     model_name2id = {"SIMPLE_PINHOLE": 0, "PINHOLE": 1,
 27 |                      "SIMPLE_RADIAL": 2, "RADIAL": 3}
 28 |     
 29 |     def __init__(self, camera, iscolmap=True) -> None:
 30 |         if iscolmap:
 31 |             self.name = camera.model
 32 |             self.get_camera_vector_colmap(camera)
 33 |         else: # list type 
 34 |             self.name = camera[0]
 35 |             self.camera_array = np.array([self.model_name2id[self.name]] + camera[1:])
 36 |     def get_camera_vector_colmap(self, camera):
 37 |         '''
 38 |         Return a camera vector from a colmap camera object
 39 |         return: numpy array of camera vector
 40 |                 [modelid, width, height, focal,..., cx, cy,...]
 41 |         '''
 42 |         id = self.model_name2id[camera.model]
 43 |         array = [id, camera.width, camera.height]
 44 |         array.extend(camera.params)
 45 |         self.camera_array =  np.array(array)
 46 |         
 47 |     def update_scale(self, scale_factor):
 48 |         self.camera_array[1:] = self.camera_array[1:]*scale_factor
 49 |     
 50 |     def get_dict_camera(self):
 51 |         '''
 52 |         Return a dictionary of camera
 53 |         '''
 54 |         return {"model": self.name, "width": self.camera_array[1], "height": self.camera_array[2],
 55 |                 "params": self.camera_array[3:].tolist()}
 56 | 
 57 | class Line3D():
 58 |     def __init__(self, start, end) -> None:
 59 |         self.start = np.asarray(start)
 60 |         self.end = np.asarray(end)
 61 |     def get_line3d_vector(self):
 62 |         return np.hstack([self.start, self.end])
 63 |     
 64 | class Pose():
 65 |     def __init__(self, qvec, tvec) -> None:
 66 |         self.qvec = qvec # quaternion, [w,x,y,z]
 67 |         self.tvec = tvec # translation,  [x,y,z]
 68 |     def get_pose_vector(self):
 69 |         """
 70 |         Return a pose vector [tvec, qvec]
 71 |         """
 72 |         return np.hstack([self.tvec, self.qvec])
 73 |     def get_pose_Tmatrix(self):
 74 |         """
 75 |         Return a pose matrix [R|t]
 76 |         """
 77 |         # Convert the quaternion to a rotation matrix
 78 |         qvec = np.zeros(4)
 79 |         qvec[:3] = self.qvec[1:] # convert quaternion from [w,x,y,z] (colmap) to [x,y,z,w] (scipy)
 80 |         qvec[3] = self.qvec[0]
 81 |         rotation = R.from_quat(qvec)
 82 |         rotation_matrix = rotation.as_matrix()
 83 |         # Create a 4x4 transformation matrix
 84 |         T = np.eye(4)
 85 |         T[:3, :3] = rotation_matrix
 86 |         T[:3, 3] = self.tvec
 87 |         return T
 88 |     
 89 |     def rotate(self, angle):
 90 |         pose = self.get_pose_Tmatrix()
 91 |         angle = -angle * math.pi / 180 # convert to radian, and reverse direction != opencv
 92 |         pose_rot = np.eye(4) 
 93 |         pose_rot[0, 0] = math.cos(angle)
 94 |         pose_rot[0, 1] = -math.sin(angle)
 95 |         pose_rot[1, 0] = math.sin(angle)
 96 |         pose_rot[1, 1] = math.cos(angle)
 97 |         pose = np.matmul(pose, pose_rot)
 98 |         self.tvec = pose[:3, 3]
 99 |         rotation = R.from_matrix(pose[:3, :3])
100 |         qvec = rotation.as_quat()
101 |         self.qvec = np.hstack([qvec[3], qvec[:3]]) # convert quaternion from [x,y,z,w] to [w,x,y,z] colmap
102 | 
103 | class Image_Class():
104 |     def __init__(self,imgname:str) -> None:
105 |         '''
106 |         - image class for storing 2D & 3D points, 2D & 3D lines, camera vector, pose vector
107 |         - comments with ### can be changed if augmenting data, otherwise must be fixed
108 |                         #@ no change, but can be reduced if augmenting data
109 |         '''
110 |         self.points2Ds = None  ### numpy matrix of 2D points (Nx2)
111 |         self.points3Ds = None #@ numpy matrix of 3D points, including np.array[0,0,0] if not available
112 |         self.validPoints = None # numpy array of valid 2D points (have 2D-3D points correspondence)
113 |         self.line2Ds = None ### numpy matrix of 2D line segments (Nx4)
114 |         self.line3Ds = None #@ list of 3D line segment objects, including None if not available
115 |         self.line3Ds_matrix = None # numpy matrix of 3D line segments, including np.array[0,0,0,0,0,0] if not available
116 |         self.validLines = None # numpy array of valid 3D lines (have 2D-3D lines correspondence)
117 |         self.camera = None # camera class
118 |         self.id = None
119 |         self.imgname = imgname # string: image name
120 |         self.pose = None  ### Pose object
121 |     def get_line3d_matrix(self):
122 |         '''
123 |         Return a matrix of line3D vectors
124 |         '''
125 |         self.line3Ds_matrix = np.stack([ii.get_line3d_vector() if ii is not None else 
126 |                                 np.array([0,0,0,0,0,0]) for ii in self.line3Ds], 0)
127 |         self.validLines = np.stack([1 if ii is not None else 
128 |                                 0 for ii in self.line3Ds], 0)
129 |     
130 | 
131 | class Base_Collection():
132 |     def __init__(self, args, cfg, mode) -> None:
133 |         self.args = args
134 |         self.cfg = cfg
135 |         self.device = f'cuda:{args.cudaid}' if torch.cuda.is_available() else 'cpu'
136 |         if mode == "test":
137 |             self.get_detector_models()
138 |     
139 |     def get_point_detector_model(self):
140 |         '''
141 |         Return a point detector model
142 |         '''
143 |         configs = self.cfg.point2d.detector.configs
144 |         method = self.cfg.point2d.detector.name
145 |         return get_pointdetector(method = method, configs=configs)
146 |     
147 |     def get_line_detector_model(self):
148 |         '''
149 |         Return a line detector model
150 |         '''
151 |         max_num_2d_segs = self.cfg.line2d.max_num_2d_segs
152 |         do_merge_lines = self.cfg.line2d.do_merge_lines
153 |         visualize = self.cfg.line2d.visualize
154 |         method = self.cfg.line2d.detector.name_test_model
155 |         return get_linedetector(method= method, max_num_2d_segs=max_num_2d_segs,
156 |                  do_merge_lines=do_merge_lines, visualize=visualize, cudaid=self.args.cudaid)
157 |     
158 |     def get_detector_models(self):
159 |         self.line_detector = self.get_line_detector_model()
160 |         # self.point_detector = self.get_point_detector_model().eval().to(self.device)
161 |     
162 |     def do_augmentation(self, image, image_infor_class, debug = False):
163 |         if not aug.is_apply_augment(self.cfg.train.augmentation.on_rate):
164 |             # No apply augmentation
165 |             return image, image_infor_class
166 |         # Apply the brightness and contrast
167 |         transf_image = aug.random_brightness_contrast(image, self.cfg.train.augmentation.brightness,
168 |                                                       self.cfg.train.augmentation.contrast)
169 |         points2Ds = image_infor_class.points2Ds
170 |         lines2Ds = image_infor_class.line2Ds
171 |         camera = image_infor_class.camera
172 |         pose = image_infor_class.pose
173 |         if self.cfg.train.augmentation.homography.apply:
174 |             # camera and pose are not correct after applying homography
175 |             H,W = image.shape
176 |             shape = np.array([H,W])
177 |             h_matrix = aug.sample_homography(shape, self.cfg.train.augmentation.homography) # sample homography matrix
178 |             transf_image = aug.warpPerspective_forimage(transf_image, h_matrix)
179 |             points2Ds = aug.perspectiveTransform_forpoints(image_infor_class.points2Ds, h_matrix)
180 |             lines2Ds = aug.perspectiveTransform_forlines(image_infor_class.line2Ds, h_matrix)            
181 |         
182 |         # dsacstar-like augmentation method.
183 |         if self.cfg.train.augmentation.dsacstar.apply:
184 |             # camera and pose will be corrected in this augmentation
185 |             assert not self.cfg.train.augmentation.homography.apply, "dsacstar augmentation cannot be applied with homography augmentation"
186 |             transf_image, points2Ds, lines2Ds, camera, pose = aug.dsacstar_augmentation(
187 |                 transf_image, self.cfg.train.augmentation.dsacstar, points2Ds, lines2Ds, camera, pose)
188 | 
189 |         if debug:
190 |             from util.visualize import visualize_img_withlinesandpoints
191 |             visualize_img_withlinesandpoints(image, image_infor_class.points2Ds,image_infor_class.line2Ds)
192 |         image_infor_class.points2Ds = points2Ds
193 |         image_infor_class.line2Ds = lines2Ds
194 |         image_infor_class.camera = camera
195 |         image_infor_class.pose = pose
196 |         # correct points and lines inside image
197 |         if self.cfg.train.augmentation.dsacstar.apply:
198 |             image_infor_class = aug.correct_points_lines_inside_image(transf_image.shape, image_infor_class)
199 |         if debug:
200 |             visualize_img_withlinesandpoints(transf_image, image_infor_class.points2Ds,image_infor_class.line2Ds, True)
201 |         return transf_image, image_infor_class
202 |     
203 |     def image_loader(self, image_name, augmentation=False, debug = False):
204 |         '''
205 |         (use only for point2d detector model)
206 |         Read an image, do augmentation if needed, preprocess it, and 
207 |         return a dictionary of image data and a Image_Class object
208 |         '''
209 |         resize_max = self.cfg.point2d.detector.preprocessing.resize_max
210 |         resize_force = self.cfg.point2d.detector.preprocessing.resize_force
211 |         interpolation = self.cfg.point2d.detector.preprocessing.interpolation
212 |         grayscale = self.cfg.point2d.detector.preprocessing.grayscale
213 |         path_to_image = self.get_image_path(image_name)
214 |         image = read_image(path_to_image, grayscale=grayscale)
215 |         
216 |         size = image.shape[:2][::-1]
217 |         if resize_force and (max(size) > resize_max):
218 |             scale = resize_max / max(size)
219 |             size_new = tuple(int(round(x*scale)) for x in size)
220 |             image = aug.resize_image(image, size_new, interpolation)
221 |             # rescale 2D points and lines, camera focal length
222 |             raise NotImplementedError
223 |         
224 |         image_infor_class = copy.deepcopy(self.imgname2imgclass[image_name])
225 |         if augmentation:
226 |             image, image_infor_class = self.do_augmentation(image, image_infor_class, debug)
227 |             if debug:
228 |                 print("Debugging image_loader")
229 |                 return None
230 |             
231 |         image = image.astype(np.float32)
232 |         if grayscale:
233 |             image = image[None]
234 |         else:
235 |             image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
236 |         image = image / 255.
237 |         original_size = np.array(size)
238 |         data = {
239 |             'image': image,
240 |             'original_size': original_size,
241 |         }
242 |         return data, image_infor_class
243 |     
244 |     def detect_points2D(self, image_name):
245 |         '''
246 |         Read an image, preprocess it, and  
247 |         Return a keypoints from that image using
248 |         loaded point detector model. 
249 |         '''
250 |         point_detector = self.get_point_detector_model().eval().to(self.device)
251 |         resize_force = self.cfg.point2d.detector.preprocessing.resize_force
252 |         resize_max = self.cfg.point2d.detector.preprocessing.resize_max
253 |         data,_ = self.image_loader(image_name, False)
254 |         data['image'] = torch.from_numpy(data['image'][None]).float().to(self.device)
255 |         keypointsdict = point_detector._forward_default(data)
256 |         scale = resize_max / max(data['original_size'])
257 |         if resize_force and (max(data['original_size']) > resize_max):
258 |             keypointsdict['keypoints'][0] = (keypointsdict['keypoints'][0] + .5)/scale - .5
259 |         else:
260 |             keypointsdict['keypoints'][0] += .5
261 |         return keypointsdict
262 |         
263 |     def detect_lines2D(self, image_name):
264 |         '''
265 |         Return a list of lines2D in the image
266 |         '''
267 |         grayscale = self.cfg.line2d.preprocessing.grayscale
268 |         image_path = self.get_image_path(image_name)
269 |         image = read_image(image_path, grayscale=grayscale)
270 |         if self.line_detector.get_module_name() == "deeplsd":
271 |             image = frame2tensor(image, self.device)
272 |         segs = self.line_detector.detect(image)
273 |         return segs
274 |     def get_2dpoints_lines_for_testing(self, image_name):
275 |         '''
276 |         Return a list of points2D and a list of lines2D in the image
277 |         '''
278 |         raise NotImplementedError
279 |     
280 |     def get_image_path(self, image_name):
281 |         '''
282 |         Return a path to image
283 |         '''
284 |         img_path = os.path.join(self.args.dataset_dir, self.args.dataset, self.args.scene, image_name)
285 |         return img_path
286 |         
287 | 
288 | 


--------------------------------------------------------------------------------
/datasets/augmentation.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import math
  4 | import random
  5 | import PIL.Image
  6 | 
  7 | def resize_image(image, size, interp):
  8 |     if interp.startswith('cv2_'):
  9 |         interp = getattr(cv2, 'INTER_'+interp[len('cv2_'):].upper())
 10 |         h, w = image.shape[:2]
 11 |         if interp == cv2.INTER_AREA and (w < size[0] or h < size[1]):
 12 |             interp = cv2.INTER_LINEAR
 13 |         resized = cv2.resize(image, size, interpolation=interp)
 14 |     elif interp.startswith('pil_'):
 15 |         interp = getattr(PIL.Image, interp[len('pil_'):].upper())
 16 |         resized = PIL.Image.fromarray(image.astype(np.uint8))
 17 |         resized = resized.resize(size, resample=interp)
 18 |         resized = np.asarray(resized, dtype=image.dtype)
 19 |     else:
 20 |         raise ValueError(
 21 |             f'Unknown interpolation {interp}.')
 22 |     return resized
 23 | 
 24 | 
 25 | def sample_homography(shape, cfg):
 26 |     """Sample a random valid homography.
 27 | 
 28 |     Computes the homography transformation between a random patch in the original image
 29 |     and a warped projection with the same image size.
 30 |     As in `tf.contrib.image.transform`, it maps the output point (warped patch) to a
 31 |     transformed input point (original patch).
 32 |     The original patch, which is initialized with a simple half-size centered crop, is
 33 |     iteratively projected, scaled, rotated and translated.
 34 | 
 35 |     Arguments:
 36 |         shape: A numpy array [H,W] specifying the height and width of the original image.
 37 |         perspective: A boolean that enables the perspective and affine transformations.
 38 |         scaling: A boolean that enables the random scaling of the patch.
 39 |         rotation: A boolean that enables the random rotation of the patch.
 40 |         translation: A boolean that enables the random translation of the patch.
 41 |         n_scales: The number of tentative scales that are sampled when scaling.
 42 |         n_angles: The number of tentatives angles that are sampled when rotating.
 43 |         scaling_amplitude: Controls the amount of scale.
 44 |         perspective_amplitude_x: Controls the perspective effect in x direction.
 45 |         perspective_amplitude_y: Controls the perspective effect in y direction.
 46 |         patch_ratio: Controls the size of the patches used to create the homography.
 47 |         max_angle: Maximum angle used in rotations.
 48 |         allow_artifacts: A boolean that enables artifacts when applying the homography.
 49 |         translation_overflow: Amount of border artifacts caused by translation.
 50 | 
 51 |     Returns:
 52 |         A numpy of shape 3x3 corresponding to the homography transform.
 53 |     """
 54 |     shift=0
 55 |     perspective=cfg.perspective
 56 |     scaling=cfg.scaling
 57 |     rotation=cfg.rotation
 58 |     translation=cfg.translation
 59 |     n_scales=cfg.n_scales
 60 |     n_angles=cfg.n_angles
 61 |     scaling_amplitude=cfg.scaling_amplitude
 62 |     perspective_amplitude_x=cfg.perspective_amplitude_x
 63 |     perspective_amplitude_y=cfg.perspective_amplitude_y
 64 |     patch_ratio=cfg.patch_ratio
 65 |     max_angle=math.pi*(cfg.max_angle/180)
 66 |     allow_artifacts=cfg.allow_artifacts
 67 |     translation_overflow=0.
 68 | 
 69 |     # Corners of the output image
 70 |     pts1 = np.stack([[0., 0.], [0., 1.], [1., 1.], [1., 0.]], axis=0)
 71 |     # Corners of the input patch
 72 |     margin = (1 - patch_ratio) / 2
 73 |     pts2 = margin + np.array([[0, 0], [0, patch_ratio],
 74 |                                  [patch_ratio, patch_ratio], [patch_ratio, 0]])
 75 | 
 76 |     from numpy.random import normal
 77 |     from numpy.random import uniform
 78 |     from scipy.stats import truncnorm
 79 | 
 80 |     # Random perspective and affine perturbations
 81 |     # lower, upper = 0, 2
 82 |     std_trunc = 2
 83 | 
 84 |     if perspective:
 85 |         if not allow_artifacts:
 86 |             perspective_amplitude_x = min(perspective_amplitude_x, margin)
 87 |             perspective_amplitude_y = min(perspective_amplitude_y, margin)
 88 | 
 89 |         perspective_displacement = truncnorm(-1*std_trunc, std_trunc, loc=0, scale=perspective_amplitude_y/2).rvs(1)
 90 |         h_displacement_left = truncnorm(-1*std_trunc, std_trunc, loc=0, scale=perspective_amplitude_x/2).rvs(1)
 91 |         h_displacement_right = truncnorm(-1*std_trunc, std_trunc, loc=0, scale=perspective_amplitude_x/2).rvs(1)
 92 |         pts2 += np.array([[h_displacement_left, perspective_displacement],
 93 |                           [h_displacement_left, -perspective_displacement],
 94 |                           [h_displacement_right, perspective_displacement],
 95 |                           [h_displacement_right, -perspective_displacement]]).squeeze()
 96 | 
 97 |     # Random scaling
 98 |     # sample several scales, check collision with borders, randomly pick a valid one
 99 |     if scaling:
100 |         scales = truncnorm(-1*std_trunc, std_trunc, loc=1, scale=scaling_amplitude/2).rvs(n_scales)
101 |         scales = np.concatenate((np.array([1]), scales), axis=0)
102 | 
103 |         center = np.mean(pts2, axis=0, keepdims=True)
104 |         scaled = (pts2 - center)[np.newaxis, :, :] * scales[:, np.newaxis, np.newaxis] + center
105 |         if allow_artifacts:
106 |             valid = np.arange(n_scales)  # all scales are valid except scale=1
107 |         else:
108 |             valid = (scaled >= 0.) * (scaled < 1.)
109 |             valid = valid.prod(axis=1).prod(axis=1)
110 |             valid = np.where(valid)[0]
111 |         idx = valid[np.random.randint(valid.shape[0], size=1)].squeeze().astype(int)
112 |         pts2 = scaled[idx,:,:]
113 | 
114 |     # Random translation
115 |     if translation:
116 |         t_min, t_max = np.min(pts2, axis=0), np.min(1 - pts2, axis=0)
117 |         if allow_artifacts:
118 |             t_min += translation_overflow
119 |             t_max += translation_overflow
120 |         pts2 += np.array([uniform(-t_min[0], t_max[0],1), uniform(-t_min[1], t_max[1], 1)]).T
121 | 
122 |     # Random rotation
123 |     # sample several rotations, check collision with borders, randomly pick a valid one
124 |     if rotation:
125 |         angles = np.linspace(-max_angle, max_angle, num=n_angles)
126 |         angles = np.concatenate((angles, np.array([0.])), axis=0)  # in case no rotation is valid
127 |         center = np.mean(pts2, axis=0, keepdims=True)
128 |         rot_mat = np.reshape(np.stack([np.cos(angles), -np.sin(angles), np.sin(angles),
129 |                                        np.cos(angles)], axis=1), [-1, 2, 2])
130 |         rotated = np.matmul( (pts2 - center)[np.newaxis,:,:], rot_mat) + center
131 |         if allow_artifacts:
132 |             valid = np.arange(n_angles)  # all scales are valid except scale=1
133 |         else:
134 |             valid = (rotated >= 0.) * (rotated < 1.)
135 |             valid = valid.prod(axis=1).prod(axis=1)
136 |             valid = np.where(valid)[0]
137 |         idx = valid[np.random.randint(valid.shape[0], size=1)].squeeze().astype(int)
138 |         pts2 = rotated[idx,:,:]
139 | 
140 | 
141 |     # Rescale to actual size
142 |     shape = shape[::-1]  # different convention [y, x]
143 |     pts1 *= shape[np.newaxis,:]
144 |     pts2 *= shape[np.newaxis,:]
145 | 
146 |     def ax(p, q): return [p[0], p[1], 1, 0, 0, 0, -p[0] * q[0], -p[1] * q[0]]
147 |     def ay(p, q): return [0, 0, 0, p[0], p[1], 1, -p[0] * q[1], -p[1] * q[1]]
148 | 
149 |     homography = cv2.getPerspectiveTransform(np.float32(pts1+shift), np.float32(pts2+shift))
150 |     return homography
151 | 
152 | def warpPerspective_forimage(ori_img, h_matrix):
153 |     # Apply the homography transformation to the image
154 |     transformed_image = cv2.warpPerspective(ori_img, h_matrix, (ori_img.shape[1], ori_img.shape[0]))
155 |     return transformed_image
156 | 
157 | def perspectiveTransform_forpoints(positions, h_matrix):
158 |     # Apply the homography transformation to the list of positions
159 |     transformed_positions = cv2.perspectiveTransform(np.array([positions]), h_matrix)
160 |     return transformed_positions[0,:,:]
161 | 
162 | def perspectiveTransform_forlines(lines, h_matrix):
163 |     # Apply the homography transformation to the list of 2D lines
164 |     start_points = lines[:,:2]  
165 |     end_points = lines[:,2:]
166 |     transformed_start_points = cv2.perspectiveTransform(np.array([start_points]), h_matrix)[0,:,:]
167 |     transformed_end_points = cv2.perspectiveTransform(np.array([end_points]), h_matrix)[0,:,:]
168 |     transformed_lines = np.concatenate((transformed_start_points, transformed_end_points), axis=1)
169 |     return transformed_lines
170 | 
171 | def random_brightness_contrast(image, b_rate, c_rate):
172 |     # Random the brightness and contrast values
173 |     contrast = [1.0, 1.0+2.0*c_rate]
174 |     brightness = [-100*b_rate, 100*b_rate]
175 |     alpha = random.uniform(contrast[0], contrast[1])
176 |     beta = random.uniform(brightness[0], brightness[1])
177 |     return cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
178 | 
179 | 
180 | def is_apply_augment(rate):
181 |     '''
182 |     Return True if the augmentation is applied by select a random option
183 |     '''
184 |     # Define the options and their probabilities
185 |     options = [True, False]
186 |     probabilities = [rate, 1-rate] 
187 |     # Choose an option to turn on augmentation or not
188 |     return random.choices(options, weights=probabilities, k=1)[0]
189 | 
190 | def dsacstar_augmentation(image, cfg, points2d, lines2d, camera, pose, interpolation='cv2_area'):
191 |     '''
192 |     Apply the augmentation to the input image, points, lines, camera, and pose
193 |     args:
194 |         image: input image np.array WxH 
195 |         cfg: configuration file .yaml
196 |         points2d: 2D points np.array Nx2
197 |         lines2d: 2D lines np.array Nx4
198 |         camera: camera parameters np.array[w, h, f, cx, cy,...]
199 |         pose: camera pose 'class _base.Pose'
200 |     '''
201 |     # Random the scale factor and rotation angle
202 |     scale_factor = random.uniform(cfg.aug_scale_min, cfg.aug_scale_max)
203 |     angle = random.uniform(-cfg.aug_rotation, cfg.aug_rotation)
204 | 
205 |     # Apply the scale factor and rotation angle to the image
206 |     new_shape = (int(image.shape[1] * scale_factor), int(image.shape[0] * scale_factor)) # height, width 
207 |     image = resize_image(image, new_shape, interpolation)
208 | 
209 |     # ajust the points and lines coordinates
210 |     points2d = points2d * scale_factor
211 |     lines2d = lines2d * scale_factor
212 |     
213 |     # ajust the camera parameters
214 |     camera.update_scale(scale_factor)
215 | 
216 |     # rotate input image
217 |     # Get the rotation matrix
218 |     M = cv2.getRotationMatrix2D((new_shape[0] / 2, new_shape[1] / 2), angle, 1)
219 |     # Rotate the image
220 |     image = cv2.warpAffine(image, M, new_shape)
221 |     points2d = rotate_points_dsacstar(points2d, M)
222 |     lines2d = rotate_lines_dsacstar(lines2d, M)
223 |     # rotate ground truth camera pose
224 |     pose.rotate(angle)
225 |     
226 |     return image, points2d, lines2d, camera, pose
227 | 
228 | def rotate_points_dsacstar(points, M):
229 |     # Convert the points to homogeneous coordinates
230 |     points_hom = np.hstack((points, np.ones((points.shape[0], 1))))
231 |     # Rotate the points
232 |     rotated_points_hom = np.dot(M, points_hom.T).T
233 |     # Convert the points back to 2D
234 |     rotated_points = rotated_points_hom[:, :2]
235 |     return rotated_points
236 | 
237 | def rotate_lines_dsacstar(lines, M):
238 |     start_points = lines[:,:2]  
239 |     end_points = lines[:,2:]
240 |     start_points = rotate_points_dsacstar(start_points, M)
241 |     end_points = rotate_points_dsacstar(end_points, M)
242 |     rotated_lines = np.concatenate((start_points, end_points), axis=1)
243 |     return rotated_lines
244 | 
245 | def is_inside_img(points, img_shape):
246 |     h, w = img_shape[0], img_shape[1]
247 |     return (points[:, 0] >= 0) & (points[:, 0] < w) & (points[:, 1] >= 0) & (points[:, 1] < h)
248 | 
249 | def correct_points_lines_inside_image(shape, image_infor_class):
250 |     '''
251 |     Correct the points and lines coordinates to be inside the image
252 |     if the points/lines are outside the image, remove them 
253 |     if lines have half inside and half outside, shrink the line to be inside the image
254 |     Then, correct the 3D ground truth points coordinates
255 |     Args:
256 |         shape: image shape (height, width)
257 |         image_infor_class: class _base.ImageInfor
258 |     '''
259 |     H, W = shape[0], shape[1]
260 |     # correct 2d points
261 |     points2d = image_infor_class.points2Ds
262 |     valid_points = is_inside_img(points2d, shape)
263 |     points2d = points2d[valid_points]
264 |     image_infor_class.points2Ds = points2d
265 |     # correct 3d points
266 |     image_infor_class.points3Ds = image_infor_class.points3Ds[valid_points]
267 |     # correct id of valids 
268 |     image_infor_class.validPoints = image_infor_class.validPoints[valid_points]
269 |     assert len(image_infor_class.points2Ds) == len(image_infor_class.points3Ds) == len(image_infor_class.validPoints)
270 | 
271 |     # correct 2d lines
272 |     lines2d = image_infor_class.line2Ds
273 |     lines3d = image_infor_class.line3Ds_matrix 
274 |     valids_lines2d = image_infor_class.validLines
275 |     start_points = lines2d[:,:2]
276 |     end_points = lines2d[:,2:]
277 |     valid_start_points = is_inside_img(start_points, shape)
278 |     valid_end_points = is_inside_img(end_points, shape)
279 |     # remove lines that are outside the image
280 |     valid_lines = valid_start_points | valid_end_points
281 | 
282 |     start_points = start_points[valid_lines]
283 |     end_points = end_points[valid_lines]
284 |     lines3d = lines3d[valid_lines]
285 |     valids_lines2d = valids_lines2d[valid_lines]
286 | 
287 |     valid_start_points = valid_start_points[valid_lines]
288 |     valid_end_points = valid_end_points[valid_lines]
289 |     # shrink lines that are half inside and half outside the image
290 |     indices = np.where(~valid_start_points)[0]
291 |     for idx in indices:
292 |         start = start_points[idx,:] # outside points 
293 |         end = end_points[idx,:]
294 |         m, c = line_equation(start, end) # y = mx + c
295 |         if start[0] < 0:
296 |             start[0] = 0+1
297 |             start[1] = compute_y(m, c, 0)
298 |         elif start[0] > W:
299 |             start[0] = W - 1 
300 |             start[1] = compute_y(m, c, W)
301 |         if start[1] < 0:
302 |             start[0] = compute_x(m, c, 0)
303 |             start[1] = 0 + 1
304 |         elif start[1] > H:
305 |             start[0] = compute_x(m, c, H)
306 |             start[1] = H - 1
307 |         start_points[idx] = start 
308 |     indices = np.where(~valid_end_points)[0]
309 |     for idx in indices:
310 |         start = start_points[idx] 
311 |         end = end_points[idx] # outside points 
312 |         m, c = line_equation(start, end) # y = mx + c
313 |         if end[0] < 0:
314 |             end[0] = 0+1
315 |             end[1] = compute_y(m, c, 0)
316 |         elif end[0] > W:
317 |             end[0] = W - 1 
318 |             end[1] = compute_y(m, c, W)
319 |         if end[1] < 0:
320 |             end[0] = compute_x(m, c, 0)
321 |             end[1] = 0 + 1
322 |         elif end[1] > H:
323 |             end[0] = compute_x(m, c, H)
324 |             end[1] = H - 1
325 |         end_points[idx] = end
326 | 
327 |     assert np.all(is_inside_img(start_points, shape))
328 |     assert np.all(is_inside_img(end_points, shape))
329 |     lines2d = np.concatenate((start_points, end_points), axis=1)
330 |     assert len(lines2d) == len(lines3d) == len(valids_lines2d)
331 |     image_infor_class.line2Ds  = lines2d
332 |     image_infor_class.line3Ds_matrix = lines3d
333 |     image_infor_class.validLines = valids_lines2d
334 |     return image_infor_class
335 | 
336 | def line_equation(start, end):
337 |     # Calculate the slope
338 |     m = (end[1] - start[1]) / (end[0] - start[0])
339 |     # Calculate the y-intercept
340 |     c = start[1] - m * start[0]
341 |     return m, c # y = mx + c
342 | def compute_x(m, c, y):
343 |     # Calculate the x value that corresponds to the given y value
344 |     # and the line equation y = mx + c
345 |     return (y - c) / m
346 | def compute_y(m, c, x):
347 |     # Calculate the y value that corresponds to the given x value
348 |     # and the line equation y = mx + c
349 |     return m * x + c


--------------------------------------------------------------------------------
/datasets/data_collection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import sys, os
  4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  5 | from util.read_write_model import read_model
  6 | import numpy as np
  7 | from pathlib import Path
  8 | from datasets._base import (Image_Class, Base_Collection, Line3D, Pose,
  9 |                             Camera)
 10 | 
 11 | strlist2floatlist = lambda strlist: [float(s) for s in strlist]
 12 | strlist2intlist = lambda strlist: [int(s) for s in strlist]
 13 | 
 14 | class DataCollection(Base_Collection):
 15 |     def __init__(self, args:dict, cfg:dict, mode="train")->None:
 16 |         super(DataCollection, self).__init__(args, cfg, mode)
 17 |         self.gt_3Dmodels_path = self.args.sfm_dir / f"{self.args.dataset}/{self.args.scene}"
 18 |         self.SfM_with_depth = self.cfg.train.use_depth # use SfM labels which has been corrected by depth or not
 19 |         self.train_imgs = [] # list of train image names
 20 |         self.test_imgs = [] # list of test image names
 21 |         self.imgname2limapID = {} # map from image name to limap image id
 22 |         self.limapID2imgname = {} # map from limap image id to image name
 23 |         # load all images 2D & 3D points and create Image_Class objects
 24 |         self.imgname2imgclass = {} # map from image name to Image_Class object
 25 |         self.load_all_2Dpoints_by_dataset(self.args.dataset)
 26 |         self.load_imgname2limapID()
 27 |         # load lines data from Limap output
 28 |         self.load_all_2Dlines_data()
 29 |         # load alltracks data from Limap output
 30 |         self.load_alltracks_limap_3Dlines()
 31 |     
 32 |     def load_all_2Dpoints_by_dataset(self, dataset):
 33 |         if dataset == "7scenes":
 34 |             self.load_all_2Dpoints_7scenes()
 35 |         elif dataset == "Cambridge" or dataset == "indoor6":
 36 |             self.load_all_2Dpoints_Cambridge()
 37 |         else:
 38 |             raise NotImplemented
 39 |         
 40 | 
 41 |     def load_all_2Dpoints_7scenes(self):
 42 |         # currently used for 7scenes. 
 43 |         # load all 2d & 3d points from colmap output
 44 |         path_gt_3Dmodels_full = self.gt_3Dmodels_path/"sfm_sift_full"
 45 |         if self.SfM_with_depth:
 46 |             print("[INFOR] Using SfM labels corrected by depth.")
 47 |             path_gt_3Dmodels_train = self.gt_3Dmodels_path/"sfm_superpoint+superglue+depth"
 48 |         else:
 49 |             path_gt_3Dmodels_train = self.gt_3Dmodels_path/"sfm_superpoint+superglue"
 50 |         testlist_path = path_gt_3Dmodels_full/"list_test.txt"
 51 |         cameras_all, images_all, _ = read_model(path=path_gt_3Dmodels_full, ext=".bin")
 52 |         _, images_train, points3D_train = read_model(path=path_gt_3Dmodels_train, ext=".bin")
 53 |         name2id_train = {image.name: i for i, image in images_train.items()}
 54 | 
 55 |         if os.path.exists(testlist_path):
 56 |             with open(testlist_path, 'r') as f:
 57 |                 testlist = f.read().rstrip().split('\n')
 58 |         else:
 59 |             raise ValueError("Error! Input file/directory {0} not found.".format(testlist_path))
 60 |         for id_, image in images_all.items():
 61 |             img_name = image.name
 62 |             self.imgname2imgclass[img_name] = Image_Class(img_name)
 63 |             if image.name in testlist:
 64 |                 # fill data to TEST img classes 
 65 |                 self.test_imgs.append(img_name)
 66 |                 self.imgname2imgclass[img_name].pose = Pose(image.qvec, image.tvec)
 67 |                 self.imgname2imgclass[img_name].camera = Camera(cameras_all[image.camera_id],
 68 |                                                                           iscolmap=True)
 69 |             else:
 70 |                 # fill data to TRAIN img classes
 71 |                 self.train_imgs.append(img_name)
 72 |                 self.imgname2imgclass[img_name].pose = Pose(image.qvec, image.tvec)
 73 |                 image_train = images_train[name2id_train[img_name]]
 74 |                 self.imgname2imgclass[img_name].points2Ds = image_train.xys
 75 |                 self.imgname2imgclass[img_name].points3Ds = np.stack([points3D_train[ii].xyz if ii != -1 else 
 76 |                                 np.array([0,0,0]) for ii in image_train.point3D_ids], 0)
 77 |                 self.imgname2imgclass[img_name].validPoints = np.stack([1 if ii != -1 else 
 78 |                                 0 for ii in image_train.point3D_ids], 0)
 79 |                 self.imgname2imgclass[img_name].camera = Camera(cameras_all[image.camera_id],
 80 |                                                                           iscolmap=True)
 81 |     
 82 |     def load_all_2Dpoints_Cambridge(self):
 83 |         # load all 2d & 3d points from colmap output
 84 |         path_gt_3Dmodels_full = self.gt_3Dmodels_path/"sfm_sift_full"
 85 |         
 86 |         # load query_list_with_intrinsics.txt 
 87 |         query_list_with_intrinsics = self.gt_3Dmodels_path/"query_list_with_intrinsics.txt"
 88 |         if not os.path.exists(query_list_with_intrinsics):
 89 |             raise ValueError("Error! Input file/directory {0} not found.".format(query_list_with_intrinsics))
 90 |         query_list_with_intrinsics = pd.read_csv(query_list_with_intrinsics, sep=" ", header=None)
 91 |         # get test dictionary with its intrinsic 
 92 |         testimgname2intrinsic = {query_list_with_intrinsics.iloc[i,0]:list(query_list_with_intrinsics.iloc[i,1:]) 
 93 |                                    for i in range(len(query_list_with_intrinsics))}
 94 |         
 95 |         # load id_to_origin_name.txt 
 96 |         import json 
 97 |         id_to_origin_name = self.gt_3Dmodels_path / "id_to_origin_name.txt"
 98 |         with open(id_to_origin_name, 'r') as f:
 99 |             id_to_origin_name = json.load(f)
100 | 
101 |         originalname2newimgname = {}
102 |         for id, originalname in id_to_origin_name.items():
103 |             id = int(id)
104 |             originalname2newimgname[originalname] = "image{0:08d}.png".format(id)
105 |         
106 |         
107 |         # load the camera model from colmap output
108 |         _, images_all, _ = read_model(path=path_gt_3Dmodels_full, ext=".bin")
109 |         path_gt_3Dmodels_train = self.gt_3Dmodels_path/"sfm_superpoint+superglue"
110 |         cameras_train, images_train, points3D_train = read_model(path=path_gt_3Dmodels_train, ext=".bin")
111 |         name2id_train = {image.name: i for i, image in images_train.items()}
112 |         
113 |         for _, image in images_all.items():
114 |             img_name = image.name
115 |             new_img_name = originalname2newimgname[img_name]
116 |             self.imgname2imgclass[new_img_name] = Image_Class(new_img_name)
117 |             if new_img_name in testimgname2intrinsic:
118 |                 # fill data to TEST img classes 
119 |                 self.test_imgs.append(new_img_name)
120 |                 self.imgname2imgclass[new_img_name].pose = Pose(image.qvec, image.tvec)
121 |                 self.imgname2imgclass[new_img_name].camera = Camera(testimgname2intrinsic[new_img_name],
122 |                                                                           iscolmap=False)
123 |             else:
124 |                 # fill data to TRAIN img classes
125 |                 if new_img_name not in name2id_train:
126 |                     continue
127 |                 image_train = images_train[name2id_train[new_img_name]]
128 |                 if len(image_train.point3D_ids) == 0:
129 |                     continue
130 |                 self.train_imgs.append(new_img_name)
131 |                 self.imgname2imgclass[new_img_name].pose = Pose(image.qvec, image.tvec)
132 |                 self.imgname2imgclass[new_img_name].points2Ds = image_train.xys
133 |                 self.imgname2imgclass[new_img_name].points3Ds = np.stack([points3D_train[ii].xyz if ii != -1 else 
134 |                                 np.array([0,0,0]) for ii in image_train.point3D_ids], 0)
135 |                 self.imgname2imgclass[new_img_name].validPoints = np.stack([1 if ii != -1 else 
136 |                                 0 for ii in image_train.point3D_ids], 0)
137 |                 self.imgname2imgclass[new_img_name].camera = Camera(cameras_train[image_train.camera_id],
138 |                                                                           iscolmap=True)
139 |     
140 |     def load_imgname2limapID(self):
141 |         # load path image list from limap output
142 |         img_list_path = self.gt_3Dmodels_path/f"limap/{self.cfg.line2d.detector.name}/image_list.txt"
143 |         if not os.path.exists(img_list_path):
144 |             raise ValueError("Error! Input file/directory {0} not found.".format(img_list_path))
145 |         with open(img_list_path, 'r') as f:
146 |             lines = f.readlines()[1:]  # read all lines except the first one
147 |             for line in lines:
148 |                 img_id, img_name = line.strip().split(',')  # assuming two columns separated by comma
149 |                 self.imgname2limapID[img_name] = int(img_id)
150 |                 self.limapID2imgname[int(img_id)] = img_name
151 |             
152 | 
153 |     
154 |     def load_all_2Dlines_data(self):
155 |         # load train all lines data from limap output (all exixting lines in all images)
156 |         # then create line3D objects for each image
157 |         segments_path = self.gt_3Dmodels_path /f"limap/{self.cfg.line2d.detector.name}/segments"
158 |         def read_segments_file(img_id, segments_path):
159 |             segments_file = segments_path / f"segments_{img_id}.txt"
160 |             if not os.path.exists(segments_file):
161 |                 raise ValueError("Error! Input file/directory {0} not found.".format(segments_file))
162 |             segments_matrix = pd.read_csv(segments_file, sep=' ', skiprows=1, header=None).to_numpy()
163 |             return segments_matrix
164 |         for img_name in self.train_imgs:
165 |             img_id = self.imgname2limapID[img_name]
166 |             segments_matrix = read_segments_file(img_id, segments_path)
167 |             length = segments_matrix.shape[0]
168 |             self.imgname2imgclass[img_name].line2Ds = segments_matrix
169 |             self.imgname2imgclass[img_name].line3Ds = [None for _ in range(length)]
170 |             # if length < 80:
171 |             #     print(length, img_name)
172 |                 
173 |     
174 |     
175 |     def load_alltracks_limap_3Dlines(self):
176 |         # load all tracks data from limap output (training data only)
177 |         track_file = "fitnmerge_alltracks.txt" if self.SfM_with_depth else "alltracks.txt"
178 |         tracks_path = self.gt_3Dmodels_path/ f"limap/{self.cfg.line2d.detector.name}/{track_file}"
179 |         if not os.path.exists(tracks_path):
180 |             raise ValueError("Error! Input file/directory {0} not found.".format(tracks_path))
181 |         with open(tracks_path, 'r') as f:
182 |             lines = f.readlines()
183 |             number3Dlines = int(lines[0].strip())
184 |             i = 1
185 |             length = len(lines)
186 |             while i < length:
187 |                 i += 1 # skip the first line (3dline id, #2dlines, #imgs)
188 |                 start_3d = strlist2floatlist(lines[i].strip().split(' '))
189 |                 i += 1
190 |                 end_3d = strlist2floatlist(lines[i].strip().split(' '))
191 |                 i += 1
192 |                 # load img ids 
193 |                 img_ids = strlist2intlist(lines[i].strip().split(' '))
194 |                 i += 1
195 |                 # load 2d line ids 
196 |                 line2d_ids = strlist2intlist(lines[i].strip().split(' '))
197 |                 # fill data to Image_Class objects
198 |                 for img_id, line2d_id in zip(img_ids, line2d_ids):
199 |                     img_name = self.limapID2imgname[img_id]
200 |                     if img_name not in self.train_imgs:
201 |                         continue
202 |                     if self.imgname2imgclass[img_name].line3Ds[line2d_id] is not None:
203 |                         raise ValueError("Error! 3D line {0} in image {1} is already filled.".format(line2d_id, img_id))
204 |                     self.imgname2imgclass[img_name].line3Ds[line2d_id] = Line3D(start_3d, end_3d)
205 |                 i += 1
206 |         self.load_all_lines3D_matrix()
207 |         
208 |     def load_all_lines3D_matrix(self):
209 |         # load all lines3D matrix from colmap output
210 |         for img_name in self.train_imgs:
211 |             self.imgname2imgclass[img_name].get_line3d_matrix()
212 | 
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/datasets/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | import sys, os
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | from datasets.data_collection import DataCollection
 6 | import numpy as np
 7 | 
 8 | class Collection_Loader(Dataset):
 9 |     def __init__(self, args, cfg, mode="train"):
10 |         self.DataCol = DataCollection(args, cfg, mode=mode)
11 |         self.mode = mode
12 |         if "train" in mode:
13 |             self.image_list = self.DataCol.train_imgs
14 |             self.augmentation = cfg.train.augmentation.apply if mode == "train" else False
15 |             if self.augmentation: print("[INFOR] Augmentation is applied")
16 |         elif mode == "test":
17 |             self.augmentation = False
18 |             self.image_list = self.DataCol.test_imgs
19 |         else:
20 |             raise ValueError("Error! Mode {0} not supported.".format(mode))
21 | 
22 |     def __len__(self):
23 |         return len(self.image_list)
24 | 
25 |     def __getitem__(self, index):
26 |         image_name = self.image_list[index]
27 |         data, infor = self.DataCol.image_loader(image_name, augmentation=self.augmentation) # dict:{img, ori_img_size}
28 |         target = {}
29 |         if self.mode == "test":
30 |             data['lines'] = self.DataCol.detect_lines2D(image_name)[:,:4] # detect lines2D
31 |             data['keypoints'] = 'None' # to show there is no keypoints
32 |         if "train" in self.mode:
33 |             data['lines'] = infor.line2Ds
34 |             data['keypoints'] = infor.points2Ds
35 |             target['lines3D'] = infor.line3Ds_matrix.T
36 |             target['points3D'] = infor.points3Ds.T
37 |             target['validPoints'] = infor.validPoints
38 |             target['validLines'] = infor.validLines
39 |             assert data['lines'].shape[0] == target['lines3D'].shape[1] == target['validLines'].shape[0]
40 |             assert data['keypoints'].shape[0] == target['points3D'].shape[1] == target['validPoints'].shape[0]
41 |         target['pose'] = infor.pose.get_pose_vector()
42 |         target['camera'] = infor.camera.camera_array
43 |         data['imgname'] = image_name
44 |         data = map_dict_to_torch(data)
45 |         target = map_dict_to_torch(target)
46 |         return data, target
47 | 
48 | def map_dict_to_torch(data):
49 |     for k, v in data.items():
50 |         if isinstance(v, str):
51 |             continue
52 |         elif isinstance(v, np.ndarray):
53 |             data[k] = torch.from_numpy(v).float()
54 |         else:
55 |             raise ValueError("Error! Type {0} not supported.".format(type(v)))
56 |     return data


--------------------------------------------------------------------------------
/datasets/test.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argparse
 3 | from data_collection import DataCollection 
 4 | import sys, os
 5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 6 | import util.config as utilcfg
 7 | import util.visualize as u_vis
 8 | from omegaconf import OmegaConf
 9 | 
10 | def parse_config():
11 |     arg_parser = argparse.ArgumentParser(description='pre-processing for PL2Map dataset')
12 |     arg_parser.add_argument('-d', '--dataset_dir', type=Path, default='datasets/imgs_datasets/', help='')
13 |     arg_parser.add_argument('--dataset', type=str, default="7scenes", help='dataset name')
14 |     arg_parser.add_argument('-s', '--scene', type=str, default="office", help='scene name(s)')
15 |     arg_parser.add_argument('-cp','--checkpoint', type=int, default=0, choices=[0,1], help='use pre-trained model')
16 |     arg_parser.add_argument('--visdom', type=int, default=1,  choices=[0,1], help='visualize loss using visdom')
17 |     arg_parser.add_argument('-c','--cudaid', type=int, default=0, help='specify cuda device id')
18 |     arg_parser.add_argument('--use_depth', type=int, default=0, choices=[0,1], help='use SfM corrected by depth or not')
19 |     arg_parser.add_argument('-o','--outputs', type=Path, default='logs/',
20 |                         help='Path to the output directory, default: %(default)s')
21 |     arg_parser.add_argument('-expv', '--experiment_version', type=str, default="pl2map", help='experiment version folder')
22 |     args, _ = arg_parser.parse_known_args()
23 |     args.outputs = os.path.join(args.outputs, args.scene + "_" + args.experiment_version)
24 |     print("Dataset: {} | Scene: {}".format(args.dataset, args.scene))
25 |     cfg = utilcfg.load_config(f'cfgs/{args.dataset}.yaml', default_path='cfgs/default.yaml')
26 |     cfg = OmegaConf.create(cfg)
27 |     utilcfg.mkdir(args.outputs)
28 | 
29 |     # Save the config file for evaluation purposes
30 |     config_file_path = os.path.join(args.outputs, 'config.yaml')
31 |     OmegaConf.save(cfg, config_file_path)
32 | 
33 |     return args, cfg
34 | 
35 | def main():
36 |     args, cfg = parse_config()
37 |     dataset = DataCollection(args, cfg, mode="test")
38 |     # img_name = "seq-06/frame-000780.color.png"
39 |     
40 |     # print(dataset.imgname2imgclass[img_name].camera.camera_array)
41 |     # print(dataset.imgname2imgclass[img_name].pose.get_pose_vector())
42 |     
43 |     # u_vis.visualize_2d_points_lines_from_collection(dataset, img_name, mode="online")
44 |     # u_vis.visualize_2d_lines_from_collection(dataset, img_name, mode="online")
45 |     # u_vis.visualize_2d_lines_from_collection(dataset, img_name, mode="offline")
46 |     # u_vis.open3d_vis_3d_points_from_datacollection(dataset)
47 |     # u_vis.open3d_vis_3d_lines_from_single_imgandcollection(dataset, img_name)
48 |     u_vis.open3d_vis_3d_lines_from_datacollection(dataset)
49 |     # u_vis.visualize_2d_points_from_collection(dataset, img_name, mode="online")
50 |     # u_vis.visualize_2d_points_from_collection(dataset, img_name, mode="offline")
51 |     # dataset.image_loader(img_name, cfg.train.augmentation.apply, debug=True)
52 |     # img_name = "seq-06/frame-000499.color.png"
53 |     # train_img_list = dataset.train_imgs
54 |     # i = 0
55 |     # for img_name in train_img_list:
56 |     #     i+=1
57 |     #     if i%5 == 0:
58 |     #         continue
59 |     #     print(img_name)
60 |     #     # u_vis.visualize_2d_points_from_collection(dataset, img_name, mode="offline")
61 |     #     # u_vis.visualize_2d_points_from_collection(dataset, img_name, mode="online")
62 |     #     u_vis.visualize_2d_lines_from_collection(dataset, img_name, mode="offline")
63 |     #     # u_vis.visualize_2d_lines_from_collection(dataset, img_name, mode="online")
64 |     #     # visualize 3D train lines
65 |     #     # u_vis.open3d_vis_3d_lines_from_datacollection(dataset)
66 |     #     if i > 2000:
67 |     #         break
68 | if __name__ == "__main__":
69 |     main()
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/detectors/line2d/DeepLSD/deeplsd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import torch
 4 | from third_party.DeepLSD.deeplsd.models.deeplsd_inference import DeepLSD
 5 | from ..linebase_detector import LineBaseDetector, BaseDetectorOptions
 6 | 
 7 | class DeepLSDDetector(LineBaseDetector):
 8 |     def __init__(self, options = BaseDetectorOptions()):
 9 |         super(DeepLSDDetector, self).__init__(options)
10 | 
11 |         conf = {
12 |             'detect_lines': True,
13 |             'line_detection_params': {
14 |                 'merge': False,
15 |                 'grad_nfa': True,
16 |                 'filtering': 'normal',
17 |                 'grad_thresh': 3,
18 |             },
19 |         }
20 |         self.device = f'cuda:{self.cudaid}' if torch.cuda.is_available() else 'cpu'
21 |         if self.weight_path is None:
22 |             ckpt = os.path.join(os.path.dirname(__file__), 'deeplsd_md.tar')
23 |         else:
24 |             ckpt = os.path.join(self.weight_path, "line2d", "DeepLSD", 'deeplsd_md.tar')
25 |         if not os.path.isfile(ckpt):
26 |             self.download_model(ckpt)
27 |         ckpt = torch.load(ckpt, map_location='cpu')
28 |         print('Loaded DeepLSD model')
29 |         self.net = DeepLSD(conf).eval()
30 |         self.net.load_state_dict(ckpt['model'])
31 |         self.net = self.net.to(self.device)
32 | 
33 |     def download_model(self, path):
34 |         import subprocess
35 |         if not os.path.exists(os.path.dirname(path)):
36 |             os.makedirs(os.path.dirname(path))
37 |         link = "https://www.polybox.ethz.ch/index.php/s/XVb30sUyuJttFys/download"
38 |         cmd = ["wget", link, "-O", path]
39 |         print("Downloading DeepLSD model...")
40 |         subprocess.run(cmd, check=True)
41 | 
42 |     def get_module_name(self):
43 |         return "deeplsd"
44 | 
45 |     def detect(self, image):
46 |         
47 |         with torch.no_grad():
48 |             lines = self.net({'image': image})['lines'][0]
49 | 
50 |         # Use the line length as score
51 |         lines = np.concatenate([
52 |             lines.reshape(-1, 4),
53 |             np.linalg.norm(lines[:, 0] - lines[:, 1], axis=1, keepdims=True)],
54 |                                axis=1)
55 |         return lines
56 | 


--------------------------------------------------------------------------------
/detectors/line2d/LSD/lsd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytlsd
 3 | import numpy as np
 4 | from ..linebase_detector import LineBaseDetector, BaseDetectorOptions
 5 | 
 6 | class LSDDetector(LineBaseDetector):
 7 |     def __init__(self, options = BaseDetectorOptions()):
 8 |         super(LSDDetector, self).__init__(options)
 9 | 
10 |     def get_module_name(self):
11 |         return "lsd"
12 | 
13 |     def detect(self, image):
14 |         max_n_lines = None # 80
15 |         min_length = 15
16 |         lines, scores, valid_lines = [], [], []
17 |         if max_n_lines is None:
18 |             b_segs = pytlsd.lsd(image)
19 |         else:
20 |             for s in [0.3, 0.4, 0.5, 0.7, 0.8, 1.0]:
21 |                 b_segs = pytlsd.lsd(image, scale=s)
22 |                 # print(len(b_segs))
23 |                 if len(b_segs) >= max_n_lines:
24 |                     break
25 |         # print(len(b_segs))
26 |         segs_length = np.linalg.norm(b_segs[:, 2:4] - b_segs[:, 0:2], axis=1)
27 |         # Remove short lines
28 |         # b_segs = b_segs[segs_length >= min_length]
29 |         # segs_length = segs_length[segs_length >= min_length]
30 |         b_scores = b_segs[:, -1] * np.sqrt(segs_length)
31 |         # Take the most relevant segments with
32 |         indices = np.argsort(-b_scores)
33 |         if max_n_lines is not None:
34 |             indices = indices[:max_n_lines]
35 |         b_segs = b_segs[indices, :]
36 |         # print(b_segs.shape)
37 |         # segs = pytlsd.lsd(image)
38 |         return b_segs
39 | 
40 | 


--------------------------------------------------------------------------------
/detectors/line2d/linebase_detector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | import cv2
  5 | from torch import nn
  6 | 
  7 | #import limap.util.io as limapio
  8 | #import limap.visualize as limapvis
  9 | 
 10 | import collections
 11 | from typing import NamedTuple
 12 | class BaseDetectorOptions(NamedTuple):
 13 |     """
 14 |     Base options for the line detector
 15 | 
 16 |     :param set_gray: whether to set the image to gray scale (sometimes depending on the detector)
 17 |     :param max_num_2d_segs: maximum number of detected line segments (default = 3000)
 18 |     :param do_merge_lines: whether to merge close similar lines at post-processing (default = False)
 19 |     :param visualize: whether to output visualizations into output folder along with the detections (default = False)
 20 |     :param weight_path: specify path to load weights (at default, weights will be downloaded to ~/.local)
 21 |     """
 22 |     max_num_2d_segs: int = 3000
 23 |     do_merge_lines: bool = False
 24 |     visualize: bool = False
 25 |     weight_path: str = None
 26 |     cudaid: int = 0
 27 | 
 28 | class LineBaseDetector():
 29 |     """
 30 |     Virtual class for line detector
 31 |     """
 32 |     def __init__(self, options = BaseDetectorOptions()):
 33 |         self.max_num_2d_segs = options.max_num_2d_segs
 34 |         self.do_merge_lines = options.do_merge_lines
 35 |         self.visualize = options.visualize
 36 |         self.weight_path = options.weight_path
 37 |         self.cudaid = options.cudaid
 38 | 
 39 |     # Module name needs to be set
 40 |     def get_module_name(self):
 41 |         """
 42 |         Virtual method (need to be implemented) - return the name of the module
 43 |         """
 44 |         raise NotImplementedError
 45 |     # The functions below are required for detectors
 46 |     def detect(self, data):
 47 |         """
 48 |         Virtual method (for detector) - detect 2D line segments
 49 | 
 50 |         Args:
 51 |             view (:class:`limap.base.CameraView`): The `limap.base.CameraView` instance corresponding to the image
 52 |         Returns:
 53 |             :class:`np.array` of shape (N, 5): line detections. Each row corresponds to x1, y1, x2, y2 and score.
 54 |         """
 55 |         raise NotImplementedError
 56 |     # The functions below are required for extractors
 57 |     def extract(self, camview, segs):
 58 |         """
 59 |         Virtual method (for extractor) - extract the features for the detected segments
 60 | 
 61 |         Args:
 62 |             view (:class:`limap.base.CameraView`): The `limap.base.CameraView` instance corresponding to the image
 63 |             segs: :class:`np.array` of shape (N, 5), line detections. Each row corresponds to x1, y1, x2, y2 and score. Computed from the `detect` method.
 64 |         Returns:
 65 |             The extracted feature
 66 |         """
 67 |         raise NotImplementedError
 68 |     def get_descinfo_fname(self, descinfo_folder, img_id):
 69 |         """
 70 |         Virtual method (for extractor) - Get the target filename of the extracted feature
 71 | 
 72 |         Args:
 73 |             descinfo_folder (str): The output folder
 74 |             img_id (int): The image id
 75 |         Returns:
 76 |             str: target filename
 77 |         """
 78 |         raise NotImplementedError
 79 |     def save_descinfo(self, descinfo_folder, img_id, descinfo):
 80 |         """
 81 |         Virtual method (for extractor) - Save the extracted feature to the target folder
 82 | 
 83 |         Args:
 84 |             descinfo_folder (str): The output folder
 85 |             img_id (int): The image id
 86 |             descinfo: The features extracted from the function `extract`
 87 |         """
 88 |         raise NotImplementedError
 89 |     def read_descinfo(self, descinfo_folder, img_id):
 90 |         """
 91 |         Virtual method (for extractor) - Read in the extracted feature. Dual function for `save_descinfo`.
 92 | 
 93 |         Args:
 94 |             descinfo_folder (str): The output folder
 95 |             img_id (int): The image id
 96 |         Returns:
 97 |             The extracted feature
 98 |         """
 99 |         raise NotImplementedError
100 |     # The functions below are required for double-functioning objects
101 |     def detect_and_extract(self, camview):
102 |         """
103 |         Virtual method (for dual-functional class that can perform both detection and extraction) - Detect and extract on a single image
104 | 
105 |         Args:
106 |             view (:class:`limap.base.CameraView`): The `limap.base.CameraView` instance corresponding to the image
107 |         Returns:
108 |             segs (:class:`np.array`): of shape (N, 5), line detections. Each row corresponds to x1, y1, x2, y2 and score. Computed from the `detect` method.
109 |             descinfo: The features extracted from the function `extract`
110 |         """
111 |         raise NotImplementedError
112 |     def sample_descinfo_by_indexes(self, descinfo, indexes):
113 |         """
114 |         Virtual method (for dual-functional class that can perform both detection and extraction) -  sample descriptors for a subset of images
115 | 
116 |         Args:
117 |             descinfo: The features extracted from the function `extract`.
118 |             indexes (list[int]): List of image ids for the subset.
119 |         """
120 |         raise NotImplementedError
121 | 
122 |     def get_segments_folder(self, output_folder):
123 |         """
124 |         Return the folder path to the detected segments
125 | 
126 |         Args:
127 |             output_folder (str): The output folder
128 |         Returns:
129 |             path_to_segments (str): The path to the saved segments
130 |         """
131 |         return os.path.join(output_folder, "segments")
132 | 
133 |     def merge_lines(self, segs):
134 |         from limap.line2d.line_utils import merge_lines
135 |         segs = segs[:, :4].reshape(-1, 2, 2)
136 |         segs = merge_lines(segs)
137 |         segs = segs.reshape(-1, 4)
138 |         return segs
139 | 
140 |     def take_longest_k(self, segs, max_num_2d_segs=3000):
141 |         indexes = np.arange(0, segs.shape[0])
142 |         if max_num_2d_segs is None or max_num_2d_segs == -1:
143 |             pass
144 |         elif segs.shape[0] > max_num_2d_segs:
145 |             lengths_squared = (segs[:,2] - segs[:,0]) ** 2 + (segs[:,3] - segs[:,1]) ** 2
146 |             indexes = np.argsort(lengths_squared)[::-1][:max_num_2d_segs]
147 |             segs = segs[indexes,:]
148 |         return segs, indexes
149 | 
150 |     def visualize_segs(self, output_folder, imagecols, first_k=10):
151 |         seg_folder = self.get_segments_folder(output_folder)
152 |         n_vis_images = min(first_k, imagecols.NumImages())
153 |         vis_folder = os.path.join(output_folder, "visualize")
154 |         limapio.check_makedirs(vis_folder)
155 |         image_ids = imagecols.get_img_ids()[:n_vis_images]
156 |         for img_id in image_ids:
157 |             img = imagecols.read_image(img_id)
158 |             segs = limapio.read_txt_segments(seg_folder, img_id)
159 |             img = limapvis.draw_segments(img, segs, (0, 255, 0))
160 |             fname = os.path.join(vis_folder, "img_{0}_det.png".format(img_id))
161 |             cv2.imwrite(fname, img)
162 | 
163 |     def detect_all_images(self, output_folder, imagecols, skip_exists=False):
164 |         """
165 |         Perform line detection on all images and save the line segments
166 | 
167 |         Args:
168 |             output_folder (str): The output folder
169 |             imagecols (:class:`limap.base.ImageCollection`): The input image collection
170 |             skip_exists (bool): Whether to skip already processed images
171 |         Returns:
172 |             dict[int -> :class:`np.array`]: The line detection for each image indexed by the image id. Each segment is with shape (N, 5). Each row corresponds to x1, y1, x2, y2 and score.
173 |         """
174 |         seg_folder = self.get_segments_folder(output_folder)
175 |         if not skip_exists:
176 |             limapio.delete_folder(seg_folder)
177 |         limapio.check_makedirs(seg_folder)
178 |         if self.visualize:
179 |             vis_folder = os.path.join(output_folder, "visualize")
180 |             limapio.check_makedirs(vis_folder)
181 |         for img_id in tqdm(imagecols.get_img_ids()):
182 |             if skip_exists and limapio.exists_txt_segments(seg_folder, img_id):
183 |                 if self.visualize:
184 |                     segs = limapio.read_txt_segments(seg_folder, img_id)
185 |             else:
186 |                 segs = self.detect(imagecols.camview(img_id))
187 |                 if self.do_merge_lines:
188 |                     segs = self.merge_lines(segs)
189 |                 segs, _ = self.take_longest_k(segs, max_num_2d_segs=self.max_num_2d_segs)
190 |                 limapio.save_txt_segments(seg_folder, img_id, segs)
191 |             if self.visualize:
192 |                 img = imagecols.read_image(img_id)
193 |                 img = limapvis.draw_segments(img, segs, (0, 255, 0))
194 |                 fname = os.path.join(vis_folder, "img_{0}_det.png".format(img_id))
195 |                 cv2.imwrite(fname, img)
196 |         all_2d_segs = limapio.read_all_segments_from_folder(seg_folder)
197 |         all_2d_segs = {id: all_2d_segs[id] for id in imagecols.get_img_ids()}
198 |         return all_2d_segs


--------------------------------------------------------------------------------
/detectors/line2d/register_linedetector.py:
--------------------------------------------------------------------------------
 1 | from .linebase_detector import BaseDetectorOptions
 2 | 
 3 | def get_linedetector(method="lsd", max_num_2d_segs=3000,
 4 |                  do_merge_lines=False, visualize=False, weight_path=None,
 5 |                  cudaid=0):
 6 |     """
 7 |     Get a line detector
 8 |     """
 9 |     options = BaseDetectorOptions()
10 |     options = options._replace(max_num_2d_segs=max_num_2d_segs,
11 |         do_merge_lines=do_merge_lines, visualize=visualize, weight_path=weight_path,
12 |         cudaid=cudaid)
13 | 
14 |     if method == "lsd":
15 |         from .LSD.lsd import LSDDetector
16 |         return LSDDetector(options)
17 |     elif method == "deeplsd":
18 |         from .DeepLSD.deeplsd import DeepLSDDetector
19 |         return DeepLSDDetector(options)
20 |     else:
21 |         raise NotImplementedError


--------------------------------------------------------------------------------
/detectors/point2d/SuperPoint/superpoint.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | import os
 44 | import sys 
 45 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
 46 | from models.base_model import BaseModel
 47 | from pathlib import Path
 48 | import torch
 49 | from torch import nn
 50 | 
 51 | def simple_nms(scores, nms_radius: int):
 52 |     """ Fast Non-maximum suppression to remove nearby points """
 53 |     assert(nms_radius >= 0)
 54 | 
 55 |     def max_pool(x):
 56 |         return torch.nn.functional.max_pool2d(
 57 |             x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius)
 58 | 
 59 |     zeros = torch.zeros_like(scores)
 60 |     max_mask = scores == max_pool(scores)
 61 |     for _ in range(2):
 62 |         supp_mask = max_pool(max_mask.float()) > 0
 63 |         supp_scores = torch.where(supp_mask, zeros, scores)
 64 |         new_max_mask = supp_scores == max_pool(supp_scores)
 65 |         max_mask = max_mask | (new_max_mask & (~supp_mask))
 66 |     return torch.where(max_mask, scores, zeros)
 67 | 
 68 | 
 69 | def remove_borders(keypoints, scores, border: int, height: int, width: int):
 70 |     """ Removes keypoints too close to the border """
 71 |     mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border))
 72 |     mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border))
 73 |     mask = mask_h & mask_w
 74 |     return keypoints[mask], scores[mask]
 75 | 
 76 | 
 77 | def top_k_keypoints(keypoints, scores, k: int):
 78 |     if k >= len(keypoints):
 79 |         return keypoints, scores
 80 |     scores, indices = torch.topk(scores, k, dim=0)
 81 |     return keypoints[indices], scores
 82 | 
 83 | 
 84 | def sample_descriptors(keypoints, descriptors, s: int = 8):
 85 |     """ Interpolate descriptors at keypoint locations """
 86 |     b, c, h, w = descriptors.shape
 87 |     keypoints = keypoints - s / 2 + 0.5
 88 |     keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)],
 89 |                               ).to(keypoints)[None]
 90 |     keypoints = keypoints*2 - 1  # normalize to (-1, 1)
 91 |     descriptors = torch.nn.functional.grid_sample(
 92 |         descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear',
 93 |         align_corners=True)
 94 |     descriptors = torch.nn.functional.normalize(
 95 |         descriptors.reshape(b, c, -1), p=2, dim=1)
 96 |     return descriptors
 97 | 
 98 | class SuperPoint(BaseModel):
 99 |     """SuperPoint Convolutional Detector and Descriptor
100 | 
101 |     SuperPoint: Self-Supervised Interest Point Detection and
102 |     Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
103 |     Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
104 | 
105 |     """
106 |     default_conf = {
107 |         'name': 'SuperPoint',
108 |         'trainable': False,
109 |         'descriptor_dim': 256,
110 |         'nms_radius': 4,
111 |         'keypoint_threshold': 0.005,
112 |         'max_keypoints': -1,
113 |         'remove_borders': 4,
114 |         'weight_path': None,
115 |         'force_num_keypoints': False,
116 |     }
117 |     required_data_keys = ['image']
118 | 
119 |     def _init(self, conf):
120 |         if self.conf.force_num_keypoints:
121 |             print('[WARNING]: \"force_num_keypoints\" is applied')
122 |             self.conf.keypoint_threshold = 0.0
123 |         self.relu = nn.ReLU(inplace=True)
124 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
125 |         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
126 | 
127 |         self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
128 |         self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
129 |         self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
130 |         self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
131 |         self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
132 |         self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
133 |         self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
134 |         self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
135 | 
136 |         self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
137 |         self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
138 | 
139 |         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
140 |         self.convDb = nn.Conv2d(
141 |             c5, self.conf.descriptor_dim,
142 |             kernel_size=1, stride=1, padding=0)
143 | 
144 |         if self.conf.weight_path is None:
145 |             path = Path(__file__).parent / 'weights/superpoint_v1.pth'
146 |         else:
147 |             path = os.path.join(self.conf.weight_path, "point2d", "superpoint", "weights/superpoint_v1.pth")
148 |         if not os.path.isfile(path):
149 |             self.download_model(path)
150 |         self.load_state_dict(torch.load(str(path)))
151 | 
152 |         mk = self.conf.max_keypoints
153 |         if mk == 0 or mk < -1:
154 |             raise ValueError('\"max_keypoints\" must be positive or \"-1\"')
155 | 
156 |         print('Loaded SuperPoint model')
157 | 
158 |     def download_model(self, path):
159 |         import subprocess
160 |         if not os.path.exists(os.path.dirname(path)):
161 |             os.makedirs(os.path.dirname(path))
162 |         link = "https://github.com/magicleap/SuperPointPretrainedNetwork/blob/master/superpoint_v1.pth?raw=true"
163 |         cmd = ["wget", link, "-O", path]
164 |         print("Downloading SuperPoint model...")
165 |         subprocess.run(cmd, check=True)
166 | 
167 |     def compute_dense_descriptor(self, image):
168 |         """ Compute keypoints, scores, descriptors for image """
169 |         # Shared Encoder
170 |         x = self.relu(self.conv1a(image))
171 |         x = self.relu(self.conv1b(x))
172 |         x = self.pool(x)
173 |         x = self.relu(self.conv2a(x))
174 |         x = self.relu(self.conv2b(x))
175 |         x = self.pool(x)
176 |         x = self.relu(self.conv3a(x))
177 |         x = self.relu(self.conv3b(x))
178 |         x = self.pool(x)
179 |         x = self.relu(self.conv4a(x))
180 |         x = self.relu(self.conv4b(x))
181 | 
182 |         # Compute the dense keypoint scores
183 |         cPa = self.relu(self.convPa(x))
184 |         scores = self.convPb(cPa)
185 |         scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
186 |         b, _, h, w = scores.shape
187 |         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
188 |         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
189 |         scores = simple_nms(scores, self.conf.nms_radius)
190 | 
191 |         # Extract keypoints
192 |         keypoints = [
193 |             torch.nonzero(s > self.conf.keypoint_threshold)
194 |             for s in scores]
195 |         scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)]
196 | 
197 |         # Discard keypoints near the image borders
198 |         keypoints, scores = list(zip(*[
199 |             remove_borders(k, s, self.conf.remove_borders, h*8, w*8)
200 |             for k, s in zip(keypoints, scores)]))
201 | 
202 |         # Keep the k keypoints with highest score
203 |         if self.conf.max_keypoints >= 0:
204 |             keypoints, scores = list(zip(*[
205 |                 top_k_keypoints(k, s, self.conf.max_keypoints)
206 |                 for k, s in zip(keypoints, scores)]))
207 | 
208 |         # Convert (h, w) to (x, y)
209 |         keypoints = [torch.flip(k, [1]).float() for k in keypoints]
210 | 
211 |         # Compute the dense descriptors
212 |         cDa = self.relu(self.convDa(x))
213 |         descriptors = self.convDb(cDa)
214 |         descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
215 |         return keypoints, scores, descriptors
216 | 
217 |     def compute_dense_descriptor_and_score(self, image):
218 |         """ Compute dense scores and descriptors for an image """
219 |         # Shared Encoder
220 |         x = self.relu(self.conv1a(image))
221 |         x = self.relu(self.conv1b(x))
222 |         x = self.pool(x)
223 |         x = self.relu(self.conv2a(x))
224 |         x = self.relu(self.conv2b(x))
225 |         x = self.pool(x)
226 |         x = self.relu(self.conv3a(x))
227 |         x = self.relu(self.conv3b(x))
228 |         x = self.pool(x)
229 |         x = self.relu(self.conv4a(x))
230 |         x = self.relu(self.conv4b(x))
231 | 
232 |         # Compute the dense keypoint scores
233 |         cPa = self.relu(self.convPa(x))
234 |         scores = self.convPb(cPa)
235 |         scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
236 |         b, _, h, w = scores.shape
237 |         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
238 |         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
239 | 
240 |         # Compute the dense descriptors
241 |         cDa = self.relu(self.convDa(x))
242 |         descriptors = self.convDb(cDa)
243 |         dense_descriptor = torch.nn.functional.normalize(descriptors, p=2, dim=1)
244 |         return {
245 |             'dense_score': scores,
246 |             'dense_descriptor': dense_descriptor
247 |         }
248 | 
249 |     def sample_descriptors(self, data, keypoints):
250 |         _, _, descriptors = self.compute_dense_descriptor(data)
251 |         
252 |         # Extract descriptors
253 |         descriptors = [sample_descriptors(k[None], d[None], 8)[0]
254 |                        for k, d in zip(keypoints, descriptors)]
255 | 
256 |         return {
257 |             'keypoints': keypoints,
258 |             'descriptors': descriptors
259 |         }
260 |     def _forward(self, datain): # sample_points_and_lines_descriptors
261 |         data = datain[0] # image
262 |         keypoints = datain[1] # keypoints
263 |         line_keypoints = datain[2] # line_keypoints
264 |         if isinstance(keypoints, list):
265 |             # found test mode, then get keypoints too
266 |             keypoints, _, descriptors = self.compute_dense_descriptor(data)
267 |         else:
268 |             _, _, descriptors = self.compute_dense_descriptor(data)
269 |         # Extract keypoints descriptors
270 |         points_descriptors = [sample_descriptors(k[None], d[None], 8)[0]
271 |                        for k, d in zip(keypoints, descriptors)]
272 |         points_descriptors = torch.stack(points_descriptors, dim=0)
273 | 
274 |         # Extract line keypoints descriptors
275 |         bs,nline,npoints,_ = line_keypoints.shape
276 |         line_keypoints = line_keypoints.view(bs,nline*npoints,2)
277 |         lines_descriptors = [sample_descriptors(k[None], d[None], 8)[0] 
278 |                           for k, d in zip(line_keypoints, descriptors)]
279 |         # reshape and merge lines_descriptors
280 |         for i in range(bs):
281 |             lines_descriptors[i] = lines_descriptors[i].view(-1, nline, npoints).permute(1,2,0) #  -> nline x npoints x 256
282 |         lines_descriptors = torch.stack(lines_descriptors, dim=0)
283 |         return {
284 |             'points_descriptors': points_descriptors,
285 |             'lines_descriptors': lines_descriptors,
286 |             'keypoints': keypoints,
287 |         }
288 | 
289 |     def _forward_default(self, data):
290 |         image = data['image']
291 |         keypoints, scores, descriptors = self.compute_dense_descriptor(image)
292 |         # Extract descriptors
293 |         descriptors = [sample_descriptors(k[None], d[None], 8)[0]
294 |                        for k, d in zip(keypoints, descriptors)]
295 |         return {
296 |             'keypoints': keypoints,
297 |             'scores': scores,
298 |             'descriptors': descriptors,
299 |         }
300 |     def loss(self, pred, data):
301 |         raise NotImplementedError


--------------------------------------------------------------------------------
/detectors/point2d/register_pointdetector.py:
--------------------------------------------------------------------------------
1 | def get_pointdetector(method="superpoint", configs=dict()):
2 |     """
3 |     Get a point detector
4 |     """
5 |     if method == "superpoint":
6 |         from .SuperPoint.superpoint import SuperPoint
7 |         return SuperPoint(configs)
8 |     else:
9 |         raise NotImplementedError


--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from omegaconf import OmegaConf
 3 | from abc import ABCMeta, abstractmethod
 4 | 
 5 | class MetaModel(ABCMeta):
 6 |     def __prepare__(name, bases, **kwds):
 7 |         total_conf = OmegaConf.create()
 8 |         for base in bases:
 9 |             for key in ('base_default_conf', 'default_conf'):
10 |                 update = getattr(base, key, {})
11 |                 if isinstance(update, dict):
12 |                     update = OmegaConf.create(update)
13 |                 total_conf = OmegaConf.merge(total_conf, update)
14 |         return dict(base_default_conf=total_conf)
15 | 
16 | class BaseModel(nn.Module, metaclass=MetaModel):
17 |     default_conf = {
18 |         'name': None,
19 |         'trainable': False,
20 |     }
21 |     required_data = []
22 | 
23 |     def __init__(self, conf):
24 |         super().__init__()
25 |         default_conf = OmegaConf.merge(
26 |                 self.base_default_conf, OmegaConf.create(self.default_conf))
27 |         self.conf = conf = OmegaConf.merge(default_conf, conf)
28 |         self._init(conf)
29 |         if not conf.trainable:
30 |             for param in self.parameters():
31 |                 param.requires_grad = False
32 | 
33 |     def forward(self, data):
34 |         """Check the data and call the _forward method of the child model."""
35 |         def recursive_key_check(expected, given):
36 |             for key in expected:
37 |                 assert key in given, f'Missing key {key} in data'
38 |                 if isinstance(expected, dict):
39 |                     recursive_key_check(expected[key], given[key])
40 |         recursive_key_check(self.required_data, data)
41 |         return self._forward(data)
42 | 
43 |     @abstractmethod
44 |     def _init(self, conf):
45 |         """To be implemented by child class."""
46 |         raise NotImplementedError
47 |     @abstractmethod
48 |     def _forward(self, data):
49 |         """To be implemented by child class."""
50 |         raise NotImplementedError
51 |     @abstractmethod
52 |     def loss(self, pred, data):
53 |         """To be implemented by child class."""
54 |         raise NotImplementedError
55 |     


--------------------------------------------------------------------------------
/models/pipeline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import torch
 3 | import torch.nn as nn
 4 | from models.base_model import BaseModel
 5 | from models.util import get_model
 6 | import os
 7 | import os.path as osp
 8 | 
 9 | class Pipeline(BaseModel):
10 |     default_conf = {
11 |         'trainable': True,
12 |     }
13 |     required_data = ['image', 'original_size', 'keypoints', 'lines']
14 | 
15 |     def _init(self, conf):
16 |         # get detector model 
17 |         self.detector = get_model(conf.point2d.detector.name, "detector")(conf.point2d.detector.configs)
18 |         assert self.detector.conf.trainable == False, "detector must be fixed, not trainable"
19 |         # get regressor model
20 |         self.regressor = get_model(conf.regressor.name, "regressor")(conf.regressor).train()
21 |         assert self.regressor.conf.trainable == True, "regressor must be trainable"
22 |         print(f'The model regresor {conf.regressor.name} has {count_parameters(self.regressor):,} trainable parameters')
23 | 
24 |     def _forward(self, data):
25 |         # Pre process data
26 |         # convert lines to line_keypoints | BxLx4 -> BxLx(4+n_line_keypoints*2)
27 |         line_keypoints = get_line_keypoints(data['lines'], self.conf.regressor.n_line_keypoints)
28 |         # sample descriptors using superpoint
29 |         regressor_data = self.detector((data['image'], data['keypoints'], line_keypoints))
30 |         # regress descriptors to 3D points and lines
31 |         pred = self.regressor(regressor_data)
32 |         pred['keypoints'] = regressor_data['keypoints']
33 |         pred['lines'] = data['lines']
34 |         return pred
35 | 
36 |     def loss(self, pred, data):
37 |         pass
38 |     def save_checkpoint(self, path, name, epoch, final = False):
39 |         if os.path.exists(path) == False:
40 |             os.makedirs(path)
41 |         filename = osp.join(path, '{}_final.pth.tar'.format(name)) if final \
42 |             else osp.join(path, '{}.pth.tar'.format(name))
43 |         checkpoint_dict =\
44 |             {'epoch': epoch, 'model_state_dict': self.regressor.state_dict()}
45 |         torch.save(checkpoint_dict, filename)
46 |     def load_checkpoint(self, path, exp_name):
47 |         ''' Load regressor checkpoint from path'''
48 |         filename = osp.join(path, '{}.pth.tar'.format(exp_name))
49 |         if not osp.exists(filename):
50 |             raise FileNotFoundError(f'Cannot find checkpoint at {filename}')
51 |         devide = torch.device(f'cuda:{torch.cuda.current_device()}' \
52 |                                        if torch.cuda.is_available() else 'cpu')
53 |         checkpoint_dict = torch.load(filename, map_location=torch.device(devide))
54 |         self.regressor.load_state_dict(checkpoint_dict['model_state_dict'])
55 |         print(f'[INFOR] Loaded checkpoint from {filename}')
56 |         return checkpoint_dict['epoch']
57 | 
58 | def get_line_keypoints(lines, n_line_keypoints):
59 |     # convert lines to line_keypoints | BxLx4 -> BxLx(n_line_keypoints+2)x2
60 |     bs,n_line,_ = lines.shape
61 |     total_points = n_line_keypoints + 2 # start point + end point + n_line_keypoints
62 |     line_keypoints = lines.new_zeros((bs,n_line, total_points,2))
63 |     line_keypoints[:,:,0,:] = lines[:,:,:2] # start point
64 |     line_keypoints[:,:,total_points-1,:] = lines[:,:,2:] # end point
65 |     per_distance = (lines[:,:,2:] - lines[:,:,:2])/(n_line_keypoints+2-1) # stop - start point
66 |     for i in range(n_line_keypoints):
67 |         line_keypoints[:,:,i+1,:] = line_keypoints[:,:,0,:] + per_distance*(i+1)
68 |     return line_keypoints
69 | 
70 | def count_parameters(model):
71 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
72 | 
73 | 


--------------------------------------------------------------------------------
/models/pl2map.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | import torch
  3 | import torch.nn as nn
  4 | from models.base_model import BaseModel
  5 | from models.util import get_model
  6 | from copy import deepcopy
  7 | from typing import Tuple, List
  8 | import torch.nn.functional as F
  9 | 
 10 | class PL2Map(BaseModel):
 11 |     default_conf = {
 12 |         'trainable': True,
 13 |         'n_heads': 4,
 14 |         'd_inner': 1024,
 15 |         'n_att_layers': 1,
 16 |         'feature_dim': 256,
 17 |         'GNN_layers': ['self', 'cross', 'self', 'cross', 'self'],
 18 |         'mapping_layers': [512, 1024, 512],
 19 |     }
 20 |     required_data = ['points_descriptors', 'lines_descriptors']
 21 | 
 22 |     def _init(self, conf):
 23 |         self.line_encoder = LineEncoder(conf.feature_dim, conf.n_heads, conf.n_att_layers, conf.d_inner)
 24 |         self.gnn = AttentionalGNN(
 25 |             feature_dim=self.conf.feature_dim, layer_names=self.conf.GNN_layers)
 26 |         self.mapping_p = MLP([conf.feature_dim]+self.conf.mapping_layers+[4])  # mapping point descriptors to 3D points
 27 |         self.mapping_l = MLP([conf.feature_dim]+self.conf.mapping_layers+[7])  # mapping line descriptors to 3D lines
 28 |         
 29 |     def _forward(self, data):
 30 |         # get line descriptors
 31 |         p_desc = data['points_descriptors']
 32 |         l_desc = self.line_encoder(data['lines_descriptors'])
 33 |         p_desc, l_desc = self.gnn(p_desc, l_desc)
 34 |         pred = {}
 35 |         pred['points3D'] = self.mapping_p(p_desc)
 36 |         pred['lines3D'] = self.mapping_l(l_desc)
 37 |         return pred
 38 |     def loss(self, pred, data):
 39 |         pass
 40 | 
 41 | 
 42 | class ScaledDotProduct(nn.Module):
 43 |     """ Scaled Dot-Product Attention """
 44 |     def __init__(self, scale, attn_dropout=0.1):
 45 |         super().__init__()
 46 |         self.scale = scale
 47 |         self.dropout = nn.Dropout(attn_dropout)
 48 | 
 49 |     def forward(self, q, k, v, mask=None):
 50 |         attn = torch.matmul(q / self.scale, k.transpose(3, 4))
 51 |         if mask is not None:
 52 |             attn = attn.masked_fill(mask == 0, -1e9)
 53 |         attn = self.dropout(F.softmax(attn, dim=-1))
 54 |         output = torch.matmul(attn, v)
 55 | 
 56 |         return output, attn
 57 | 
 58 | class MultiHeadAttention_Line(nn.Module):
 59 |     """ Multi-Headed Attention """
 60 |     def __init__(self, n_heads: int, d_feature: int, dropout=0.1):
 61 |         super().__init__()
 62 |         assert d_feature % n_heads == 0
 63 |         dim = d_feature // n_heads
 64 |         self.dim = dim
 65 |         self.n_heads = n_heads
 66 | 
 67 |         self.w_qs = nn.Linear(d_feature, n_heads * dim, bias=True)
 68 |         self.w_ks = nn.Linear(d_feature, n_heads * dim, bias=True)
 69 |         self.w_vs = nn.Linear(d_feature, n_heads * dim, bias=True)
 70 |         self.fc = nn.Linear(n_heads * dim, d_feature, bias=True) 
 71 | 
 72 |         self.attention = ScaledDotProduct(scale = dim ** 0.5)
 73 | 
 74 |         self.dropout = nn.Dropout(dropout)
 75 |         self.layer_norm = nn.LayerNorm(d_feature, eps=1e-6)
 76 | 
 77 |     def forward(self, q, k, v, mask=None):
 78 |         d_k = self.dim
 79 |         d_v = self.dim
 80 |         n_heads = self.n_heads
 81 | 
 82 |         n_batches = q.size(0)
 83 |         n_sublines = q.size(1)
 84 |         n_words_q = q.size(2)
 85 |         n_words_k = k.size(2)
 86 |         n_words_v = v.size(2)
 87 | 
 88 |         residual = q
 89 | 
 90 |         q = self.w_qs(q).view(n_batches, n_sublines, n_words_q, n_heads, d_k)
 91 |         k = self.w_ks(k).view(n_batches, n_sublines, n_words_k, n_heads, d_k)
 92 |         v = self.w_vs(v).view(n_batches, n_sublines, n_words_v, n_heads, d_k)
 93 | 
 94 |         # Transpose for attention dot product: b x n x lq x dv
 95 |         q, k, v = q.transpose(2, 3), k.transpose(2, 3), v.transpose(2, 3)
 96 | 
 97 |         if mask is not None:
 98 |             mask = mask.unsqueeze(2)   # For head axis broadcasting.
 99 |             
100 |         q, attn = self.attention(q, k, v, mask=mask)
101 | 
102 |         # Transpose to move the head dimension back: b x lq x n x dv
103 |         # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
104 |         q = q.transpose(2,3).contiguous().view(n_batches, n_sublines, n_words_q, -1)
105 |         q = self.dropout(self.fc(q))
106 |         
107 |         q += residual
108 |         q = self.layer_norm(q)
109 | 
110 |         return q, attn
111 | 
112 | class FeedForward(nn.Module):
113 |     """ Feed Forward layer """
114 |     def __init__(self, d_in, d_hid, dropout=0.1):
115 |         super().__init__()
116 |         self.w_1 = nn.Linear(d_in, d_hid)   # d_in: 256, d_hid: 1024
117 |         self.w_2 = nn.Linear(d_hid, d_in)   # d_hid: 1024, d_in: 256
118 |         self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
119 |         self.dropout = nn.Dropout(dropout)
120 |     def forward(self,x):
121 |         residual = x
122 |         x = self.w_2(F.gelu(self.w_1(x)))
123 |         x = self.dropout(x)
124 |         x += residual
125 |         x = self.layer_norm(x)
126 |         return x
127 | 
128 | 
129 | class LineDescriptiveEncoder(nn.Module):
130 |     """ Line Descriptive Network using the transformer """
131 |     def __init__(self, d_feature: int, n_heads: int, d_inner: int, dropout=0.1):
132 |         super().__init__()
133 |         self.slf_attn = MultiHeadAttention_Line(n_heads, d_feature)
134 |         self.pos_ffn = FeedForward(d_feature, d_inner, dropout=dropout)
135 | 
136 |     def forward(self, desc, slf_attn_mask=None):
137 | 
138 |         desc, enc_slf_attn = self.slf_attn(desc, desc, desc, mask=slf_attn_mask)
139 |         desc = self.pos_ffn(desc)
140 | 
141 |         return desc, enc_slf_attn
142 | 
143 | class LineEncoder(nn.Module):
144 |     """ LineEncoder mimics the transformer model"""
145 |     def __init__(self, feature_dim, n_heads, n_att_layers, d_inner, dropout=0.1):
146 |         super().__init__()
147 |         self.feature_dim = feature_dim
148 |         self.desc_layers = nn.ModuleList([
149 |             LineDescriptiveEncoder(feature_dim, n_heads, d_inner, dropout=dropout) 
150 |             for _ in range(n_att_layers)])
151 | 
152 |     def forward(self, desc, return_attns=False):
153 |         enc_slf_attn_list = []
154 |         for desc_layer in self.desc_layers:
155 |             enc_output, enc_slf_attn = desc_layer(desc)
156 |             enc_slf_attn_list += [enc_slf_attn] if return_attns else []
157 |         # get the first token of each line
158 |         sentence =  enc_output[:,:,0,:].transpose(1,2)
159 |         return sentence # line descriptors
160 | 
161 | 
162 | def MLP(channels:list):
163 |     layers = []
164 |     n_chnls = len(channels)
165 |     for i in range(1, n_chnls):
166 |         layers.append(nn.Conv1d(channels[i-1], channels[i], 
167 |                                 kernel_size=1, bias=True))
168 |         if i < n_chnls-1:
169 |             layers.append(nn.ReLU())
170 |     return nn.Sequential(*layers)
171 | 
172 | 
173 | def attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
174 |     dim = query.shape[1]
175 |     scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5
176 |     prob = torch.nn.functional.softmax(scores, dim=-1)
177 |     return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob
178 | 
179 | 
180 | class MultiHeadedAttention(nn.Module):
181 |     """ Multi-head attention to increase model expressivitiy """
182 |     def __init__(self, num_heads: int, d_model: int):
183 |         super().__init__()
184 |         assert d_model % num_heads == 0
185 |         self.dim = d_model // num_heads
186 |         self.num_heads = num_heads
187 |         self.merge = nn.Conv1d(d_model, d_model, kernel_size=1)
188 |         self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)])
189 | 
190 |     def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor:
191 |         batch_dim = query.size(0)
192 |         query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1)
193 |                              for l, x in zip(self.proj, (query, key, value))]
194 |         x, _ = attention(query, key, value)
195 |         return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1))
196 | 
197 | 
198 | class AttentionalPropagation(nn.Module):
199 |     def __init__(self, feature_dim: int, num_heads: int):
200 |         super().__init__()
201 |         self.attn = MultiHeadedAttention(num_heads, feature_dim)
202 |         self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim])
203 |         nn.init.constant_(self.mlp[-1].bias, 0.0)
204 |     
205 |     def forward(self, x: torch.Tensor, source: torch.Tensor) -> torch.Tensor:
206 |         message = self.attn(x, source, source)
207 |         return self.mlp(torch.cat([x, message], dim=1))
208 | 
209 | class AttentionalGNN(nn.Module):
210 |     def __init__(self, feature_dim: int, layer_names: List[str]) -> None:
211 |         super().__init__()
212 |         self.layers = nn.ModuleList([
213 |             AttentionalPropagation(feature_dim, 4)
214 |             for _ in range(len(layer_names))])
215 |         self.names = layer_names
216 |     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
217 |         for layer, name in zip(self.layers, self.names):
218 |             if name == 'cross':
219 |                 src0, src1 = desc1, desc0
220 |             else:  # if name == 'self':
221 |                 src0, src1 = desc0, desc1
222 |             delta0, delta1 = layer(desc0, src0), layer(desc1, src1)
223 |             desc0, desc1 = (desc0 + delta0), (desc1 + delta1)
224 |         return desc0, desc1


--------------------------------------------------------------------------------
/models/util.py:
--------------------------------------------------------------------------------
 1 | import collections.abc as collections
 2 | import torch
 3 | 
 4 | 
 5 | def get_class(mod_path, BaseClass):
 6 |     """Get the class object which inherits from BaseClass and is defined in
 7 |        the module named mod_name, child of base_path.
 8 |     """
 9 |     import inspect
10 |     mod = __import__(mod_path, fromlist=[''])
11 |     classes = inspect.getmembers(mod, inspect.isclass)
12 |     # Filter classes defined in the module
13 |     classes = [c for c in classes if c[1].__module__ == mod_path]
14 |     # Filter classes inherited from BaseModel
15 |     classes = [c for c in classes if issubclass(c[1], BaseClass)]
16 |     assert len(classes) == 1, classes
17 |     return classes[0][1]
18 | 
19 | 
20 | def get_model(name, _type = "detector"):
21 |     from models.base_model import BaseModel
22 |     if _type == "detector" and name == "superpoint":
23 |         base_path = 'detectors.point2d.SuperPoint.'
24 |     elif _type == "regressor":
25 |         base_path = 'models.'
26 |     return get_class(base_path + name, BaseModel)
27 | 
28 | 
29 | def numpy_image_to_torch(image):
30 |     """Normalize the image tensor and reorder the dimensions."""
31 |     if image.ndim == 3:
32 |         image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
33 |     elif image.ndim == 2:
34 |         image = image[None]  # add channel axis
35 |     else:
36 |         raise ValueError(f'Not an image: {image.shape}')
37 |     return torch.from_numpy(image / 255.).float()
38 | 
39 | 
40 | def map_tensor(input_, func):
41 |     if isinstance(input_, (str, bytes)):
42 |         return input_
43 |     elif isinstance(input_, collections.Mapping):
44 |         return {k: map_tensor(sample, func) for k, sample in input_.items()}
45 |     elif isinstance(input_, collections.Sequence):
46 |         return [map_tensor(sample, func) for sample in input_]
47 |     else:
48 |         return func(input_)
49 | 
50 | 
51 | def batch_to_np(batch):
52 |     return map_tensor(batch, lambda t: t.detach().cpu().numpy()[0])
53 | 


--------------------------------------------------------------------------------
/models/util_learner.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn 
  2 | import torch.optim as optim
  3 | import torch 
  4 | 
  5 | class CriterionPointLine(nn.Module):
  6 |     '''
  7 |     Criterion for point and line'''
  8 |     def __init__(self, rpj_cfg, total_iterations=2000000):
  9 |         super(CriterionPointLine, self).__init__()
 10 |         self.rpj_cfg = rpj_cfg
 11 |         self.reprojection_loss = ReproLoss(total_iterations, self.rpj_cfg.soft_clamp, 
 12 |                                             self.rpj_cfg.soft_clamp_min, self.rpj_cfg.type, 
 13 |                                             self.rpj_cfg.circle_schedule)
 14 |         self.zero = fakezero()
 15 |         self.total_iterations = total_iterations
 16 |         
 17 |     def forward(self, pred, target, iteration=2000000):
 18 |         batch_size, _, _ = pred['points3D'].shape
 19 |         validPoints = target["validPoints"]
 20 |         validLines = target["validLines"]
 21 |         # get losses for points 
 22 |         square_errors_points = torch.norm((pred['points3D'][:,:3,:] - target["points3D"]), dim = 1)
 23 |         loss_points = torch.sum(validPoints*square_errors_points)/batch_size
 24 |         uncer_loss_points = torch.sum(torch.norm(validPoints - 1/(1+100*torch.abs(pred['points3D'][:,3,:])), dim = 1))/batch_size
 25 |         # get losses for lines
 26 |         square_errors_lines = torch.norm((pred['lines3D'][:,:6,:] - target["lines3D"]), dim = 1)
 27 |         loss_lines = torch.sum(validLines*square_errors_lines)/batch_size
 28 |         uncer_loss_lines = torch.sum(torch.norm(validLines - 1/(1+100*torch.abs(pred['lines3D'][:,6,:])), dim = 1))/batch_size
 29 |         
 30 |         points_proj_loss = 0
 31 |         lines_proj_loss = 0
 32 | 
 33 |         if self.rpj_cfg.apply:
 34 |             # get projection losses for points
 35 |             for i in range(batch_size): # default batch_size = 1
 36 |                 prp_error, prp= project_loss_points(pred['keypoints'][i,:,:], pred['points3D'][i,:3,:], 
 37 |                                     target['pose'][i,:], target['camera'][i,:], validPoints[i,:])
 38 |                 points_proj_loss += self.reprojection_loss.compute_point(prp_error, prp, iteration, validPoints[i,:])
 39 |             points_proj_loss = points_proj_loss / batch_size
 40 |             # get projection losses for lines
 41 |             
 42 |             for i in range(batch_size):
 43 |                 prl_error, prp_s, prp_e = project_loss_lines(pred['lines'][i,:,:], pred['lines3D'][i,:6,:], 
 44 |                                     target['pose'][i,:], target['camera'][i,:], validLines[i,:])
 45 |                 lines_proj_loss += self.reprojection_loss.compute_line(prl_error, prp_s, prp_e, iteration, validLines[i,:])
 46 |             lines_proj_loss = lines_proj_loss / batch_size
 47 |         if iteration/self.total_iterations < self.rpj_cfg.start_apply:
 48 |             total_loss = loss_points + uncer_loss_points + loss_lines + uncer_loss_lines
 49 |         else:
 50 |             total_loss = loss_points + uncer_loss_points + loss_lines + uncer_loss_lines + points_proj_loss + lines_proj_loss
 51 |         
 52 |         points_proj_loss = self.zero if (isinstance(points_proj_loss, int) or isinstance(points_proj_loss, float)) else points_proj_loss
 53 |         lines_proj_loss = self.zero if (isinstance(lines_proj_loss, int) or isinstance(lines_proj_loss, float)) else lines_proj_loss
 54 |         return total_loss, loss_points, uncer_loss_points, loss_lines, uncer_loss_lines, points_proj_loss, lines_proj_loss
 55 |     
 56 | 
 57 | class fakezero(object):
 58 | 	def __init__(self):
 59 | 		pass
 60 | 	def item(self):
 61 | 		return 0
 62 | 
 63 | 
 64 | def qvec2rotmat(qvec):
 65 |     return torch.tensor([
 66 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
 67 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
 68 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
 69 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
 70 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
 71 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
 72 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
 73 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
 74 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
 75 | 
 76 | def project_loss_points(gt_pt2Ds, pt3Ds, c_pose, camera, valids):
 77 |     '''
 78 |     gt_pt2Ds: 2xN
 79 |     pt3Ds: 3xN
 80 |     c_pose: 1x7
 81 |     camera: 1x5
 82 |     valids: 1xN
 83 |     '''
 84 |     device = pt3Ds.device
 85 |     R = qvec2rotmat(c_pose[3:]).to(device=device)
 86 |     t = torch.unsqueeze(c_pose[:3], dim = 1).to(device=device)
 87 |     if camera[0] == 0.0: # SIMPLE_PINHOLE
 88 |         fx = fy = camera[3] # focal length
 89 |         ppx = camera[4]
 90 |         ppy = camera[5]
 91 |     elif camera[0] == 1.0: # PINHOLE
 92 |         fx = camera[3] # focal length
 93 |         fy = camera[4]
 94 |         ppx = camera[5]
 95 |         ppy = camera[6]
 96 |     else:
 97 |         raise f"Camera type {camera[0]} is not implemented"
 98 |     prd_2Ds = R@pt3Ds + t
 99 |     # project
100 |     px = fx*prd_2Ds[0,:]/prd_2Ds[2,:] + ppx
101 |     py = fy*prd_2Ds[1,:]/prd_2Ds[2,:] + ppy
102 |     errors_x = (gt_pt2Ds[:,0] - px)**2
103 |     errors_y = (gt_pt2Ds[:,1] - py)**2
104 |     # return torch.mean(valids * torch.sqrt(errors_x + errors_y))
105 |     return torch.sqrt(errors_x + errors_y), prd_2Ds # l2 distance error, and projected 2D points
106 | 
107 | def project_loss_lines(gt_line2Ds, line3Ds, c_pose, camera, valids):
108 |     '''
109 |     gt_line2Ds: 4xN
110 |     line3Ds: 6xN
111 |     c_pose: 1x7 # camera pose
112 |     camera: 1x5
113 |     valids: Nx1
114 |     '''
115 |     device = line3Ds.device
116 |     R = qvec2rotmat(c_pose[3:]).to(device=device)
117 |     t = torch.unsqueeze(c_pose[:3], dim = 1).to(device=device)
118 |     if camera[0] == 0.0: # SIMPLE_PINHOLE
119 |         fx = fy = camera[3] # focal length
120 |         ppx = camera[4]
121 |         ppy = camera[5]
122 |     elif camera[0] == 1.0: # PINHOLE
123 |         fx = camera[3] # focal length
124 |         fy = camera[4]
125 |         ppx = camera[5]
126 |         ppy = camera[6]
127 |     else:
128 |         raise f"Camera type {camera[0]} is not implemented"
129 |     start_point = line3Ds[:3,:]
130 |     end_point = line3Ds[3:,:]
131 |     prd_2Ds_start = R@start_point + t
132 |     prd_2Ds_end = R@end_point + t
133 |     # project start point
134 |     px_start = fx*prd_2Ds_start[0,:]/prd_2Ds_start[2,:] + ppx # (N,)
135 |     py_start = fy*prd_2Ds_start[1,:]/prd_2Ds_start[2,:] + ppy # (N,)
136 |     
137 |     # project end point
138 |     px_end = fx*prd_2Ds_end[0,:]/prd_2Ds_end[2,:] + ppx # (N,)
139 |     py_end = fy*prd_2Ds_end[1,:]/prd_2Ds_end[2,:] + ppy # (N,)
140 |     
141 |     # project startpoint to line
142 |     AB = gt_line2Ds[:,2:4] - gt_line2Ds[:,0:2] # ground truth line vector
143 |     APstart = torch.stack([px_start - gt_line2Ds[:,0], py_start - gt_line2Ds[:,1]], dim = 1) 
144 |     APend = torch.stack([px_end - gt_line2Ds[:,0], py_end - gt_line2Ds[:,1]], dim = 1)
145 |     # calculate the cross product
146 |     cross_product_start = APstart[:,0]*AB[:,1] - APstart[:,1]*AB[:,0]
147 |     AB_magnitude = torch.sqrt((AB**2).sum(dim=1))
148 |     # calculate the distance
149 |     distance_start = torch.abs(cross_product_start) / AB_magnitude
150 |     cross_product_end = APend[:,0]*AB[:,1] - APend[:,1]*AB[:,0]
151 |     # calculate the distance
152 |     distance_end = torch.abs(cross_product_end) / AB_magnitude
153 |     repr_error = distance_start + distance_end
154 |     # return torch.mean(valids * (repr_error))
155 |     return repr_error, prd_2Ds_start, prd_2Ds_end  # l2 distance, and projected 2D points
156 | 
157 | 
158 | 
159 | 
160 | def weighted_tanh(repro_errs, weight):
161 |     # return weight * torch.tanh(repro_errs / weight).sum()
162 |     return torch.mean(weight * torch.tanh(repro_errs / weight))
163 | 
164 | import numpy as np
165 | class ReproLoss:
166 |     """
167 |     Original from: https://github.com/nianticlabs/ace
168 |     Compute per-pixel reprojection loss using different configurable approaches.
169 | 
170 |     - tanh:     tanh loss with a constant scale factor given by the `soft_clamp` parameter (when a pixel's reprojection
171 |                 error is equal to `soft_clamp`, its loss is equal to `soft_clamp * tanh(1)`).
172 |     - dyntanh:  Used in the paper, similar to the tanh loss above, but the scaling factor decreases during the course of
173 |                 the training from `soft_clamp` to `soft_clamp_min`. The decrease is linear, unless `circle_schedule`
174 |                 is True (default), in which case it applies a circular scheduling. See paper for details.
175 |     - l1:       Standard L1 loss, computed only on those pixels having an error lower than `soft_clamp`
176 |     - l1+sqrt:  L1 loss for pixels with reprojection error smaller than `soft_clamp` and
177 |                 `sqrt(soft_clamp * reprojection_error)` for pixels with a higher error.
178 |     - l1+logl1: Similar to the above, but using log L1 for pixels with high reprojection error.
179 |     """
180 | 
181 |     def __init__(self,
182 |                  total_iterations,
183 |                  soft_clamp=50,
184 |                  soft_clamp_min=1,
185 |                  type='dyntanh',
186 |                  circle_schedule=True):
187 | 
188 |         self.total_iterations = total_iterations
189 |         self.soft_clamp = soft_clamp
190 |         self.soft_clamp_min = soft_clamp_min
191 |         self.type = type
192 |         self.circle_schedule = circle_schedule
193 | 
194 |     def compute_point(self, reprojection_error_b1, pred_cam_coords_b31, iteration, valids):
195 |         
196 |         # Predicted coordinates behind or close to camera plane.
197 |         invalid_min_depth_b1 = pred_cam_coords_b31[2, :] < 0.1 # 0.1 is the min depth
198 |         # Very large reprojection errors.
199 |         invalid_repro_b1 = reprojection_error_b1 > 1000 # repro_loss_hard_clamp
200 |         # Predicted coordinates beyond max distance.
201 |         invalid_max_depth_b1 = pred_cam_coords_b31[2, :] > 1000 # 1000 is the max depth
202 |         valids = valids.bool()
203 |         # Invalid mask is the union of all these. Valid mask is the opposite.
204 |         invalid_mask_b1 = (valids | invalid_min_depth_b1 | invalid_repro_b1 | invalid_max_depth_b1)
205 |         valid_mask_b1 = ~invalid_mask_b1
206 | 
207 |         # Reprojection error for all valid scene coordinates.
208 |         repro_errs_b1N = reprojection_error_b1[valid_mask_b1] # valid_reprojection_error_b1
209 |         return self.final_compute(repro_errs_b1N, iteration)
210 | 
211 |     def compute_line(self, reprojection_error_b1, pred_cam_coords_b31_1,
212 |                      pred_cam_coords_b31_2, iteration, valids):
213 |         # Predicted coordinates behind or close to camera plane.
214 |         invalid_min_depth_b1_1 = pred_cam_coords_b31_1[2, :] < 0.1 # 0.1 is the min depth
215 |         invalid_min_depth_b1_2 = pred_cam_coords_b31_2[2, :] < 0.1 # 0.1 is the min depth
216 |         # Very large reprojection errors.
217 |         invalid_repro_b1 = reprojection_error_b1 > 1000 # repro_loss_hard_clamp
218 |         # Predicted coordinates beyond max distance.
219 |         invalid_max_depth_b1_1 = pred_cam_coords_b31_1[2, :] > 1000 # 1000 is the max depth
220 |         invalid_max_depth_b1_2 = pred_cam_coords_b31_2[2, :] > 1000 # 1000 is the max depth
221 |         valids = valids.bool()
222 |         # Invalid mask is the union of all these. Valid mask is the opposite.
223 |         invalid_mask_b1 = (valids | invalid_min_depth_b1_1 | invalid_repro_b1 | invalid_max_depth_b1_1
224 |                            | invalid_min_depth_b1_2 | invalid_max_depth_b1_2)
225 |         valid_mask_b1 = ~invalid_mask_b1
226 | 
227 |         # Reprojection error for all valid scene coordinates.
228 |         repro_errs_b1N = reprojection_error_b1[valid_mask_b1] # valid_reprojection_error_b1
229 |         return self.final_compute(repro_errs_b1N, iteration)
230 | 
231 |     def final_compute(self, repro_errs_b1N, iteration):
232 | 
233 |         if repro_errs_b1N.nelement() == 0:
234 |             return 0
235 | 
236 |         if self.type == "tanh":
237 |             return weighted_tanh(repro_errs_b1N, self.soft_clamp)
238 | 
239 |         elif self.type == "dyntanh":
240 |             # Compute the progress over the training process.
241 |             schedule_weight = iteration / self.total_iterations
242 | 
243 |             if self.circle_schedule:
244 |                 # Optionally scale it using the circular schedule.
245 |                 schedule_weight = 1 - np.sqrt(1 - schedule_weight ** 2)
246 | 
247 |             # Compute the weight to use in the tanh loss.
248 |             loss_weight = (1 - schedule_weight) * self.soft_clamp + self.soft_clamp_min
249 | 
250 |             # Compute actual loss.
251 |             return weighted_tanh(repro_errs_b1N, loss_weight)
252 | 
253 |         elif self.type == "l1":
254 |             # L1 loss on all pixels with small-enough error.
255 |             softclamp_mask_b1 = repro_errs_b1N > self.soft_clamp
256 |             return repro_errs_b1N[~softclamp_mask_b1].sum()
257 | 
258 |         elif self.type == "l1+sqrt":
259 |             # L1 loss on pixels with small errors and sqrt for the others.
260 |             softclamp_mask_b1 = repro_errs_b1N > self.soft_clamp
261 |             loss_l1 = repro_errs_b1N[~softclamp_mask_b1].sum()
262 |             loss_sqrt = torch.sqrt(self.soft_clamp * repro_errs_b1N[softclamp_mask_b1]).sum()
263 | 
264 |             return loss_l1 + loss_sqrt
265 | 
266 |         else:
267 |             # l1+logl1: same as above, but use log(L1) for pixels with a larger error.
268 |             softclamp_mask_b1 = repro_errs_b1N > self.soft_clamp
269 |             loss_l1 = repro_errs_b1N[~softclamp_mask_b1].sum()
270 |             loss_logl1 = torch.log(1 + (self.soft_clamp * repro_errs_b1N[softclamp_mask_b1])).sum()
271 | 
272 |             return loss_l1 + loss_logl1
273 | 
274 | #### Optimizer ####
275 | 
276 | class Optimizer:
277 |     """
278 |     Wrapper around torch.optim + learning rate
279 |     """
280 |     def __init__(self, params, nepochs, **kwargs):
281 |         self.method = kwargs.pop("method")
282 |         self.base_lr = kwargs.pop("base_lr")
283 |         self.lr = self.base_lr
284 |         self.lr_decay_step = int(nepochs/kwargs.pop("num_lr_decay_step"))
285 |         self.lr_decay = kwargs.pop('lr_decay')
286 |         self.nfactor = 0
287 |         if self.method == 'sgd':
288 |             print("OPTIMIZER: ---  sgd")
289 |             self.learner = optim.SGD(params, lr=self.base_lr,
290 |                                      weight_decay=kwargs.pop("weight_decay"), **kwargs)
291 |         elif self.method == 'adam':
292 |             print("OPTIMIZER: ---  adam")
293 |             self.learner = optim.Adam(params, lr=self.base_lr,
294 |                                       weight_decay=kwargs.pop("weight_decay"), **kwargs)
295 |         elif self.method == 'rmsprop':
296 |             print("OPTIMIZER: ---  rmsprop")
297 |             self.learner = optim.RMSprop(params, lr=self.base_lr,
298 |                                          weight_decay=kwargs.pop("weight_decay"), **kwargs)
299 | 
300 |     def adjust_lr(self, epoch):
301 |         ''' Adjust learning rate based on epoch.
302 |         Optional: call this function if keep training the model after loading checkpoint
303 |         '''
304 |         if (self.method not in ['sgd', 'adam']) or (self.lr_decay_step == 0.0):
305 |             return self.base_lr
306 |         nfactor = epoch // self.lr_decay_step
307 |         if nfactor > self.nfactor:
308 |             decay_factor = (1-self.lr_decay)**nfactor
309 |             self.lr = self.base_lr * decay_factor
310 |             for param_group in self.learner.param_groups:
311 |                 param_group['lr'] = self.lr
312 |         return self.lr
313 |     
314 | 
315 | 


--------------------------------------------------------------------------------
/prepare_scripts/cambridge.sh:
--------------------------------------------------------------------------------
 1 | # Description: Prepare the directory structure for the Cambridge dataset
 2 | 
 3 | if [ ! -d "train_test_datasets" ]; then
 4 |   mkdir train_test_datasets
 5 | fi
 6 | 
 7 | if [ ! -d "train_test_datasets/gt_3Dmodels" ]; then
 8 |   mkdir train_test_datasets/gt_3Dmodels
 9 | fi
10 | 
11 | if [ ! -d "train_test_datasets/imgs_datasets" ]; then
12 |   mkdir train_test_datasets/imgs_datasets
13 | fi
14 | 
15 | TARGET_FOLDER="train_test_datasets/gt_3Dmodels"
16 | OUTPUT_FILE="Cambridge.zip"
17 | FILE_ID="19LRQ5j9I4YdrUykkoavcRTR6ekygU5iU"
18 | 
19 | # Download the file from Google Drive using gdown and save it in the target folder
20 | gdown --id $FILE_ID -O $TARGET_FOLDER/$OUTPUT_FILE
21 | 
22 | # Unzip the downloaded file in the target folder
23 | unzip $TARGET_FOLDER/$OUTPUT_FILE -d $TARGET_FOLDER
24 | 
25 | # Remove the zip file after extraction
26 | rm $TARGET_FOLDER/$OUTPUT_FILE
27 | 
28 | echo "Download, extraction, and cleanup completed in $TARGET_FOLDER."
29 | 
30 | TARGET_FOLDER="train_test_datasets/imgs_datasets"
31 | FILE_ID="1MZyLPu9Z7tKCeuM4DchseoX4STIhKyi7"
32 | 
33 | # Download the file from Google Drive using gdown and save it in the target folder
34 | gdown --id $FILE_ID -O $TARGET_FOLDER/$OUTPUT_FILE
35 | 
36 | # Unzip the downloaded file in the target folder
37 | unzip $TARGET_FOLDER/$OUTPUT_FILE -d $TARGET_FOLDER
38 | 
39 | # Remove the zip file after extraction
40 | rm $TARGET_FOLDER/$OUTPUT_FILE
41 | 
42 | echo "Download, extraction, and cleanup completed in $TARGET_FOLDER."


--------------------------------------------------------------------------------
/prepare_scripts/download_pre_trained_models.sh:
--------------------------------------------------------------------------------
 1 | OUTPUT_FILE="logs.zip"
 2 | FILE_ID="1iH8PfqgPPQod0q_I8T_ZSO_mSj5XRUuO"
 3 | 
 4 | # Download the file from Google Drive using gdown and save it in the target folder
 5 | gdown --id $FILE_ID -O $OUTPUT_FILE
 6 | 
 7 | # Unzip the downloaded file in the target folder
 8 | unzip $OUTPUT_FILE
 9 | 
10 | # Remove the zip file after extraction
11 | rm $OUTPUT_FILE
12 | 
13 | echo "Download, extraction, and cleanup completed."


--------------------------------------------------------------------------------
/prepare_scripts/indoor6.sh:
--------------------------------------------------------------------------------
 1 | # Description: Prepare the directory structure for the indoor6 dataset
 2 | 
 3 | if [ ! -d "train_test_datasets" ]; then
 4 |   mkdir train_test_datasets
 5 | fi
 6 | 
 7 | if [ ! -d "train_test_datasets/gt_3Dmodels" ]; then
 8 |   mkdir train_test_datasets/gt_3Dmodels
 9 | fi
10 | 
11 | if [ ! -d "train_test_datasets/imgs_datasets" ]; then
12 |   mkdir train_test_datasets/imgs_datasets
13 | fi
14 | 
15 | TARGET_FOLDER="train_test_datasets/gt_3Dmodels"
16 | OUTPUT_FILE="indoor6.zip"
17 | FILE_ID="1q28Tkldc--ucD4l7q15RDVsuZ7IN3CEV"
18 | 
19 | # Download the file from Google Drive using gdown and save it in the target folder
20 | gdown --id $FILE_ID -O $TARGET_FOLDER/$OUTPUT_FILE
21 | 
22 | # Unzip the downloaded file in the target folder
23 | unzip $TARGET_FOLDER/$OUTPUT_FILE -d $TARGET_FOLDER
24 | 
25 | # Remove the zip file after extraction
26 | rm $TARGET_FOLDER/$OUTPUT_FILE
27 | 
28 | echo "Download, extraction, and cleanup completed in $TARGET_FOLDER."
29 | 
30 | TARGET_FOLDER="train_test_datasets/imgs_datasets"
31 | FILE_ID="1kzLPt7LuVJIqKrJMYSFicJ231KDDJxVh"
32 | 
33 | # Download the file from Google Drive using gdown and save it in the target folder
34 | gdown --id $FILE_ID -O $TARGET_FOLDER/$OUTPUT_FILE
35 | 
36 | # Unzip the downloaded file in the target folder
37 | unzip $TARGET_FOLDER/$OUTPUT_FILE -d $TARGET_FOLDER
38 | 
39 | # Remove the zip file after extraction
40 | rm $TARGET_FOLDER/$OUTPUT_FILE
41 | 
42 | echo "Download, extraction, and cleanup completed in $TARGET_FOLDER."


--------------------------------------------------------------------------------
/prepare_scripts/seven_scenes.sh:
--------------------------------------------------------------------------------
 1 | # Description: Prepare the directory structure for the seven scene dataset
 2 | 
 3 | if [ ! -d "train_test_datasets" ]; then
 4 |   mkdir train_test_datasets
 5 | fi
 6 | 
 7 | if [ ! -d "train_test_datasets/gt_3Dmodels" ]; then
 8 |   mkdir train_test_datasets/gt_3Dmodels
 9 | fi
10 | 
11 | if [ ! -d "train_test_datasets/imgs_datasets" ]; then
12 |   mkdir train_test_datasets/imgs_datasets
13 | fi
14 | 
15 | TARGET_FOLDER="train_test_datasets/gt_3Dmodels"
16 | OUTPUT_FILE="7scenes.zip"
17 | FILE_ID="1X8_tV0Y4b_W-vPgeXKoqtFaDCQ5_csL3"
18 | 
19 | # Download the file from Google Drive using gdown and save it in the target folder
20 | gdown --id $FILE_ID -O $TARGET_FOLDER/$OUTPUT_FILE
21 | 
22 | # Unzip the downloaded file in the target folder
23 | unzip $TARGET_FOLDER/$OUTPUT_FILE -d $TARGET_FOLDER
24 | 
25 | # Remove the zip file after extraction
26 | rm $TARGET_FOLDER/$OUTPUT_FILE
27 | 
28 | echo "Download, extraction, and cleanup completed in $TARGET_FOLDER."
29 | 
30 | 
31 | cd train_test_datasets/imgs_datasets
32 | mkdir 7scenes
33 | cd 7scenes
34 | 
35 | # List of datasets
36 | datasets=("chess" "fire" "heads" "office" "pumpkin" "redkitchen" "stairs")
37 | 
38 | # Loop through each dataset
39 | for ds in "${datasets[@]}"; do
40 |     # Check if the dataset directory exists
41 |     if [ ! -d "$ds" ]; then
42 |         echo "=== Downloading 7scenes Data: $ds ==============================="
43 |         
44 |         # Download the dataset zip file
45 |         wget "http://download.microsoft.com/download/2/8/5/28564B23-0828-408F-8631-23B1EFF1DAC8/$ds.zip"
46 |         
47 |         # Unzip the dataset
48 |         unzip "$ds.zip"
49 |         
50 |         # Remove the zip file
51 |         rm "$ds.zip"
52 | 
53 |         # Loop through the dataset folder and unzip any additional zip files
54 |         for file in "$ds"/*.zip; do
55 |             if [ -f "$file" ]; then
56 |                 echo "Unpacking $file"
57 |                 unzip "$file" -d "$ds"
58 |                 rm "$file"
59 |             fi
60 |         done
61 |     else
62 |         echo "Found data of scene $ds already. Assuming its complete and skipping download."
63 |     fi
64 | done
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pathlib
 2 | open3d
 3 | omegaconf
 4 | h5py
 5 | numpy
 6 | scipy
 7 | matplotlib
 8 | tqdm
 9 | pyyaml
10 | opencv-python
11 | pathlib
12 | poselib
13 | visdom
14 | scikit-image
15 | numpy==1.26.3
16 | gdown
17 | 
18 | ./third_party/pytlsd
19 | ./third_party/DeepLSD


--------------------------------------------------------------------------------
/runners/eval.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argparse
 3 | import sys, os
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 
 5 | import util.config as utilcfg
 6 | from omegaconf import OmegaConf
 7 | from evaluator import Evaluator
 8 | from util.logger import DualLogger
 9 | 
10 | def parse_config():
11 |     arg_parser = argparse.ArgumentParser(description='pre-processing for PL2Map dataset')
12 |     arg_parser.add_argument('-d', '--dataset_dir', type=Path, default='train_test_datasets/imgs_datasets/', help='')
13 |     arg_parser.add_argument('--sfm_dir', type=Path, default='train_test_datasets/gt_3Dmodels/', help='sfm ground truth directory')
14 |     arg_parser.add_argument('--dataset', type=str, default="7scenes", help='dataset name')
15 |     arg_parser.add_argument('-s', '--scene', type=str, default="pumpkin", help='scene name(s)')
16 |     arg_parser.add_argument('-c','--cudaid', type=int, default=0, help='specify cuda device id')
17 |     arg_parser.add_argument('-o','--outputs', type=Path, default='logs/',
18 |                         help='Path to the output directory, default: %(default)s')
19 |     arg_parser.add_argument('-expv', '--experiment_version', type=str, default="pl2map", help='experiment version folder')
20 |     args, _ = arg_parser.parse_known_args()
21 |     args.outputs = os.path.join(args.outputs, args.scene + "_" + args.experiment_version)
22 |     path_to_eval_cfg = f'{args.outputs}/config.yaml'
23 |     cfg = utilcfg.load_config(path_to_eval_cfg, default_path='cfgs/default.yaml')
24 |     cfg = OmegaConf.create(cfg)
25 |     return args, cfg
26 | 
27 | def main():
28 |     eval_cfg = {
29 |         "eval_train": False, # evaluate train_loader
30 |         "eval_test": True, # evaluate test_loader
31 |         "vis_point3d": False, # visualize predicted 3D points, if eval_train/test = True
32 |         "vis_line3d": False, # visualize predicted 3D lines, if eval_train/test = True
33 |         "pnp_point": True, # use point-mode-only for PnP
34 |         "pnp_pointline": True, # use point+line mode for PnP
35 |         "uncer_threshold_point": 0.5, # threshold to remove uncertain points
36 |         "uncer_threshold_line": 0.02, # threshold to remove uncertain lines
37 |         "exist_results":False, # if True, skip running model,then use the existing results in the outputs folder
38 |         "save_3dmap": False, # save predicted 3D map
39 |     }
40 |     args, cfg = parse_config()
41 |     sys.stdout = DualLogger(f'{args.outputs}/eval_log.txt')
42 |     evaler = Evaluator(args, cfg, eval_cfg)
43 |     evaler.eval()
44 |     sys.stdout.log.close()
45 | 
46 | if __name__ == "__main__":
47 |     main()


--------------------------------------------------------------------------------
/runners/evaluator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from tqdm import tqdm
  4 | import sys, os
  5 | from omegaconf import OmegaConf
  6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 
  7 | from models.pipeline import Pipeline
  8 | from util.help_evaluation import Vis_Infor, pose_evaluator
  9 | from datasets.dataloader import Collection_Loader
 10 | from models.util_learner import CriterionPointLine
 11 | from trainer import step_fwd, ShowLosses
 12 | from util.pose_estimator import Pose_Estimator # require limap library
 13 | from util.io import SAVING_MAP
 14 | 
 15 | class Evaluator():
 16 |     default_cfg = {
 17 |         "eval_train": True, # evaluate train_loader
 18 |         "eval_test": True, # evaluate test_loader
 19 |         "vis_point3d": False, # visualize predicted 3D points, if eval_train/test = True
 20 |         "vis_line3d": False, # visualize predicted 3D lines, if eval_train/test = True
 21 |         "pnp_point": True, # use point-mode-only for PnP
 22 |         "pnp_pointline": True, # use point+line mode for PnP
 23 |         "uncer_threshold_point": 0.5, # threshold to remove uncertain points
 24 |         "uncer_threshold_line": 0.1, # threshold to remove uncertain lines
 25 |         "exist_results":False, # if True, skip running model,then use the existing results in the outputs folder
 26 |         "save_3dmap": False, # save predicted 3D map
 27 |     }
 28 |     def __init__(self, args, cfg, eval_cfg=dict()):
 29 |         self.args = args
 30 |         self.cfg = cfg
 31 |         eval_cfg = eval_cfg if cfg.regressor.name == 'pl2map' \
 32 |               else force_onlypoint_cfg(eval_cfg)
 33 |         self.eval_cfg = OmegaConf.merge(OmegaConf.create(self.default_cfg), eval_cfg)
 34 |         print(f"[INFO] Model: {cfg.regressor.name}")
 35 |         print("[INFO] Evaluation Config: ", self.eval_cfg)
 36 |         
 37 |         if not self.eval_cfg.exist_results:
 38 |             self.pipeline = Pipeline(cfg)
 39 |             self.criterion = CriterionPointLine(self.cfg.train.loss.reprojection, cfg.train.num_iters) 
 40 |             self.device = torch.device(f'cuda:{args.cudaid}' \
 41 |                                        if torch.cuda.is_available() else 'cpu')
 42 |             self.save_path = None
 43 |             # to device
 44 |             self.pipeline.to(self.device)
 45 |             self.criterion.to(self.device)
 46 |             # dataloader
 47 |             if self.eval_cfg.eval_train: self.train_collection = Collection_Loader(args, cfg, mode="traintest")
 48 |             self.eval_collection = Collection_Loader(args, cfg, mode="test")
 49 |             print("[INFO] Loaded data collection")
 50 |             if self.eval_cfg.eval_train: self.train_loader = torch.utils.data.DataLoader(self.train_collection, batch_size=1,
 51 |                                                             shuffle=True)
 52 |             self.eval_loader = torch.utils.data.DataLoader(self.eval_collection, batch_size=1,
 53 |                                                             shuffle=True)
 54 |             self.train_loss = ShowLosses()
 55 |             self.exp_name = str(args.dataset) + "_" + str(args.scene) + "_" + str(cfg.regressor.name)
 56 |             self.vis_infor_train = Vis_Infor(self.eval_cfg)
 57 |             self.vis_infor_test = Vis_Infor(self.eval_cfg)
 58 |             # self.vis_infor_test = Vis_Infor(self.eval_cfg, "seq-06/frame-000780.color.png", 20)
 59 |             if self.eval_cfg.save_3dmap: self.saving_map = SAVING_MAP(self.args.outputs)
 60 |             self.pose_estimator = Pose_Estimator(self.cfg.localization, self.eval_cfg, 
 61 |                                                 self.args.outputs)
 62 |         else:
 63 |             print("[INFO] Skip running model, then use the existing results in the outputs folder")
 64 | 
 65 |     def eval(self):
 66 |         if not self.eval_cfg.exist_results:
 67 |             epoch = self.pipeline.load_checkpoint(self.args.outputs, self.exp_name)
 68 |             self.pipeline.eval()
 69 |             print("[INFO] Start evaluating ...")
 70 |             if self.eval_cfg.eval_train:
 71 |                 print("[INFO] Evaluating train_loader ...")
 72 |                 for _, (data, target) in enumerate(tqdm(self.train_loader)):
 73 |                     loss, output = step_fwd(self.pipeline, self.device, data,target,
 74 |                                              iteration=self.cfg.train.num_iters, 
 75 |                                              criterion=self.criterion, train=True)
 76 |                     self.train_loss.update(loss)
 77 |                     self.vis_infor_train.update(output, data)
 78 |                     self.pose_estimator.run(output, data, target, mode='train')
 79 |                 self.train_loss.show(epoch)
 80 |                 self.vis_infor_train.vis()
 81 |             if self.eval_cfg.eval_test:
 82 |                 i = 0
 83 |                 print("[INFO] Evaluating test_loader ...")
 84 |                 for _, (data, target) in enumerate(tqdm(self.eval_loader)):
 85 |                     _, output = step_fwd(self.pipeline, self.device, data, 
 86 |                                         target, train=False)
 87 |                     if self.eval_cfg.save_3dmap: self.saving_map.save(output, data)
 88 |                     # if data['imgname'][0] == self.vis_infor_test.highlight_frame:
 89 |                     pose_vis_infor = self.pose_estimator.run(output, data, target, mode='test')
 90 |                     self.vis_infor_test.update(output, data, pose_vis_infor)
 91 |                     # i += 1
 92 |                     # if i > 20: break
 93 |                 self.vis_infor_test.vis()
 94 |         else:
 95 |             print("[INFO] Skip evaluating and use the existing results")
 96 |         pose_evaluator(self.eval_cfg, self.args.outputs)
 97 |         print("[INFO] DONE evaluation")
 98 | 
 99 | def force_onlypoint_cfg(cfg):
100 |     '''
101 |     Force the evaluation config to be only point mode
102 |     '''
103 |     if cfg["pnp_pointline"] or cfg["vis_line3d"]: # turn off line mode, if it is on
104 |         print("[Warning] Force the evaluation config to be only point mode")
105 |         cfg["vis_line3d"] = False
106 |         cfg["pnp_pointline"] = False
107 |     return cfg


--------------------------------------------------------------------------------
/runners/train.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import argparse
 3 | import sys, os
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 
 5 | import util.config as utilcfg
 6 | from omegaconf import OmegaConf
 7 | from trainer import Trainer
 8 | import time
 9 | from util.logger import DualLogger
10 | 
11 | def parse_config():
12 |     arg_parser = argparse.ArgumentParser(description='pre-processing for PL2Map dataset')
13 |     arg_parser.add_argument('-d', '--dataset_dir', type=Path, default='train_test_datasets/imgs_datasets/', help='')
14 |     arg_parser.add_argument('--sfm_dir', type=Path, default='train_test_datasets/gt_3Dmodels/', help='sfm ground truth directory')
15 |     arg_parser.add_argument('--dataset', type=str, default="7scenes", help='dataset name')
16 |     arg_parser.add_argument('-s', '--scene', type=str, default="pumpkin", help='scene name(s)')
17 |     arg_parser.add_argument('-cp','--checkpoint', action= 'store_true', help='use pre-trained model')
18 |     arg_parser.add_argument('--visdom', action= 'store_true', help='visualize loss using visdom')
19 |     arg_parser.add_argument('-c','--cudaid', type=int, default=0, help='specify cuda device id')
20 |     arg_parser.add_argument('-o','--outputs', type=Path, default='logs/',
21 |                         help='Path to the output directory, default: %(default)s')
22 |     arg_parser.add_argument('-expv', '--experiment_version', type=str, default="pl2map", help='experiment version folder')
23 |     args, _ = arg_parser.parse_known_args()
24 |     args.outputs = os.path.join(args.outputs, args.scene + "_" + args.experiment_version)
25 |     print("Dataset: {} | Scene: {}".format(args.dataset, args.scene))
26 |     cfg = utilcfg.load_config(f'cfgs/{args.dataset}.yaml', default_path='cfgs/default.yaml')
27 |     cfg = OmegaConf.create(cfg)
28 |     utilcfg.mkdir(args.outputs)
29 | 
30 |     # Save the config file for evaluation purposes
31 |     config_file_path = os.path.join(args.outputs, 'config.yaml')
32 |     OmegaConf.save(cfg, config_file_path)
33 | 
34 |     return args, cfg
35 | 
36 | def main():
37 |     args, cfg = parse_config()
38 |     sys.stdout = DualLogger(f'{args.outputs}/train_log.txt')
39 |     trainer = Trainer(args, cfg)
40 |     start_time = time.time()
41 |     trainer.train()
42 |     print("Training time: {:.2f} hours".format((time.time() - start_time) / (60*60)))
43 |     sys.stdout.log.close() 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/runners/trainer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import sys, os
  4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  5 | from models.pipeline import Pipeline
  6 | from datasets.dataloader import Collection_Loader
  7 | from models.util_learner import CriterionPointLine, Optimizer
  8 | from tqdm import tqdm
  9 | torch.manual_seed(0)
 10 | 
 11 | class Trainer():
 12 |     def __init__(self, args, cfg):
 13 |         self.args = args
 14 |         print(f"[INFO] Model: {cfg.regressor.name}")
 15 |         self.log_name = str(args.dataset) + "_" + str(args.scene) + "_" + str(cfg.regressor.name)
 16 |         self.pipeline = Pipeline(cfg)
 17 |         self.criterion = CriterionPointLine(cfg.train.loss.reprojection, cfg.train.num_iters)
 18 |         self.device = torch.device(f'cuda:{args.cudaid}' if torch.cuda.is_available() else 'cpu')
 19 |         
 20 |         # to device
 21 |         self.pipeline.to(self.device)
 22 |         self.criterion.to(self.device)
 23 | 
 24 |         # dataloader
 25 |         train_collection = Collection_Loader(args, cfg, mode="train")
 26 |         print("[INFO] Loaded data collection")
 27 |         self.train_loader = torch.utils.data.DataLoader(train_collection, batch_size=cfg.train.batch_size,
 28 |                                                         shuffle=cfg.train.loader_shuffle, num_workers=cfg.train.loader_num_workers, 
 29 |                                                         pin_memory=True)
 30 |         
 31 |         self.length_train_loader = len(self.train_loader)
 32 |         self.epochs = int(cfg.train.num_iters / self.length_train_loader)
 33 |         print(f"[INFO] Total epochs: {self.epochs}")
 34 |         self.optimizer = Optimizer(self.pipeline.regressor.parameters(), self.epochs, **cfg.optimizer)
 35 |         
 36 |         if self.args.checkpoint:
 37 |             # load checkpoint and resume training
 38 |             self.start_epoch = self.pipeline.load_checkpoint(self.args.outputs, self.log_name)
 39 |             # self.start_epoch = 2024
 40 |             self.lr = self.optimizer.adjust_lr(self.start_epoch)
 41 |         else:
 42 |             self.start_epoch = 0
 43 |             self.lr = self.optimizer.lr
 44 |         self.train_log = Train_Log(args, cfg, self.length_train_loader, self.start_epoch, self.epochs)
 45 |         
 46 | 
 47 |     def train(self):
 48 |         print("[INFO] Start training")
 49 |         for epoch in range(self.start_epoch, self.epochs):
 50 |             if self.train_log.is_save_checkpoint():
 51 |                 self.pipeline.save_checkpoint(self.args.outputs, self.log_name, epoch) # overwrite(save) checkpoint per epoch
 52 |             for batch_idx, (data, target) in enumerate(tqdm(self.train_loader)):
 53 |                 iters = epoch*self.length_train_loader + batch_idx
 54 |                 loss,_ = step_fwd(self.pipeline, self.device, data, target, iters,
 55 |                                    self.criterion, self.optimizer, train=True)
 56 |                 self.train_log.update(epoch, batch_idx, loss, self.lr)
 57 |             self.lr = self.optimizer.adjust_lr(epoch) # adjust learning rate
 58 |             self.train_log.show(epoch) # show loss per epoch
 59 |         # self.pipeline.save_checkpoint(self.args.outputs, self.log_name, epoch, True)    
 60 | 
 61 | 
 62 | def step_fwd(model, device, data, target=None, iteration=2500000,
 63 |              criterion=None, optim=None, train=False):
 64 |     """
 65 |     A training/validation step."""
 66 |     if train: 
 67 |         assert criterion is not None
 68 |         assert target is not None
 69 |     for k,v in data.items():
 70 |         if isinstance(v,list):
 71 |             continue
 72 |         data[k] = data[k].to(device)
 73 |     if target is not None:
 74 |         for k,_ in target.items():
 75 |                 target[k] = target[k].to(device)
 76 |     output = model(data)
 77 |     loss = None
 78 |     if train:
 79 |         loss = criterion(output, target, iteration)
 80 |         if optim is not None:
 81 |             optim.learner.zero_grad()
 82 |             loss[0].backward()
 83 |             optim.learner.step() 
 84 |     return loss, output
 85 | 
 86 | class Train_Log():
 87 |     def __init__(self, args, cfg, length_loader, start_epoch, total_epoch=0) -> None:
 88 |         self.args = args
 89 |         self.cfg = cfg
 90 |         self.total_epoch = total_epoch
 91 |         self.log_interval = cfg.train.log_interval
 92 |         self.length_train_loader = length_loader
 93 |         self.vis_env = str(args.dataset) + "_" + str(args.scene) + \
 94 |               "_" + str(cfg.regressor.name) +"_"+ str(args.experiment_version)
 95 |         self.showloss = ShowLosses(total_epoch=self.total_epoch)
 96 |         self.list_fignames = ['total_loss', 'point_loss', 'point_uncer_loss', 
 97 |                               'line_loss', 'line_uncer_loss', 'points_prj_loss',
 98 |                                 'lines_prj_loss', 'learning_rate']
 99 |         if self.args.visdom:
100 |             from visdom import Visdom
101 |             print("[INFOR] Visdom is used for log visualization")
102 |             self.vis = Visdom()
103 |             for name in self.list_fignames:
104 |                 self.add_fig(name, start_epoch)
105 | 
106 |     def add_fig(self, name, start_epoch):
107 |         self.vis.line(X=np.asarray([start_epoch]), Y=np.zeros(1), win=name,
108 |                             opts={'legend': [name], 'xlabel': 'epochs',
109 |                                 'ylabel': name}, env=self.vis_env)
110 |     def update_fig(self, idx, epoch_count, value):
111 |         name = self.list_fignames[idx]
112 |         self.vis.line(X=np.asarray([epoch_count]), Y=np.asarray([value]), win=name,
113 |                         update='append', env=self.vis_env)
114 |         
115 |     def update(self, epoch, batch_idx, loss, lr):
116 |         self.showloss.update(loss)
117 |         self.lr = lr
118 |         if self.args.visdom:
119 |             if batch_idx % self.log_interval == 0:
120 |                 n_iter = epoch*self.length_train_loader + batch_idx
121 |                 epoch_count = float(n_iter)/self.length_train_loader
122 |                 l = len(self.list_fignames)
123 |                 for idx in range(l-1):
124 |                     self.update_fig(idx, epoch_count, loss[idx].item())
125 |                 self.update_fig(l-1, epoch_count, lr)
126 | 
127 |     def show(self, epoch):
128 |         self.showloss.show(epoch, self.lr)
129 |     def is_save_checkpoint(self):
130 |         his_epoch_loss = self.showloss.dict_losses[0].his_epoch_loss
131 |         if len(his_epoch_loss) == 0:
132 |             return False
133 |         if min(his_epoch_loss) >= his_epoch_loss[-1]:
134 |             return True
135 |         else:
136 |             return False
137 | 
138 | class His_Loss():
139 |     def __init__(self)->None:
140 |         self.his_epoch_loss = []
141 |         self.temp_batch_loss = []
142 |     def update_loss(self, loss):
143 |         self.temp_batch_loss.append(loss)
144 |     def show(self):
145 |         avg_loss = np.mean(self.temp_batch_loss)
146 |         self.his_epoch_loss.append(avg_loss)
147 |         self.temp_batch_loss = [] # reset
148 |         return avg_loss
149 | 
150 | class ShowLosses():
151 |     # for debugging, showing all losses if needed
152 |     def __init__(self, list_display=[True, True, False, True, False, True, True], total_epoch=0):
153 |         '''
154 |         corresponding to show following losses:
155 |         ['total_loss', 'point_loss', 'point_uncer_loss', 
156 |                               'line_loss', 'line_uncer_loss', 'points_prj_loss',
157 |                                 'lines_prj_loss']
158 |         '''
159 |         self.list_display = list_display
160 |         self.length = len(self.list_display)
161 |         self.names = ['Avg total loss', 'A.P.L', 'A.P.U.L', 'A.L.L', 'A.L.U.L', 'A.P.P.L', 'A.P.L.L']
162 |         # A.P.L means average point loss, A.P.P.L means average point projection loss, etc.
163 |         self.create_dict_losses()
164 |         self.total_epoch = total_epoch
165 | 
166 |     def create_dict_losses(self):
167 |         self.dict_losses = {}
168 |         for i in range(self.length):
169 |             if self.list_display[i]:
170 |                 self.dict_losses[i] = His_Loss()
171 |     
172 |     def update(self, loss):
173 |         for k,_ in self.dict_losses.items():
174 |             self.dict_losses[k].update_loss(loss[k].item())
175 | 
176 | 
177 |     def show(self, epoch, lr=0.0):
178 |         content = f"Epoch {epoch}/{self.total_epoch} | "
179 |         for k,_ in self.dict_losses.items():
180 |             avg_loss = self.dict_losses[k].show()
181 |             content += self.names[k] + f": {avg_loss:.5f} | "
182 |         content = content + f"lr: {lr:.6f}"
183 |         print(content)
184 | 
185 |     


--------------------------------------------------------------------------------
/util/config.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import os
 3 | 
 4 | def update_recursive(dict1, dictinfo):
 5 |     for k, v in dictinfo.items():
 6 |         if k not in dict1:
 7 |             dict1[k] = dict()
 8 |         if isinstance(v, dict):
 9 |             update_recursive(dict1[k], v)
10 |         else:
11 |             dict1[k] = v
12 | def load_config(config_file, default_path=None):
13 |     with open(config_file, 'r') as f:
14 |         cfg_loaded = yaml.load(f, Loader=yaml.Loader)
15 | 
16 |     base_config_file = cfg_loaded.get('base_config_file')
17 |     if base_config_file is not None:
18 |         cfg = load_config(base_config_file)
19 |     elif (default_path is not None) and (config_file != default_path):
20 |         cfg = load_config(default_path)
21 |     else:
22 |         cfg = dict()
23 |     update_recursive(cfg, cfg_loaded)
24 |     return cfg
25 | 
26 | def mkdir(path):
27 |     if not os.path.exists(path):
28 |         os.makedirs(path)
29 |     return path


--------------------------------------------------------------------------------
/util/help_evaluation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import sys, os
  4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 
  5 | from util.read_write_model import qvec2rotmat
  6 | 
  7 | class Vis_Infor():
  8 |     '''
  9 |     Store and Merge the 3D lines output from the model (by remove line with high uncertainty)
 10 |     lines3D: (N, 6)
 11 |     points3D: (N, 3)
 12 |     Visualize the 3D lines and 3D points
 13 |     '''
 14 |     def __init__(self, eval_cfg, highlight_frame=None, limit_n_frames=None, save_list_imgs=False,
 15 |                  output_path=None)->None:
 16 |         '''
 17 |         highlight_frame: "seq-06/frame-000612.color.png", for example
 18 |         limit_n_frames: limit the number of frames to visualize the 3D lines and 3D points
 19 |         '''
 20 |         self.eval_cfg = eval_cfg
 21 |         self.highlight_frame = highlight_frame
 22 |         self.limit_n_frames = np.inf if limit_n_frames is None else limit_n_frames
 23 |         self.save_list_imgs = save_list_imgs
 24 |         self.output_path = output_path
 25 |         self.lines3D = None
 26 |         self.points3D = None
 27 |         self.hightlight_lines3D = None
 28 |         self.hightlight_points3D = None
 29 |         self.threshold_point = eval_cfg.uncer_threshold_point
 30 |         self.threshold_line = eval_cfg.uncer_threshold_line
 31 |         self.current_num_frames = 0
 32 |         self.list_images = [] # list of images to visualize 3D lines / 3D points
 33 |         self.cameras = []
 34 |         self.prd_poses = []
 35 |         self.gt_poses = []
 36 |     def update(self, output, data, vis_pose_infor=None):
 37 |         '''
 38 |         args:
 39 |             output: dict of model output
 40 |             data: dict of data
 41 |         '''
 42 |         
 43 |         if self.current_num_frames < self.limit_n_frames:
 44 |             if self.eval_cfg.vis_line3d:
 45 |                 lines3D,_ = getLine3D_from_modeloutput(output['lines3D'], self.threshold_line)
 46 |                 self.lines3D = lines3D if self.lines3D is None else np.concatenate((self.lines3D, lines3D))
 47 |                 self.list_images.append(data['imgname'][0])
 48 |                 if vis_pose_infor is not None:
 49 |                     self.cameras.append(vis_pose_infor[0])
 50 |                     self.prd_poses.append(vis_pose_infor[1])
 51 |                     self.gt_poses.append(vis_pose_infor[2])
 52 |             
 53 |             if self.eval_cfg.vis_point3d:
 54 |                 points3D,_ = getPoint3D_from_modeloutput(output['points3D'], self.threshold_point)
 55 |                 self.points3D = points3D if self.points3D is None else np.concatenate((self.points3D, points3D))
 56 |                 self.list_images.append(data['imgname'][0])
 57 |                 
 58 |         if self.limit_n_frames is not None and self.highlight_frame is not None:
 59 |             # save visualizations for the highlight 3d lines and 3d points
 60 |             current_frame = data['imgname'][0]
 61 |             if self.highlight_frame == current_frame:
 62 |                 print("FOUND HIGHLIGHT FRAME")
 63 |                 if self.eval_cfg.vis_line3d:
 64 |                     self.hightlight_lines3D,_ = getLine3D_from_modeloutput(output['lines3D'], self.threshold)
 65 |                 if self.eval_cfg.vis_point3d:
 66 |                     self.hightlight_points3D,_ = getPoint3D_from_modeloutput(output['points3D'], self.threshold)
 67 |                 if self.current_num_frames >= self.limit_n_frames:
 68 |                     self.save_vis_highlights()
 69 |         self.current_num_frames += 1
 70 |     
 71 |     def vis(self):
 72 |         if self.eval_cfg.vis_line3d:
 73 |             print("[INFOR] Visualizing predicted 3D lines ...")
 74 |             from util.visualize import open3d_vis_3d_lines
 75 |             # open3d_vis_3d_lines(self.lines3D)
 76 |             open3d_vis_3d_lines(self.lines3D, self.cameras, self.prd_poses, self.gt_poses)
 77 |         if self.eval_cfg.vis_point3d:
 78 |             print("[INFOR] Visualizing predicted 3D points ...")
 79 |             from util.visualize import open3d_vis_3d_points
 80 |             open3d_vis_3d_points(self.points3D)
 81 |         if self.save_list_imgs:
 82 |             print("[INFOR] Saving list of images to visualize 3D lines / 3D points ...")
 83 |             with open(os.path.join(self.output_path, "list_vis_imgs.txt"), "w") as f:
 84 |                 for img in self.list_images:
 85 |                     f.write(img + "\n")
 86 |             
 87 |     def save_vis_highlights(self):
 88 |         if self.hightlight_lines3D is not None:
 89 |             from util.visualize import open3d_vis_3d_lines_with_hightlightFrame
 90 |             open3d_vis_3d_lines_with_hightlightFrame(self.lines3D, self.hightlight_lines3D)
 91 |         if self.hightlight_points3D is not None:
 92 |             from util.visualize import open3d_vis_3d_points_with_hightlightFrame
 93 |             open3d_vis_3d_points_with_hightlightFrame(self.points3D, self.hightlight_points3D)
 94 | 
 95 | def getLine3D_from_modeloutput(lines3D, threshold=0.5):
 96 |     '''
 97 |     get uncertainty and remove line with high uncertainty
 98 |     args:
 99 |         lines3D: numpy array (1, 7, N)
100 |     return: lines3D (N, 6)
101 |     '''
102 |     lines3D = np.squeeze(lines3D.detach().cpu().numpy())
103 |     uncertainty = 1/(1+100*np.abs(lines3D[6,:]))
104 |     lines3D = lines3D[:6,:]
105 |     uncertainty = [True if tmpc >= threshold else False for tmpc in uncertainty]
106 |     lines3D = lines3D.T[uncertainty,:]
107 |     return lines3D, uncertainty
108 | 
109 | def getPoint3D_from_modeloutput(points3D, threshold=0.5):
110 |     '''
111 |     get uncertainty and remove point with high uncertainty
112 |     args:
113 |         points3D: numpy array (1, 4, N)
114 |     return: points3D (N, 3)
115 |     '''
116 |     points3D = np.squeeze(points3D.detach().cpu().numpy())
117 |     uncertainty = 1/(1+100*np.abs(points3D[3,:]))
118 |     points3D = points3D[:3,:]
119 |     uncertainty = [True if tmpc >= threshold else False for tmpc in uncertainty]
120 |     points3D = points3D.T[uncertainty,:]
121 |     return points3D, uncertainty
122 | 
123 | def pose_evaluator(eval_cfg, spath):
124 |     '''
125 |     Evaluate the estimated poses with ground truth poses
126 |     args:
127 |         eval_cfg: evaluation config
128 |         spath: path to save the estimated poses and ground truth poses
129 |     '''
130 |     def eval(eval_cfg, spath, mode):
131 |         if eval_cfg.pnp_point:
132 |             evaluate_pose_results(spath, mode=mode, pnp='point')
133 |         if eval_cfg.pnp_pointline:
134 |             evaluate_pose_results(spath, mode=mode, pnp='pointline')
135 |             
136 |     if eval_cfg.eval_train:
137 |         mode = 'train'
138 |         eval(eval_cfg, spath, mode)
139 |     if eval_cfg.eval_test:
140 |         mode = 'test'
141 |         eval(eval_cfg, spath, mode)
142 | 
143 | def evaluate_pose_results(spath, mode='train', pnp='pointline'):
144 |     '''
145 |     Evaluate the estimated poses with ground truth poses 
146 |     args:
147 |         spath: path to save the estimated poses and ground truth poses
148 |         mode: 'train' or 'test'
149 |         pnp: 'point' or 'pointline'
150 |     '''
151 |     gt_path = os.path.join(spath, f"gt_poses_{mode}.txt")
152 |     prd_path = os.path.join(spath, f"est_poses_{mode}_{pnp}.txt")
153 |     gt = pd.read_csv(gt_path, header=None, sep=" ")
154 |     prd = pd.read_csv(prd_path, header=None, sep =" ")
155 |     # assert len(gt) == len(prd)
156 |     errors_t = []
157 |     errors_R = []
158 |     for i in range(len(prd)):
159 |         R_gt = qvec2rotmat(gt.iloc[i,3:7].to_numpy())
160 |         t_gt = gt.iloc[i,:3].to_numpy()
161 |         t = prd.iloc[i,:3].to_numpy()
162 |         R = qvec2rotmat(prd.iloc[i,3:].to_numpy())
163 |         e_t = np.linalg.norm(-R_gt.T @ t_gt + R.T @ t, axis=0)
164 |         cos = np.clip((np.trace(np.dot(R_gt.T, R)) - 1) / 2, -1., 1.)
165 |         e_R = np.rad2deg(np.abs(np.arccos(cos)))
166 |         errors_t.append(e_t)
167 |         errors_R.append(e_R)
168 |     errors_t = np.array(errors_t)
169 |     errors_R = np.array(errors_R)
170 |     med_t = np.median(errors_t)
171 |     med_R = np.median(errors_R)
172 |     print(f'Evaluation results on {mode} set ({len(gt)}imgs) & PnP {pnp}:')
173 |     print('Median errors: {:.4f}m, {:.4f}deg'.format(med_t, med_R))
174 |     print('Average PnP time: {:.4f}s'.format(np.mean(prd.iloc[:,7].to_numpy())))
175 |     print('Percentage of test images localized within:')
176 |     threshs_t = [0.01, 0.02, 0.03, 0.05, 0.10]
177 |     threshs_R = [1.0, 2.0, 3.0, 5.0, 10.0]
178 |     for th_t, th_R in zip(threshs_t, threshs_R):
179 |         ratio = np.mean((errors_t < th_t) & (errors_R < th_R))
180 |         print('\t{:.0f}cm, {:.0f}deg : {:.2f}%'.format(th_t*100, th_R, ratio*100))
181 |     return med_t, med_R
182 | 
183 | 
184 | 
185 | 


--------------------------------------------------------------------------------
/util/io.py:
--------------------------------------------------------------------------------
 1 | import cv2 
 2 | import numpy as np 
 3 | import os
 4 | 
 5 | def read_image(path, grayscale=False):
 6 |     if grayscale:
 7 |         mode = cv2.IMREAD_GRAYSCALE
 8 |     else:
 9 |         mode = cv2.IMREAD_COLOR
10 |     image = cv2.imread(str(path), mode)
11 |     if image is None:
12 |         raise ValueError(f'Cannot read image {path}.')
13 |     if not grayscale and len(image.shape) == 3:
14 |         image = image[:, :, ::-1]  # BGR to RGB
15 |     return image
16 | 
17 | 
18 | class SAVING_MAP():
19 |     def __init__(self, save_path) -> None:
20 |         print("[INFOR] Saving prediction 3D map")
21 |         self.save_path = os.path.join(save_path, "Map_Prediction")
22 |         if not os.path.exists(self.save_path):
23 |             os.makedirs(self.save_path)
24 |         self.image_list = []
25 |         self.idx = 0
26 |     def save(self, output, data):
27 |         image_name = data['imgname'][0]
28 |         self.image_list.append(image_name)
29 |         
30 |         p2ds = output['keypoints'][0].detach().cpu().numpy()
31 |         # save 2D points
32 |         np.savetxt(os.path.join(self.save_path, str(self.idx) + "_p2d.txt"), p2ds)
33 |         
34 |         points3D = np.squeeze(output['points3D'].detach().cpu().numpy())
35 |         np.savetxt(os.path.join(self.save_path, str(self.idx) + "_p3d.txt"), points3D)
36 |         
37 |         l2ds = data['lines'][0].detach().cpu().numpy()
38 |         np.savetxt(os.path.join(self.save_path, str(self.idx) + "_l2d.txt"), l2ds)
39 | 
40 |         lines3D = np.squeeze(output['lines3D'].detach().cpu().numpy())
41 |         np.savetxt(os.path.join(self.save_path, str(self.idx) + "_l3d.txt"), lines3D)
42 |         
43 |         with open(os.path.join(self.save_path, "images.txt"), "a") as f:
44 |             f.write(str(self.idx) + " " + image_name + "\n")
45 |             
46 |         self.idx += 1
47 |         
48 | 
49 |         
50 |     
51 |     
52 |     
53 |     
54 |     
55 |     


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | class DualLogger:
 5 |     def __init__(self, filename):
 6 |         self.terminal = sys.stdout
 7 |         if os.path.exists(filename):
 8 |             os.remove(filename)
 9 |         self.log = open(filename, 'a')
10 |     
11 |     def write(self, message):
12 |         self.terminal.write(message)
13 |         self.log.write(message)
14 |     
15 |     def flush(self):
16 |         # This method is needed for Python 3 compatibility.
17 |         # This handles the flush command by doing nothing.
18 |         # You might want to specify some behavior here.
19 |         pass


--------------------------------------------------------------------------------
/util/pose_estimator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from omegaconf import OmegaConf
 3 | import sys, os
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 
 5 | from util.help_evaluation import getLine3D_from_modeloutput, getPoint3D_from_modeloutput
 6 | import time 
 7 | import poselib 
 8 | 
 9 | class Pose_Estimator():
10 |     def __init__(self, localize_cfg, eval_cfg, spath):
11 |         self.localize_cfg = localize_cfg # config file for localization
12 |         self.eval_cfg = eval_cfg # local config for evaluation
13 |         self.spath = spath
14 |         self.uncertainty_point = eval_cfg.uncer_threshold_point
15 |         self.uncertainty_line = eval_cfg.uncer_threshold_line
16 |         self.pnppoint = eval_cfg.pnp_point
17 |         self.pnppointline = eval_cfg.pnp_pointline
18 |         if not self.eval_cfg.exist_results:
19 |             self.checkexist()
20 |     def checkexist(self):
21 |         ''' 
22 |         Check if the files exist, if yes, remove them
23 |         '''
24 |         trainfiles_list = ['est_poses_train_pointline.txt', 'est_poses_train_point.txt',
25 |                        'gt_poses_train.txt']
26 |         testfiles_list = ['est_poses_test_pointline.txt', 'est_poses_test_point.txt',
27 |                        'gt_poses_test.txt']
28 |         if self.eval_cfg.eval_train:
29 |             self.rmfiles(trainfiles_list)
30 |         if self.eval_cfg.eval_test:
31 |             self.rmfiles(testfiles_list)
32 | 
33 |     def rmfiles(self, rm_list):
34 |         for file in rm_list:
35 |             if os.path.exists(os.path.join(self.spath, file)):
36 |                 os.remove(os.path.join(self.spath, file))
37 | 
38 |     def run(self, output, data, target, mode='train'):
39 |         return camera_pose_estimation(self.localize_cfg, output, data, target, self.spath, mode=mode,
40 |                      uncertainty_point=self.uncertainty_point, uncertainty_line=self.uncertainty_line,
41 |                      pnppoint=self.pnppoint, pnppointline=self.pnppointline)
42 | 
43 | def camera_pose_estimation(localize_cfg, output, data, target, spath, mode='train', 
44 |                            uncertainty_point=0.5, uncertainty_line=0.5, pnppoint=False, pnppointline=True):
45 |     '''
46 |     Creating same inputs for limap library and estimate camera pose
47 |     '''
48 |     p3ds_, point_uncer = getPoint3D_from_modeloutput(output['points3D'], uncertainty_point)
49 |     p3ds = [i for i in p3ds_]
50 |     p2ds = output['keypoints'][0].detach().cpu().numpy() + 0.5 # COLMAP
51 |     p2ds = p2ds[point_uncer,:]
52 |     p2ds = [i for i in p2ds]
53 |     camera = target['camera'][0].detach().cpu().numpy()
54 |     camera_model = "PINHOLE" if camera[0] == 1.0 else "SIMPLE_PINHOLE"
55 |     poselibcamera = {'model': camera_model, 'width': camera[2], 'height': camera[1], 'params': camera[3:]}
56 |     image_name = data['imgname'][0]
57 |     
58 |     if pnppoint:
59 |         start = time.time()
60 |         pose_point, _ = poselib.estimate_absolute_pose(p2ds, p3ds, poselibcamera, {'max_reproj_error': 12.0}, {})
61 |         est_time = time.time() - start
62 |         with open(os.path.join(spath, f"est_poses_{mode}_point.txt"), 'a') as f:
63 |             f.write(f"{pose_point.t[0]} {pose_point.t[1]} {pose_point.t[2]} {pose_point.q[0]} {pose_point.q[1]} {pose_point.q[2]} {pose_point.q[3]} {est_time} {image_name}\n")
64 |     target_pose = target['pose'][0].detach().cpu().numpy()
65 |     with open(os.path.join(spath, f"gt_poses_{mode}.txt"), 'a') as f:
66 |         f.write(f"{target_pose[0]} {target_pose[1]} {target_pose[2]} {target_pose[3]} {target_pose[4]} {target_pose[5]} {target_pose[6]}\n")
67 |     if not pnppointline:
68 |         return None
69 |     # modify the limap pnp to poselib pnp 
70 |     
71 |     
72 |     l3ds, line_uncer = getLine3D_from_modeloutput(output['lines3D'], uncertainty_line)
73 |     l3d_ids = [i for i in range(len(l3ds))]
74 |     l2ds = data['lines'][0].detach().cpu().numpy()
75 |     l2ds = l2ds[line_uncer,:]
76 |     
77 |     localize_cfg = OmegaConf.to_container(localize_cfg, resolve=True)
78 |     
79 |     if pnppointline:
80 |         start = time.time()
81 |         ransac_opt = {"max_reproj_error": 12.0, "max_epipolar_error": 10.0}
82 |         l2d_1 = [i for i in l2ds[:,:2]]
83 |         l2d_2 = [i for i in l2ds[:,2:]]
84 |         l3d_1 = [i for i in l3ds[:,:3]]
85 |         l3d_2 = [i for i in l3ds[:,3:]]
86 |         pose, _ = poselib.estimate_absolute_pose_pnpl(p2ds, p3ds, l2d_1, l2d_2, l3d_1, l3d_2, poselibcamera, ransac_opt)
87 |         est_time = time.time() - start
88 |         with open(os.path.join(spath, f"est_poses_{mode}_pointline.txt"), 'a') as f:
89 |             f.write(f"{pose.t[0]} {pose.t[1]} {pose.t[2]} {pose.q[0]} {pose.q[1]} {pose.q[2]} {pose.q[3]} {est_time} {image_name}\n")
90 |         return [poselibcamera, np.array([pose.t[0], pose.t[1], pose.t[2], pose.q[0], pose.q[1], pose.q[2], pose.q[3]]), target_pose]


--------------------------------------------------------------------------------
/util/read_write_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | #     * Redistributions of source code must retain the above copyright
  8 | #       notice, this list of conditions and the following disclaimer.
  9 | #
 10 | #     * Redistributions in binary form must reproduce the above copyright
 11 | #       notice, this list of conditions and the following disclaimer in the
 12 | #       documentation and/or other materials provided with the distribution.
 13 | #
 14 | #     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
 15 | #       its contributors may be used to endorse or promote products derived
 16 | #       from this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 28 | # POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
 31 | 
 32 | import os
 33 | import collections
 34 | import numpy as np
 35 | import struct
 36 | import argparse
 37 | 
 38 | 
 39 | CameraModel = collections.namedtuple(
 40 |     "CameraModel", ["model_id", "model_name", "num_params"])
 41 | Camera = collections.namedtuple(
 42 |     "Camera", ["id", "model", "width", "height", "params"])
 43 | BaseImage = collections.namedtuple(
 44 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 45 | Point3D = collections.namedtuple(
 46 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 47 | 
 48 | 
 49 | class Image(BaseImage):
 50 |     def qvec2rotmat(self):
 51 |         return qvec2rotmat(self.qvec)
 52 | 
 53 | 
 54 | CAMERA_MODELS = {
 55 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 56 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 57 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 58 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 59 |     CameraModel(model_id=4, model_name="OPENCV", num_params=8),
 60 |     CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
 61 |     CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
 62 |     CameraModel(model_id=7, model_name="FOV", num_params=5),
 63 |     CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
 64 |     CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
 65 |     CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 66 | }
 67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
 68 |                          for camera_model in CAMERA_MODELS])
 69 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
 70 |                            for camera_model in CAMERA_MODELS])
 71 | 
 72 | 
 73 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 74 |     """Read and unpack the next bytes from a binary file.
 75 |     :param fid:
 76 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 77 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 78 |     :param endian_character: Any of {@, =, <, >, !}
 79 |     :return: Tuple of read and unpacked values.
 80 |     """
 81 |     data = fid.read(num_bytes)
 82 |     return struct.unpack(endian_character + format_char_sequence, data)
 83 | 
 84 | 
 85 | def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
 86 |     """pack and write to a binary file.
 87 |     :param fid:
 88 |     :param data: data to send, if multiple elements are sent at the same time,
 89 |     they should be encapsuled either in a list or a tuple
 90 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 91 |     should be the same length as the data list or tuple
 92 |     :param endian_character: Any of {@, =, <, >, !}
 93 |     """
 94 |     if isinstance(data, (list, tuple)):
 95 |         bytes = struct.pack(endian_character + format_char_sequence, *data)
 96 |     else:
 97 |         bytes = struct.pack(endian_character + format_char_sequence, data)
 98 |     fid.write(bytes)
 99 | 
100 | 
101 | def read_cameras_text(path):
102 |     """
103 |     see: src/base/reconstruction.cc
104 |         void Reconstruction::WriteCamerasText(const std::string& path)
105 |         void Reconstruction::ReadCamerasText(const std::string& path)
106 |     """
107 |     cameras = {}
108 |     with open(path, "r") as fid:
109 |         while True:
110 |             line = fid.readline()
111 |             if not line:
112 |                 break
113 |             line = line.strip()
114 |             if len(line) > 0 and line[0] != "#":
115 |                 elems = line.split()
116 |                 camera_id = int(elems[0])
117 |                 model = elems[1]
118 |                 width = int(elems[2])
119 |                 height = int(elems[3])
120 |                 params = np.array(tuple(map(float, elems[4:])))
121 |                 cameras[camera_id] = Camera(id=camera_id, model=model,
122 |                                             width=width, height=height,
123 |                                             params=params)
124 |     return cameras
125 | 
126 | 
127 | def read_cameras_binary(path_to_model_file):
128 |     """
129 |     see: src/base/reconstruction.cc
130 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
131 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
132 |     """
133 |     cameras = {}
134 |     with open(path_to_model_file, "rb") as fid:
135 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
136 |         for _ in range(num_cameras):
137 |             camera_properties = read_next_bytes(
138 |                 fid, num_bytes=24, format_char_sequence="iiQQ")
139 |             camera_id = camera_properties[0]
140 |             model_id = camera_properties[1]
141 |             model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
142 |             width = camera_properties[2]
143 |             height = camera_properties[3]
144 |             num_params = CAMERA_MODEL_IDS[model_id].num_params
145 |             params = read_next_bytes(fid, num_bytes=8*num_params,
146 |                                      format_char_sequence="d"*num_params)
147 |             cameras[camera_id] = Camera(id=camera_id,
148 |                                         model=model_name,
149 |                                         width=width,
150 |                                         height=height,
151 |                                         params=np.array(params))
152 |         assert len(cameras) == num_cameras
153 |     return cameras
154 | 
155 | 
156 | def write_cameras_text(cameras, path):
157 |     """
158 |     see: src/base/reconstruction.cc
159 |         void Reconstruction::WriteCamerasText(const std::string& path)
160 |         void Reconstruction::ReadCamerasText(const std::string& path)
161 |     """
162 |     HEADER = "# Camera list with one line of data per camera:\n" + \
163 |              "#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \
164 |              "# Number of cameras: {}\n".format(len(cameras))
165 |     with open(path, "w") as fid:
166 |         fid.write(HEADER)
167 |         for _, cam in cameras.items():
168 |             to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
169 |             line = " ".join([str(elem) for elem in to_write])
170 |             fid.write(line + "\n")
171 | 
172 | 
173 | def write_cameras_binary(cameras, path_to_model_file):
174 |     """
175 |     see: src/base/reconstruction.cc
176 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
177 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
178 |     """
179 |     with open(path_to_model_file, "wb") as fid:
180 |         write_next_bytes(fid, len(cameras), "Q")
181 |         for _, cam in cameras.items():
182 |             model_id = CAMERA_MODEL_NAMES[cam.model].model_id
183 |             camera_properties = [cam.id,
184 |                                  model_id,
185 |                                  cam.width,
186 |                                  cam.height]
187 |             write_next_bytes(fid, camera_properties, "iiQQ")
188 |             for p in cam.params:
189 |                 write_next_bytes(fid, float(p), "d")
190 |     return cameras
191 | 
192 | 
193 | def read_images_text(path):
194 |     """
195 |     see: src/base/reconstruction.cc
196 |         void Reconstruction::ReadImagesText(const std::string& path)
197 |         void Reconstruction::WriteImagesText(const std::string& path)
198 |     """
199 |     images = {}
200 |     with open(path, "r") as fid:
201 |         while True:
202 |             line = fid.readline()
203 |             if not line:
204 |                 break
205 |             line = line.strip()
206 |             if len(line) > 0 and line[0] != "#":
207 |                 elems = line.split()
208 |                 image_id = int(elems[0])
209 |                 qvec = np.array(tuple(map(float, elems[1:5])))
210 |                 tvec = np.array(tuple(map(float, elems[5:8])))
211 |                 camera_id = int(elems[8])
212 |                 image_name = elems[9]
213 |                 elems = fid.readline().split()
214 |                 xys = np.column_stack([tuple(map(float, elems[0::3])),
215 |                                        tuple(map(float, elems[1::3]))])
216 |                 point3D_ids = np.array(tuple(map(int, elems[2::3])))
217 |                 images[image_id] = Image(
218 |                     id=image_id, qvec=qvec, tvec=tvec,
219 |                     camera_id=camera_id, name=image_name,
220 |                     xys=xys, point3D_ids=point3D_ids)
221 |     return images
222 | 
223 | 
224 | def read_images_binary(path_to_model_file):
225 |     """
226 |     see: src/base/reconstruction.cc
227 |         void Reconstruction::ReadImagesBinary(const std::string& path)
228 |         void Reconstruction::WriteImagesBinary(const std::string& path)
229 |     """
230 |     images = {}
231 |     with open(path_to_model_file, "rb") as fid:
232 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
233 |         for _ in range(num_reg_images):
234 |             binary_image_properties = read_next_bytes(
235 |                 fid, num_bytes=64, format_char_sequence="idddddddi")
236 |             image_id = binary_image_properties[0]
237 |             qvec = np.array(binary_image_properties[1:5])
238 |             tvec = np.array(binary_image_properties[5:8])
239 |             camera_id = binary_image_properties[8]
240 |             image_name = ""
241 |             current_char = read_next_bytes(fid, 1, "c")[0]
242 |             while current_char != b"\x00":   # look for the ASCII 0 entry
243 |                 image_name += current_char.decode("utf-8")
244 |                 current_char = read_next_bytes(fid, 1, "c")[0]
245 |             num_points2D = read_next_bytes(fid, num_bytes=8,
246 |                                            format_char_sequence="Q")[0]
247 |             x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
248 |                                        format_char_sequence="ddq"*num_points2D)
249 |             xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
250 |                                    tuple(map(float, x_y_id_s[1::3]))])
251 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
252 |             images[image_id] = Image(
253 |                 id=image_id, qvec=qvec, tvec=tvec,
254 |                 camera_id=camera_id, name=image_name,
255 |                 xys=xys, point3D_ids=point3D_ids)
256 |     return images
257 | 
258 | 
259 | def write_images_text(images, path):
260 |     """
261 |     see: src/base/reconstruction.cc
262 |         void Reconstruction::ReadImagesText(const std::string& path)
263 |         void Reconstruction::WriteImagesText(const std::string& path)
264 |     """
265 |     if len(images) == 0:
266 |         mean_observations = 0
267 |     else:
268 |         mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
269 |     HEADER = "# Image list with two lines of data per image:\n" + \
270 |              "#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \
271 |              "#   POINTS2D[] as (X, Y, POINT3D_ID)\n" + \
272 |              "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations)
273 | 
274 |     with open(path, "w") as fid:
275 |         fid.write(HEADER)
276 |         for _, img in images.items():
277 |             image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
278 |             first_line = " ".join(map(str, image_header))
279 |             fid.write(first_line + "\n")
280 | 
281 |             points_strings = []
282 |             for xy, point3D_id in zip(img.xys, img.point3D_ids):
283 |                 points_strings.append(" ".join(map(str, [*xy, point3D_id])))
284 |             fid.write(" ".join(points_strings) + "\n")
285 | 
286 | 
287 | def write_images_binary(images, path_to_model_file):
288 |     """
289 |     see: src/base/reconstruction.cc
290 |         void Reconstruction::ReadImagesBinary(const std::string& path)
291 |         void Reconstruction::WriteImagesBinary(const std::string& path)
292 |     """
293 |     with open(path_to_model_file, "wb") as fid:
294 |         write_next_bytes(fid, len(images), "Q")
295 |         for _, img in images.items():
296 |             write_next_bytes(fid, img.id, "i")
297 |             write_next_bytes(fid, img.qvec.tolist(), "dddd")
298 |             write_next_bytes(fid, img.tvec.tolist(), "ddd")
299 |             write_next_bytes(fid, img.camera_id, "i")
300 |             for char in img.name:
301 |                 write_next_bytes(fid, char.encode("utf-8"), "c")
302 |             write_next_bytes(fid, b"\x00", "c")
303 |             write_next_bytes(fid, len(img.point3D_ids), "Q")
304 |             for xy, p3d_id in zip(img.xys, img.point3D_ids):
305 |                 write_next_bytes(fid, [*xy, p3d_id], "ddq")
306 | 
307 | 
308 | def read_points3D_text(path):
309 |     """
310 |     see: src/base/reconstruction.cc
311 |         void Reconstruction::ReadPoints3DText(const std::string& path)
312 |         void Reconstruction::WritePoints3DText(const std::string& path)
313 |     """
314 |     points3D = {}
315 |     with open(path, "r") as fid:
316 |         while True:
317 |             line = fid.readline()
318 |             if not line:
319 |                 break
320 |             line = line.strip()
321 |             if len(line) > 0 and line[0] != "#":
322 |                 elems = line.split()
323 |                 point3D_id = int(elems[0])
324 |                 xyz = np.array(tuple(map(float, elems[1:4])))
325 |                 rgb = np.array(tuple(map(int, elems[4:7])))
326 |                 error = float(elems[7])
327 |                 image_ids = np.array(tuple(map(int, elems[8::2])))
328 |                 point2D_idxs = np.array(tuple(map(int, elems[9::2])))
329 |                 points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
330 |                                                error=error, image_ids=image_ids,
331 |                                                point2D_idxs=point2D_idxs)
332 |     return points3D
333 | 
334 | 
335 | def read_points3D_binary(path_to_model_file):
336 |     """
337 |     see: src/base/reconstruction.cc
338 |         void Reconstruction::ReadPoints3DBinary(const std::string& path)
339 |         void Reconstruction::WritePoints3DBinary(const std::string& path)
340 |     """
341 |     points3D = {}
342 |     with open(path_to_model_file, "rb") as fid:
343 |         num_points = read_next_bytes(fid, 8, "Q")[0]
344 |         for _ in range(num_points):
345 |             binary_point_line_properties = read_next_bytes(
346 |                 fid, num_bytes=43, format_char_sequence="QdddBBBd")
347 |             point3D_id = binary_point_line_properties[0]
348 |             xyz = np.array(binary_point_line_properties[1:4])
349 |             rgb = np.array(binary_point_line_properties[4:7])
350 |             error = np.array(binary_point_line_properties[7])
351 |             track_length = read_next_bytes(
352 |                 fid, num_bytes=8, format_char_sequence="Q")[0]
353 |             track_elems = read_next_bytes(
354 |                 fid, num_bytes=8*track_length,
355 |                 format_char_sequence="ii"*track_length)
356 |             image_ids = np.array(tuple(map(int, track_elems[0::2])))
357 |             point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
358 |             points3D[point3D_id] = Point3D(
359 |                 id=point3D_id, xyz=xyz, rgb=rgb,
360 |                 error=error, image_ids=image_ids,
361 |                 point2D_idxs=point2D_idxs)
362 |     return points3D
363 | 
364 | 
365 | def write_points3D_text(points3D, path):
366 |     """
367 |     see: src/base/reconstruction.cc
368 |         void Reconstruction::ReadPoints3DText(const std::string& path)
369 |         void Reconstruction::WritePoints3DText(const std::string& path)
370 |     """
371 |     if len(points3D) == 0:
372 |         mean_track_length = 0
373 |     else:
374 |         mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
375 |     HEADER = "# 3D point list with one line of data per point:\n" + \
376 |              "#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \
377 |              "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length)
378 | 
379 |     with open(path, "w") as fid:
380 |         fid.write(HEADER)
381 |         for _, pt in points3D.items():
382 |             point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
383 |             fid.write(" ".join(map(str, point_header)) + " ")
384 |             track_strings = []
385 |             for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
386 |                 track_strings.append(" ".join(map(str, [image_id, point2D])))
387 |             fid.write(" ".join(track_strings) + "\n")
388 | 
389 | 
390 | def write_points3D_binary(points3D, path_to_model_file):
391 |     """
392 |     see: src/base/reconstruction.cc
393 |         void Reconstruction::ReadPoints3DBinary(const std::string& path)
394 |         void Reconstruction::WritePoints3DBinary(const std::string& path)
395 |     """
396 |     with open(path_to_model_file, "wb") as fid:
397 |         write_next_bytes(fid, len(points3D), "Q")
398 |         for _, pt in points3D.items():
399 |             write_next_bytes(fid, pt.id, "Q")
400 |             write_next_bytes(fid, pt.xyz.tolist(), "ddd")
401 |             write_next_bytes(fid, pt.rgb.tolist(), "BBB")
402 |             write_next_bytes(fid, pt.error, "d")
403 |             track_length = pt.image_ids.shape[0]
404 |             write_next_bytes(fid, track_length, "Q")
405 |             for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
406 |                 write_next_bytes(fid, [image_id, point2D_id], "ii")
407 | 
408 | 
409 | def detect_model_format(path, ext):
410 |     if os.path.isfile(os.path.join(path, "cameras"  + ext)) and \
411 |        os.path.isfile(os.path.join(path, "images"   + ext)) and \
412 |        os.path.isfile(os.path.join(path, "points3D" + ext)):
413 |         print("Detected model format: '" + ext + "'")
414 |         return True
415 | 
416 |     return False
417 | 
418 | 
419 | def read_model(path, ext=""):
420 |     # try to detect the extension automatically
421 |     if ext == "":
422 |         if detect_model_format(path, ".bin"):
423 |             ext = ".bin"
424 |         elif detect_model_format(path, ".txt"):
425 |             ext = ".txt"
426 |         else:
427 |             print("Provide model format: '.bin' or '.txt'")
428 |             return
429 | 
430 |     if ext == ".txt":
431 |         cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
432 |         images = read_images_text(os.path.join(path, "images" + ext))
433 |         points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
434 |     else:
435 |         cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
436 |         images = read_images_binary(os.path.join(path, "images" + ext))
437 |         points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
438 |     return cameras, images, points3D
439 | 
440 | 
441 | def write_model(cameras, images, points3D, path, ext=".bin"):
442 |     if ext == ".txt":
443 |         write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
444 |         write_images_text(images, os.path.join(path, "images" + ext))
445 |         write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
446 |     else:
447 |         write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
448 |         write_images_binary(images, os.path.join(path, "images" + ext))
449 |         write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
450 |     return cameras, images, points3D
451 | 
452 | 
453 | def qvec2rotmat(qvec):
454 |     return np.array([
455 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
456 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
457 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
458 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
459 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
460 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
461 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
462 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
463 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
464 | 
465 | 
466 | def rotmat2qvec(R):
467 |     Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
468 |     K = np.array([
469 |         [Rxx - Ryy - Rzz, 0, 0, 0],
470 |         [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
471 |         [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
472 |         [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
473 |     eigvals, eigvecs = np.linalg.eigh(K)
474 |     qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
475 |     if qvec[0] < 0:
476 |         qvec *= -1
477 |     return qvec
478 | 
479 | 
480 | def main():
481 |     parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models")
482 |     parser.add_argument("--input_model", help="path to input model folder")
483 |     parser.add_argument("--input_format", choices=[".bin", ".txt"],
484 |                         help="input model format", default="")
485 |     parser.add_argument("--output_model",
486 |                         help="path to output model folder")
487 |     parser.add_argument("--output_format", choices=[".bin", ".txt"],
488 |                         help="outut model format", default=".txt")
489 |     args = parser.parse_args()
490 | 
491 |     cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
492 | 
493 |     print("num_cameras:", len(cameras))
494 |     print("num_images:", len(images))
495 |     print("num_points3D:", len(points3D))
496 | 
497 |     if args.output_model is not None:
498 |         write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
499 | 
500 | 
501 | if __name__ == "__main__":
502 |     main()
503 | 


--------------------------------------------------------------------------------
/util/visualize.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import open3d as o3d
  3 | import os
  4 | import sys
  5 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6 | from datasets._base import Line3D
  7 | from util.io import read_image
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | def test_point_inside_ranges(point, ranges):
 11 |     point = np.array(point)
 12 |     if ~np.all(point > ranges[0]) or ~np.all(point < ranges[1]):
 13 |         return False
 14 |     return True
 15 | 
 16 | def test_line_inside_ranges(line, ranges):
 17 |     if not test_point_inside_ranges(line.start, ranges):
 18 |         return False
 19 |     if not test_point_inside_ranges(line.end, ranges):
 20 |         return False
 21 |     return True
 22 | 
 23 | def open3d_get_line_set(lines, color=[0.0, 0.0, 0.0], width=2, ranges=None, scale=1.0):
 24 |     """
 25 |     convert a list of line3D objects to an Open3D lines set
 26 |     Args:
 27 |         lines (list[:class:`datasets._base.Line3D`] or numpy array of Nx6): The 3D line map
 28 |         color (list[float]): The color of the lines
 29 |         width (float, optional): width of the line
 30 |     """
 31 |     o3d_points, o3d_lines, o3d_colors = [], [], []
 32 |     counter = 0
 33 |     for line in lines:
 34 |         if isinstance(line, np.ndarray):
 35 |             line = Line3D(line[:3], line[3:])
 36 |         if ranges is not None:
 37 |             if not test_line_inside_ranges(line, ranges):
 38 |                 continue
 39 |         o3d_points.append(line.start * scale)
 40 |         o3d_points.append(line.end * scale)
 41 |         o3d_lines.append([2*counter, 2*counter+1])
 42 |         counter += 1
 43 |         o3d_colors.append(color)
 44 |     line_set = o3d.geometry.LineSet()
 45 |     line_set.points = o3d.utility.Vector3dVector(o3d_points)
 46 |     line_set.lines = o3d.utility.Vector2iVector(o3d_lines)
 47 |     line_set.colors = o3d.utility.Vector3dVector(o3d_colors)
 48 |     return line_set
 49 | 
 50 | 
 51 | def open3d_vis_3d_lines_with_hightlightFrame(lines3D, hightlight_lines3D, width=2, ranges=None, scale=1.0):
 52 |     """
 53 |     Save 3D line map with `Open3D <http://www.open3d.org/>`_
 54 | 
 55 |     Args:
 56 |         lines3D: numpy array of Nx6
 57 |         hightlight_lines3D: numpy array of Nx6
 58 |         width (float, optional): width of the line
 59 |     """
 60 | 
 61 |     line_set = open3d_get_line_set(lines3D, width=width, ranges=ranges, scale=scale)
 62 |     line_set_highlight = open3d_get_line_set(hightlight_lines3D, color=[0.0, 1.0, 0.0], width=width*2, ranges=ranges, scale=scale)
 63 |     
 64 |     # Save the line_set
 65 |     o3d.io.write_line_set("visualization/line_set.ply", line_set)
 66 |     o3d.io.write_line_set("visualization/line_set_highlight.ply", line_set_highlight)
 67 |     
 68 |     '''
 69 |     vis = o3d.visualization.Visualizer()
 70 |     vis.create_window(height=1080, width=1920)
 71 |     vis.add_geometry(line_set)
 72 |     vis.add_geometry(line_set_highlight)
 73 |     vis.run()
 74 |     vis.destroy_window()
 75 |     '''
 76 |         
 77 |     
 78 | 
 79 | 
 80 | def open3d_vis_3d_lines(lines3D, cameras=None, poses=None, gt_pose=None, width=2, ranges=None, scale=1.0):
 81 |     """
 82 |     Visualize a 3D line map with `Open3D <http://www.open3d.org/>`_
 83 | 
 84 |     Args:
 85 |         lines (list[:class:`datasets._base.Line3D` and/or None]): The 3D line map
 86 |         width (float, optional): width of the line
 87 |     """
 88 |     if isinstance(lines3D, list):
 89 |         lines = []
 90 |         for line in lines3D:
 91 |             if line is not None:
 92 |                 lines.append(line)
 93 |     elif isinstance(lines3D, np.ndarray):
 94 |         lines = lines3D
 95 |     else:
 96 |         raise ValueError("lines3D must be either a list or a numpy array")
 97 | 
 98 |     vis = o3d.visualization.Visualizer()
 99 |     vis.create_window(height=1080, width=1920)
100 |     
101 |     prune = len(lines)
102 |     # prune = int(0.8*len(lines))
103 |     line_set = open3d_get_line_set(lines[:prune,:], width=width, ranges=ranges, scale=scale)
104 | 
105 |     vis.add_geometry(line_set)
106 |     if poses is not None:
107 |         assert cameras is not None
108 |         assert gt_pose is not None
109 |         def get_t(pose):
110 |             R = qvec2rotmat(pose[3:])
111 |             # translation
112 |             t = pose[:3]
113 |             # invert
114 |             t = -R.T @ t
115 |             return t
116 |         connect_poses_lines = []
117 |         is_draws = []
118 |         for i in range(len(poses)):
119 |             est_t = get_t(poses[i])
120 |             gt_t = get_t(gt_pose[i])
121 |             # calculate distance between two points
122 |             is_draw = True if np.linalg.norm(est_t - gt_t) < 100 else False
123 |             is_draws.append(is_draw)
124 |             if is_draw:
125 |                 tmp_line = np.array([est_t[0], est_t[1], est_t[2], gt_t[0], gt_t[1], gt_t[2]])
126 |                 connect_poses_lines.append(tmp_line)
127 |         connect_line_set = open3d_get_line_set(connect_poses_lines, width=width, ranges=ranges, scale=scale, color=[0,1,0])
128 |         vis.add_geometry(connect_line_set)
129 |         i = 0 
130 |         for pose, camera in zip(poses, cameras):
131 |             if is_draws[i]: add_camera(vis, pose, camera, scale=0.2, gt = False)
132 |             i+=1
133 |         i = 0 
134 |         for pose, camera in zip(gt_pose, cameras):
135 |             if is_draws[i]: add_camera(vis, pose, camera, scale=0.2, gt = True)
136 |             i+=1
137 |     vis.run()
138 |     vis.destroy_window()
139 | 
140 | def open3d_vis_3d_lines_from_datacollection(datacollection, train_or_test="train"):
141 |     '''
142 |     Visualize 3D lines from datasetcollection
143 |     Args:
144 |         datacollection: DataCollection object
145 |         train_or_test: string, "train" or "test"'''
146 |     if train_or_test != "train":
147 |         raise ValueError("Currently only support 'train' mode.")
148 |     vis_lines = []
149 |     imgs_list = datacollection.train_imgs if train_or_test=="train" else datacollection.test_imgs
150 |     import random
151 |     random.shuffle(imgs_list)
152 |     cameras = []
153 |     poses = []
154 |     i = 0
155 |     for img in imgs_list:
156 |         vis_lines += datacollection.imgname2imgclass[img].line3Ds
157 |         poses.append(datacollection.imgname2imgclass[img].pose.get_pose_vector())
158 |         cameras.append(datacollection.imgname2imgclass[img].camera.get_dict_camera())
159 |         # i += 1
160 |         # if i > 20:
161 |         #     break
162 |     open3d_vis_3d_lines(vis_lines, cameras=cameras, poses=poses)
163 | 
164 | def open3d_vis_3d_lines_from_single_imgandcollection(datacollection, img_name):
165 |     '''
166 |     Visualize 3D lines from datasetcollection
167 |     Args:
168 |         datacollection: DataCollection object
169 |         img_name: string, image name
170 |     '''
171 |     if img_name in datacollection.test_imgs:
172 |         raise ValueError("Only train images have 3D labeled lines.")
173 |     vis_lines = datacollection.imgname2imgclass[img_name].line3Ds
174 |     open3d_vis_3d_lines(vis_lines)
175 | 
176 | def visualize_2d_lines(img_path, savename,lines2D, lines3D, save_path="visualization/"):
177 |     """ Plot lines for existing images.
178 |     Args:
179 |         img_path: string, path to the image.
180 |         lines2D: list of ndarrays of size (N, 4).
181 |         lines3D: list of objects with size of (N, 1).
182 |         save_path: string, path to save the image.
183 |     """
184 |     save_path = os.path.join(save_path,savename)
185 |     img = read_image(img_path)
186 |     plt.figure()
187 |     plt.imshow(img)
188 |     length = lines2D.shape[0]
189 |     for i in range(length):
190 |         k = lines2D[i,:]
191 |         x = [k[0], k[2]]
192 |         y = [k[1], k[3]]
193 |         if lines3D is not None:
194 |             c = 'lime' if lines3D[i] is None else 'red'
195 |         else:
196 |             c = 'lime'
197 |         plt.plot(x, y, color=c)
198 |     plt.savefig(save_path)
199 |     # Close the figure to free up memory
200 |     plt.close()
201 | 
202 | def visualize_2d_lines_from_collection(datacollection, img_name, mode="offline"):
203 |     """
204 |     Visualize 2D lines from datasetcollection
205 |     Args:
206 |         datacollection: DataCollection object
207 |         img_name: string, image name
208 |         mode: string, "offline" (take from exiting labels) or "online" (use detector model to get 2D points)
209 |     """
210 |     if mode == "offline":
211 |         line2Ds = datacollection.imgname2imgclass[img_name].line2Ds
212 |         line3Ds = datacollection.imgname2imgclass[img_name].line3Ds
213 |     elif mode == "online":
214 |         line2Ds = datacollection.detect_lines2D(img_name)
215 |         line3Ds = None
216 |     else:
217 |         raise ValueError("mode must be either 'offline' or 'online'")
218 |     img_path = datacollection.get_image_path(img_name)
219 |     save_name = img_name.replace("/","_") + "_lines_" + mode +".png"
220 |     visualize_2d_lines(img_path, save_name, line2Ds, line3Ds)
221 | 
222 | # -------------------------------- end line visualization --------------------------------
223 | ##########################################################################################
224 | # -------------------------------- start point visualization -----------------------------
225 | 
226 | 
227 | def visualize_2d_points(img_path, points2D, savename, colors='lime', ps=4, save_path="visualization/"):
228 |     """Plot keypoints for existing images.
229 |     Args:
230 |         img_path: string, path to the image.
231 |         points2D: list of ndarrays of size (N, 2).
232 |         colors: string, or list of list of tuples (one for each keypoints).
233 |         ps: size of the keypoints as float.
234 |         save_path: string, path to save the image.
235 |     """
236 |     save_path = os.path.join(save_path,savename)
237 |     img = read_image(img_path)
238 |     plt.figure()
239 |     plt.imshow(img)
240 |     if not isinstance(colors, list):
241 |         colors = [colors] * len(points2D)
242 |     for k, c in zip(points2D, colors):
243 |         plt.scatter(k[0], k[1], c=c, s=ps, linewidths=0)
244 |     plt.savefig(save_path)
245 |     # Close the figure to free up memory
246 |     plt.close()
247 | 
248 | def visualize_2d_points_from_collection(datacollection, img_name, mode="offline"):
249 |     """
250 |     Visualize 2D points from datasetcollection
251 |     Args:
252 |         datacollection: DataCollection object
253 |         img_name: string, image name
254 |         mode: string, "offline" (take from exiting labels) or "online" (use detector model to get 2D points)
255 |     """
256 |     if mode == "offline":
257 |         if img_name in datacollection.test_imgs:
258 |             raise ValueError("Only train images have 2D labeled points.")
259 |         points2D = datacollection.imgname2imgclass[img_name].points2Ds
260 |     elif mode == "online":
261 |         data = datacollection.detect_points2D(img_name)
262 |         points2D = data["keypoints"][0].detach().cpu().numpy()
263 |     else:
264 |         raise ValueError("mode must be either 'offline' or 'online'")
265 |     img_path = datacollection.get_image_path(img_name)
266 |     save_name = img_name.replace("/","_") + "_points_" + mode +".png"
267 |     visualize_2d_points(img_path, points2D, save_name)
268 | 
269 | def open3d_get_point_set(points, color=[0.0, 0.0, 0.0], width=2, scale=1.0):
270 |     """
271 |     convert a numpy array of points3D to an Open3D lines set
272 |     Args:
273 |         points (numpy array of Nx3): The 3D point map
274 |         color (list[float]): The color of the lines
275 |         width (float, optional): width of the line
276 |     """
277 |     o3d_points, o3d_colors = [], []
278 |     for point in points:
279 |         if np.sum(point) == 0:
280 |             continue
281 |         o3d_points.append(point)
282 |         o3d_colors.append(color)
283 |     point_set = o3d.geometry.PointCloud()
284 |     point_set.points = o3d.utility.Vector3dVector(o3d_points)
285 |     point_set.colors = o3d.utility.Vector3dVector(o3d_colors)
286 |     return point_set
287 | 
288 | def open3d_vis_3d_points(points3D:np.asanyarray, width=2, ranges=None, scale=1.0):
289 |     """
290 |     Visualize a 3D point map with `Open3D <http://www.open3d.org/>`_
291 | 
292 |     Args:
293 |         points3D (list[:class:`datasets._base.Line3D` and/or None]): The 3D line map
294 |         width (float, optional): width of the line
295 |     """
296 | 
297 |     vis = o3d.visualization.Visualizer()
298 |     vis.create_window(height=1080, width=1920)
299 |     point_set = open3d_get_point_set(points3D, width=width, scale=scale)
300 |     vis.add_geometry(point_set)
301 |     vis.run()
302 |     vis.destroy_window()
303 | 
304 | def open3d_vis_3d_points_from_datacollection(datacollection, mode="train"):
305 |     '''
306 |     Visualize 3D points from datasetcollection
307 |     Args:
308 |         datacollection: DataCollection object
309 |         mode: string, "train" or "test"
310 |     '''
311 |     if mode != "train":
312 |         raise ValueError("Currently only support 'train' mode.")
313 |     vis_points = np.array([[0,0,0]])
314 |     imgs_list = datacollection.train_imgs if mode=="train" else datacollection.test_imgs
315 |     for img in imgs_list:
316 |         vis_points = np.concatenate((vis_points, datacollection.imgname2imgclass[img].points3Ds))
317 |     open3d_vis_3d_points(vis_points)
318 | 
319 | 
320 | def open3d_vis_3d_points_with_hightlightFrame(points3D, hightlight_points3D, width=2, ranges=None, scale=1.0):
321 |     """
322 |     Save 3D point map with `Open3D <http://www.open3d.org/>`_
323 | 
324 |     Args:
325 |         points3D (list[:class:`datasets._base.Line3D` and/or None]): The 3D line map
326 |         width (float, optional): width of the line
327 |     """
328 | 
329 |     point_set = open3d_get_point_set(points3D, width=width, scale=scale)
330 |     highlight_point_set = open3d_get_point_set(hightlight_points3D, color=[0.0, 1.0, 0.0], width=width*2, scale=scale)
331 |     
332 |     # save the point_set
333 |     o3d.io.write_point_cloud("visualization/point_set.ply", point_set)
334 |     o3d.io.write_point_cloud("visualization/highlight_point_set.ply", highlight_point_set)
335 |     '''
336 |     vis = o3d.visualization.Visualizer()
337 |     vis.create_window(height=1080, width=1920)
338 |     vis.add_geometry(point_set)
339 |     vis.add_geometry(highlight_point_set)
340 |     vis.run()
341 |     vis.destroy_window()
342 |     '''
343 | 
344 | 
345 | ##########################################################################################
346 | # -------------------- merging points and lines for visualization together --------------
347 | def visualize_2d_points_lines(img_path, points2D, lines2D, lines3D, savename,
348 |                               colors='lime', ps=4, save_path="visualization/"):
349 |     """Plot keypoints for existing images.
350 |     Args:
351 |         img_path: string, path to the image.
352 |         points2D: list of ndarrays of size (N, 2).
353 |         lines2D: list of ndarrays of size (N, 4).
354 |         lines3D: list of objects with size of (N, 1).
355 |         colors: string, or list of list of tuples (one for each keypoints).
356 |         ps: size of the keypoints as float.
357 |         save_path: string, path to save the image.
358 |     """
359 |     save_path = os.path.join(save_path,savename)
360 |     img = read_image(img_path)
361 |     plt.figure()
362 |     plt.imshow(img)
363 |     if not isinstance(colors, list):
364 |         colors = [colors] * len(points2D)
365 |     # visualize points
366 |     for k, c in zip(points2D, colors):
367 |         plt.scatter(k[0], k[1], c=c, s=ps, linewidths=0)
368 |         
369 |     # visualize lines
370 |     length = lines2D.shape[0]
371 |     for i in range(length):
372 |         k = lines2D[i,:]
373 |         x = [k[0], k[2]]
374 |         y = [k[1], k[3]]
375 |         if lines3D is not None:
376 |             c = 'lime' if lines3D[i] is None else 'red'
377 |         else:
378 |             c = 'lime'
379 |         plt.plot(x, y, color=c)
380 |     plt.savefig(save_path)
381 |     # Close the figure to free up memory
382 |     plt.close()
383 | 
384 | 
385 | def visualize_2d_points_lines_from_collection(datacollection, img_name, mode="offline"):
386 |     """
387 |     Visualize 2D points and lines from datasetcollection
388 |     Args:
389 |         datacollection: DataCollection object
390 |         img_name: string, image name
391 |         mode: string, "offline" (take from exiting labels) or "online" (use detector model to get 2D points)
392 |     """
393 |     if mode == "offline":
394 |         if img_name in datacollection.test_imgs:
395 |             raise ValueError("Only train images have 2D labeled points.")
396 |         points2D = datacollection.imgname2imgclass[img_name].points2Ds
397 |         
398 |         line2Ds = datacollection.imgname2imgclass[img_name].line2Ds
399 |         line3Ds = datacollection.imgname2imgclass[img_name].line3Ds
400 |         
401 |     elif mode == "online":
402 |         data = datacollection.detect_points2D(img_name)
403 |         points2D = data["keypoints"][0].detach().cpu().numpy()
404 |         
405 |         line2Ds = datacollection.detect_lines2D(img_name)
406 |         line3Ds = None
407 |     else:
408 |         raise ValueError("mode must be either 'offline' or 'online'")
409 |     img_path = datacollection.get_image_path(img_name)
410 |     save_name = img_name.replace("/","_") + "_points_lines_" + mode +".svg"
411 |     
412 |     visualize_2d_points_lines(img_path, points2D, line2Ds, line3Ds, save_name)
413 | 
414 | 
415 | ##########################################################################################
416 | # -------------------------------- Augmentation Visualization Debug ----------------------
417 | 
418 | import cv2
419 | 
420 | def visualize_img_withlinesandpoints(image, points, lines, augmented=False):
421 |     
422 |     save_path = "visualization/"
423 |     point_size = 1
424 | 
425 |     # Draw the original positions on the original image
426 |     for position in points:
427 |         cv2.circle(image, (int(position[0]), int(position[1])), point_size, (0, 0, 255), -1)
428 |     # Draw the original lines on the original image
429 |     for line in lines:
430 |         cv2.line(image, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (255, 0, 0), 1)
431 |         cv2.circle(image, (int(line[0]), int(line[1])), point_size*3, (0, 0, 255), -1)
432 |         cv2.circle(image, (int(line[2]), int(line[3])), point_size*3, (0, 0, 255), -1)
433 | 
434 |     if augmented:
435 |         cv2.imwrite(save_path+'Transformed_Image.jpg', image)
436 |     else:
437 |         cv2.imwrite(save_path+'Original_Image.jpg', image)
438 |         
439 | ##########################################################################################
440 | # -------------------------------- draw camera poses -------------------------------------
441 | from util.read_write_model import qvec2rotmat
442 | def draw_camera(K, R, t, w, h,
443 |                 scale=1, color=[1, 0, 0]):
444 |     """Create axis, plane and pyramed geometries in Open3D format.
445 |     :param K: calibration matrix (camera intrinsics)
446 |     :param R: rotation matrix
447 |     :param t: translation
448 |     :param w: image width
449 |     :param h: image height
450 |     :param scale: camera model scale
451 |     :param color: color of the image plane and pyramid lines
452 |     :return: camera model geometries (axis, plane and pyramid)
453 |     """
454 | 
455 |     # intrinsics
456 |     K = K.copy() / scale
457 |     Kinv = np.linalg.inv(K)
458 | 
459 |     # 4x4 transformation
460 |     T = np.column_stack((R, t))
461 |     T = np.vstack((T, (0, 0, 0, 1)))
462 | 
463 |     # axis
464 |     axis = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.5 * scale)
465 |     axis.transform(T)
466 | 
467 |     # points in pixel
468 |     points_pixel = [
469 |         [0, 0, 0],
470 |         [0, 0, 1],
471 |         [w, 0, 1],
472 |         [0, h, 1],
473 |         [w, h, 1],
474 |     ]
475 | 
476 |     # pixel to camera coordinate system
477 |     points = [Kinv @ p for p in points_pixel]
478 | 
479 |     # image plane
480 |     width = abs(points[1][0]) + abs(points[3][0])
481 |     height = abs(points[1][1]) + abs(points[3][1])
482 |     plane = o3d.geometry.TriangleMesh.create_box(width, height, depth=1e-6)
483 |     # plane.paint_uniform_color([0.5,0,0])
484 |     # plane.paint_uniform_color(color)
485 |     plane.translate([points[1][0], points[1][1], scale])
486 |     plane.transform(T)
487 | 
488 |     # pyramid
489 |     points_in_world = [(R @ p + t) for p in points]
490 |     lines = [
491 |         [0, 1],
492 |         [0, 2],
493 |         [0, 3],
494 |         [0, 4],
495 |         [1, 2], 
496 |         [2, 4],
497 |         [4, 3],
498 |         [3, 1],
499 |     ]
500 |     colors = [color for i in range(len(lines))]
501 |     line_set = o3d.geometry.LineSet(
502 |         points=o3d.utility.Vector3dVector(points_in_world),
503 |         lines=o3d.utility.Vector2iVector(lines))
504 |     line_set.colors = o3d.utility.Vector3dVector(colors)
505 | 
506 |     # return as list in Open3D format
507 |     # return [axis, plane, line_set]
508 |     # return [plane, line_set]
509 |     return [line_set]
510 | 
511 | 
512 | 
513 | def add_camera(vis, pose, camera, scale=0.1, gt = False, othermethod = False):
514 |         plane_scale = 1
515 |         # rotation
516 |         R = qvec2rotmat(pose[3:])
517 |         # translation
518 |         t = pose[:3]
519 |         # invert
520 |         t = -R.T @ t
521 |         R = R.T
522 |         # intrinsics
523 | 
524 |         if camera['model'] in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
525 |             fx = fy = camera['params'][0]
526 |             cx = camera['params'][1]
527 |             cy = camera['params'][2]
528 |         elif camera['model'] in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
529 |             fx = camera['params'][0]
530 |             fy = camera['params'][1]
531 |             cx = camera['params'][2]
532 |             cy = camera['params'][3]
533 |         else:
534 |             raise Exception("Camera model not supported")
535 | 
536 |         # intrinsics
537 |         K = np.identity(3)
538 |         K[0, 0] = fx
539 |         K[1, 1] = fy
540 |         K[0, 2] = cx
541 |         K[1, 2] = cy
542 |         if othermethod:
543 |             color = [0,1,0]
544 |         else:
545 |             color = [1, 0, 0] if gt else [0, 0, 1]
546 |         # create axis, plane and pyramed geometries that will be drawn
547 |         cam_model = draw_camera(K, R, t, camera['width']*plane_scale, camera['height']*plane_scale, scale, color)
548 |         for i in cam_model:
549 |             vis.add_geometry(i)
550 |     
551 | 


--------------------------------------------------------------------------------