├── README.md
├── assets
├── PLV.png
├── cover_2.png
├── encoder.png
└── toy_result.png
├── configs
└── replica
│ ├── office0.yaml
│ ├── office0_custom.yaml
│ ├── office0_w_clip.yaml
│ └── office0_w_slam.yaml
├── demo.py
├── example
├── office0
│ ├── depth000000.png
│ ├── depth000020.png
│ ├── frame000000.jpg
│ ├── frame000020.jpg
│ └── traj.txt
├── render_w_lim.py
├── toy.py
└── util.py
├── external
└── openseg
│ └── openseg_api.py
├── scripts
└── download_replica.sh
├── setup.py
├── uni
├── __init__.py
├── dataset
│ ├── 3dscene.py
│ ├── NICE_SLAM_config
│ │ └── demo.yaml
│ ├── NICE_SLAM_dataset.py
│ ├── __init__.py
│ ├── aug_icl.py
│ ├── azure.py
│ ├── bpnet_scannet.py
│ ├── custom.py
│ ├── custom_w_slam.py
│ ├── fountain.py
│ ├── icl_nuim.py
│ ├── latent_map.py
│ ├── matterport3d.py
│ ├── replica.py
│ ├── scannet.py
│ └── tum.py
├── encoder
│ ├── __init__.py
│ ├── position_encoder.pth
│ ├── uni_encoder_v2.py
│ └── utility.py
├── ext
│ ├── __init__.py
│ ├── imgproc
│ │ ├── common.cuh
│ │ ├── imgproc.cpp
│ │ ├── imgproc.cu
│ │ └── photometric.cu
│ ├── indexing
│ │ ├── indexing.cpp
│ │ └── indexing.cu
│ ├── marching_cubes
│ │ ├── mc.cpp
│ │ ├── mc_data.cuh
│ │ └── mc_interp_kernel.cu
│ └── pcproc
│ │ ├── cuda_kdtree.cu
│ │ ├── cuda_kdtree.cuh
│ │ ├── cutil_math.h
│ │ ├── pcproc.cpp
│ │ └── pcproc.cu
├── mapper
│ ├── __init__.py
│ ├── base_map.py
│ ├── context_map_v2.py
│ ├── latent_map.py
│ └── surface_map.py
├── tracker
│ ├── __init__.py
│ ├── cicp.py
│ └── tracker_custom.py
└── utils
│ ├── __init__.py
│ ├── exp_util.py
│ ├── linalg_util.py
│ ├── motion_util.py
│ ├── pt_util.py
│ ├── ray_cast.py
│ ├── torch_scatter.py
│ └── vis_util.py
└── vis_LIMs.py
/README.md:
--------------------------------------------------------------------------------
1 | # [Uni-Fusion: Universal Continuous Mapping](https://jarrome.github.io/Uni-Fusion/)
2 |
3 | [Yijun Yuan](https://jarrome.github.io/), [Andreas Nüchter](https://www.informatik.uni-wuerzburg.de/robotics/team/nuechter/)
4 |
5 | [Preprint](https://arxiv.org/abs/2303.12678) | [website](https://jarrome.github.io/Uni-Fusion/)
6 |
7 | #### Uni-Fusion is *nothing to do with NeRF!*
8 | #### It is a Fusion method (only forward and fusion)!
9 |
10 |
11 |
12 |
13 |
14 |
15 | *Universal encoder **no need data train** | Picture on the right is voxel grid for mapping*
16 |
17 | *Therefore, it supports **any mapping**:*
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | Table of Contents
26 |
27 | -
28 | Installation
29 |
30 | -
31 | Demo
32 |
33 | -
34 | TODO
35 |
36 | -
37 | Citation
38 |
39 | -
40 | Acknowledgement
41 |
42 |
43 |
44 |
45 | ## Env setting and install
46 |
47 | Unfold this for installation
48 |
49 | * Create env
50 | ```bash
51 | conda create -n uni python=3.8
52 | conda activate uni
53 |
54 | conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
55 | pip install torch-scatter torch-sparse torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
56 | pip install ninja functorch==0.2.1 numba open3d opencv-python trimesh torchfile
57 | ```
58 |
59 | * install package
60 | ```bash
61 | git clone https://github.com/Jarrome/Uni-Fusion.git && cd Uni-Fusion
62 | # install uni package
63 | python setup.py install
64 | # install cuda function, this may take several minutes, please use `top` or `ps` to check
65 | python uni/ext/__init__.py
66 | ```
67 |
68 | * train a uni encoder from nothing in 1 second
69 | ```bash
70 | python uni/encoder/uni_encoder_v2.py
71 | ```
72 |
73 |
74 |
75 | optionally, you can install the [ORB-SLAM2](https://github.com/Jarrome/Uni-Fusion-use-ORB-SLAM2) that we use for tracking
76 |
77 | ```bash
78 | cd external
79 | git clone https://github.com/Jarrome/Uni-Fusion-use-ORB-SLAM2
80 | cd [this_folder]
81 | # this_folder is the absolute path for the orbslam2
82 | # Add ORB_SLAM2/lib to PYTHONPATH and LD_LIBRARY_PATH environment variables
83 | # I suggest putting this in ~/.bashrc
84 | export PYTHONPATH=$PYTHONPATH:[this_folder]/lib
85 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:[this_folder]/lib
86 |
87 | ./build.sh && ./build_python.sh
88 | ```
89 |
90 |
91 |
92 | ## Demo
93 |
94 | ### 0. Quick try
95 | We provide a toy example to quick try our algorithm.
96 | You can either `python example/toy.py` or code as following:
97 | ```python
98 | import torch
99 | import numpy as np
100 |
101 | from example.util import get_modules, get_example_data
102 |
103 | device = torch.device("cuda", index=0)
104 |
105 | # get mapper and tracker
106 | sm, cm, tracker, config = get_modules(device)
107 |
108 | # prepare data
109 | colors, depths, customs, calib, poses = get_example_data(device)
110 |
111 | for i in [0, 1]:
112 | # preprocess rgbd to point cloud
113 | frame_pose = tracker.track_camera(colors[i], depths[i], customs, calib, poses[i], scene = config.sequence_type)
114 | # transform data
115 | tracker_pc, tracker_normal, tracker_customs= tracker.last_processed_pc
116 | opt_depth = frame_pose @ tracker_pc
117 | opt_normal = frame_pose.rotation @ tracker_normal
118 | color_pc, color, color_normal = tracker.last_colored_pc
119 | color_pc = frame_pose @ color_pc
120 | color_normal = frame_pose.rotation @ color_normal if color_normal is not None else None
121 |
122 | # mapping pc
123 | sm.integrate_keyframe(opt_depth, opt_normal)
124 | cm.integrate_keyframe(color_pc, color, color_normal)
125 |
126 | # mesh extraction
127 | map_mesh = sm.extract_mesh(config.resolution, int(4e7), max_std=0.15, extract_async=False, interpolate=True)
128 |
129 | import open3d as o3d
130 | o3d.io.write_triangle_mesh('example/mesh.ply', map_mesh)
131 |
132 | ```
133 | You will get a mesh looks like this:
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 | ---
143 | Then
144 |
145 | * **All demo can be run with ```python demo.py [config]```**
146 | * **Mesh for color, style, infrad, semantic can be extracted with ```python vis_LIM.py [config]```**
147 | * **Rendering for RGB and Depth image can be extracted with ```python example/render_w_LIM.py [config] [optionally traj with GT poses]```**
148 |
149 | ### 1. Reconstruction Demo
150 | ```bash
151 | # download replica data
152 | source scripts/download_replica.sh
153 |
154 | # with gt pose
155 | python demo.py configs/replica/office0.yaml
156 |
157 | # with slam
158 | python demo.py configs/replica/office0_w_slam.yaml
159 | ```
160 |
161 | Then you can find results in `output/replica/office0` where was specified in the `[config]` file:
162 | ```console
163 | $ ls output/replica/office0
164 |
165 | surface.lim
166 | color.lim
167 | final_recons.ply
168 | pred_traj.txt
169 | ```
170 |
171 | * *in [scene_w_slam.yaml], we can choose 3 mode*
172 |
173 | |Usage| load_gt| slam|
174 | |---|---|---|
175 | |use SLAM track|False|True|
176 | |use SLAM pred pose|True|True|
177 | |use GT pose|True|False|
178 |
179 | * *you can set ```vis=True``` for online vis (```False``` by default), which is more Di-Fusion. You can tap keyboard ',' for step and '.' for continue running with GUI*
180 |
181 | * *LIM extraction for mesh*
182 | ```
183 | python vis_LIM.py configs/replica/office0.yaml
184 | ```
185 |
186 | will generate a `output/replica/office0/color_recons.ply`
187 |
188 | * *LIM rendering given result LIMs*
189 | ```
190 | # with gt pose
191 | python example/render_w_lim.py configs/replica/office0.yaml data/replica/office0/traj.txt
192 |
193 | # otherwise
194 | python example/render_w_lim.py configs/replica/office0_w_slam.yaml
195 | ```
196 |
197 | This will creat a `render` folder under `output/replica/office0` where was specified in the `[config]` file:
198 |
199 | ```console
200 | $ ls output/replica/office0
201 |
202 | surface.lim
203 | color.lim
204 | final_recons.ply
205 | pred_traj.txt
206 | render/ # here contains rendered RGB and Depth images
207 | ```
208 |
209 |
210 | ### 2. Custom context Demo
211 |
212 | [```office0_custom.yaml```](https://github.com/Jarrome/Uni-Fusion/blob/main/configs/replica/office0_custom.yaml) contains all mapping you need
213 |
214 | ```bash
215 | # if you need saliency
216 | pip install transparent-background numba
217 | # if you need style
218 | cd external
219 | git clone https://github.com/Jarrome/PyTorch-Multi-Style-Transfer.git
220 | mv PyTorch-Multi-Style-Transfer style_transfer
221 | cd style_transfer/experiments
222 | bash models/download_model.sh
223 | cd ../../../
224 |
225 | # run demo
226 | python demo.py configs/replica/office0_custom.yaml
227 |
228 |
229 | # LIM extraction of custom property shown on mesh
230 | python vis_LIM.py configs/replica/office0_custom.yaml
231 | ```
232 |
233 |
234 | ### 3. Open Vocabulary Scene Understanding Demo
235 | This Text-Visual CLIP is from [OpenSeg](https://github.com/tensorflow/tpu/tree/641c1ac6e26ed788327b973582cbfa297d7d31e7/models/official/detection/projects/openseg)
236 | ```bash
237 | # install requirements
238 | pip install tensorflow==2.5.0
239 | pip install git+https://github.com/openai/CLIP.git
240 |
241 | # download openseg ckpt
242 | # can use `sudo snap install google-cloud-cli --classic` to install gsutil
243 | gsutil cp -r gs://cloud-tpu-checkpoints/detection/projects/openseg/colab/exported_model ./external/openseg/
244 |
245 | python demo.py configs/replica/office0_w_clip.yaml
246 |
247 | # LIM extraction of semantic shown on mesh
248 | python vis_LIM.py configs/replica/office0_w_clip.yaml
249 | ```
250 |
251 | ### 4. Self-captured data
252 | #### Azure capturing
253 | We provide the script to extract RGB, D and IR from azure.mp4: [azure_process](https://github.com/Jarrome/azure_process).
254 |
255 | The captured apartment data stores [here](https://robotik.informatik.uni-wuerzburg.de/telematics/download/appartment2.tgz).
256 |
257 | ---
258 | ## TODO:
259 | - [x] Upload the uni-encoder src (Jan.3)
260 | - [x] Upload the env script (Jan.4)
261 | - [x] Upload the recon. application (By Jan.8)
262 | - [x] Upload the used ORB-SLAM2 support (Jan.8)
263 | - [x] Upload the azure process for RGB,D,IR (Jan.8)
264 | - [x] Upload the seman. application (Jan.14)
265 | - [x] Upload the Custom context demo (Jan.14)
266 | - [x] Toy example for fast essembling Uni-Fusion into custom project
267 | - [x] Extraction of Mesh w properties from Latent Implicit Maps (LIMs) (Jun.26) [Sry for the delay... Yijun just get some free time...]
268 | - [x] Rendering of RGB and Depth images from Latent Implicit Maps (LIMs) (Jun.26)
269 | - [ ] Our current new project [SceneFactory](https://jarrome.github.io/SceneFactory/) has a better option, I plan to replace this ORB-SLAM2 with that option after open-release that work.
270 |
271 | ---
272 | ## Citation
273 | If you find this work interesting, please cite us:
274 | ```bibtex
275 | @article{yuan2024uni,
276 | title={Uni-Fusion: Universal Continuous Mapping},
277 | author={Yuan, Yijun and N{\"u}chter, Andreas},
278 | journal={IEEE Transactions on Robotics},
279 | year={2024},
280 | publisher={IEEE}
281 | }
282 | ```
283 |
284 | ## Acknowledgement
285 | * This implementation is on top of [DI-Fusion](https://github.com/huangjh-pub/di-fusion).
286 | * We also borrow some dataset code from [NICE-SLAM](https://github.com/cvg/nice-slam).
287 | * We thank the detailed response of questions from Kejie Li, Björn Michele, Songyou Peng and Golnaz Ghiasi.
288 |
--------------------------------------------------------------------------------
/assets/PLV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/assets/PLV.png
--------------------------------------------------------------------------------
/assets/cover_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/assets/cover_2.png
--------------------------------------------------------------------------------
/assets/encoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/assets/encoder.png
--------------------------------------------------------------------------------
/assets/toy_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/assets/toy_result.png
--------------------------------------------------------------------------------
/configs/replica/office0.yaml:
--------------------------------------------------------------------------------
1 | # Sequence parameters
2 | sequence_type: "replica.ReplicaRGBDDataset"
3 | sequence_kwargs:
4 | path: "data/Replica/office0/results/"
5 | start_frame: 0
6 | end_frame: -1 # Run all frames
7 | first_tq: [0, 0, 0.0, 0.0, -1.0, 0.0, 0.0]
8 | load_gt: True # use gt traj or not
9 |
10 | mesh_gt: "data/Replica/office0_mesh.ply"
11 |
12 | # Network parameters (network structure, etc. will be inherited from the training config)
13 | training_hypers: "ckpt/default/hyper.json"
14 | using_epoch: 300
15 |
16 |
17 | # Separate tracking and meshing.
18 | run_async: True
19 | # Enable visualization
20 | vis: False
21 | resolution: 4
22 |
23 | # These two define the range of depth observations to be cropped. Unit is meter.
24 | depth_cut_min: 0.1
25 | depth_cut_max: 10.0
26 |
27 | meshing_interval: 10
28 | integrate_interval: 10
29 | track_interval: 5
30 | #color_integrate_interval: 20
31 |
32 |
33 | # Mapping parameters
34 | surface_mapping:
35 | GPIS_mode: "sample"
36 | margin: .1
37 |
38 | # Bound of the scene to be reconstructed
39 | bound_min: [-10., -5., -10.]
40 | bound_max: [10., 5., 10.]
41 |
42 | voxel_size: 0.05
43 | # Prune observations if detected as noise.
44 | prune_min_vox_obs: 1
45 | ignore_count_th: 1.0
46 | encoder_count_th: 60000.0
47 |
48 | # Mapping parameters
49 | context_mapping:
50 | # Bound of the scene to be reconstructed
51 | bound_min: [-10., -5., -10.]
52 | bound_max: [10., 5., 10.]
53 | voxel_size: .02
54 | # Prune observations if detected as noise.
55 | prune_min_vox_obs: 1
56 | ignore_count_th: 1.0
57 | encoder_count_th: 60000.0
58 |
59 | outdir: "./output/replica/office0/"
60 |
61 |
62 | # Tracking parameters
63 | tracking:
64 | # An array defining how the camera pose is optimized.
65 | # Each element is a dictionary:
66 | # For example {"n": 2, "type": [['sdf'], ['rgb', 1]]} means to optimize the summation of sdf term and rgb term
67 | # at the 1st level pyramid for 2 iterations.
68 | iter_config:
69 | #- {"n": 10, "type": [['rgb', 2]]}
70 | - {"n": 5, "type": [['sdf'], ['rgb', 1]]}
71 | - {"n": 10, "type": [['sdf'], ['rgb', 0]]}
72 | sdf:
73 | robust_kernel: "huber"
74 | robust_k: 5.0
75 | subsample: 0.5
76 | rgb:
77 | weight: 500.0
78 | robust_kernel: null
79 | robust_k: 0.01
80 | min_grad_scale: 0.0
81 | max_depth_delta: 0.2
82 |
--------------------------------------------------------------------------------
/configs/replica/office0_custom.yaml:
--------------------------------------------------------------------------------
1 | # Sequence parameters
2 | sequence_type: "custom_w_slam.CustomReplicawSLAM"
3 | sequence_kwargs:
4 | path: "data/Replica/office0/results/"
5 | start_frame: 0
6 | end_frame: -1 # Run all frames
7 | #first_tq: [-1.2, 1.3, 1.0, 0.0, -1.0, 0.0, 0.0] # Starting pose
8 | first_tq: [0, 0, 0.0, 0.0, -1.0, 0.0, 0.0]
9 | load_gt: True
10 |
11 | mesh_gt: "data/Replica/office0_mesh.ply"
12 |
13 | outdir: "./output/custom/office0/"
14 | slam: False
15 | # Network parameters (network structure, etc. will be inherited from the training config)
16 | training_hypers: "ckpt/default/hyper.json"
17 | using_epoch: 300
18 |
19 |
20 | # Separate tracking and meshing.
21 | run_async: True
22 | # Enable visualization
23 | vis: False
24 | resolution: 4
25 |
26 | # These two define the range of depth observations to be cropped. Unit is meter.
27 | depth_cut_min: 0.1
28 | depth_cut_max: 10.0
29 |
30 | meshing_interval: 10
31 | integrate_interval: 10
32 | track_interval: 5
33 | #color_integrate_interval: 20
34 |
35 |
36 | # Mapping parameters
37 | surface_mapping:
38 | GPIS_mode: "sample"
39 | margin: .1
40 |
41 | # Bound of the scene to be reconstructed
42 | bound_min: [-10., -5., -10.]
43 | bound_max: [10., 5., 10.]
44 |
45 | voxel_size: 0.05
46 | # Prune observations if detected as noise.
47 | prune_min_vox_obs: 1
48 | ignore_count_th: 1.0
49 | encoder_count_th: 60000.0
50 |
51 | # Mapping parameters
52 | context_mapping:
53 | # Bound of the scene to be reconstructed
54 | bound_min: [-10., -5., -10.]
55 | bound_max: [10., 5., 10.]
56 | voxel_size: .02
57 | # Prune observations if detected as noise.
58 | prune_min_vox_obs: 1
59 | ignore_count_th: 1.0
60 | encoder_count_th: 60000.0
61 |
62 | # Mapping parameters
63 | saliency_mapping:
64 | # Bound of the scene to be reconstructed
65 | bound_min: [-10., -5., -10.]
66 | bound_max: [10., 5., 10.]
67 | voxel_size: .1
68 | # Prune observations if detected as noise.
69 | prune_min_vox_obs: 1
70 | ignore_count_th: 1.0
71 | encoder_count_th: 60000.0
72 |
73 | # Mapping parameters
74 | style_mapping:
75 | # Bound of the scene to be reconstructed
76 | bound_min: [-10., -5., -10.]
77 | bound_max: [10., 5., 10.]
78 | voxel_size: .1
79 | # Prune observations if detected as noise.
80 | prune_min_vox_obs: 1
81 | ignore_count_th: 1.0
82 | encoder_count_th: 60000.0
83 |
84 |
85 | # Mapping parameters
86 | nlatent_mapping:
87 | # Bound of the scene to be reconstructed
88 | bound_min: [-10., -5., -10.]
89 | bound_max: [10., 5., 10.]
90 | voxel_size: .1
91 | # Prune observations if detected as noise.
92 | prune_min_vox_obs: 1
93 | ignore_count_th: 1.0
94 | encoder_count_th: 60000.0
95 |
96 |
97 |
98 |
99 |
100 | # Tracking parameters
101 | tracking:
102 | # An array defining how the camera pose is optimized.
103 | # Each element is a dictionary:
104 | # For example {"n": 2, "type": [['sdf'], ['rgb', 1]]} means to optimize the summation of sdf term and rgb term
105 | # at the 1st level pyramid for 2 iterations.
106 | iter_config:
107 | #- {"n": 10, "type": [['rgb', 2]]}
108 | - {"n": 5, "type": [['sdf'], ['rgb', 1]]}
109 | - {"n": 10, "type": [['sdf'], ['rgb', 0]]}
110 | sdf:
111 | robust_kernel: "huber"
112 | robust_k: 5.0
113 | subsample: 0.5
114 | rgb:
115 | weight: 500.0
116 | robust_kernel: null
117 | robust_k: 0.01
118 | min_grad_scale: 0.0
119 | max_depth_delta: 0.2
120 |
--------------------------------------------------------------------------------
/configs/replica/office0_w_clip.yaml:
--------------------------------------------------------------------------------
1 | # Sequence parameters
2 | sequence_type: "custom_w_slam.CustomReplicawSLAM"
3 | sequence_kwargs:
4 | path: "data/Replica/office0/results/"
5 | start_frame: 0
6 | end_frame: -1 # Run all frames
7 | first_tq: [0, 0, 0.0, 0.0, -1.0, 0.0, 0.0]
8 | load_gt: True
9 |
10 | mesh_gt: "data/Replica/office0_mesh.ply"
11 |
12 | outdir: "./output/w_clip/replica/office0/"
13 |
14 | slam: False
15 |
16 | # Network parameters (network structure, etc. will be inherited from the training config)
17 | training_hypers: "ckpt/default/hyper.json"
18 | using_epoch: 300
19 |
20 |
21 | # Separate tracking and meshing.
22 | run_async: True
23 | # Enable visualization
24 | vis: False
25 | resolution: 4
26 |
27 | # These two define the range of depth observations to be cropped. Unit is meter.
28 | depth_cut_min: 0.1
29 | depth_cut_max: 10.0
30 |
31 | meshing_interval: 10
32 | integrate_interval: 10
33 | track_interval: 10
34 | #color_integrate_interval: 20
35 |
36 |
37 | # Mapping parameters
38 | surface_mapping:
39 | GPIS_mode: "sample"
40 | margin: .1
41 |
42 | # Bound of the scene to be reconstructed
43 | bound_min: [-10., -5., -10.]
44 | bound_max: [10., 5., 10.]
45 |
46 | voxel_size: 0.05
47 | # Prune observations if detected as noise.
48 | prune_min_vox_obs: 1
49 | ignore_count_th: 1.0
50 | encoder_count_th: 60000.0
51 |
52 | # Mapping parameters
53 | context_mapping:
54 | # Bound of the scene to be reconstructed
55 | bound_min: [-10., -5., -10.]
56 | bound_max: [10., 5., 10.]
57 | voxel_size: .02
58 | # Prune observations if detected as noise.
59 | prune_min_vox_obs: 1
60 | ignore_count_th: 1.0
61 | encoder_count_th: 60000.0
62 |
63 | # Mapping parameters
64 | latent_mapping:
65 | # Bound of the scene to be reconstructed
66 | bound_min: [-10., -5., -10.]
67 | bound_max: [10., 5., 10.]
68 | voxel_size: .1
69 | # Prune observations if detected as noise.
70 | prune_min_vox_obs: 1
71 | ignore_count_th: 1.0
72 | encoder_count_th: 60000000.0
73 |
74 |
75 |
76 |
77 |
78 | # Tracking parameters
79 | tracking:
80 | # An array defining how the camera pose is optimized.
81 | # Each element is a dictionary:
82 | # For example {"n": 2, "type": [['sdf'], ['rgb', 1]]} means to optimize the summation of sdf term and rgb term
83 | # at the 1st level pyramid for 2 iterations.
84 | iter_config:
85 | #- {"n": 10, "type": [['rgb', 2]]}
86 | - {"n": 5, "type": [['sdf'], ['rgb', 1]]}
87 | - {"n": 10, "type": [['sdf'], ['rgb', 0]]}
88 | sdf:
89 | robust_kernel: "huber"
90 | robust_k: 5.0
91 | subsample: 0.5
92 | rgb:
93 | weight: 500.0
94 | robust_kernel: null
95 | robust_k: 0.01
96 | min_grad_scale: 0.0
97 | max_depth_delta: 0.2
98 |
--------------------------------------------------------------------------------
/configs/replica/office0_w_slam.yaml:
--------------------------------------------------------------------------------
1 | # Sequence parameters
2 | sequence_type: "custom_w_slam.CustomReplicawSLAM"
3 | sequence_kwargs:
4 | path: "data/Replica/office0/results/"
5 | start_frame: 0
6 | end_frame: -1 # Run all frames
7 | first_tq: [0, 0, 0.0, 0.0, -1.0, 0.0, 0.0]
8 | load_gt: False
9 |
10 | mesh_gt: "data/Replica/office0_mesh.ply"
11 |
12 | outdir: "./output/w_slam/replica/office0/"
13 |
14 | slam: True
15 |
16 | # Network parameters (network structure, etc. will be inherited from the training config)
17 | training_hypers: "ckpt/default/hyper.json"
18 | using_epoch: 300
19 |
20 |
21 | # Separate tracking and meshing.
22 | run_async: True
23 | # Enable visualization
24 | vis: False
25 | resolution: 4
26 |
27 | # These two define the range of depth observations to be cropped. Unit is meter.
28 | depth_cut_min: 0.1
29 | depth_cut_max: 10.0
30 |
31 | meshing_interval: 10
32 | integrate_interval: 10
33 | track_interval: 10
34 | #color_integrate_interval: 20
35 |
36 |
37 | # Mapping parameters
38 | surface_mapping:
39 | GPIS_mode: "sample"
40 | margin: .1
41 |
42 | # Bound of the scene to be reconstructed
43 | bound_min: [-10., -5., -10.]
44 | bound_max: [10., 5., 10.]
45 |
46 | voxel_size: 0.05
47 | # Prune observations if detected as noise.
48 | prune_min_vox_obs: 1
49 | ignore_count_th: 1.0
50 | encoder_count_th: 60000.0
51 |
52 | # Mapping parameters
53 | context_mapping:
54 | # Bound of the scene to be reconstructed
55 | bound_min: [-10., -5., -10.]
56 | bound_max: [10., 5., 10.]
57 | voxel_size: .02
58 | # Prune observations if detected as noise.
59 | prune_min_vox_obs: 1
60 | ignore_count_th: 1.0
61 | encoder_count_th: 60000.0
62 |
63 |
64 |
65 | # Tracking parameters
66 | tracking:
67 | # An array defining how the camera pose is optimized.
68 | # Each element is a dictionary:
69 | # For example {"n": 2, "type": [['sdf'], ['rgb', 1]]} means to optimize the summation of sdf term and rgb term
70 | # at the 1st level pyramid for 2 iterations.
71 | iter_config:
72 | #- {"n": 10, "type": [['rgb', 2]]}
73 | - {"n": 5, "type": [['sdf'], ['rgb', 1]]}
74 | - {"n": 10, "type": [['sdf'], ['rgb', 0]]}
75 | sdf:
76 | robust_kernel: "huber"
77 | robust_k: 5.0
78 | subsample: 0.5
79 | rgb:
80 | weight: 500.0
81 | robust_kernel: null
82 | robust_k: 0.01
83 | min_grad_scale: 0.0
84 | max_depth_delta: 0.2
85 |
--------------------------------------------------------------------------------
/example/office0/depth000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/example/office0/depth000000.png
--------------------------------------------------------------------------------
/example/office0/depth000020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/example/office0/depth000020.png
--------------------------------------------------------------------------------
/example/office0/frame000000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/example/office0/frame000000.jpg
--------------------------------------------------------------------------------
/example/office0/frame000020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/example/office0/frame000020.jpg
--------------------------------------------------------------------------------
/example/render_w_lim.py:
--------------------------------------------------------------------------------
1 | import sys, os
2 | import pathlib
3 | import importlib
4 | import open3d as o3d
5 | import trimesh
6 | import argparse
7 | from pathlib import Path
8 | import logging
9 | from time import time
10 | import torch
11 |
12 | import cv2
13 | import numpy as np
14 | from tqdm import tqdm
15 |
16 |
17 |
18 | p = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
19 | sys.path.append(p)
20 |
21 |
22 | from uni.encoder.uni_encoder_v2 import get_uni_model
23 | from uni.mapper.context_map_v2 import ContextMap
24 | from uni.utils import exp_util, vis_util, motion_util
25 | from pyquaternion import Quaternion
26 | from uni.utils.ray_cast import RayCaster
27 |
28 | import pdb
29 |
30 |
31 |
32 |
33 | #from utils.ray_cast import RayCaster
34 |
35 | def depth2pc(depth_im, calib_mat):
36 | H,W = depth_im.shape
37 |
38 | d = depth_im.reshape(-1)
39 |
40 | fx = calib_mat[0,0]
41 | fy = calib_mat[1,1]
42 | cx = calib_mat[0,2]
43 | cy = calib_mat[1,2]
44 |
45 | x = np.arange(W)
46 | y = np.arange(H)
47 | yv, xv = np.meshgrid(y, x, indexing='ij') # HxW
48 |
49 | yv = yv.reshape(-1) # HW
50 | xv = xv.reshape(-1) # HW
51 |
52 | pc = np.zeros((H*W,3))
53 | pc[:,0] = (xv - cx) / fx * d
54 | pc[:,1] = (yv - cy) / fy * d
55 | pc[:,2] = d
56 | return pc
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 | if __name__ == '__main__':
71 | config_path = sys.argv[1]
72 |
73 | args = exp_util.parse_config_yaml(Path(config_path))
74 | mesh_path = args.outdir+'/final_recons.ply'
75 | render_path = args.outdir+'/render/'
76 |
77 |
78 | if not hasattr(args,'slam'):
79 | slamed = False
80 | else:
81 | slamed = args.slam
82 | use_gt = args.sequence_kwargs['load_gt'] and not slamed
83 |
84 | if use_gt:
85 | traj_path = sys.argv[2]
86 | else:
87 | traj_path = args.outdir+'pred_traj.txt'
88 |
89 |
90 |
91 |
92 | args.context_mapping = exp_util.dict_to_args(args.context_mapping)
93 | main_device = torch.device('cuda',index=0)
94 | uni_model = get_uni_model(main_device)
95 | context_map = ContextMap(uni_model,
96 | args.context_mapping, uni_model.color_code_length, device=main_device,
97 | enable_async=args.run_async)
98 |
99 | context_map.load(args.outdir+'/color.lim')
100 |
101 | # Load in sequence.
102 | seq_package, seq_class = args.sequence_type.split(".")
103 | sequence_module = importlib.import_module("uni.dataset." + seq_package)
104 | sequence_module = getattr(sequence_module, seq_class)
105 | sequence = sequence_module(**args.sequence_kwargs)
106 |
107 |
108 | mesh = o3d.io.read_triangle_mesh(mesh_path)
109 | traj_data = np.genfromtxt(traj_path)
110 |
111 | render_path = pathlib.Path(render_path)
112 | render_path.mkdir(parents=True, exist_ok=True)
113 |
114 |
115 | calib_matrix = np.eye(3)
116 | calib_matrix[0,0] = 600.
117 | calib_matrix[1,1] = 600.
118 | calib_matrix[0,2] = 599.5
119 | calib_matrix[1,2] = 339.5
120 |
121 | H = 680
122 | W = 1200
123 |
124 |
125 | # if using gt_traj for trajectory, change_mat == I
126 | # else will need inv(traj_data[0])
127 | if use_gt:
128 | change_mat = np.eye(4)
129 | else:
130 | change_mat = np.linalg.inv(traj_data[0].reshape(4,4))
131 |
132 | ray_caster = RayCaster(mesh, H, W, calib_matrix)
133 | for id, pose in tqdm(enumerate(traj_data)):
134 | pose = pose.reshape((4,4))
135 | pose = change_mat.dot(pose)
136 | # 2. predict
137 | ans, ray_direction = ray_caster.ray_cast(pose) # N,3
138 | depth_on_ray = ans['t_hit'].numpy().reshape((H,W))
139 | facing_direction = pose[:3,:3].dot(np.array([[0.,0.,1.]]).T).T # 1,3
140 | facing_direction = facing_direction / np.linalg.norm(facing_direction)
141 | # depth_im is on z axis
142 | depth_im = (ray_direction * facing_direction).sum(-1).reshape((H,W)) * depth_on_ray
143 |
144 | mask_valid = ~np.isinf(depth_im.reshape(-1))
145 |
146 | pc = depth2pc(depth_im, calib_matrix)
147 |
148 |
149 | pose_ = sequence.first_iso.matrix.dot(np.linalg.inv(traj_data[0].reshape(4,4)).dot(pose))
150 |
151 | pc = (pose_[:3,:3].dot(pc[mask_valid,:].T) + pose_[:3,(3,)]).T
152 | color, pinds = context_map.infer(torch.from_numpy(pc).to(main_device).float())
153 | color = torch.clip(color, 0., 1.)
154 |
155 | color_im = np.zeros((H*W,3))
156 | color_im[mask_valid,:] = color.cpu().numpy() * 255
157 | color_im = color_im.reshape((H,W,3))
158 |
159 |
160 | # inpainting to fill the hole
161 | mask = (~mask_valid.reshape((H,W,1))).astype(np.uint8)
162 | color_im = color_im.astype(np.uint8)
163 | color_im = cv2.inpaint(color_im, mask,3,cv2.INPAINT_TELEA)
164 |
165 |
166 | #cv2.imwrite(str(render_path)+'/%d.jpg'%(id), color_im[:,:,::-1])
167 | cv2.imwrite(str(render_path)+'/%d.jpg'%(id), color_im[:,:,::-1])
168 | cv2.imwrite(str(render_path)+'/%d.png'%(id), (depth_im*6553.5).astype(np.uint16))
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
--------------------------------------------------------------------------------
/example/toy.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 | from example.util import get_modules, get_example_data
5 | import pdb
6 |
7 | device = torch.device("cuda", index=0)
8 |
9 | # get mapper and tracker
10 | sm, cm, tracker, config = get_modules(device)
11 |
12 | # prepare data
13 | colors, depths, customs, calib, poses = get_example_data(device)
14 |
15 | for i in [0, 1]:
16 | # preprocess rgbd to point cloud
17 | frame_pose = tracker.track_camera(colors[i], depths[i], customs, calib, poses[i], scene = config.sequence_type)
18 | # transform data
19 | tracker_pc, tracker_normal, tracker_customs= tracker.last_processed_pc
20 | opt_depth = frame_pose @ tracker_pc
21 | opt_normal = frame_pose.rotation @ tracker_normal
22 | color_pc, color, color_normal = tracker.last_colored_pc
23 | color_pc = frame_pose @ color_pc
24 | color_normal = frame_pose.rotation @ color_normal if color_normal is not None else None
25 |
26 | # mapping pc
27 | sm.integrate_keyframe(opt_depth, opt_normal)
28 | cm.integrate_keyframe(color_pc, color, color_normal)
29 |
30 | # mesh extraction
31 | map_mesh = sm.extract_mesh(config.resolution, int(4e7), max_std=0.15, extract_async=False, interpolate=True)
32 |
33 | import open3d as o3d
34 | o3d.io.write_triangle_mesh('example/mesh.ply', map_mesh)
35 |
36 |
37 |
--------------------------------------------------------------------------------
/example/util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import cv2
4 | import yaml
5 | import argparse
6 | from pyquaternion import Quaternion
7 | from uni.encoder.uni_encoder_v2 import get_uni_model
8 | from uni.mapper.surface_map import SurfaceMap
9 | from uni.mapper.context_map_v2 import ContextMap # 8 points
10 | import uni.tracker.tracker_custom as tracker
11 |
12 | from uni.dataset import FrameIntrinsic
13 | from uni.utils import motion_util
14 |
15 | import pdb
16 |
17 |
18 | def get_modules(main_device='cuda:0'):
19 | with open('configs/replica/office0.yaml') as f:
20 | configs = yaml.load(f, Loader=yaml.FullLoader)
21 | args = argparse.Namespace(**configs)
22 |
23 | args.surface_mapping = argparse.Namespace(**(args.surface_mapping))
24 | args.context_mapping = argparse.Namespace(**(args.context_mapping))
25 | args.tracking = argparse.Namespace(**(args.tracking))
26 |
27 | uni_model = get_uni_model(main_device)
28 | cm = ContextMap(uni_model,
29 | args.context_mapping,
30 | uni_model.color_code_length,
31 | device=main_device,
32 | enable_async=False)
33 |
34 | sm = SurfaceMap(uni_model,
35 | cm,
36 | args.surface_mapping,
37 | uni_model.surface_code_length,
38 | device=main_device,
39 | enable_async=False)
40 |
41 | tk = tracker.SDFTracker(sm, args.tracking)
42 |
43 | return sm, cm, tk, args
44 |
45 | def get_example_data(main_device='cuda:0'):
46 |
47 | colors, depths, poses = [], [], []
48 | for name_rgb, name_depth in [('example/office0/frame000000.jpg', 'example/office0/depth000000.png'),
49 | ('example/office0/frame000020.jpg', 'example/office0/depth000020.png')]:
50 | rgb = cv2.imread(name_rgb,-1)
51 | depth = cv2.imread(name_depth,-1)
52 |
53 | color = torch.from_numpy(rgb).to(main_device).float() / 255.
54 | depth = torch.from_numpy(depth.astype(np.float32)).to(main_device).float() / 6553.5
55 |
56 | colors.append(color)
57 | depths.append(depth)
58 |
59 |
60 |
61 | customs = [None] * 4
62 | calib = FrameIntrinsic(600., 600., 599.5, 339.5, 6553.5)
63 |
64 | first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
65 | traj_mat = np.genfromtxt('example/office0/traj.txt').reshape((-1,4,4))
66 | for i in [0,20]:
67 | T = traj_mat[i,:,:]
68 | pose = first_iso.dot(motion_util.Isometry.from_matrix(T))
69 | poses.append(pose)
70 |
71 | return colors, depths, customs, calib, poses
72 |
--------------------------------------------------------------------------------
/external/openseg/openseg_api.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | #sys.path.append(os.path.dirname(__file__))
4 |
5 | import numpy as np
6 | import torch
7 | import clip
8 |
9 | import tensorflow.compat.v1 as tf
10 | import tensorflow as tf2
11 |
12 | from tqdm import tqdm
13 |
14 | import pdb
15 | '''
16 | gpus = tf2.config.experimental.list_physical_devices('GPU')
17 | for gpu in gpus:
18 | tf2.config.experimental.set_memory_growth(gpu, True)
19 | '''
20 | gpus = tf.config.experimental.list_physical_devices('GPU')
21 | if gpus:
22 | # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
23 | try:
24 | tf.config.experimental.set_virtual_device_configuration(
25 | gpus[1],
26 | [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*12)]) # Notice here
27 | logical_gpus = tf.config.experimental.list_logical_devices('GPU')
28 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
29 |
30 | #tf.config.experimental.set_memory_growth(gpus[1], True)
31 |
32 | except RuntimeError as e:
33 | # Virtual devices must be set before GPUs have been initialized
34 | print(e)
35 |
36 |
37 |
38 | clip.available_models()
39 | model, preprocess = clip.load("ViT-L/14@336px")
40 | '''
41 | def f_tx(label_src, device):
42 | #args.label_src = 'plant,grass,cat,stone,other'
43 |
44 |
45 | labels = []
46 | print('** Input label value: {} **'.format(label_src))
47 | lines = label_src.split(',')
48 | for line in lines:
49 | label = line
50 | labels.append(label)
51 |
52 | outputs = model.net.text_encode(labels, device)
53 | return outputs
54 | '''
55 |
56 | def build_text_embedding(categories):
57 | run_on_gpu = torch.cuda.is_available()
58 | with torch.no_grad():
59 | all_text_embeddings = []
60 | print("Building text embeddings...")
61 | for category in tqdm(categories):
62 | texts = clip.tokenize(category) #tokenize
63 | if run_on_gpu:
64 | texts = texts.cuda(1)
65 | text_embeddings = model.encode_text(texts) #embed with text encoder
66 |
67 | text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)
68 |
69 | text_embedding = text_embeddings.mean(dim=0)
70 |
71 | text_embedding /= text_embedding.norm()
72 |
73 | all_text_embeddings.append(text_embedding)
74 |
75 | all_text_embeddings = torch.stack(all_text_embeddings, dim=1)
76 |
77 | if run_on_gpu:
78 | all_text_embeddings = all_text_embeddings.cuda(0)
79 | return all_text_embeddings.cpu().numpy().T
80 |
81 |
82 | def f_tx(label_src):
83 | categories = label_src.split(',')
84 | feat = build_text_embedding(categories)
85 | return feat
86 |
87 |
88 | saved_model_dir = os.path.dirname(__file__)+'/exported_model' #@param {type:"string"}
89 | with tf.device('/GPU:1'):
90 | openseg_model = tf2.saved_model.load(saved_model_dir, tags=[tf.saved_model.tag_constants.SERVING],)
91 |
92 | classes = [
93 | 'unannotated', # 0
94 | 'wall', # 1
95 | 'floor', # 2
96 | 'chair', # 3
97 | 'table', # 4
98 | 'desk', # 5
99 | 'bed', # 6
100 | 'bookshelf', # 7
101 | 'sofa', # 8
102 | 'sink', # 9
103 | 'bathtub', # 10
104 | 'toilet', # 11
105 | 'curtain', # 12
106 | 'counter', # 13
107 | 'door', # 14
108 | 'window', # 15
109 | 'shower curtain', # 16
110 | 'refrigerator', # 17
111 | 'picture', # 18
112 | 'cabinet', # 19
113 | 'otherfurniture', # 20
114 | ]
115 |
116 |
117 |
118 | text = classes
119 | text[0] = 'other'
120 | text = ','.join(text)
121 |
122 |
123 | text_embedding = f_tx(text)#f_tx('desk,table')
124 | num_words_per_category = 1
125 | with tf.device('/GPU:1'):
126 | text_embedding = tf.reshape(
127 | text_embedding, [-1, num_words_per_category, text_embedding.shape[-1]])
128 | text_embedding = tf.cast(text_embedding, tf.float32)
129 |
130 | def f_im(np_str,H=320,W=240):
131 | with tf.device('/GPU:1'):
132 | output = openseg_model.signatures['serving_default'](
133 | inp_image_bytes=tf.convert_to_tensor(np_str[0]),
134 | inp_text_emb=text_embedding)
135 | #feat = output['image_embedding_feat'][0,:480,:,:] # 1,640,640,768 -> 480,640,768
136 |
137 | # if scannet
138 | '''
139 | feat = output['ppixel_ave_feat'][0,:480,:,:] # 1,640,640,768 -> 480,640,768
140 | feat = tf.image.resize(feat, [240, 320]) # 240,320,768
141 | '''
142 | # if 2D-3D-S
143 | feat = output['ppixel_ave_feat'][0,:,:,:]
144 | feat_h, feat_w = feat.shape[:2]
145 | H_ov_W = float(H)/float(W)
146 | feat_cropped = feat[:int(H_ov_W*feat_h),:]
147 |
148 | feat = tf.image.resize(feat_cropped, [H,W])
149 |
150 |
151 | #feat = tf.image.resize(feat, [320, 320])
152 |
153 | feat = feat.numpy()
154 | #feat = feat / np.linalg.norm(feat, axis=-1, keepdims=True)
155 |
156 | return feat
157 |
158 | def classify(image_features, text_features):
159 | '''
160 | both in np
161 | F_im is N,c
162 | F_tx is k,c where k is the classes
163 | '''
164 | #image_features = image_features / image_features.norm(dim=-1, keepdim=True)
165 | #text_features = text_features / text_features.norm(dim=-1, keepdim=True)
166 | logits_per_image = image_features.half() @ text_features.T # N,k
167 | return logits_per_image
168 |
169 | def get_api():
170 | return f_im, f_tx, classify, 768
171 |
172 |
173 |
174 |
175 |
176 | '''
177 | For each category you can list different names. Use ';' to separate different categories and use ',' to separate different names of a category.
178 | E.g. 'lady, ladies, girl, girls; book' creates two categories of 'lady or ladies or girl or girls' and 'book'.
179 | '''
180 |
181 |
--------------------------------------------------------------------------------
/scripts/download_replica.sh:
--------------------------------------------------------------------------------
1 | # script from NICE-SLAM
2 |
3 | mkdir -p data
4 | cd data
5 | wget https://cvg-data.inf.ethz.ch/nice-slam/data/Replica.zip
6 | unzip Replica.zip
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='uni',
4 | version='0.1',
5 | description='Uni-Fusion',
6 | author='Yijun Yuan',
7 | url='https://github.com/Jarrome/Uni-Fusion',
8 | packages=find_packages(),
9 | )
10 |
--------------------------------------------------------------------------------
/uni/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/uni/__init__.py
--------------------------------------------------------------------------------
/uni/dataset/3dscene.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from dataset.production import *
11 | from utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 |
16 | import pdb
17 |
18 |
19 | class ImageReader(object):
20 | def __init__(self, ids, timestamps=None, cam=None, is_rgb=False):
21 | self.ids = ids
22 | self.timestamps = timestamps
23 | self.cam = cam
24 | self.cache = dict()
25 | self.idx = 0
26 |
27 | self.is_rgb = is_rgb
28 |
29 | self.ahead = 10 # 10 images ahead of current index
30 | self.waiting = 1.5 # waiting time
31 |
32 | self.preload_thread = Thread(target=self.preload)
33 | self.thread_started = False
34 |
35 | def read(self, path):
36 | img = cv2.imread(path, -1)
37 | if self.is_rgb:
38 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39 |
40 | if self.cam is None:
41 | return img
42 | else:
43 | return self.cam.rectify(img)
44 |
45 | def preload(self):
46 | idx = self.idx
47 | t = float('inf')
48 | while True:
49 | if time.time() - t > self.waiting:
50 | return
51 | if self.idx == idx:
52 | time.sleep(1e-2)
53 | continue
54 |
55 | for i in range(self.idx, self.idx + self.ahead):
56 | if i not in self.cache and i < len(self.ids):
57 | self.cache[i] = self.read(self.ids[i])
58 | if self.idx + self.ahead > len(self.ids):
59 | return
60 | idx = self.idx
61 | t = time.time()
62 |
63 | def __len__(self):
64 | return len(self.ids)
65 |
66 | def __getitem__(self, idx):
67 | self.idx = idx
68 | # if not self.thread_started:
69 | # self.thread_started = True
70 | # self.preload_thread.start()
71 |
72 | if idx in self.cache:
73 | img = self.cache[idx]
74 | del self.cache[idx]
75 | else:
76 | img = self.read(self.ids[idx])
77 |
78 | return img
79 |
80 | def __iter__(self):
81 | for i, timestamp in enumerate(self.timestamps):
82 | yield timestamp, self[i]
83 |
84 | @property
85 | def dtype(self):
86 | return self[0].dtype
87 | @property
88 | def shape(self):
89 | return self[0].shape
90 |
91 |
92 |
93 |
94 |
95 |
96 | def make_pair(matrix, threshold=1):
97 | assert (matrix >= 0).all()
98 | pairs = []
99 | base = defaultdict(int)
100 | while True:
101 | i = matrix[:, 0].argmin()
102 | min0 = matrix[i, 0]
103 | j = matrix[0, :].argmin()
104 | min1 = matrix[0, j]
105 |
106 | if min0 < min1:
107 | i, j = i, 0
108 | else:
109 | i, j = 0, j
110 | if min(min1, min0) < threshold:
111 | pairs.append((i + base['i'], j + base['j']))
112 |
113 | matrix = matrix[i + 1:, j + 1:]
114 | base['i'] += (i + 1)
115 | base['j'] += (j + 1)
116 |
117 | if min(matrix.shape) == 0:
118 | break
119 | return pairs
120 |
121 |
122 | class ThreeDSceneRGBDDataset:
123 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, register=True, mesh_gt: str = None):
124 | path = os.path.expanduser(path)
125 |
126 | self.calib = FrameIntrinsic(525., 525., 319.5, 239.5, 1000)
127 |
128 |
129 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
130 |
131 |
132 | if not register:
133 | rgb_ids, rgb_timestamps = self.listdir(path, 'color')
134 | depth_ids, depth_timestamps = self.listdir(path, 'depth')
135 | else:
136 | rgb_imgs, rgb_timestamps = self.listdir(path, 'color', ext='.png')
137 | depth_imgs, depth_timestamps = self.listdir(path, 'depth')
138 |
139 | interval = (rgb_timestamps[1:] - rgb_timestamps[:-1]).mean() * 2/3
140 | matrix = np.abs(rgb_timestamps[:, np.newaxis] - depth_timestamps)
141 | pairs = make_pair(matrix, interval)
142 |
143 | rgb_ids = []
144 | depth_ids = []
145 | for i, j in pairs:
146 | rgb_ids.append(rgb_imgs[i])
147 | depth_ids.append(depth_imgs[j])
148 |
149 | self.rgb = ImageReader(rgb_ids, rgb_timestamps, is_rgb=True)
150 | self.depth = ImageReader(depth_ids, depth_timestamps)
151 | self.timestamps = rgb_timestamps
152 |
153 | self.frame_id = 0
154 |
155 | if load_gt:
156 | path = path[:-1] if path[-1]=='/' else path
157 | scene = path.split('/')[-1]
158 | gt_traj_path = os.path.join(path,scene+'_trajectory.log')
159 | self.gt_trajectory = self._parse_traj_file(gt_traj_path)
160 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
161 | #change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
162 | #self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
163 | assert len(self.gt_trajectory) == len(self.rgb)
164 | self.T_gt2uni = np.eye(4) #change_iso.matrix
165 |
166 | else:
167 | self.gt_trajectory = None
168 | self.T_gt2uni = self.first_iso.matrix
169 |
170 |
171 |
172 | if mesh_gt is None:
173 | print("using reconstruction mesh")
174 | else:
175 | if mesh_gt != '' and load_gt:
176 | self.gt_mesh = self.get_ground_truth_mesh(mesh_gt)#, gt_traj_path)
177 | def get_ground_truth_mesh(self, mesh_path):#, gt_traj_path):
178 | import trimesh
179 | '''
180 | with open(gt_traj_path, 'r') as f:
181 | ls = f.readlines()
182 | traj_data = []
183 | for i in range(1):
184 | mat = []
185 | for j in range(1,5):
186 | mat.append([float(item) for item in ls[i*5+j].strip().split('\t')])
187 | traj_data.append(np.array(mat))
188 |
189 |
190 | T0 = traj_data[0].reshape((4,4))
191 | change_mat = (self.first_iso.matrix.dot(np.linalg.inv(T0)))
192 | '''
193 |
194 | change_mat = self.T_gt2uni
195 |
196 |
197 |
198 | mesh_gt = trimesh.load(mesh_path)
199 | mesh_gt.apply_transform(change_mat)
200 | return mesh_gt.as_open3d
201 |
202 |
203 |
204 |
205 |
206 | def _parse_traj_file(self,traj_path):
207 | camera_ext = {}
208 | with open(traj_path, 'r') as f:
209 | ls = f.readlines()
210 | traj_data = []
211 | for i in range(int(len(ls)/5)):
212 | mat = []
213 | for j in range(1,5):
214 | mat.append([float(item) for item in ls[i*5+j].strip().split('\t')])
215 | traj_data.append(np.array(mat))
216 |
217 | #cano_quat = motion_util.Isometry(q=Quaternion(axis=[0.0, 0.0, 1.0], degrees=180.0))
218 | for id, cur_p in enumerate(traj_data):
219 | T = cur_p.reshape((4,4))
220 | #cur_q = T[:3,:3]
221 | #cur_t = T[:3, 3]
222 | cur_iso = motion_util.Isometry.from_matrix(T, ortho=True) #(q=Quaternion(matrix=cur_q), t=cur_t)
223 | camera_ext[id] = cur_iso #cano_quat.dot(cur_iso)
224 | camera_ext[len(camera_ext)] = camera_ext[len(camera_ext)-1]
225 | return [camera_ext[t] for t in range(len(camera_ext))]
226 |
227 |
228 |
229 |
230 |
231 | def sort(self, xs, st = 3):
232 | return sorted(xs, key=lambda x:float(x[st:-4]))
233 |
234 | def listdir(self, path, split='rgb', ext='.png'):
235 | imgs, timestamps = [], []
236 | files = [x for x in os.listdir(os.path.join(path, split)) if x.endswith(ext)]
237 | st = 0
238 | for name in self.sort(files,st):
239 | imgs.append(os.path.join(path, split, name))
240 | timestamp = float(name[st:-len(ext)].rstrip('.'))
241 | timestamps.append(timestamp)
242 |
243 | return imgs, np.array(timestamps)
244 |
245 | def __getitem__(self, idx):
246 | frame_data = FrameData()
247 | if self.gt_trajectory is not None:
248 | frame_data.gt_pose = self.gt_trajectory[idx]
249 | else:
250 | frame_data.gt_pose = None
251 | frame_data.calib = self.calib
252 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda().float() / 1000.
253 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda().float() / 255.
254 | return frame_data
255 |
256 |
257 | def __next__(self):
258 | frame_data = FrameData()
259 | if self.gt_trajectory is not None:
260 | frame_data.gt_pose = self.gt_trajectory[self.frame_id]
261 | else:
262 | frame_data.gt_pose = None
263 | frame_data.calib = self.calib
264 | frame_data.depth = torch.from_numpy(self.depth[self.frame_id].astype(np.float32)).cuda().float() / 1000.
265 | frame_data.rgb = torch.from_numpy(self.rgb[self.frame_id]).cuda().float() / 255.
266 | self.frame_id += 1
267 | return frame_data
268 |
269 | def __len__(self):
270 | return len(self.rgb)
271 |
272 |
273 |
--------------------------------------------------------------------------------
/uni/dataset/NICE_SLAM_config/demo.yaml:
--------------------------------------------------------------------------------
1 | dataset: 'scannet'
2 | sync_method: loose
3 | coarse: True
4 | verbose: False
5 | meshing:
6 | resolution: 256
7 | tracking:
8 | vis_freq: 50
9 | vis_inside_freq: 25
10 | ignore_edge_W: 20
11 | ignore_edge_H: 20
12 | seperate_LR: False
13 | const_speed_assumption: True
14 | lr: 0.0005
15 | pixels: 1000
16 | iters: 30
17 | mapping:
18 | every_frame: 10
19 | vis_freq: 50
20 | vis_inside_freq: 30
21 | mesh_freq: 50
22 | ckpt_freq: 500
23 | keyframe_every: 50
24 | mapping_window_size: 10
25 | pixels: 1000
26 | iters_first: 400
27 | iters: 10
28 | bound: [[0.0,6.5],[0.0,4.0],[0,3.5]]
29 | marching_cubes_bound: [[0.0,6.5],[0.0,4.0],[0,3.5]]
30 | cam:
31 | H: 480
32 | W: 640
33 | fx: 577.590698
34 | fy: 578.729797
35 | cx: 318.905426
36 | cy: 242.683609
37 | png_depth_scale: 1000. #for depth image in png format
38 | crop_edge: 0
39 | data:
40 | input_folder: Datasets/Demo
41 | output: output/Demo
42 |
43 |
--------------------------------------------------------------------------------
/uni/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class FrameIntrinsic:
5 | def __init__(self, fx, fy, cx, cy, dscale):
6 | self.cx = cx
7 | self.cy = cy
8 | self.fx = fx
9 | self.fy = fy
10 | self.dscale = dscale
11 |
12 | def to_K(self):
13 | return np.asarray([
14 | [self.fx, 0.0, self.cx],
15 | [0.0, self.fy, self.cy],
16 | [0.0, 0.0, 1.0]
17 | ])
18 |
19 |
20 | class FrameData:
21 | def __init__(self):
22 | self.rgb = None
23 | self.depth = None
24 | self.gt_pose = None
25 | self.calib = None
26 |
27 |
28 | class RGBDSequence:
29 | def __init__(self):
30 | self.frame_id = 0
31 |
32 | def __iter__(self):
33 | return self
34 |
35 | def __len__(self):
36 | raise NotImplementedError
37 |
38 | def __next__(self):
39 | raise NotImplementedError
40 |
--------------------------------------------------------------------------------
/uni/dataset/aug_icl.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from dataset.production import *
11 | from utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 |
16 | import pdb
17 |
18 |
19 | class ImageReader(object):
20 | def __init__(self, ids, timestamps=None, cam=None, is_rgb=False):
21 | self.ids = ids
22 | self.timestamps = timestamps
23 | self.cam = cam
24 | self.cache = dict()
25 | self.idx = 0
26 |
27 | self.is_rgb = is_rgb
28 |
29 | self.ahead = 10 # 10 images ahead of current index
30 | self.waiting = 1.5 # waiting time
31 |
32 | self.preload_thread = Thread(target=self.preload)
33 | self.thread_started = False
34 |
35 | def read(self, path):
36 | img = cv2.imread(path, -1)
37 | if self.is_rgb:
38 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39 |
40 | if self.cam is None:
41 | return img
42 | else:
43 | return self.cam.rectify(img)
44 |
45 | def preload(self):
46 | idx = self.idx
47 | t = float('inf')
48 | while True:
49 | if time.time() - t > self.waiting:
50 | return
51 | if self.idx == idx:
52 | time.sleep(1e-2)
53 | continue
54 |
55 | for i in range(self.idx, self.idx + self.ahead):
56 | if i not in self.cache and i < len(self.ids):
57 | self.cache[i] = self.read(self.ids[i])
58 | if self.idx + self.ahead > len(self.ids):
59 | return
60 | idx = self.idx
61 | t = time.time()
62 |
63 | def __len__(self):
64 | return len(self.ids)
65 |
66 | def __getitem__(self, idx):
67 | self.idx = idx
68 | # if not self.thread_started:
69 | # self.thread_started = True
70 | # self.preload_thread.start()
71 |
72 | if idx in self.cache:
73 | img = self.cache[idx]
74 | del self.cache[idx]
75 | else:
76 | img = self.read(self.ids[idx])
77 |
78 | return img
79 |
80 | def __iter__(self):
81 | for i, timestamp in enumerate(self.timestamps):
82 | yield timestamp, self[i]
83 |
84 | @property
85 | def dtype(self):
86 | return self[0].dtype
87 | @property
88 | def shape(self):
89 | return self[0].shape
90 |
91 |
92 |
93 |
94 |
95 |
96 | def make_pair(matrix, threshold=1):
97 | assert (matrix >= 0).all()
98 | pairs = []
99 | base = defaultdict(int)
100 | while True:
101 | i = matrix[:, 0].argmin()
102 | min0 = matrix[i, 0]
103 | j = matrix[0, :].argmin()
104 | min1 = matrix[0, j]
105 |
106 | if min0 < min1:
107 | i, j = i, 0
108 | else:
109 | i, j = 0, j
110 | if min(min1, min0) < threshold:
111 | pairs.append((i + base['i'], j + base['j']))
112 |
113 | matrix = matrix[i + 1:, j + 1:]
114 | base['i'] += (i + 1)
115 | base['j'] += (j + 1)
116 |
117 | if min(matrix.shape) == 0:
118 | break
119 | return pairs
120 |
121 |
122 | class AugICLRGBDDataset:
123 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, register=False, mesh_gt: str = None):
124 | path = os.path.expanduser(path)
125 |
126 | self.calib = FrameIntrinsic(525., 525., 319.5, 239.5, 1000)
127 |
128 |
129 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
130 |
131 |
132 | if not register:
133 | rgb_ids, rgb_timestamps = self.listdir(path, 'color',ext='.jpg')
134 | depth_ids, depth_timestamps = self.listdir(path, 'depth_w_noise')
135 | else:
136 | assert False, "not implemented"
137 |
138 | self.rgb = ImageReader(rgb_ids, rgb_timestamps, is_rgb=True)
139 | self.depth = ImageReader(depth_ids, depth_timestamps)
140 | self.timestamps = rgb_timestamps
141 |
142 | self.frame_id = 0
143 |
144 | if load_gt:
145 | path = path[:-1] if path[-1]=='/' else path
146 | scene = path.split('/')[-1]
147 | gt_traj_path = os.path.join(path,scene+'-traj.txt')
148 | self.gt_trajectory = self._parse_traj_file(gt_traj_path)
149 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
150 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
151 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
152 | assert len(self.gt_trajectory) == len(self.rgb)
153 | self.T_gt2uni = change_iso.matrix
154 |
155 | else:
156 | self.gt_trajectory = None
157 | self.T_gt2uni = self.first_iso.matrix
158 |
159 |
160 |
161 | if mesh_gt is None:
162 | print("using reconstruction mesh")
163 | else:
164 | if mesh_gt != '' and load_gt:
165 | self.gt_mesh = self.get_ground_truth_mesh(mesh_gt)#, gt_traj_path)
166 | def get_ground_truth_mesh(self, mesh_path):#, gt_traj_path):
167 | import trimesh
168 | '''
169 | with open(gt_traj_path, 'r') as f:
170 | ls = f.readlines()
171 | traj_data = []
172 | for i in range(1):
173 | mat = []
174 | for j in range(1,5):
175 | mat.append([float(item) for item in ls[i*5+j].strip().split(' ')])
176 | traj_data.append(np.array(mat))
177 |
178 |
179 | T0 = traj_data[0].reshape((4,4))
180 | change_mat = (self.first_iso.matrix.dot(np.linalg.inv(T0)))
181 | '''
182 | change_mat = self.T_gt2uni
183 |
184 | mesh_gt = trimesh.load(mesh_path)
185 | mesh_gt.apply_transform(change_mat)
186 | return mesh_gt#.as_open3d
187 |
188 |
189 |
190 |
191 |
192 | def _parse_traj_file(self,traj_path):
193 | camera_ext = {}
194 | with open(traj_path, 'r') as f:
195 | ls = f.readlines()
196 | traj_data = []
197 | for i in range(int(len(ls)/5)):
198 | mat = []
199 | for j in range(1,5):
200 | mat.append([float(item) for item in ls[i*5+j].strip().split(' ')])
201 | traj_data.append(np.array(mat))
202 |
203 | #cano_quat = motion_util.Isometry(q=Quaternion(axis=[0.0, 0.0, 1.0], degrees=180.0))
204 | for id, cur_p in enumerate(traj_data):
205 | T = cur_p.reshape((4,4))
206 | #cur_q = T[:3,:3]
207 | #cur_t = T[:3, 3]
208 | cur_iso = motion_util.Isometry.from_matrix(T, ortho=True) #(q=Quaternion(matrix=cur_q), t=cur_t)
209 | camera_ext[id] = cur_iso #cano_quat.dot(cur_iso)
210 | camera_ext[len(camera_ext)] = camera_ext[len(camera_ext)-1]
211 | return [camera_ext[t] for t in range(len(camera_ext))]
212 |
213 |
214 |
215 |
216 |
217 | def sort(self, xs, st = 3):
218 | return sorted(xs, key=lambda x:float(x[st:-4]))
219 |
220 | def listdir(self, path, split='rgb', ext='.png'):
221 | imgs, timestamps = [], []
222 | files = [x for x in os.listdir(os.path.join(path, split)) if x.endswith(ext)]
223 | st = 0
224 | for name in self.sort(files,st):
225 | imgs.append(os.path.join(path, split, name))
226 | timestamp = float(name[st:-len(ext)].rstrip('.'))
227 | timestamps.append(timestamp)
228 |
229 | return imgs, np.array(timestamps)
230 |
231 | def __getitem__(self, idx):
232 | return self.rgb[idx], self.depth[idx]
233 | def __next__(self):
234 | frame_data = FrameData()
235 | if self.gt_trajectory is not None:
236 | frame_data.gt_pose = self.gt_trajectory[self.frame_id]
237 | else:
238 | frame_data.gt_pose = None
239 | frame_data.calib = self.calib
240 | frame_data.depth = torch.from_numpy(self.depth[self.frame_id].astype(np.float32)).cuda().float() / 1000.
241 | frame_data.rgb = torch.from_numpy(self.rgb[self.frame_id]).cuda().float() / 255.
242 | self.frame_id += 1
243 | return frame_data
244 |
245 | def __len__(self):
246 | return len(self.rgb)
247 |
248 |
249 |
--------------------------------------------------------------------------------
/uni/dataset/azure.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from uni.dataset import *
11 | from uni.utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 |
16 | from PIL import Image
17 |
18 |
19 | import pdb
20 |
21 |
22 | class ImageReader(object):
23 | def __init__(self, ids, timestamps=None, cam=None, is_rgb=False):
24 | self.ids = ids
25 | self.timestamps = timestamps
26 | self.cam = cam
27 | self.cache = dict()
28 | self.idx = 0
29 |
30 | self.is_rgb = is_rgb
31 |
32 | self.ahead = 10 # 10 images ahead of current index
33 | self.waiting = 1.5 # waiting time
34 |
35 | self.preload_thread = Thread(target=self.preload)
36 | self.thread_started = False
37 |
38 | def read(self, path):
39 | img = cv2.imread(path, -1)
40 | if self.is_rgb:
41 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
42 |
43 | if self.cam is None:
44 | return img
45 | else:
46 | return self.cam.rectify(img)
47 |
48 | def preload(self):
49 | idx = self.idx
50 | t = float('inf')
51 | while True:
52 | if time.time() - t > self.waiting:
53 | return
54 | if self.idx == idx:
55 | time.sleep(1e-2)
56 | continue
57 |
58 | for i in range(self.idx, self.idx + self.ahead):
59 | if i not in self.cache and i < len(self.ids):
60 | self.cache[i] = self.read(self.ids[i])
61 | if self.idx + self.ahead > len(self.ids):
62 | return
63 | idx = self.idx
64 | t = time.time()
65 |
66 | def __len__(self):
67 | return len(self.ids)
68 |
69 | def __getitem__(self, idx):
70 | self.idx = idx
71 | # if not self.thread_started:
72 | # self.thread_started = True
73 | # self.preload_thread.start()
74 |
75 | if idx in self.cache:
76 | img = self.cache[idx]
77 | del self.cache[idx]
78 | else:
79 | img = self.read(self.ids[idx])
80 |
81 | return img
82 |
83 | def __iter__(self):
84 | for i, timestamp in enumerate(self.timestamps):
85 | yield timestamp, self[i]
86 |
87 | @property
88 | def dtype(self):
89 | return self[0].dtype
90 | @property
91 | def shape(self):
92 | return self[0].shape
93 |
94 |
95 |
96 | def read_orbslam2_file(traj_file):
97 | with open(traj_file) as f:
98 | lines = f.readlines()
99 | poses = []
100 | frame_ids = []
101 | for line_id, line in enumerate(lines):
102 | vs = [float(v) for v in line.strip().split(' ')]
103 | frame_id = round(vs[0]*30)
104 | #frame_id = round(vs[0])
105 | v_t = vs[1:4]
106 | #v_q = vs[4:] # xyzw
107 | v_q = Quaternion(vs[-1],*vs[4:-1])
108 | pose = v_q.transformation_matrix
109 | pose[:3,3] = np.array(v_t)
110 | poses.append(pose)
111 | frame_ids.append(frame_id)
112 | return frame_ids, poses
113 |
114 |
115 |
116 |
117 | class AzureRGBDIDataset(object):
118 |
119 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, register=True, mesh_gt: str = None):
120 | path = os.path.expanduser(path)
121 |
122 |
123 | cam = np.genfromtxt(path+'/intrinsic.txt')
124 | self.cam = namedtuple('camera', 'fx fy cx cy scale')(
125 | *(cam.tolist()), 1000)
126 |
127 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
128 |
129 |
130 | self.start_frame = start_frame
131 | self.end_frame = end_frame
132 |
133 | rgb_ids, rgb_timestamps = self.listdir(path, 'color')
134 | depth_ids, depth_timestamps = self.listdir(path, 'depth')
135 | ir_ids, ir_timestamps = self.listdir(path, 'ir')
136 |
137 |
138 | if load_gt:
139 | traj_path = path+'/traj_orbslam2.txt'
140 | frame_ids, poses = read_orbslam2_file(traj_path)
141 | rgb_ids = [rgb_ids[frame_id] for frame_id in frame_ids]
142 | depth_ids = [depth_ids[frame_id] for frame_id in frame_ids]
143 | ir_ids = [ir_ids[frame_id] for frame_id in frame_ids]
144 | rgb_timestamps = [rgb_timestamps[frame_id] for frame_id in frame_ids]
145 | '''
146 | now_id = 0
147 | frame_ids_ = []
148 | poses_ = []
149 | for idx in range(0,frame_ids[-1]+1):
150 | frame_ids_.append(idx)
151 | if idx in frame_ids:
152 | poses_.append(poses[now_id])
153 | now_id += 1
154 | else:
155 | poses_.append(np.eye(4))
156 | frame_ids = frame_ids_
157 | poses = poses_
158 | '''
159 |
160 | self.gt_trajectory = self._parse_traj(poses)
161 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
162 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
163 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
164 | self.T_gt2uni = change_iso.matrix
165 |
166 | else:
167 | self.gt_trajectory = None
168 | self.T_gt2uni = self.first_iso.matrix
169 |
170 |
171 |
172 | self.rgb_ids = rgb_ids
173 | self.rgb = ImageReader(rgb_ids, rgb_timestamps, is_rgb=True)
174 | self.depth = ImageReader(depth_ids, depth_timestamps)
175 | self.ir = ImageReader(ir_ids, ir_timestamps)
176 |
177 | self.timestamps = rgb_timestamps
178 |
179 | self.frame_id = 0
180 |
181 | if mesh_gt is None:
182 | print("using reconstruction mesh")
183 | else:
184 | if mesh_gt != '' and load_gt:
185 | self.gt_mesh = self.get_ground_truth_mesh(mesh_gt)#, gt_traj_path)
186 |
187 | # saliency
188 | from transparent_background import Remover
189 | self.saliency_detector = Remover()
190 |
191 | # style
192 | from thirdparts.style_transfer.experiments import style_api
193 | self.style_painting = style_api.get_api()
194 |
195 |
196 | def _parse_traj(self,traj_data):
197 | camera_ext = {}
198 | for id, cur_p in enumerate(traj_data):
199 | T = cur_p.reshape((4,4))
200 | #cur_q = Quaternion(imaginary=cur_p[4:7], real=cur_p[-1]).rotation_matrix
201 | cur_q = T[:3,:3]
202 | cur_t = T[:3, 3]
203 | cur_iso = motion_util.Isometry(q=Quaternion(matrix=cur_q), t=cur_t)
204 | camera_ext[id] = cur_iso #cano_quat.dot(cur_iso)
205 | camera_ext[len(camera_ext)] = camera_ext[len(camera_ext)-1]
206 | return [camera_ext[t] for t in range(len(camera_ext))]
207 |
208 |
209 |
210 |
211 |
212 | def sort(self, xs, st = 3):
213 | return sorted(xs, key=lambda x:float(x[st:-4]))
214 |
215 | def listdir(self, path, split='rgb', ext='.png'):
216 | imgs, timestamps = [], []
217 | files = [x for x in os.listdir(os.path.join(path, split)) if x.endswith(ext)]
218 | st = 0
219 | for name in self.sort(files,st):
220 | imgs.append(os.path.join(path, split, name))
221 | timestamp = float(name[st:-len(ext)].rstrip('.'))
222 | timestamps.append(timestamp)
223 |
224 | imgs = imgs[self.start_frame: self.end_frame]
225 | timestamps = timestamps[self.start_frame: self.end_frame]
226 |
227 | return imgs, np.array(timestamps)
228 |
229 | def __getitem__(self, idx):
230 | frame_data = FrameData()
231 | if self.gt_trajectory is not None:
232 | frame_data.gt_pose = self.gt_trajectory[idx]
233 | else:
234 | frame_data.gt_pose = None
235 | frame_data.calib = FrameIntrinsic(self.cam.fx, self.cam.fy, self.cam.cx, self.cam.cy, self.cam.scale)
236 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda().float() / self.cam.scale
237 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda().float() / 255.
238 | frame_data.ir = torch.from_numpy(self.ir[idx].astype(np.float32)).cuda().float().unsqueeze(-1)
239 |
240 | img = Image.fromarray(self.rgb[idx]).convert('RGB')
241 | frame_data.saliency = torch.from_numpy(
242 | self.saliency_detector.process(img,type='map')
243 | ).cuda().float() /255
244 |
245 |
246 | frame_data.style = torch.from_numpy(
247 | self.style_painting(img)).cuda().float() / 255.
248 |
249 |
250 | return frame_data
251 |
252 |
253 | def __next__(self):
254 | frame_data = FrameData()
255 | if self.gt_trajectory is not None:
256 | frame_data.gt_pose = self.gt_trajectory[self.frame_id]
257 | else:
258 | frame_data.gt_pose = None
259 | frame_data.calib = FrameIntrinsic(self.cam.fx, self.cam.fy, self.cam.cx, self.cam.cy, self.cam.scale)
260 | frame_data.depth = torch.from_numpy(self.depth[self.frame_id].astype(np.float32)).cuda().float() / self.cam.scale
261 | frame_data.rgb = torch.from_numpy(self.rgb[self.frame_id]).cuda().float() / 255.
262 | frame_data.ir = torch.from_numpy(self.ir[self.frame_id].astype(np.float32)).cuda().float()
263 |
264 | img = Image.fromarray(frame).convert('RGB')
265 | frame_data.saliency = torch.from_numpy(
266 | self.saliency_detector.process(img,type='map').astype(np.float32)
267 | ).cuda().float()
268 |
269 |
270 | frame_data.style = torch.from_numpy(
271 | self.style_painting(img)).cuda().float() / 255.
272 |
273 |
274 | self.frame_id += 1
275 | return frame_data
276 |
277 | def __len__(self):
278 | return len(self.rgb)
279 |
280 |
--------------------------------------------------------------------------------
/uni/dataset/custom.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import cv2
3 | import numpy as np
4 | import open3d as o3d
5 | from PIL import Image
6 | import tensorflow.compat.v1 as tf
7 |
8 |
9 | from dataset.production import *
10 | from dataset.production.latent_map import ScanNetDataset
11 | from dataset.production.replica import ReplicaRGBDDataset
12 | from dataset.production.bpnet_scannet import ScanNetLatentDataset
13 | from dataset.production.azure import AzureRGBDIDataset
14 |
15 | from tqdm import tqdm
16 |
17 |
18 | import pdb
19 |
20 |
21 | class CustomScanNet(ScanNetDataset):
22 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, train=True, mesh_gt = None, style_idx=1, has_style=True, has_ir=False, has_saliency=True, has_latent=False, f_im=None):
23 | super().__init__(path, start_frame, end_frame, first_tq, load_gt, train, mesh_gt)
24 | self.has_ir = has_ir
25 | self.has_saliency = has_saliency
26 | self.has_style = has_style
27 | self.has_latent = has_latent
28 | # saliency
29 | from transparent_background import Remover
30 | self.saliency_detector = Remover()
31 |
32 | # style
33 | from thirdparts.style_transfer.experiments import style_api
34 | self.style_painting = style_api.get_api(style_idx)
35 |
36 | # latent
37 | self.latent_func = f_im
38 |
39 | # np_str
40 | self.np_image_strings = []
41 | for rgb_id in self.inner_dataset.color_paths:
42 | with tf.gfile.GFile(rgb_id, 'rb') as f:
43 | np_image_string = np.array([f.read()])
44 | self.np_image_strings.append(np_image_string)
45 |
46 |
47 |
48 | def __getitem__(self, idx):
49 | index, rgb, depth, pose = self.inner_dataset[idx]
50 |
51 | frame_data = FrameData()
52 | frame_data.calib = FrameIntrinsic(self.fx, self.fy, self.cx, self.cy, self.depth_scaling_factor)
53 | frame_data.depth = depth.cuda(0).float()# torch.from_numpy(self.depth[idx_id].astype(np.float32)).cuda().float()# / self.depth_scaling_factor
54 | frame_data.rgb = rgb.cuda(0).float() #torch.from_numpy(self.rgb[idx_id]).cuda().float() #/ 255.
55 |
56 | frame_data.gt_pose = self.gt_trajectory[idx]
57 |
58 |
59 | frame_data.ir = None
60 | img = Image.fromarray((rgb.cpu().numpy()*255).astype(np.ubyte)).convert('RGB')
61 | frame_data.saliency = torch.from_numpy(
62 | self.saliency_detector.process(img,type='map').astype(np.float32)
63 | ).cuda(0).float() / 255. if self.has_saliency else None
64 |
65 | frame_data.style = torch.from_numpy(
66 | self.style_painting(img)).cuda(0).float() / 255. if self.has_style else None
67 |
68 | H,W,_ = frame_data.rgb.shape
69 |
70 | frame_data.latent = torch.from_numpy(self.latent_func(self.np_image_strings[idx], H, W)).cuda(0).float() if self.has_latent else None
71 |
72 |
73 |
74 | frame_data.customs = [frame_data.ir, frame_data.saliency, frame_data.style, frame_data.latent]
75 | return frame_data
76 |
77 | class CustomAzure(AzureRGBDIDataset):
78 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, train=True, mesh_gt = None, style_idx=1, has_style=True, has_ir=False, has_saliency=True, has_latent=False, f_im=None):
79 | super().__init__(path, start_frame, end_frame, first_tq, load_gt, train, mesh_gt)
80 | self.has_ir = has_ir
81 | self.has_saliency = has_saliency
82 | self.has_style = has_style
83 | self.has_latent = has_latent
84 | # saliency
85 | from transparent_background import Remover
86 | self.saliency_detector = Remover()
87 |
88 | # style
89 | from thirdparts.style_transfer.experiments import style_api
90 | self.style_painting = style_api.get_api(style_idx)
91 |
92 | # latent
93 | self.latent_func = f_im
94 |
95 | # np_str
96 | if has_latent:
97 | self.np_image_strings = []
98 | for rgb_id in tqdm(self.rgb_ids):
99 | with tf.gfile.GFile(rgb_id, 'rb') as f:
100 | np_image_string = np.array([f.read()])
101 | self.np_image_strings.append(np_image_string)
102 |
103 |
104 |
105 | def __getitem__(self, idx):
106 | frame_data = FrameData()
107 | if self.gt_trajectory is not None:
108 | frame_data.gt_pose = self.gt_trajectory[idx]
109 | else:
110 | frame_data.gt_pose = None
111 | frame_data.calib = FrameIntrinsic(self.cam.fx, self.cam.fy, self.cam.cx, self.cam.cy, self.cam.scale)
112 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda(0).float() / self.cam.scale
113 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda(0).float() / 255.
114 | frame_data.ir = torch.from_numpy(self.ir[idx].astype(np.float32)).cuda(0).float().unsqueeze(-1) if self.has_ir else None
115 |
116 | img = Image.fromarray(self.rgb[idx]).convert('RGB')
117 | frame_data.saliency = torch.from_numpy(
118 | self.saliency_detector.process(img,type='map')
119 | ).cuda(0).float() /255. if self.has_saliency else None
120 |
121 |
122 |
123 | frame_data.style = torch.from_numpy(
124 | self.style_painting(img)).cuda(0).float() / 255. if self.has_style else None
125 |
126 | H,W,_ = frame_data.rgb.shape
127 |
128 | frame_data.latent = torch.from_numpy(self.latent_func(self.np_image_strings[idx], H, W)).cuda(0).float() if self.has_latent else None
129 |
130 |
131 | frame_data.customs = [frame_data.ir, frame_data.saliency, frame_data.style, frame_data.latent]
132 | return frame_data
133 |
134 |
135 |
136 | class CustomReplica(ReplicaRGBDDataset):
137 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, train=True, mesh_gt = None, style_idx=1, has_style=True, has_ir=False, has_saliency=True, has_latent=False, f_im=None):
138 | super().__init__(path, start_frame, end_frame, first_tq, load_gt, train, mesh_gt)
139 | self.has_ir = has_ir
140 | self.has_saliency = has_saliency
141 | self.has_style = has_style
142 | self.has_latent = has_latent
143 | # saliency
144 | from transparent_background import Remover
145 | self.saliency_detector = Remover()
146 |
147 | # style
148 | from thirdparts.style_transfer.experiments import style_api
149 | self.style_painting = style_api.get_api(style_idx)
150 |
151 | # latent
152 | self.latent_func = f_im
153 |
154 |
155 | def __getitem__(self, idx):
156 | #return self.rgb[idx], self.depth[idx]
157 | frame_data = FrameData()
158 | if self.gt_trajectory is not None:
159 | frame_data.gt_pose = self.gt_trajectory[idx]
160 | else:
161 | frame_data.gt_pose = None
162 |
163 | frame_data.calib = FrameIntrinsic(600., 600., 599.5, 339.5, 6553.5)
164 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda().float() / 6553.5
165 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda().float() / 255.
166 |
167 | img = Image.fromarray((frame_data.rgb.cpu().numpy()*255).astype(np.ubyte)).convert('RGB')
168 |
169 | frame_data.ir = None
170 | frame_data.saliency = torch.from_numpy(
171 | self.saliency_detector.process(img,type='map').astype(np.float32)
172 | ).cuda().float() / 255. if self.has_saliency else None
173 |
174 | frame_data.style = torch.from_numpy(
175 | self.style_painting(img)).cuda().float() / 255. if self.has_style else None
176 |
177 | frame_data.latent = self.latent_func(frame_data.rgb).permute(2,3,1,0).squeeze(-1) if self.has_latent else None
178 |
179 | frame_data.customs = [frame_data.ir, frame_data.saliency, frame_data.style, frame_data.latent]
180 |
181 | return frame_data
182 |
183 |
184 | class CustomBPNetScanNet(ScanNetLatentDataset):
185 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, train=True, mesh_gt = None, style_idx=1, has_style=True, has_ir=False, has_saliency=True, has_latent=False, f_im=None):
186 | super().__init__(path, f_im, start_frame, end_frame, first_tq, load_gt, train, mesh_gt)
187 | self.has_ir = has_ir
188 | self.has_saliency = has_saliency
189 | self.has_style = has_style
190 | self.has_latent = has_latent
191 | # saliency
192 | from transparent_background import Remover
193 | self.saliency_detector = Remover()
194 |
195 | # style
196 | from thirdparts.style_transfer.experiments import style_api
197 | self.style_painting = style_api.get_api(style_idx)
198 |
199 | # latent
200 | self.latent_func = f_im
201 |
202 |
203 | def __getitem__(self, idx):
204 | frame_data = FrameData()
205 | if self.gt_trajectory is not None:
206 | frame_data.gt_pose = self.gt_trajectory[idx]
207 | else:
208 | frame_data.gt_pose = None
209 | frame_data.calib = FrameIntrinsic(*self.calib)
210 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda(0).float() / 1000
211 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda(0).float() / 255.
212 |
213 | frame_data.ir = None
214 | img = Image.fromarray((frame_data.rgb.cpu().numpy()*255).astype(np.ubyte)).convert('RGB')
215 | frame_data.saliency = torch.from_numpy(
216 | self.saliency_detector.process(img,type='map').astype(np.float32)
217 | ).cuda().float() / 255. if self.has_saliency else None
218 |
219 | frame_data.style = torch.from_numpy(
220 | self.style_painting(img)).cuda().float() / 255. if self.has_style else None
221 |
222 | H,W,_ = self.rgb.shape
223 |
224 | frame_data.latent = torch.from_numpy(self.f_im(self.np_image_strings[idx], H, W)).cuda(0).float() if self.has_latent else None
225 |
226 |
227 |
228 | frame_data.customs = [frame_data.ir, frame_data.saliency, frame_data.style, frame_data.latent]
229 | return frame_data
230 |
231 |
232 |
--------------------------------------------------------------------------------
/uni/dataset/fountain.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import torch
4 | from dataset.production import *
5 | from pyquaternion import Quaternion
6 | from pathlib import Path
7 | from utils import motion_util
8 | import pdb
9 |
10 |
11 | class FountainSequence(RGBDSequence):
12 | def __init__(self, path: str, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False):
13 | super().__init__()
14 | self.path = Path(path)
15 |
16 | with open(self.path/"depth_vga.match", 'r') as f:
17 | lines = f.readlines()
18 | pairs = [l.strip().split(' ') for l in lines]
19 | self.color_names = []
20 | self.depth_names = []
21 | for pair in pairs:
22 | self.depth_names.append(pair[0])
23 | self.color_names.append(pair[-1])
24 |
25 | #self.color_names = sorted([f"rgb/{t}" for t in os.listdir(self.path / "rgb")], key=lambda t: int(t[4:].split(".")[0]))
26 | #self.depth_names = [f"depth/{t}.png" for t in range(len(self.color_names))]
27 | self.calib = [525., 525.0, 319.50, 239.50, 1000.0]
28 | if first_tq is not None:
29 | self.first_iso = motion_util.Isometry(q=Quaternion(array=first_tq[3:]), t=np.array(first_tq[:3]))
30 | else:
31 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
32 |
33 | if end_frame == -1:
34 | end_frame = len(self.color_names)
35 |
36 | self.color_names = self.color_names[start_frame:end_frame]
37 | self.depth_names = self.depth_names[start_frame:end_frame]
38 |
39 | if load_gt:
40 | gt_traj_path = list(self.path.glob("fountain_key.log"))[0]
41 | self.gt_trajectory = self._parse_traj_file(gt_traj_path)
42 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
43 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
44 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
45 | assert len(self.gt_trajectory) == len(self.color_names)
46 | else:
47 | self.gt_trajectory = None
48 |
49 | def _parse_traj_file(self, traj_path):
50 | camera_ext = {}
51 | traj_data = []#np.genfromtxt(traj_path)
52 | with open(traj_path, 'r') as f:
53 | ls = f.readlines()
54 |
55 |
56 | cano_quat = motion_util.Isometry(q=Quaternion(axis=[0.0, 0.0, 1.0], degrees=180.0))
57 | for cur_p in traj_data:
58 | cur_q = Quaternion(imaginary=cur_p[4:7], real=cur_p[-1]).rotation_matrix
59 | cur_t = cur_p[1:4]
60 | '''
61 | cur_q[1] = -cur_q[1]
62 | cur_q[:, 1] = -cur_q[:, 1]
63 | cur_t[1] = -cur_t[1]
64 | '''
65 | cur_iso = motion_util.Isometry(q=Quaternion(matrix=cur_q), t=cur_t)
66 | camera_ext[cur_p[0]] = cano_quat.dot(cur_iso)
67 | camera_ext[0] = camera_ext[1]
68 | return [camera_ext[t] for t in range(len(camera_ext))]
69 |
70 | def __len__(self):
71 | return len(self.color_names)
72 |
73 | def __next__(self):
74 | if self.frame_id >= len(self):
75 | raise StopIteration
76 | depth_img_path = self.path / self.depth_names[self.frame_id]
77 | rgb_img_path = self.path / self.color_names[self.frame_id]
78 |
79 | # Convert depth image into point cloud.
80 | depth_data = cv2.imread(str(depth_img_path), cv2.IMREAD_UNCHANGED)
81 | depth_data = torch.from_numpy(depth_data.astype(np.float32)).cuda() / self.calib[4]
82 | rgb_data = cv2.imread(str(rgb_img_path))
83 | rgb_data = cv2.cvtColor(rgb_data, cv2.COLOR_BGR2RGB)
84 | rgb_data = torch.from_numpy(rgb_data).cuda().float() / 255.
85 |
86 | frame_data = FrameData()
87 | frame_data.gt_pose = self.gt_trajectory[self.frame_id] if self.gt_trajectory is not None else None
88 | frame_data.calib = FrameIntrinsic(self.calib[0], self.calib[1], self.calib[2], self.calib[3], self.calib[4])
89 | frame_data.depth = depth_data
90 | frame_data.rgb = rgb_data
91 |
92 | self.frame_id += 1
93 | return frame_data
94 |
--------------------------------------------------------------------------------
/uni/dataset/icl_nuim.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import torch
4 | from dataset.production import *
5 | from pyquaternion import Quaternion
6 | from pathlib import Path
7 | from utils import motion_util
8 | import pdb
9 |
10 |
11 | class ICLNUIMSequence(RGBDSequence):
12 | def __init__(self, path: str, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False):
13 | super().__init__()
14 | self.path = Path(path)
15 | self.color_names = sorted([f"rgb/{t}" for t in os.listdir(self.path / "rgb")], key=lambda t: int(t[4:].split(".")[0]))
16 | self.depth_names = [f"depth/{t}.png" for t in range(len(self.color_names))]
17 | self.calib = [481.2, 480.0, 319.50, 239.50, 5000.0]
18 | if first_tq is not None:
19 | self.first_iso = motion_util.Isometry(q=Quaternion(array=first_tq[3:]), t=np.array(first_tq[:3]))
20 | else:
21 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
22 |
23 | if end_frame == -1:
24 | end_frame = len(self.color_names)
25 |
26 | self.color_names = self.color_names[start_frame:end_frame]
27 | self.depth_names = self.depth_names[start_frame:end_frame]
28 |
29 | if load_gt:
30 | gt_traj_path = (list(self.path.glob("*.freiburg")) + list(self.path.glob("groundtruth.txt")))[0]
31 | self.gt_trajectory = self._parse_traj_file(gt_traj_path)
32 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
33 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
34 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
35 | assert len(self.gt_trajectory) == len(self.color_names)
36 | else:
37 | self.gt_trajectory = None
38 |
39 | def _parse_traj_file(self, traj_path):
40 | camera_ext = {}
41 | traj_data = np.genfromtxt(traj_path)
42 | cano_quat = motion_util.Isometry(q=Quaternion(axis=[0.0, 0.0, 1.0], degrees=180.0))
43 | for cur_p in traj_data:
44 | cur_q = Quaternion(imaginary=cur_p[4:7], real=cur_p[-1]).rotation_matrix
45 | cur_t = cur_p[1:4]
46 | cur_q[1] = -cur_q[1]
47 | cur_q[:, 1] = -cur_q[:, 1]
48 | cur_t[1] = -cur_t[1]
49 | cur_iso = motion_util.Isometry(q=Quaternion(matrix=cur_q), t=cur_t)
50 | camera_ext[cur_p[0]] = cano_quat.dot(cur_iso)
51 | camera_ext[0] = camera_ext[1]
52 | return [camera_ext[t] for t in range(len(camera_ext))]
53 |
54 | def __len__(self):
55 | return len(self.color_names)
56 |
57 | def __next__(self):
58 | if self.frame_id >= len(self):
59 | raise StopIteration
60 |
61 | depth_img_path = self.path / self.depth_names[self.frame_id]
62 | rgb_img_path = self.path / self.color_names[self.frame_id]
63 |
64 | # Convert depth image into point cloud.
65 | depth_data = cv2.imread(str(depth_img_path), cv2.IMREAD_UNCHANGED)
66 | depth_data = torch.from_numpy(depth_data.astype(np.float32)).cuda() / self.calib[4]
67 | rgb_data = cv2.imread(str(rgb_img_path))
68 | rgb_data = cv2.cvtColor(rgb_data, cv2.COLOR_BGR2RGB)
69 | rgb_data = torch.from_numpy(rgb_data).cuda().float() / 255.
70 |
71 | frame_data = FrameData()
72 | frame_data.gt_pose = self.gt_trajectory[self.frame_id] if self.gt_trajectory is not None else None
73 | frame_data.calib = FrameIntrinsic(self.calib[0], self.calib[1], self.calib[2], self.calib[3], self.calib[4])
74 | frame_data.depth = depth_data
75 | frame_data.rgb = rgb_data
76 |
77 | self.frame_id += 1
78 | return frame_data
79 |
--------------------------------------------------------------------------------
/uni/dataset/matterport3d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from dataset.production import *
11 | from utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 |
16 | import pdb
17 |
18 |
19 |
20 |
21 | class Matterport3DRGBDDataset():
22 | '''
23 | follow https://github.com/otakuxiang/circle/blob/master/torch/sample_matterport.py
24 | '''
25 |
26 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, mesh_gt: str = None):
27 | path = os.path.expanduser(path)
28 |
29 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
30 |
31 | self.depth_path = os.path.join(path,"matterport_depth_images")
32 | self.rgb_path = os.path.join(path,"matterport_color_images")
33 | self.pose_path = os.path.join(path,"matterport_camera_poses")
34 | self.intri_path = os.path.join(path,"matterport_camera_intrinsics")
35 | tripod_numbers = [ins[:ins.find("_")] for ins in os.listdir(self.intri_path)]
36 | self.depthMapFactor = 4000
37 |
38 |
39 | self.frames = []
40 | for tripod_number in tripod_numbers:
41 | for camera_id in range(3):
42 | for frame_id in range(6):
43 | self.frames.append([tripod_number,camera_id,frame_id])
44 | self.frame_ids = list(range(len(self.frames)))
45 |
46 |
47 | if load_gt:
48 | self.gt_trajectory = self._parse_traj_file(self.pose_path)
49 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
50 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
51 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
52 | #assert len(self.gt_trajectory) == len(self.rgb)
53 | self.T_gt2uni = change_iso.matrix
54 |
55 | else:
56 | self.gt_trajectory = None
57 | self.T_gt2uni = self.first_iso.matrix
58 |
59 |
60 |
61 |
62 |
63 |
64 | self.frame_id = 0
65 |
66 | def __len__(self):
67 | return len(self.frames)
68 |
69 | def _parse_traj_file(self, traj_path):
70 | traj_data = []
71 | for frame_id in range(len(self)):
72 | tripod_number,camera_id,frame_idx = self.frames[frame_id]
73 | f = open(os.path.join(self.pose_path,f"{tripod_number}_pose_{camera_id}_{frame_idx}.txt"))
74 | pose = np.zeros((4,4))
75 | for idx,line in enumerate(f):
76 | ss = line.strip().split(" ")
77 | for k in range(0,4):
78 | pose[idx,k] = float(ss[k])
79 | # pose = np.linalg.inv(pose)
80 | traj_data.append(pose)
81 |
82 | f.close()
83 |
84 | camera_ext = {}
85 | for id, cur_p in enumerate(traj_data):
86 | T = cur_p
87 | cur_q = T[:3,:3]
88 | cur_t = T[:3, 3]
89 | cur_iso = motion_util.Isometry(q=Quaternion(matrix=cur_q, atol=1e-5, rtol=1e-5), t=cur_t)
90 | camera_ext[id] = cur_iso
91 | camera_ext[len(camera_ext)] = camera_ext[len(camera_ext)-1]
92 | return [camera_ext[t] for t in range(len(camera_ext))]
93 |
94 |
95 |
96 |
97 |
98 | def __getitem__(self, frame_id):
99 | tripod_number,camera_id,frame_idx = self.frames[frame_id]
100 | '''
101 | f = open(os.path.join(self.pose_path,f"{tripod_number}_pose_{camera_id}_{frame_idx}.txt"))
102 | pose = np.zeros((4,4))
103 | for idx,line in enumerate(f):
104 | ss = line.strip().split(" ")
105 | for k in range(0,4):
106 | pose[idx,k] = float(ss[k])
107 | # pose = np.linalg.inv(pose)
108 | pose = torch.from_numpy(pose).float()
109 |
110 | f.close()
111 | '''
112 | K_depth = np.zeros((3,3))
113 | f = open(os.path.join(self.intri_path,f"{tripod_number}_intrinsics_{camera_id}.txt"))
114 | p = np.zeros((4))
115 | for idx,line in enumerate(f):
116 | ss = line.strip().split(" ")
117 | for j in range(4):
118 | p[j] = float(ss[j+2])
119 | f.close()
120 | K_depth[0,0] = p[0]
121 | K_depth[1,1] = p[1]
122 | K_depth[2,2] = 1
123 | K_depth[0,2] = p[2]
124 | K_depth[1,2] = p[3]
125 | depth_path = os.path.join(self.depth_path,tripod_number+"_d"+str(camera_id)+"_"+str(frame_idx)+".png")
126 | depth =cv2.imread(depth_path,-1)
127 | rgb_path = os.path.join(self.rgb_path,tripod_number+"_i"+str(camera_id)+"_"+str(frame_idx)+".jpg")
128 | rgb = cv2.imread(rgb_path, -1)
129 | rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
130 |
131 | ins = torch.from_numpy(K_depth).float()
132 | if depth is None:
133 | print("get None image!")
134 | print(depth_path)
135 | return None
136 |
137 | depth = depth.astype(np.float32) / self.depthMapFactor
138 | depth = torch.from_numpy(depth).float()
139 |
140 | rgb = rgb.astype(np.float32) / 255
141 | rgb = torch.from_numpy(rgb).float()
142 |
143 | assert depth.shape[:2] == rgb.shape[:2], 'depth shape should == rgb shape'
144 |
145 | return rgb,depth,ins
146 | def __next__(self):
147 | rgb, depth, K = self[self.frame_id]
148 |
149 | frame_data = FrameData()
150 | frame_data.calib = FrameIntrinsic(K[0,0],K[1,1],K[0,2],K[1,2],self.depthMapFactor)
151 | frame_data.depth = depth.cuda()
152 | frame_data.rgb = rgb.cuda()
153 | frame_data.gt_pose = self.gt_trajectory[self.frame_id]
154 |
155 | self.frame_id += 1
156 | return frame_data
157 |
158 |
159 |
--------------------------------------------------------------------------------
/uni/dataset/scannet.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from uni.dataset import *
11 | from uni.utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 | from tqdm import tqdm
16 | import glob
17 |
18 | import pdb
19 |
20 |
21 | class ImageReader(object):
22 | def __init__(self, ids, timestamps=None, cam=None, is_rgb=False, resize_shape=None):
23 | self.ids = ids
24 | self.timestamps = timestamps
25 | self.cam = cam
26 | self.cache = dict()
27 | self.idx = 0
28 |
29 | self.resize_shape = resize_shape
30 | self.is_rgb = is_rgb
31 |
32 | self.ahead = 10 # 10 images ahead of current index
33 | self.waiting = 1.5 # waiting time
34 |
35 | self.preload_thread = Thread(target=self.preload)
36 | self.thread_started = False
37 |
38 | def read(self, path):
39 | img = cv2.imread(path, -1)
40 | if self.is_rgb:
41 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
42 |
43 | if self.resize_shape is not None:
44 | img = cv2.resize(img, self.resize_shape)
45 |
46 |
47 | if self.cam is None:
48 | return img
49 | else:
50 | return self.cam.rectify(img)
51 |
52 | def preload(self):
53 | idx = self.idx
54 | t = float('inf')
55 | while True:
56 | if time.time() - t > self.waiting:
57 | return
58 | if self.idx == idx:
59 | time.sleep(1e-2)
60 | continue
61 |
62 | for i in range(self.idx, self.idx + self.ahead):
63 | if i not in self.cache and i < len(self.ids):
64 | self.cache[i] = self.read(self.ids[i])
65 | if self.idx + self.ahead > len(self.ids):
66 | return
67 | idx = self.idx
68 | t = time.time()
69 |
70 | def __len__(self):
71 | return len(self.ids)
72 |
73 | def __getitem__(self, idx):
74 | self.idx = idx
75 | # if not self.thread_started:
76 | # self.thread_started = True
77 | # self.preload_thread.start()
78 |
79 | if idx in self.cache:
80 | img = self.cache[idx]
81 | del self.cache[idx]
82 | else:
83 | img = self.read(self.ids[idx])
84 |
85 | return img
86 |
87 | def __iter__(self):
88 | for i, timestamp in enumerate(self.timestamps):
89 | yield timestamp, self[i]
90 |
91 | @property
92 | def dtype(self):
93 | return self[0].dtype
94 | @property
95 | def shape(self):
96 | return self[0].shape
97 |
98 |
99 |
100 |
101 |
102 |
103 | def make_pair(matrix, threshold=1):
104 | assert (matrix >= 0).all()
105 | pairs = []
106 | base = defaultdict(int)
107 | while True:
108 | i = matrix[:, 0].argmin()
109 | min0 = matrix[i, 0]
110 | j = matrix[0, :].argmin()
111 | min1 = matrix[0, j]
112 |
113 | if min0 < min1:
114 | i, j = i, 0
115 | else:
116 | i, j = 0, j
117 | if min(min1, min0) < threshold:
118 | pairs.append((i + base['i'], j + base['j']))
119 |
120 | matrix = matrix[i + 1:, j + 1:]
121 | base['i'] += (i + 1)
122 | base['j'] += (j + 1)
123 |
124 | if min(matrix.shape) == 0:
125 | break
126 | return pairs
127 |
128 |
129 |
130 | class ScanNetRGBDDataset(object):
131 | '''
132 | path example: 'path/to/your/TUM R-GBD Dataset/rgbd_dataset_freiburg1_xyz'
133 | '''
134 |
135 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, register=True, mesh_gt: str = None):
136 | path = os.path.expanduser(path)
137 |
138 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
139 |
140 | if load_gt:
141 | gt_traj_path = sorted(glob.glob(path+'/pose/*.txt'), key=lambda x: int(os.path.basename(x)[:-4]))
142 | self.gt_trajectory = self._parse_traj_file(gt_traj_path)
143 | # some pose is None
144 | invalid_id = [i for i, pose in enumerate(self.gt_trajectory) if pose is None]
145 | for i in invalid_id[::-1]:
146 | del self.gt_trajectory[i]
147 |
148 |
149 | self.gt_trajectory = self.gt_trajectory[start_frame:end_frame]
150 | change_iso = self.first_iso.dot(self.gt_trajectory[0].inv())
151 | self.gt_trajectory = [change_iso.dot(t) for t in self.gt_trajectory]
152 | #assert len(self.gt_trajectory) == len(self.rgb)
153 | self.T_gt2uni = change_iso.matrix
154 |
155 | else:
156 | self.gt_trajectory = None
157 | self.T_gt2uni = self.first_iso.matrix
158 | rgb_ids, rgb_timestamps = self.listdir(path, 'color', ext='.jpg')
159 | depth_ids, depth_timestamps = self.listdir(path, 'depth')
160 | if load_gt:
161 | for i in invalid_id[::-1]:
162 | del(rgb_ids[i])
163 | del(depth_ids[i])
164 |
165 | np.delete(rgb_timestamps,invalid_id)
166 | np.delete(depth_timestamps,invalid_id)
167 |
168 | rgb_ids = rgb_ids[start_frame:end_frame]
169 | depth_ids = depth_ids[start_frame:end_frame]
170 | rgb_timestamps = rgb_timestamps[start_frame:end_frame]
171 |
172 | self.rgb_ids = rgb_ids
173 |
174 | self.depth = ImageReader(depth_ids, depth_timestamps)
175 | H, W = self.depth[0].shape
176 | self.rgb = ImageReader(rgb_ids, rgb_timestamps, is_rgb=True, resize_shape=(W,H))
177 |
178 | self.timestamps = rgb_timestamps
179 |
180 | self.frame_id = 0
181 |
182 |
183 |
184 | if mesh_gt is None:
185 | print("using reconstruction mesh")
186 | else:
187 | if mesh_gt != '' and load_gt:
188 | self.gt_mesh = self.get_ground_truth_mesh(mesh_gt)#, gt_traj_path)
189 | def get_ground_truth_mesh(self, mesh_path):#, gt_traj_path):
190 | import trimesh
191 | '''
192 | traj_data = np.genfromtxt(gt_traj_path)
193 | T0 = traj_data[0].reshape((4,4))
194 | change_mat = (self.first_iso.matrix.dot(np.linalg.inv(T0)))
195 | '''
196 | change_mat = self.T_gt2uni
197 |
198 |
199 |
200 | mesh_gt = trimesh.load(mesh_path)
201 | mesh_gt.apply_transform(change_mat)
202 | return mesh_gt.as_open3d
203 |
204 |
205 |
206 |
207 |
208 | def _parse_traj_file(self,traj_path):
209 | camera_ext = {}
210 | traj_data = [np.genfromtxt(traj_file) for traj_file in traj_path]
211 | #cano_quat = motion_util.Isometry(q=Quaternion(axis=[0.0, 0.0, 1.0], degrees=180.0))
212 | for id, cur_p in enumerate(traj_data):
213 | T = cur_p.reshape((4,4))
214 | #cur_q = Quaternion(imaginary=cur_p[4:7], real=cur_p[-1]).rotation_matrix
215 | cur_q = T[:3,:3]
216 | cur_t = T[:3, 3]
217 | #cur_q[1] = -cur_q[1]
218 | #cur_q[:, 1] = -cur_q[:, 1]
219 | #cur_t[1] = -cur_t[1]
220 | try:
221 | cur_iso = motion_util.Isometry(q=Quaternion(matrix=cur_q, atol=1e-5, rtol=1e-5), t=cur_t)
222 | except Exception as e:
223 | cur_iso = None
224 | camera_ext[id] = cur_iso #cano_quat.dot(cur_iso)
225 | #camera_ext[len(camera_ext)] = camera_ext[len(camera_ext)-1]
226 | return [camera_ext[t] for t in range(len(camera_ext))]
227 |
228 |
229 |
230 | def __len__(self):
231 | return len(self.rgb)
232 |
233 |
234 |
235 |
236 | def sort(self, xs, st = 3):
237 | return sorted(xs, key=lambda x:float(x[st:-4]))
238 |
239 | def listdir(self, path, split='rgb', ext='.png'):
240 | imgs, timestamps = [], []
241 | files = [x for x in os.listdir(os.path.join(path, split)) if x.endswith(ext)]
242 | st = 0
243 | for name in self.sort(files,st):
244 | imgs.append(os.path.join(path, split, name))
245 | timestamp = float(name[st:-len(ext)].rstrip('.'))
246 | timestamps.append(timestamp)
247 |
248 | return imgs, np.array(timestamps)
249 |
250 | def __getitem__(self, idx):
251 | #return self.rgb[idx], self.depth[idx]
252 | frame_data = FrameData()
253 | if self.gt_trajectory is not None:
254 | frame_data.gt_pose = self.gt_trajectory[idx]
255 | else:
256 | frame_data.gt_pose = None
257 | frame_data.calib = FrameIntrinsic(600., 600., 599.5, 339.5, 6553.5)
258 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda().float() / 6553.5
259 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda().float() / 255.
260 | return frame_data
261 |
262 |
263 |
264 |
--------------------------------------------------------------------------------
/uni/dataset/tum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import time
5 | import torch
6 |
7 | from collections import defaultdict, namedtuple
8 |
9 | from threading import Thread, Lock
10 | from uni.dataset import *
11 | from uni.utils import motion_util
12 | from pyquaternion import Quaternion
13 |
14 | import open3d as o3d
15 |
16 | import pdb
17 |
18 |
19 | class ImageReader(object):
20 | def __init__(self, ids, timestamps=None, cam=None, is_rgb=False):
21 | self.ids = ids
22 | self.timestamps = timestamps
23 | self.cam = cam
24 | self.cache = dict()
25 | self.idx = 0
26 |
27 | self.is_rgb = is_rgb
28 |
29 | self.ahead = 10 # 10 images ahead of current index
30 | self.waiting = 1.5 # waiting time
31 |
32 | self.preload_thread = Thread(target=self.preload)
33 | self.thread_started = False
34 |
35 | def read(self, path):
36 | img = cv2.imread(path, -1)
37 | if self.is_rgb:
38 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39 |
40 | if self.cam is None:
41 | return img
42 | else:
43 | return self.cam.rectify(img)
44 |
45 | def preload(self):
46 | idx = self.idx
47 | t = float('inf')
48 | while True:
49 | if time.time() - t > self.waiting:
50 | return
51 | if self.idx == idx:
52 | time.sleep(1e-2)
53 | continue
54 |
55 | for i in range(self.idx, self.idx + self.ahead):
56 | if i not in self.cache and i < len(self.ids):
57 | self.cache[i] = self.read(self.ids[i])
58 | if self.idx + self.ahead > len(self.ids):
59 | return
60 | idx = self.idx
61 | t = time.time()
62 |
63 | def __len__(self):
64 | return len(self.ids)
65 |
66 | def __getitem__(self, idx):
67 | self.idx = idx
68 | # if not self.thread_started:
69 | # self.thread_started = True
70 | # self.preload_thread.start()
71 |
72 | if idx in self.cache:
73 | img = self.cache[idx]
74 | del self.cache[idx]
75 | else:
76 | img = self.read(self.ids[idx])
77 |
78 | return img
79 |
80 | def __iter__(self):
81 | for i, timestamp in enumerate(self.timestamps):
82 | yield timestamp, self[i]
83 |
84 | @property
85 | def dtype(self):
86 | return self[0].dtype
87 | @property
88 | def shape(self):
89 | return self[0].shape
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | class TUMRGBDDataset():
98 | def __init__(self, path, start_frame: int = 0, end_frame: int = -1, first_tq: list = None, load_gt: bool = False, register=True, mesh_gt: str = None):
99 | path = os.path.expanduser(path)
100 | self.first_iso = motion_util.Isometry(q=Quaternion(array=[0.0, -1.0, 0.0, 0.0]))
101 | rgb_ids = []
102 | depth_ids = []
103 | self.timestamps = []
104 | with open(os.path.join(path,'asso.txt'),'r') as f:
105 | ls = f.readlines()
106 | for l in ls:
107 | elems = l.strip().split(' ')
108 | rgb_id = elems[1]
109 | depth_id = elems[3]
110 | timestamp = elems[0]
111 | rgb_ids.append(os.path.join(path,rgb_id))
112 | depth_ids.append(os.path.join(path,depth_id))
113 | self.timestamps.append(timestamp)
114 |
115 |
116 |
117 | self.rgb = ImageReader(rgb_ids)
118 | self.depth = ImageReader(depth_ids)
119 |
120 | self.frame_id = 0
121 |
122 |
123 |
124 | assert load_gt == False, "NO TUM GT TRAJECTORY"
125 | self.gt_trajectory = None
126 | self.T_gt2uni = self.first_iso.matrix
127 |
128 |
129 |
130 |
131 | def sort(self, xs):
132 | return sorted(xs, key=lambda x:float(x[:-4]))
133 |
134 | def __getitem__(self, idx):
135 | frame_data = FrameData()
136 | frame_data.gt_pose = None
137 | frame_data.calib = FrameIntrinsic(525., 525., 319.5, 239.5, 5000)
138 | frame_data.depth = torch.from_numpy(self.depth[idx].astype(np.float32)).cuda().float() / 5000
139 | frame_data.rgb = torch.from_numpy(self.rgb[idx]).cuda().float() / 255.
140 | return frame_data
141 |
142 |
143 | def __next__(self):
144 | frame_data = FrameData()
145 | frame_data.gt_pose = None
146 | frame_data.calib = FrameIntrinsic(525., 525., 319.5, 239.5, 5000)
147 | frame_data.depth = torch.from_numpy(self.depth[self.frame_id].astype(np.float32)).cuda().float() / 5000
148 | frame_data.rgb = torch.from_numpy(self.rgb[self.frame_id]).cuda().float() / 255.
149 | self.frame_id += 1
150 | return frame_data
151 |
152 |
153 |
154 | def __len__(self):
155 | return len(self.rgb)
156 |
--------------------------------------------------------------------------------
/uni/encoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/uni/encoder/__init__.py
--------------------------------------------------------------------------------
/uni/encoder/position_encoder.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jarrome/Uni-Fusion/c49461d5aa53375380ca2eb7fbdf4ce8e14aeda0/uni/encoder/position_encoder.pth
--------------------------------------------------------------------------------
/uni/encoder/utility.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import math
3 | import torch
4 | import torch.nn as nn
5 | import logging
6 |
7 | p_dir = os.path.dirname(os.path.dirname(__file__))
8 | sys.path.append(p_dir)
9 |
10 | from utils import exp_util
11 |
12 | from pathlib import Path
13 | import importlib
14 | import pdb
15 |
16 |
17 | class Networks:
18 | def __init__(self):
19 | self.decoder = None
20 | self.encoder = None
21 |
22 | def eval(self):
23 | if self.encoder is not None:
24 | self.encoder.eval()
25 | if self.decoder is not None:
26 | self.decoder.eval()
27 |
28 |
29 | def load_model(training_hyper_path: str, use_epoch: int = -1):
30 | """
31 | Load in the model and hypers used.
32 | :param training_hyper_path:
33 | :param use_epoch: if -1, will load the latest model.
34 | :return: Networks
35 | """
36 | training_hyper_path = Path(training_hyper_path)
37 |
38 | if training_hyper_path.name.split(".")[-1] == "json":
39 | args = exp_util.parse_config_json(training_hyper_path)
40 | exp_dir = training_hyper_path.parent
41 | model_paths = exp_dir.glob('model_*.pth.tar')
42 | model_paths = {int(str(t).split("model_")[-1].split(".pth")[0]): t for t in model_paths}
43 | assert use_epoch in model_paths.keys(), f"{use_epoch} not found in {sorted(list(model_paths.keys()))}"
44 | args.checkpoint = model_paths[use_epoch]
45 | else:
46 | args = exp_util.parse_config_yaml(Path('configs/training_defaults.yaml'))
47 | args = exp_util.parse_config_yaml(training_hyper_path, args)
48 | logging.warning("Loaded a un-initialized model.")
49 | args.checkpoint = None
50 |
51 | model = Networks()
52 | net_module = importlib.import_module("network." + args.network_name)
53 | model.decoder = net_module.Model(args.code_length, **args.network_specs).cuda()
54 | if args.encoder_name is not None:
55 | encoder_module = importlib.import_module("network." + args.encoder_name)
56 | model.encoder = encoder_module.Model(**args.encoder_specs).cuda()
57 | if args.checkpoint is not None:
58 | if model.decoder is not None:
59 | state_dict = torch.load(args.checkpoint)["model_state"]
60 | model.decoder.load_state_dict(state_dict)
61 | if model.encoder is not None:
62 | state_dict = torch.load(Path(args.checkpoint).parent / f"encoder_{use_epoch}.pth.tar")["model_state"]
63 | model.encoder.load_state_dict(state_dict)
64 |
65 | return model, args
66 |
67 |
68 | def forward_model(model: nn.Module, network_input: torch.Tensor = None,
69 | latent_input: torch.Tensor = None,
70 | xyz_input: torch.Tensor = None,
71 | loss_func=None, max_sample: int = 2 ** 32,
72 | no_detach: bool = False,
73 | verbose: bool = False):
74 | """
75 | Forward the neural network model. (if loss_func is not None, will also compute the gradient w.r.t. the loss)
76 | Either network_input or (latent_input, xyz_input) tuple could be provided.
77 | :param model: MLP model.
78 | :param network_input: (N, 128)
79 | :param latent_input: (N, 125)
80 | :param xyz_input: (N, 3)
81 | :param loss_func:
82 | :param max_sample
83 | :return: [(N, X)] several values
84 | """
85 | if latent_input is not None and xyz_input is not None:
86 | assert network_input is None
87 | network_input = torch.cat((latent_input, xyz_input), dim=1)
88 |
89 | assert network_input.ndimension() == 2
90 |
91 | n_chunks = math.ceil(network_input.size(0) / max_sample)
92 | assert not no_detach or n_chunks == 1
93 |
94 | network_input = torch.chunk(network_input, n_chunks)
95 |
96 | if verbose:
97 | logging.debug(f"Network input chunks = {n_chunks}, each chunk = {network_input[0].size()}")
98 |
99 | head = 0
100 | output_chunks = None
101 | for chunk_i, input_chunk in enumerate(network_input):
102 | # (N, 1)
103 | network_output = model.surface_decoding(input_chunk)
104 | if not isinstance(network_output, tuple):
105 | network_output = [network_output, ]
106 |
107 | if chunk_i == 0:
108 | output_chunks = [[] for _ in range(len(network_output))]
109 |
110 | if loss_func is not None:
111 | # The 'graph' in pytorch stores how the final variable is computed to its current form.
112 | # Under normal situations, we can delete this path right after the gradient is computed because the path
113 | # will be re-constructed on next forward call.
114 | # However, in our case, self.latent_vec is the leaf node requesting the gradient, the specific computation:
115 | # vec = self.latent_vec[inds] && cat(vec, xyz)
116 | # will be forgotten, too. if we delete the entire graph.
117 | # Indeed, the above computation is the ONLY part that we do not re-build during the next forwarding.
118 | # So, we set retain_graph to True.
119 | # According to https://github.com/pytorch/pytorch/issues/31185, if we delete the head loss immediately
120 | # after the backward(retain_graph=True), the un-referenced part graph will be deleted too,
121 | # hence keeping only the needed part (a sub-graph). Perfect :)
122 | loss_func(network_output,
123 | torch.arange(head, head + network_output[0].size(0), device=network_output[0].device)
124 | ).backward(retain_graph=(chunk_i != n_chunks - 1))
125 | if not no_detach:
126 | network_output = [t.detach() for t in network_output]
127 |
128 | for payload_i, payload in enumerate(network_output):
129 | output_chunks[payload_i].append(payload)
130 | head += network_output[0].size(0)
131 |
132 | output_chunks = [torch.cat(t, dim=0) for t in output_chunks]
133 | return output_chunks
134 |
135 |
136 | def get_samples(r: int, device: torch.device, a: float = 0.0, b: float = None):
137 | """
138 | Get samples within a cube, the voxel size is (b-a)/(r-1). range is from [a, b]
139 | :param r: num samples
140 | :param a: bound min
141 | :param b: bound max
142 | :return: (r*r*r, 3)
143 | """
144 | overall_index = torch.arange(0, r ** 3, 1, device=device, dtype=torch.long)
145 | r = int(r)
146 |
147 | if b is None:
148 | b = 1. - 1. / r
149 |
150 | vsize = (b - a) / (r - 1)
151 | samples = torch.zeros(r ** 3, 3, device=device, dtype=torch.float32)
152 | samples[:, 0] = (overall_index // (r * r)) * vsize + a
153 | samples[:, 1] = ((overall_index // r) % r) * vsize + a
154 | samples[:, 2] = (overall_index % r) * vsize + a
155 |
156 | return samples
157 |
158 |
159 | def pack_samples(sample_indexer: torch.Tensor, count: int,
160 | sample_values: torch.Tensor = None):
161 | """
162 | Pack a set of samples into batches. Each element in the batch is a random subsampling of the sample_values
163 | :param sample_indexer: (N, )
164 | :param count: C
165 | :param sample_values: (N, L), if None, will return packed_inds instead of packed.
166 | :return: packed (B, C, L) or packed_inds (B, C), mapping: (B, ).
167 | """
168 | from system.ext import pack_batch
169 |
170 | # First shuffle the samples to avoid biased samples.
171 | shuffle_inds = torch.randperm(sample_indexer.size(0), device=sample_indexer.device)
172 | sample_indexer = sample_indexer[shuffle_inds]
173 |
174 | mapping, pinds, pcount = torch.unique(sample_indexer, return_inverse=True, return_counts=True)
175 |
176 | n_batch = mapping.size(0)
177 | packed_inds = pack_batch(pinds, n_batch, count * 2) # (B, 2C)
178 |
179 | pcount.clamp_(max=count * 2 - 1)
180 | packed_inds_ind = torch.floor(torch.rand((n_batch, count), device=pcount.device) * pcount.unsqueeze(-1)).long() # (B, C)
181 |
182 | packed_inds = torch.gather(packed_inds, 1, packed_inds_ind) # (B, C)
183 | packed_inds = shuffle_inds[packed_inds] # (B, C)
184 |
185 | if sample_values is not None:
186 | assert sample_values.size(0) == sample_indexer.size(0)
187 | packed = torch.index_select(sample_values, 0, packed_inds.view(-1)).view(n_batch, count, sample_values.size(-1))
188 | return packed, mapping
189 | else:
190 | return packed_inds, mapping
191 |
192 |
193 | def groupby_reduce(sample_indexer: torch.Tensor, sample_values: torch.Tensor, op: str = "max"):
194 | """
195 | Group-By and Reduce sample_values according to their indices, the reduction operation is defined in `op`.
196 | :param sample_indexer: (N,). An index, must start from 0 and go to the (max-1), can be obtained using torch.unique.
197 | :param sample_values: (N, L)
198 | :param op: have to be in 'max', 'mean'
199 | :return: reduced values: (C, L)
200 | """
201 | C = sample_indexer.max() + 1
202 | n_samples = sample_indexer.size(0)
203 |
204 | assert n_samples == sample_values.size(0), "Indexer and Values must agree on sample count!"
205 |
206 | if op == 'mean':
207 | from system.ext import groupby_sum
208 | values_sum, values_count = groupby_sum(sample_values, sample_indexer, C)
209 | return values_sum / values_count.unsqueeze(-1)
210 | elif op == 'sum':
211 | from system.ext import groupby_sum
212 | values_sum, _ = groupby_sum(sample_values, sample_indexer, C)
213 | return values_sum
214 | else:
215 | raise NotImplementedError
216 |
217 |
218 | def fix_weight_norm_pickle(net: torch.nn.Module):
219 | from torch.nn.utils.weight_norm import WeightNorm
220 | for mdl in net.modules():
221 | fix_name = None
222 | if isinstance(mdl, torch.nn.Linear):
223 | for k, hook in mdl._forward_pre_hooks.items():
224 | if isinstance(hook, WeightNorm):
225 | fix_name = hook.name
226 | if fix_name is not None:
227 | delattr(mdl, fix_name)
228 |
--------------------------------------------------------------------------------
/uni/ext/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from torch.utils.cpp_extension import load
3 |
4 |
5 | def p(rel_path):
6 | abs_path = Path(__file__).parent / rel_path
7 | return str(abs_path)
8 |
9 |
10 | __COMPILE_VERBOSE = False
11 | optimize_flags = {'extra_cflags': ['-O3'], 'extra_cuda_cflags': ['-O3']}
12 | #optimize_flags = {}
13 |
14 | # Load in Marching cubes.
15 | _marching_cubes_module = load(name='marching_cubes',
16 | sources=[p('marching_cubes/mc.cpp'),
17 | p('marching_cubes/mc_interp_kernel.cu')],
18 | verbose=__COMPILE_VERBOSE, **optimize_flags)
19 | marching_cubes_interp = _marching_cubes_module.marching_cubes_sparse_interp
20 |
21 | # Load in Image processing modules.
22 | _imgproc_module = load(name='imgproc',
23 | sources=[p('imgproc/imgproc.cu'), p('imgproc/imgproc.cpp'), p('imgproc/photometric.cu')],
24 | verbose=__COMPILE_VERBOSE, **optimize_flags)
25 | unproject_depth = _imgproc_module.unproject_depth
26 | compute_normal_weight = _imgproc_module.compute_normal_weight
27 | compute_normal_weight_robust = _imgproc_module.compute_normal_weight_robust
28 | filter_depth = _imgproc_module.filter_depth
29 | rgb_odometry = _imgproc_module.rgb_odometry
30 | gradient_xy = _imgproc_module.gradient_xy
31 |
32 | # Load in Indexing modules. (which deal with complicated indexing scheme)
33 | _indexing_module = load(name='indexing',
34 | sources=[p('indexing/indexing.cpp'), p('indexing/indexing.cu')],
35 | verbose=__COMPILE_VERBOSE, **optimize_flags)
36 | pack_batch = _indexing_module.pack_batch
37 | groupby_sum = _indexing_module.groupby_sum
38 |
39 | # We need point cloud processing module.
40 | _pcproc_module = load(name='pcproc',
41 | sources=[p('pcproc/pcproc.cpp'), p('pcproc/pcproc.cu'), p('pcproc/cuda_kdtree.cu')],
42 | verbose=__COMPILE_VERBOSE, **optimize_flags)
43 | remove_radius_outlier = _pcproc_module.remove_radius_outlier
44 | estimate_normals = _pcproc_module.estimate_normals
45 |
--------------------------------------------------------------------------------
/uni/ext/imgproc/common.cuh:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
8 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
9 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
10 |
11 | using DepthAccessor = torch::PackedTensorAccessor32;
12 | using PCMapAccessor = torch::PackedTensorAccessor32;
13 | using IntensityAccessor = torch::PackedTensorAccessor32;
14 | using GradientAccessor = torch::PackedTensorAccessor32;
15 | //using MaskAccessor = torch::PackedTensorAccessor32;
16 |
17 | struct matrix3
18 | {
19 | float3 r1{0.0, 0.0, 0.0};
20 | float3 r2{0.0, 0.0, 0.0};
21 | float3 r3{0.0, 0.0, 0.0};
22 |
23 | explicit matrix3(const std::vector& data) {
24 | r1.x = data[0]; r1.y = data[1]; r1.z = data[2];
25 | r2.x = data[3]; r2.y = data[4]; r2.z = data[5];
26 | r3.x = data[6]; r3.y = data[7]; r3.z = data[8];
27 | }
28 |
29 | __host__ __device__ float3 operator*(const float3& rv) const {
30 | return make_float3(
31 | r1.x * rv.x + r1.y * rv.y + r1.z * rv.z,
32 | r2.x * rv.x + r2.y * rv.y + r2.z * rv.z,
33 | r3.x * rv.x + r3.y * rv.y + r3.z * rv.z);
34 | }
35 | };
36 |
37 | static uint div_up(const uint a, const uint b) {
38 | return (a + b - 1) / b;
39 | }
40 |
41 | __device__ __forceinline__ static float3 get_vec3(const PCMapAccessor map, const uint i, const uint j) {
42 | return make_float3(map[i][j][0], map[i][j][1], map[i][j][2]);
43 | }
44 |
45 | inline __host__ __device__ float3 operator+(const float3& a, const float3& b) {
46 | return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
47 | }
48 |
49 | inline __host__ __device__ float3 operator-(const float3& a, const float3& b) {
50 | return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
51 | }
52 |
53 | inline __host__ __device__ void operator/=(float3 &a, float b) {
54 | a.x /= b; a.y /= b; a.z /= b;
55 | }
56 |
57 | inline __host__ __device__ void operator+=(float3 &a, const float3& b) {
58 | a.x += b.x; a.y += b.y; a.z += b.z;
59 | }
60 |
61 | inline __host__ __device__ void operator-=(float3 &a, const float3& b) {
62 | a.x -= b.x; a.y -= b.y; a.z -= b.z;
63 | }
64 |
65 | inline __host__ __device__ float3 cross(const float3& a, const float3& b) {
66 | return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
67 | }
68 |
69 | inline __host__ __device__ float dot(const float3& a, const float3& b) {
70 | return a.x * b.x + a.y * b.y + a.z * b.z;
71 | }
72 |
73 | inline __host__ __device__ float length(const float3& v) {
74 | return sqrt(v.x * v.x + v.y * v.y + v.z * v.z);
75 | }
76 |
77 | inline __host__ __device__ float squared_length(const float3& v) {
78 | return v.x * v.x + v.y * v.y + v.z * v.z;
79 | }
80 |
--------------------------------------------------------------------------------
/uni/ext/imgproc/imgproc.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | torch::Tensor unproject_depth(torch::Tensor depth, float fx, float fy, float cx, float cy);
4 | //torch::Tensor unproject_depth(torch::Tensor depth, float fx, float fy, float cx, float cy);
5 |
6 | // We might do this several times, this interface enables re-use memory.
7 | void filter_depth(torch::Tensor depth_in, torch::Tensor depth_out);
8 | torch::Tensor compute_normal_weight(torch::Tensor pc_map);
9 | torch::Tensor compute_normal_weight_robust(torch::Tensor pc_map);
10 |
11 | // Compute rgb-image based odometry. Will return per-correspondence residual (M, ) and jacobian (M, 6) w.r.t. lie algebra.
12 | // ... based on current given estimate ( T(xi) * prev_xyz = cur_xyz ).
13 | // prev_intensity (H, W), float32, raning from 0 to 1.
14 | std::vector rgb_odometry(
15 | torch::Tensor prev_intensity, torch::Tensor prev_depth,
16 | torch::Tensor cur_intensity, torch::Tensor cur_depth, torch::Tensor cur_dIdxy,
17 | const std::vector& intr,
18 | const std::vector& krkinv_data,
19 | const std::vector& kt_data,
20 | float min_grad_scale, float max_depth_delta, bool compute_J);
21 | torch::Tensor gradient_xy(torch::Tensor cur_intensity);
22 |
23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
24 | m.def("unproject_depth", &unproject_depth, "Unproject Depth (CUDA)");
25 | m.def("filter_depth", &filter_depth, "Filter Depth (CUDA)");
26 | m.def("compute_normal_weight", &compute_normal_weight, "Compute normal and weight (CUDA)");
27 | m.def("compute_normal_weight_robust", &compute_normal_weight_robust, "Compute normal and weight (CUDA)");
28 | m.def("rgb_odometry", &rgb_odometry, "Compute the function value and gradient for RGB Odometry (CUDA)");
29 | m.def("gradient_xy", &gradient_xy, "Compute Gradient of an image for jacobian computation. (CUDA)");
30 | }
31 |
--------------------------------------------------------------------------------
/uni/ext/imgproc/photometric.cu:
--------------------------------------------------------------------------------
1 | #include "common.cuh"
2 |
3 | __global__ static void gradient_xy_kernel(const IntensityAccessor intensity, GradientAccessor out_grad) {
4 | const uint v = blockIdx.x * blockDim.x + threadIdx.x;
5 | const uint u = blockIdx.y * blockDim.y + threadIdx.y;
6 | if (v > intensity.size(0) - 1 || u > intensity.size(1) - 1) { return; }
7 | if (v < 1 || v > intensity.size(0) - 2 || u < 1 || u > intensity.size(1) - 2) {
8 | out_grad[v][u][0] = out_grad[v][u][1] = NAN;
9 | return;
10 | }
11 |
12 | // Sobel morph.
13 | float u_d1 = intensity[v - 1][u + 1] - intensity[v - 1][u - 1];
14 | float u_d2 = intensity[v][u + 1] - intensity[v][u - 1];
15 | float u_d3 = intensity[v + 1][u + 1] - intensity[v + 1][u - 1];
16 | out_grad[v][u][0] = (u_d1 + 2 * u_d2 + u_d3) / 8.0f;
17 |
18 | float v_d1 = intensity[v + 1][u - 1] - intensity[v - 1][u - 1];
19 | float v_d2 = intensity[v + 1][u] - intensity[v - 1][u];
20 | float v_d3 = intensity[v + 1][u + 1] - intensity[v - 1][u + 1];
21 | out_grad[v][u][1] = (v_d1 + 2 * v_d2 + v_d3) / 8.0f;
22 | }
23 |
24 | __global__ static void evaluate_fJ(const IntensityAccessor prev_img, const DepthAccessor prev_depth,
25 | const IntensityAccessor cur_img, const DepthAccessor cur_depth,
26 | const GradientAccessor cur_dIdxy, const float min_grad_scale, const float max_depth_delta,
27 | matrix3 krkinv, float3 kt, float4 calib,
28 | IntensityAccessor f_val, GradientAccessor J_val, bool compute_J) {
29 |
30 | const uint v = blockIdx.x * blockDim.x + threadIdx.x;
31 | const uint u = blockIdx.y * blockDim.y + threadIdx.y;
32 | const uint img_h = prev_img.size(0);
33 | const uint img_w = prev_img.size(1);
34 |
35 | // The boundary will not be valid anyway.
36 | if (v > img_h - 1 || u > img_w - 1) { return; }
37 |
38 | f_val[v][u] = NAN;
39 |
40 | // Also prune if gradient is too small (which is useless for pose estimation)
41 | float dI_dx = cur_dIdxy[v][u][0], dI_dy = cur_dIdxy[v][u][1];
42 | float mTwo = (dI_dx * dI_dx) + (dI_dy * dI_dy);
43 | if (mTwo < min_grad_scale || isnan(mTwo)) {
44 | return;
45 | }
46 |
47 | float d1 = cur_depth[v][u];
48 | if (isnan(d1)) {
49 | return;
50 | }
51 |
52 | float warpped_d1 = d1 * (krkinv.r3.x * u + krkinv.r3.y * v + krkinv.r3.z) + kt.z;
53 | int u0 = __float2int_rn((d1 * (krkinv.r1.x * u + krkinv.r1.y * v + krkinv.r1.z) + kt.x) / warpped_d1);
54 | int v0 = __float2int_rn((d1 * (krkinv.r2.x * u + krkinv.r2.y * v + krkinv.r2.z) + kt.y) / warpped_d1);
55 |
56 | if (u0 >= 0 && u0 < img_w && v0 >= 0 && v0 < img_h) {
57 | float d0 = prev_depth[v0][u0];
58 | // Make sure this pair of obs is not outlier and is really observed.
59 | if (!isnan(d0) && abs(warpped_d1 - d0) <= max_depth_delta && d0 > 0.0) {
60 | // Compute function value.
61 | f_val[v][u] = cur_img[v][u] - prev_img[v0][u0];
62 | // Compute gradient w.r.t. xi.
63 | if (compute_J) {
64 | float3 G = make_float3(d0 * (u0 - calib.z) / calib.x, d0 * (v0 - calib.w) / calib.y, d0);
65 | float p0 = dI_dx * calib.x / G.z;
66 | float p1 = dI_dy * calib.y / G.z;
67 | float p2 = -(p0 * G.x + p1 * G.y) / G.z;
68 | J_val[v][u][0] = p0;
69 | J_val[v][u][1] = p1;
70 | J_val[v][u][2] = p2;
71 | J_val[v][u][3] = -G.z * p1 + G.y * p2;
72 | J_val[v][u][4] = G.z * p0 - G.x * p2;
73 | J_val[v][u][5] = -G.y * p0 + G.x * p1;
74 | }
75 | }
76 | }
77 | }
78 |
79 | torch::Tensor gradient_xy(torch::Tensor cur_intensity) {
80 | CHECK_INPUT(cur_intensity);
81 | const uint img_h = cur_intensity.size(0);
82 | const uint img_w = cur_intensity.size(1);
83 |
84 | dim3 dimBlock = dim3(16, 16);
85 | dim3 dimGrid = dim3(div_up(img_h, dimBlock.x), div_up(img_w, dimBlock.y));
86 |
87 | torch::Tensor cur_dIdxy = torch::empty({img_h, img_w, 2}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
88 | gradient_xy_kernel<<>>(
89 | cur_intensity.packed_accessor32(),
90 | cur_dIdxy.packed_accessor32()
91 | );
92 | return cur_dIdxy;
93 | }
94 |
95 | std::vector rgb_odometry(
96 | torch::Tensor prev_intensity, torch::Tensor prev_depth,
97 | torch::Tensor cur_intensity, torch::Tensor cur_depth, torch::Tensor cur_dIdxy,
98 | const std::vector& intr,
99 | const std::vector& krkinv_data,
100 | const std::vector& kt_data,
101 | float min_grad_scale, float max_depth_delta, bool compute_J) {
102 |
103 | CHECK_INPUT(prev_intensity); CHECK_INPUT(prev_depth);
104 | CHECK_INPUT(cur_intensity); CHECK_INPUT(cur_depth); CHECK_INPUT(cur_dIdxy);
105 |
106 | const uint img_h = cur_intensity.size(0);
107 | const uint img_w = cur_intensity.size(1);
108 |
109 | dim3 dimBlock = dim3(16, 16);
110 | dim3 dimGrid = dim3(div_up(img_h, dimBlock.x), div_up(img_w, dimBlock.y));
111 |
112 | matrix3 krkinv(krkinv_data);
113 | float3 kt{kt_data[0], kt_data[1], kt_data[2]};
114 | float4 calib{intr[0], intr[1], intr[2], intr[3]};
115 |
116 | torch::Tensor f_img = torch::empty({img_h, img_w}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
117 | torch::Tensor J_img = torch::empty({0, 0, 6}, torch::dtype(torch::kFloat32).device(torch::kCUDA));;
118 | if (compute_J) {
119 | J_img = torch::empty({img_h, img_w, 6}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
120 | }
121 |
122 | evaluate_fJ<<>>(
123 | prev_intensity.packed_accessor32(),
124 | prev_depth.packed_accessor32(),
125 | cur_intensity.packed_accessor32(),
126 | cur_depth.packed_accessor32(),
127 | cur_dIdxy.packed_accessor32(),
128 | min_grad_scale, max_depth_delta,
129 | krkinv, kt, calib,
130 | f_img.packed_accessor32(),
131 | J_img.packed_accessor32(), compute_J);
132 |
133 | if (compute_J) {
134 | return {f_img, J_img};
135 | } else {
136 | return {f_img};
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/uni/ext/indexing/indexing.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | torch::Tensor pack_batch(torch::Tensor indices, uint n_batch, uint n_point);
4 | std::vector groupby_sum(torch::Tensor values, torch::Tensor indices, uint C);
5 |
6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
7 | m.def("pack_batch", &pack_batch, "Pack Batch (CUDA)");
8 | m.def("groupby_sum", &groupby_sum, "GroupBy Sum (CUDA)");
9 | }
10 |
--------------------------------------------------------------------------------
/uni/ext/indexing/indexing.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
5 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
6 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
7 |
8 | using CountAccessor = torch::PackedTensorAccessor32;
9 | using IndexAccessor = torch::PackedTensorAccessor32;
10 | using PackedIndAccessor = torch::PackedTensorAccessor32;
11 | using ValueAccessor = torch::PackedTensorAccessor32;
12 |
13 | static uint div_up(const uint a, const uint b) {
14 | return (a + b - 1) / b;
15 | }
16 |
17 | __global__ static void pack_batch_kernel(const IndexAccessor indices, PackedIndAccessor packed_inds, const uint n_all,
18 | const uint n_batch, const uint n_point, int* __restrict__ filled_count) {
19 | const uint i_data = blockIdx.x * blockDim.x + threadIdx.x;
20 | if (i_data >= n_all) {
21 | return;
22 | }
23 |
24 | long i_group = indices[i_data];
25 | if (i_group >= n_batch) {
26 | return;
27 | }
28 |
29 | // Get one valid id.
30 | int cur_count = atomicAdd(filled_count + i_group, 1);
31 | if (cur_count >= n_point) {
32 | return;
33 | }
34 | packed_inds[i_group][cur_count] = i_data;
35 | }
36 |
37 | __device__ static float atomicMax(float* __restrict__ address, float val) {
38 | int* address_as_i = (int*) address;
39 | int old = *address_as_i, assumed;
40 | do {
41 | assumed = old;
42 | old = ::atomicCAS(address_as_i, assumed,
43 | __float_as_int(::fmaxf(val, __int_as_float(assumed))));
44 | } while (assumed != old);
45 | return __int_as_float(old);
46 | }
47 |
48 | __global__ static void groupby_max_kernel(const ValueAccessor values, const IndexAccessor indices, ValueAccessor reduced_values) {
49 | const uint i = blockIdx.x;
50 | const uint l = threadIdx.x;
51 |
52 | float value = values[i][l];
53 | long index = indices[i];
54 |
55 | float* rptr = reduced_values[index].data() + l;
56 | atomicMax(rptr, value);
57 | }
58 |
59 | __global__ static void groupby_sum_kernel(const ValueAccessor values, const IndexAccessor indices, ValueAccessor sum_values, CountAccessor sum_counts) {
60 | const uint i = blockIdx.x;
61 | const uint l = threadIdx.x;
62 |
63 | float value = values[i][l];
64 | long index = indices[i];
65 |
66 | float* rptr = sum_values[index].data() + l;
67 | int* iptr = &(sum_counts[index]);
68 |
69 | atomicAdd(rptr, value);
70 | atomicAdd(iptr, 1);
71 | }
72 |
73 | torch::Tensor pack_batch(torch::Tensor indices, uint n_batch, uint n_point) {
74 | CHECK_INPUT(indices);
75 | torch::Tensor packed_inds = torch::empty({n_batch, n_point}, torch::dtype(torch::kLong).device(torch::kCUDA));
76 | thrust::device_vector filled_count(n_batch, 0);
77 | const uint n_all = indices.size(0);
78 |
79 | dim3 dimBlock = dim3(128);
80 | dim3 dimGrid = dim3(div_up(n_all, dimBlock.x));
81 | pack_batch_kernel<<>>(
82 | indices.packed_accessor32(),
83 | packed_inds.packed_accessor32(),
84 | n_all, n_batch, n_point, filled_count.data().get());
85 | return packed_inds;
86 | }
87 |
88 |
89 | std::vector groupby_sum(torch::Tensor values, torch::Tensor indices, uint C) {
90 | CHECK_INPUT(values);
91 | CHECK_INPUT(indices);
92 |
93 | const uint N = values.size(0);
94 | const uint L = values.size(1);
95 |
96 | torch::Tensor sum_values = torch::zeros({C, L}, torch::dtype(torch::kFloat32).device(torch::kCUDA));
97 | torch::Tensor sum_count = torch::zeros({C}, torch::dtype(torch::kInt32).device(torch::kCUDA));
98 |
99 | dim3 dimBlock = dim3(L);
100 | dim3 dimGrid = dim3(N);
101 | groupby_sum_kernel<<>>(
102 | values.packed_accessor32(),
103 | indices.packed_accessor32(),
104 | sum_values.packed_accessor32(),
105 | sum_count.packed_accessor32()
106 | );
107 |
108 | return {sum_values, sum_count};
109 | }
110 |
--------------------------------------------------------------------------------
/uni/ext/marching_cubes/mc.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | std::vector marching_cubes_sparse_interp_cuda(
4 | torch::Tensor indexer, // (nx, ny, nz) -> data_id
5 | torch::Tensor valid_blocks, // (K, )
6 | torch::Tensor vec_batch_mapping,
7 | torch::Tensor cube_sdf, // (M, rx, ry, rz)
8 | torch::Tensor cube_std, // (M, rx, ry, rz)
9 | int max_n_triangles, // Maximum number of triangle buffer.
10 | const std::vector& n_xyz, // [nx, ny, nz]
11 | float max_std // Prune all vertices
12 | );
13 |
14 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
15 | m.def("marching_cubes_sparse_interp", &marching_cubes_sparse_interp_cuda, "Marching Cubes with Interpolation (CUDA)");
16 | }
17 |
--------------------------------------------------------------------------------
/uni/ext/pcproc/cuda_kdtree.cuh:
--------------------------------------------------------------------------------
1 | #ifndef FLANN_CUDA_KD_TREE_BUILDER_H_
2 | #define FLANN_CUDA_KD_TREE_BUILDER_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "cutil_math.h"
11 | #include
12 | #include