├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── configs
    ├── Custom
    │   └── custom_template.yaml
    ├── Dynamic
    │   ├── Bonn
    │   │   ├── bonn_balloon.yaml
    │   │   ├── bonn_balloon2.yaml
    │   │   ├── bonn_crowd.yaml
    │   │   ├── bonn_crowd2.yaml
    │   │   ├── bonn_dynamic.yaml
    │   │   ├── bonn_moving_nonobstructing_box.yaml
    │   │   ├── bonn_moving_nonobstructing_box2.yaml
    │   │   ├── bonn_person_tracking.yaml
    │   │   └── bonn_person_tracking2.yaml
    │   ├── TUM_RGBD
    │   │   ├── freiburg2_desk_with_person.yaml
    │   │   ├── freiburg3_sitting_halfsphere.yaml
    │   │   ├── freiburg3_sitting_halfsphere_static.yaml
    │   │   ├── freiburg3_sitting_rpy.yaml
    │   │   ├── freiburg3_sitting_xyz.yaml
    │   │   ├── freiburg3_walking_halfsphere.yaml
    │   │   ├── freiburg3_walking_halfsphere_static.yaml
    │   │   ├── freiburg3_walking_rpy.yaml
    │   │   ├── freiburg3_walking_xyz.yaml
    │   │   └── tum_dynamic.yaml
    │   ├── Wild_SLAM_Mocap
    │   │   ├── ANYmal1.yaml
    │   │   ├── ANYmal2.yaml
    │   │   ├── ball.yaml
    │   │   ├── crowd.yaml
    │   │   ├── crowd_demo.yaml
    │   │   ├── person_tracking.yaml
    │   │   ├── racket.yaml
    │   │   ├── stones.yaml
    │   │   ├── table_tracking1.yaml
    │   │   ├── table_tracking2.yaml
    │   │   ├── umbrella.yaml
    │   │   └── wild_slam_mocap.yaml
    │   └── Wild_SLAM_iPhone
    │   │   ├── horse.yaml
    │   │   ├── parking.yaml
    │   │   ├── piano.yaml
    │   │   ├── shopping.yaml
    │   │   ├── street.yaml
    │   │   ├── tower.yaml
    │   │   └── wild_slam_iphone.yaml
    ├── Static
    │   └── TUM_RGBD
    │   │   ├── freiburg1_desk.yaml
    │   │   ├── freiburg2_xyz.yaml
    │   │   ├── freiburg3_office.yaml
    │   │   └── tum.yaml
    └── wildgs_slam.yaml
├── media
    └── teaser.png
├── requirements.txt
├── run.py
├── scripts_downloading
    ├── download_bonn.sh
    ├── download_demo_data.sh
    ├── download_tum.sh
    ├── download_wild_slam_iphone.sh
    ├── download_wild_slam_mocap_scene1.sh
    └── download_wild_slam_mocap_scene2.sh
├── scripts_run
    ├── run_bonn_all.sh
    ├── run_tum_dynamic_all.sh
    ├── run_wild_slam_mocap_all.sh
    └── summarize_pose_eval.py
├── setup.py
├── src
    ├── __init__.py
    ├── backend.py
    ├── config.py
    ├── depth_video.py
    ├── factor_graph.py
    ├── frontend.py
    ├── geom
    │   ├── __init__.py
    │   ├── ba.py
    │   ├── chol.py
    │   └── projective_ops.py
    ├── gui
    │   ├── gl_render
    │   │   ├── LICENSE
    │   │   ├── __init__.py
    │   │   ├── render_ogl.py
    │   │   ├── shaders
    │   │   │   ├── gau_frag.glsl
    │   │   │   └── gau_vert.glsl
    │   │   ├── util.py
    │   │   └── util_gau.py
    │   ├── gui_utils.py
    │   └── slam_gui.py
    ├── lib
    │   ├── altcorr_kernel.cu
    │   ├── correlation_kernels.cu
    │   ├── droid.cpp
    │   └── droid_kernels.cu
    ├── mapper.py
    ├── modules
    │   └── droid_net
    │   │   ├── __init__.py
    │   │   ├── clipping.py
    │   │   ├── corr.py
    │   │   ├── droid_net.py
    │   │   ├── extractor.py
    │   │   └── gru.py
    ├── motion_filter.py
    ├── slam.py
    ├── tracker.py
    ├── trajectory_filler.py
    └── utils
    │   ├── Printer.py
    │   ├── camera_utils.py
    │   ├── common.py
    │   ├── datasets.py
    │   ├── dyn_uncertainty
    │       ├── __init__.py
    │       ├── mapping_utils.py
    │       ├── median_filter.py
    │       └── uncertainty_model.py
    │   ├── eval_traj.py
    │   ├── eval_utils.py
    │   ├── mono_priors
    │       ├── img_feature_extractors.py
    │       └── metric_depth_estimators.py
    │   ├── plot_utils.py
    │   ├── pose_utils.py
    │   └── slam_utils.py
└── thirdparty
    ├── __init__.py
    ├── depth_anything_v2
        ├── DA-2K.md
        ├── LICENSE
        ├── README.md
        ├── app.py
        ├── assets
        │   ├── DA-2K.png
        │   ├── examples
        │   │   ├── demo01.jpg
        │   │   ├── demo02.jpg
        │   │   ├── demo03.jpg
        │   │   ├── demo04.jpg
        │   │   ├── demo05.jpg
        │   │   ├── demo06.jpg
        │   │   ├── demo07.jpg
        │   │   ├── demo08.jpg
        │   │   ├── demo09.jpg
        │   │   ├── demo10.jpg
        │   │   ├── demo11.jpg
        │   │   ├── demo12.jpg
        │   │   ├── demo13.jpg
        │   │   ├── demo14.jpg
        │   │   ├── demo15.jpg
        │   │   ├── demo16.jpg
        │   │   ├── demo17.jpg
        │   │   ├── demo18.jpg
        │   │   ├── demo19.jpg
        │   │   └── demo20.jpg
        │   ├── examples_video
        │   │   ├── basketball.mp4
        │   │   └── ferris_wheel.mp4
        │   └── teaser.png
        ├── depth_anything_v2
        │   ├── dinov2.py
        │   ├── dinov2_layers
        │   │   ├── __init__.py
        │   │   ├── attention.py
        │   │   ├── block.py
        │   │   ├── drop_path.py
        │   │   ├── layer_scale.py
        │   │   ├── mlp.py
        │   │   ├── patch_embed.py
        │   │   └── swiglu_ffn.py
        │   ├── dpt.py
        │   └── util
        │   │   ├── blocks.py
        │   │   └── transform.py
        ├── metric_depth
        │   ├── README.md
        │   ├── assets
        │   │   └── compare_zoedepth.png
        │   ├── dataset
        │   │   ├── hypersim.py
        │   │   ├── kitti.py
        │   │   ├── splits
        │   │   │   ├── hypersim
        │   │   │   │   ├── train.txt
        │   │   │   │   └── val.txt
        │   │   │   ├── kitti
        │   │   │   │   └── val.txt
        │   │   │   └── vkitti2
        │   │   │   │   └── train.txt
        │   │   ├── transform.py
        │   │   └── vkitti2.py
        │   ├── depth_anything_v2
        │   │   ├── dinov2.py
        │   │   ├── dinov2_layers
        │   │   │   ├── __init__.py
        │   │   │   ├── attention.py
        │   │   │   ├── block.py
        │   │   │   ├── drop_path.py
        │   │   │   ├── layer_scale.py
        │   │   │   ├── mlp.py
        │   │   │   ├── patch_embed.py
        │   │   │   └── swiglu_ffn.py
        │   │   ├── dpt.py
        │   │   └── util
        │   │   │   ├── blocks.py
        │   │   │   └── transform.py
        │   ├── depth_to_pointcloud.py
        │   ├── dist_train.sh
        │   ├── requirements.txt
        │   ├── run.py
        │   ├── train.py
        │   └── util
        │   │   ├── dist_helper.py
        │   │   ├── loss.py
        │   │   ├── metric.py
        │   │   └── utils.py
        ├── requirements.txt
        ├── run.py
        └── run_video.py
    └── gaussian_splatting
        ├── LICENSE.md
        ├── __init__.py
        ├── gaussian_renderer
            └── __init__.py
        ├── scene
            └── gaussian_model.py
        └── utils
            ├── general_utils.py
            ├── graphics_utils.py
            ├── image_utils.py
            ├── loss_utils.py
            ├── sh_utils.py
            └── system_utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | datasets/
 2 | build/
 3 | *.egg-info/
 4 | 
 5 | __pycache__/
 6 | *.pyc
 7 | *.so
 8 | 
 9 | 
10 | pretrained/
11 | 
12 | output*/
13 | 
14 | .vscode/
15 | 
16 | temp/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "thirdparty/evaluate_3d_reconstruction_lib"]
 2 | 	path = thirdparty/evaluate_3d_reconstruction_lib
 3 | 	url = https://github.com/eriksandstroem/evaluate_3d_reconstruction_lib.git
 4 | [submodule "thirdparty/lietorch"]
 5 | 	path = thirdparty/lietorch
 6 | 	url = https://github.com/princeton-vl/lietorch.git
 7 | [submodule "thirdparty/diff-gaussian-rasterization-w-pose"]
 8 | 	path = thirdparty/diff-gaussian-rasterization-w-pose
 9 | 	url = https://github.com/rmurai0610/diff-gaussian-rasterization-w-pose.git
10 | [submodule "thirdparty/simple-knn"]
11 | 	path = thirdparty/simple-knn
12 | 	url = https://github.com/camenduru/simple-knn.git
13 | [submodule "thirdparty/eigen"]
14 | 	path = thirdparty/eigen
15 | 	url = https://gitlab.com/libeigen/eigen.git
16 | [submodule "thirdparty/fit3d"]
17 | 	path = thirdparty/fit3d
18 | 	url = git@github.com:ywyue/FiT3D.git
19 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project.
 4 | 
 5 | ## Before you begin
 6 | 
 7 | ### Sign our Contributor License Agreement
 8 | 
 9 | Contributions to this project must be accompanied by a
10 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
11 | You (or your employer) retain the copyright to your contribution; this simply
12 | gives us permission to use and redistribute your contributions as part of the
13 | project.
14 | 
15 | If you or your current employer have already signed the Google CLA (even if it
16 | was for a different project), you probably don't need to do it again.
17 | 
18 | Visit <https://cla.developers.google.com/> to see your current agreements or to
19 | sign a new one.
20 | 
21 | ### Review our community guidelines
22 | 
23 | This project follows
24 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
25 | 
26 | ## Contribution process
27 | 
28 | ### Code reviews
29 | 
30 | All submissions, including submissions by project members, require review. We
31 | use GitHub pull requests for this purpose. Consult
32 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
33 | information on using pull requests.


--------------------------------------------------------------------------------
/configs/Custom/custom_template.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | scene: custom_scene # Replace with your scene name
 3 | 
 4 | dataset: 'wild_slam_iphone'
 5 | data:
 6 |   input_folder: ./datasets/{Path_to_your_data}
 7 |   output: ./output/Custom
 8 | 
 9 | cam:
10 |   H: 1242 
11 |   W: 2208 
12 |   H_out: 360
13 |   W_out: 480
14 |   fx: 1974.4219
15 |   fy: 1974.4219
16 |   cx: 1134.8486
17 |   cy: 655.6515
18 |   # H_edge: 0 # Uncomment this and the following line if you have edge cropping like in TUM datasets
19 |   # W_edge: 0
20 |   # distortion: [0.0, 0.0, 0.0, 0.0, 0.0] # Uncomment if you have distortion coefficients
21 |   
22 | mapping:
23 |   Training:
24 |     alpha: 0.8 # Increase this value to make rendering loss weighs more on rgb rather than depth
25 |   uncertainty_params:
26 |     # For outdoor dataset where the metric depth estimation is unstable, 
27 |     # I recommend to set this value to be 0.1 or even 0.
28 |     uncer_depth_mult: 0.2 
29 | 
30 | 
31 | # # Uncomment the following lines to enable fast mode and GUI
32 | # fast_mode: True
33 | # gui: True
34 | 
35 | # # Uncomment the following lines to save online plotting data
36 | # mapping:
37 | #   online_plotting: True


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_balloon.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_balloon
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_balloon


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_balloon2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_balloon2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_balloon2


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_crowd.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_crowd
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_crowd


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_crowd2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_crowd2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_crowd2


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_dynamic.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | 
 3 | dataset: 'bonn_dynamic'
 4 | 
 5 | data:
 6 |   root_folder: ./datasets/Bonn
 7 |   output: ./output/Bonn
 8 | 
 9 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
10 |   H: 480 
11 |   W: 640 
12 |   fx: 542.822841
13 |   fy: 542.576870
14 |   cx: 315.593520
15 |   cy: 237.756098
16 |   distortion: [0.039903, -0.099343, -0.000730, -0.000144, 0.000000]
17 |   H_out: 384
18 |   W_out: 512


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_moving_nonobstructing_box.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_moving_nonobstructing_box
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_moving_nonobstructing_box
6 | 


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_moving_nonobstructing_box2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_moving_nonobstructing_box2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_moving_nonobstructing_box2


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_person_tracking.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_person_tracking
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_person_tracking
6 | 


--------------------------------------------------------------------------------
/configs/Dynamic/Bonn/bonn_person_tracking2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Bonn/bonn_dynamic.yaml
2 | scene: bonn_person_tracking2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_bonn_person_tracking2


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg2_desk_with_person.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg2_desk_with_person
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg2_desk_with_person
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 520.9
10 |   fy: 521.0
11 |   cx: 325.1
12 |   cy: 249.7
13 |   distortion: [0.2312, -0.7849, -0.0033, -0.0001, 0.9172]
14 |   H_edge: 8
15 |   W_edge: 8
16 |   H_out: 240
17 |   W_out: 320
18 | 
19 | tracking:
20 |   # This sequence is too long
21 |   force_keyframe_every_n_frames: -1


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_sitting_halfsphere.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_sitting_halfsphere
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_sitting_halfsphere
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_sitting_halfsphere_static.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_sitting_static
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_sitting_static
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_sitting_rpy.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_sitting_rpy
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_sitting_rpy
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_sitting_xyz.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_sitting_xyz
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_sitting_xyz
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_walking_halfsphere.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_walking_halfsphere
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_walking_halfsphere
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_walking_halfsphere_static.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_walking_static
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_walking_static
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_walking_rpy.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_walking_rpy
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_walking_rpy
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/freiburg3_walking_xyz.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/TUM_RGBD/tum_dynamic.yaml
 2 | scene: freiburg3_walking_xyz
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_walking_xyz
 6 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 535.4
10 |   fy: 539.2
11 |   cx: 320.1
12 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Dynamic/TUM_RGBD/tum_dynamic.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | 
 3 | dataset: 'tumrgbd'
 4 | 
 5 | tracking:
 6 |   buffer: 350
 7 | 
 8 | # Less weight on the depth loss for TUM
 9 | mapping:
10 |   Training:
11 |     alpha: 0.8
12 | 
13 | data:
14 |   root_folder: ./datasets/TUM_RGBD
15 |   output: ./output/TUM_RGBD
16 | 
17 | cam:  #NOTE: intrinsic is different per scene in TUM
18 |   # refer to https://vision.in.tum.de/data/datasets/rgbd-dataset/file_formats#intrinsic_camera_calibration_of_the_kinect
19 |   png_depth_scale: 5000.0 #for depth image in png format
20 |   ### target/output camera settings, camera_size -> resize -> crop -> target_size
21 |   H: 480 
22 |   W: 640 
23 |   fx: 535.4
24 |   fy: 539.2
25 |   cx: 320.1
26 |   cy: 247.6
27 |   H_edge: 8
28 |   W_edge: 8
29 |   H_out: 384
30 |   W_out: 512
31 | 


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/ANYmal1.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: ANYmal1
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene2/ANYmal1
6 | 
7 | cam: 
8 |   fx: 647.7445068359375
9 |   fy: 646.9425659179688


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/ANYmal2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: ANYmal2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene2/ANYmal2
6 | 
7 | cam: 
8 |   fx: 647.7445068359375
9 |   fy: 646.9425659179688


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/ball.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: basketball
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/ball


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/crowd.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: crowd
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/crowd


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/crowd_demo.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
 2 | scene: crowd_demo
 3 | 
 4 | fast_mode: True
 5 | gui: True
 6 | mapping:
 7 |   online_plotting: True
 8 | 
 9 | data:
10 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/crowd
11 |   output: ./output/Wild_SLAM_Mocap_demo


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/person_tracking.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: person_tracking
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/person_tracking
6 |   
7 | cam: 
8 |   fx: 647.5684814453125
9 |   fy: 646.766845703125


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/racket.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: racket
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/racket
6 | 
7 | cam: 
8 |   fx: 647.3926391601562
9 |   fy: 646.5911254882812


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/stones.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: stones
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/stones
6 | 
7 | cam: 
8 |   fx: 647.7445068359375
9 |   fy: 646.9425659179688


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/table_tracking1.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: table_tracking1
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/table_tracking1
6 | 
7 | cam: 
8 |   fx: 647.9204711914062
9 |   fy: 647.1183471679688


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/table_tracking2.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: table_tracking2
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/table_tracking2
6 | 
7 | cam: 
8 |   fx: 647.5684814453125
9 |   fy: 646.766845703125


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/umbrella.yaml:
--------------------------------------------------------------------------------
1 | inherit_from: ./configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml
2 | scene: umbrella
3 | 
4 | data:
5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/scene1/umbrella
6 | 
7 | cam: 
8 |   fx: 647.7445068359375
9 |   fy: 646.9425659179688


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_Mocap/wild_slam_mocap.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | 
 3 | dataset: 'wild_slam_mocap'
 4 | 
 5 | data:
 6 |   root_folder: ./datasets/Wild_SLAM_Mocap
 7 |   output: ./output/Wild_SLAM_Mocap
 8 |   
 9 | cam:  #intrinsic is slightly different per seq
10 |   H: 720 
11 |   W: 1280 
12 |   fx: 647.2167358398438
13 |   fy: 646.4154663085938
14 |   cx: 643.1209716796875
15 |   cy: 365.55963134765625
16 |   distortion: [-0.0550149604678154, 0.06560786068439484,-0.0005061274860054255,0.0004771310486830771,-0.021717390045523643]
17 |   H_out: 360
18 |   W_out: 640


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/horse.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_horse
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/horse
 6 | 
 7 | cam:
 8 |   fx: 1341.1414794921875
 9 |   fy: 1341.1414794921875
10 |   cx: 960.2431640625
11 |   cy: 729.904052734375


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/parking.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_parking
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/parking
 6 | 
 7 | cam:
 8 |   fx: 1336.74609375
 9 |   fy: 1336.74609375
10 |   cx: 957.005859375
11 |   cy: 726.88409423828125


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/piano.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_piano
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/piano
 6 | 
 7 | cam:
 8 |   fx: 1351.06982421875
 9 |   fy: 1351.06982421875
10 |   cx: 961.050537109375
11 |   cy: 730.18597412109375


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/shopping.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_shopping
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/shopping
 6 | 
 7 | cam:
 8 |   fx: 1340.6441650390625
 9 |   fy: 1340.6441650390625
10 |   cx: 960.7640380859375
11 |   cy: 730.26397705078125


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/street.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_street
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/street
 6 | 
 7 | cam:
 8 |   fx: 1331.6123046875
 9 |   fy: 1331.6123046875
10 |   cx: 956.61676025390625
11 |   cy: 727.839599609375


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/tower.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml
 2 | scene: iphone_tower
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/tower
 6 | 
 7 | cam:
 8 |   fx: 1338.494140625
 9 |   fy: 1338.494140625
10 |   cx: 960.17327880859375
11 |   cy: 730.55328369140625


--------------------------------------------------------------------------------
/configs/Dynamic/Wild_SLAM_iPhone/wild_slam_iphone.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | 
 3 | dataset: 'wild_slam_iphone'
 4 | 
 5 | data:
 6 |   root_folder: ./datasets/Wild_SLAM_iPhone
 7 |   output: ./output/Wild_SLAM_iPhone
 8 |   
 9 | mapping:
10 |   Training:
11 |     alpha: 0.8 # Increase this value to make rendering loss weighs more on rgb rather than depth
12 |   uncertainty_params:
13 |     # This parameter weighs depth loss when training uncertainty MLP
14 |     # It's lambda_1 in equation 4 in the paper.
15 |     # We set it 0 here as the metric depth is not reliable in iphone dataset.
16 |     # However, feel free to finetune this parameter if trying to run with your own dataset.
17 |     uncer_depth_mult: 0.0 
18 | 
19 | cam:
20 |   H: 1440 
21 |   W: 1920 
22 |   H_out: 360
23 |   W_out: 480


--------------------------------------------------------------------------------
/configs/Static/TUM_RGBD/freiburg1_desk.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Static/TUM_RGBD/tum.yaml
 2 | scene: freiburg1_desk
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg1_desk
 6 | cam:
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 517.3
10 |   fy: 516.5
11 |   cx: 318.6
12 |   cy: 255.3
13 |   distortion: [0.2624, -0.9531, -0.0054, 0.0026, 1.1633]
14 | 


--------------------------------------------------------------------------------
/configs/Static/TUM_RGBD/freiburg2_xyz.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Static/TUM_RGBD/tum.yaml
 2 | scene: freiburg2_xyz
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg2_xyz
 6 | cam:  #intrinsic is different per scene in TUM
 7 |   H: 480 
 8 |   W: 640 
 9 |   fx: 520.9
10 |   fy: 521.0
11 |   cx: 325.1
12 |   cy: 249.7
13 |   distortion: [0.2312, -0.7849, -0.0033, -0.0001, 0.9172]
14 |   H_out: 240
15 |   W_out: 320
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/Static/TUM_RGBD/freiburg3_office.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/Static/TUM_RGBD/tum.yaml
 2 | scene: reiburg3_long_office_household
 3 | 
 4 | data:
 5 |   input_folder: ROOT_FOLDER_PLACEHOLDER/rgbd_dataset_freiburg3_long_office_household
 6 | 
 7 | cam:  #intrinsic is different per scene in TUM, this cam does not have distortion
 8 |   H: 480 
 9 |   W: 640 
10 |   fx: 535.4
11 |   fy: 539.2
12 |   cx: 320.1
13 |   cy: 247.6


--------------------------------------------------------------------------------
/configs/Static/TUM_RGBD/tum.yaml:
--------------------------------------------------------------------------------
 1 | inherit_from: ./configs/wildgs_slam.yaml
 2 | 
 3 | dataset: 'tumrgbd'
 4 | 
 5 | mapping:
 6 |   Calibration:
 7 |     depth_scale: 5000.0
 8 | 
 9 | tracking:
10 |   buffer: 500
11 |   warmup: 12
12 |   multiview_filter:
13 |     visible_num: 2  
14 |   frontend:
15 |     keyframe_thresh: 3.0
16 |     radius: 2
17 |   backend:
18 |     loop_nms: 10
19 | 
20 | cam:  #NOTE: intrinsic is different per scene in TUM
21 |   # refer to https://vision.in.tum.de/data/datasets/rgbd-dataset/file_formats#intrinsic_camera_calibration_of_the_kinect
22 |   png_depth_scale: 5000.0 #for depth image in png format
23 |   ### target/output camera settings, camera_size -> resize -> crop -> target_size
24 |   H_edge: 8
25 |   W_edge: 8
26 |   H_out: 384
27 |   W_out: 512
28 | 
29 | data:
30 |   root_folder: /home/jianhaozheng/Gaussian_in_the_Wild/data/tum_rgb-d
31 |   output: ./output/TUM_RGBD
32 | 


--------------------------------------------------------------------------------
/media/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/media/teaser.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pillow
 2 | imageio
 3 | joblib
 4 | pandas
 5 | scikit-image
 6 | scikit-learn
 7 | scipy
 8 | seaborn
 9 | PyOpenGL-accelerate
10 | pyrender
11 | ninja
12 | setuptools
13 | timm==0.9.10
14 | plyfile==0.8.1
15 | tqdm
16 | opencv-python==4.8.1.78
17 | munch
18 | evo
19 | open3d==0.17.0
20 | torchmetrics
21 | imgviz
22 | lpips
23 | rich
24 | kornia
25 | PyQt5
26 | glfw
27 | PyGLM
28 | mmengine


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import argparse
 4 | import os
 5 | 
 6 | from src import config
 7 | from src.slam import SLAM
 8 | from src.utils.datasets import get_dataset
 9 | from time import gmtime, strftime
10 | from colorama import Fore,Style
11 | 
12 | import random
13 | def setup_seed(seed):
14 |     torch.manual_seed(seed)
15 |     torch.cuda.manual_seed_all(seed)
16 |     np.random.seed(seed)
17 |     random.seed(seed)
18 |     torch.backends.cudnn.deterministic = True
19 | 
20 | if __name__ == '__main__':
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('config', type=str, help='Path to config file.')
23 |     args = parser.parse_args()
24 | 
25 |     torch.multiprocessing.set_start_method('spawn')
26 | 
27 |     cfg = config.load_config(args.config)
28 |     setup_seed(cfg['setup_seed'])
29 |     if cfg['fast_mode']:
30 |         # Force the final refine iterations to be 3000 if in fast mode
31 |         cfg['mapping']['final_refine_iters'] = 3000
32 | 
33 |     output_dir = cfg['data']['output']
34 |     output_dir = output_dir+f"/{cfg['scene']}"
35 | 
36 |     start_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
37 |     start_info = "-"*30+Fore.LIGHTRED_EX+\
38 |                  f"\nStart WildGS-SLAM at {start_time},\n"+Style.RESET_ALL+ \
39 |                  f"   scene: {cfg['dataset']}-{cfg['scene']},\n" \
40 |                  f"   output: {output_dir}\n"+ \
41 |                  "-"*30
42 |     print(start_info)
43 |     
44 |     if not os.path.exists(output_dir):
45 |         os.makedirs(output_dir)
46 | 
47 |     config.save_config(cfg, f'{output_dir}/cfg.yaml')
48 | 
49 |     dataset = get_dataset(cfg)
50 | 
51 |     slam = SLAM(cfg,dataset)
52 |     slam.run()
53 | 
54 |     end_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
55 |     print("-"*30+Fore.LIGHTRED_EX+f"\nWildGS-SLAM finishes!\n"+Style.RESET_ALL+f"{end_time}\n"+"-"*30)
56 | 
57 | 


--------------------------------------------------------------------------------
/scripts_downloading/download_bonn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/Bonn
 4 | cd datasets/Bonn
 5 | 
 6 | scenes=(
 7 |     "balloon"
 8 |     "balloon2"
 9 |     "crowd"
10 |     "crowd2"
11 |     "person_tracking"
12 |     "person_tracking2"
13 |     "moving_nonobstructing_box"
14 |     "moving_nonobstructing_box2"
15 | )
16 | 
17 | for scene in "${scenes[@]}"
18 | do
19 |     echo "Processing scene: $scene"
20 |     
21 |     # Check if the folder already exists
22 |     if [ -d "$scene" ]; then
23 |         echo "Folder $scene already exists, skipping download"
24 |     else
25 |         zip_file="rgbd_bonn_${scene}.zip"
26 |         wget "https://www.ipb.uni-bonn.de/html/projects/rgbd_dynamic2019/${zip_file}"
27 |         
28 |         if [ $? -eq 0 ]; then
29 |             echo "Successfully downloaded ${zip_file}"
30 |             unzip -q "${zip_file}"
31 |             if [ $? -eq 0 ]; then
32 |                 echo "Successfully extracted ${zip_file}"
33 |                 rm "${zip_file}"
34 |                 echo "Removed ${zip_file}"
35 |             else
36 |                 echo "Failed to extract ${zip_file}"
37 |             fi
38 |         else
39 |             echo "Failed to download ${zip_file}"
40 |         fi
41 |     fi
42 |     
43 |     echo "Finished processing ${scene}"
44 |     echo "-----------------------------"
45 | done


--------------------------------------------------------------------------------
/scripts_downloading/download_demo_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/Wild_SLAM_Mocap/scene1
 4 | cd datasets/Wild_SLAM_Mocap/scene1
 5 | 
 6 | scenes=(
 7 |     "crowd"
 8 | )
 9 | 
10 | for scene in "${scenes[@]}"
11 | do
12 |     echo "Processing scene: $scene"
13 |     
14 |     # Check if the folder already exists
15 |     if [ -d "$scene" ]; then
16 |         echo "Folder $scene already exists, skipping download"
17 |     else
18 |         zip_file="${scene}.zip"
19 |         wget "https://huggingface.co/datasets/gradient-spaces/Wild-SLAM/resolve/main/Mocap/scene1/${zip_file}"
20 |         
21 |         if [ $? -eq 0 ]; then
22 |             echo "Successfully downloaded ${zip_file}"
23 |             unzip -q "${zip_file}"
24 |             if [ $? -eq 0 ]; then
25 |                 echo "Successfully extracted ${zip_file}"
26 |                 rm "${zip_file}"
27 |                 echo "Removed ${zip_file}"
28 |             else
29 |                 echo "Failed to extract ${zip_file}"
30 |             fi
31 |         else
32 |             echo "Failed to download ${zip_file}"
33 |         fi
34 |     fi
35 |     
36 |     echo "Finished processing ${scene}"
37 |     echo "-----------------------------"
38 | done


--------------------------------------------------------------------------------
/scripts_downloading/download_tum.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/TUM_RGBD
 4 | cd datasets/TUM_RGBD
 5 | 
 6 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg2/rgbd_dataset_freiburg2_desk_with_person.tgz
 7 | tar -xvzf rgbd_dataset_freiburg2_desk_with_person.tgz
 8 | rm rgbd_dataset_freiburg2_desk_with_person.tgz
 9 | 
10 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_sitting_static.tgz
11 | tar -xvzf rgbd_dataset_freiburg3_sitting_static.tgz
12 | rm rgbd_dataset_freiburg3_sitting_static.tgz
13 | 
14 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_sitting_xyz.tgz
15 | tar -xvzf rgbd_dataset_freiburg3_sitting_xyz.tgz
16 | rm rgbd_dataset_freiburg3_sitting_xyz.tgz
17 | 
18 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_sitting_halfsphere.tgz
19 | tar -xvzf rgbd_dataset_freiburg3_sitting_halfsphere.tgz
20 | rm rgbd_dataset_freiburg3_sitting_halfsphere.tgz
21 | 
22 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_sitting_rpy.tgz
23 | tar -xvzf rgbd_dataset_freiburg3_sitting_rpy.tgz
24 | rm rgbd_dataset_freiburg3_sitting_rpy.tgz
25 | 
26 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_walking_static.tgz
27 | tar -xvzf rgbd_dataset_freiburg3_walking_static.tgz
28 | rm rgbd_dataset_freiburg3_walking_static.tgz
29 | 
30 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_walking_xyz.tgz
31 | tar -xvzf rgbd_dataset_freiburg3_walking_xyz.tgz
32 | rm rgbd_dataset_freiburg3_walking_xyz.tgz
33 | 
34 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_walking_halfsphere.tgz
35 | tar -xvzf rgbd_dataset_freiburg3_walking_halfsphere.tgz
36 | rm rgbd_dataset_freiburg3_walking_halfsphere.tgz
37 | 
38 | wget https://cvg.cit.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_walking_rpy.tgz
39 | tar -xvzf rgbd_dataset_freiburg3_walking_rpy.tgz
40 | rm rgbd_dataset_freiburg3_walking_rpy.tgz
41 | 
42 | 


--------------------------------------------------------------------------------
/scripts_downloading/download_wild_slam_iphone.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/Wild_SLAM_iPhone
 4 | cd datasets/Wild_SLAM_iPhone
 5 | 
 6 | scenes=(
 7 |     "parking"
 8 |     "piano"
 9 |     "shopping"
10 |     "street"
11 |     "tower"
12 |     "wall"
13 |     "wandering"
14 | )
15 | 
16 | for scene in "${scenes[@]}"
17 | do
18 |     echo "Processing scene: $scene"
19 |     
20 |     # Check if the folder already exists
21 |     if [ -d "$scene" ]; then
22 |         echo "Folder $scene already exists, skipping download"
23 |     else
24 |         zip_file="${scene}.zip"
25 |         wget "https://huggingface.co/datasets/gradient-spaces/Wild-SLAM/resolve/main/iPhone/${zip_file}"
26 |         
27 |         if [ $? -eq 0 ]; then
28 |             echo "Successfully downloaded ${zip_file}"
29 |             unzip -q "${zip_file}"
30 |             if [ $? -eq 0 ]; then
31 |                 echo "Successfully extracted ${zip_file}"
32 |                 rm "${zip_file}"
33 |                 echo "Removed ${zip_file}"
34 |             else
35 |                 echo "Failed to extract ${zip_file}"
36 |             fi
37 |         else
38 |             echo "Failed to download ${zip_file}"
39 |         fi
40 |     fi
41 |     
42 |     echo "Finished processing ${scene}"
43 |     echo "-----------------------------"
44 | done


--------------------------------------------------------------------------------
/scripts_downloading/download_wild_slam_mocap_scene1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/Wild_SLAM_Mocap/scene1
 4 | cd datasets/Wild_SLAM_Mocap/scene1
 5 | 
 6 | scenes=(
 7 |     "ball"
 8 |     "crowd"
 9 |     "person_tracking"
10 |     "racket"
11 |     "stones"
12 |     "table_tracking1"
13 |     "table_tracking2"
14 |     "umbrella"
15 | )
16 | 
17 | for scene in "${scenes[@]}"
18 | do
19 |     echo "Processing scene: $scene"
20 |     
21 |     # Check if the folder already exists
22 |     if [ -d "$scene" ]; then
23 |         echo "Folder $scene already exists, skipping download"
24 |     else
25 |         zip_file="${scene}.zip"
26 |         wget "https://huggingface.co/datasets/gradient-spaces/Wild-SLAM/resolve/main/Mocap/scene1/${zip_file}"
27 |         
28 |         if [ $? -eq 0 ]; then
29 |             echo "Successfully downloaded ${zip_file}"
30 |             unzip -q "${zip_file}"
31 |             if [ $? -eq 0 ]; then
32 |                 echo "Successfully extracted ${zip_file}"
33 |                 rm "${zip_file}"
34 |                 echo "Removed ${zip_file}"
35 |             else
36 |                 echo "Failed to extract ${zip_file}"
37 |             fi
38 |         else
39 |             echo "Failed to download ${zip_file}"
40 |         fi
41 |     fi
42 |     
43 |     echo "Finished processing ${scene}"
44 |     echo "-----------------------------"
45 | done


--------------------------------------------------------------------------------
/scripts_downloading/download_wild_slam_mocap_scene2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p datasets/Wild_SLAM_Mocap/scene2
 4 | cd datasets/Wild_SLAM_Mocap/scene2
 5 | 
 6 | scenes=(
 7 |     "ANYmal1"
 8 |     "ANYmal2"
 9 | )
10 | 
11 | for scene in "${scenes[@]}"
12 | do
13 |     echo "Processing scene: $scene"
14 |     
15 |     # Check if the folder already exists
16 |     if [ -d "$scene" ]; then
17 |         echo "Folder $scene already exists, skipping download"
18 |     else
19 |         zip_file="${scene}.zip"
20 |         wget "https://huggingface.co/datasets/gradient-spaces/Wild-SLAM/resolve/main/Mocap/scene2/${zip_file}"
21 |         
22 |         if [ $? -eq 0 ]; then
23 |             echo "Successfully downloaded ${zip_file}"
24 |             unzip -q "${zip_file}"
25 |             if [ $? -eq 0 ]; then
26 |                 echo "Successfully extracted ${zip_file}"
27 |                 rm "${zip_file}"
28 |                 echo "Removed ${zip_file}"
29 |             else
30 |                 echo "Failed to extract ${zip_file}"
31 |             fi
32 |         else
33 |             echo "Failed to download ${zip_file}"
34 |         fi
35 |     fi
36 |     
37 |     echo "Finished processing ${scene}"
38 |     echo "-----------------------------"
39 | done


--------------------------------------------------------------------------------
/scripts_run/run_bonn_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python run.py  ./configs/Dynamic/Bonn/bonn_balloon.yaml
 4 | python run.py  ./configs/Dynamic/Bonn/bonn_balloon2.yaml
 5 | python run.py  ./configs/Dynamic/Bonn/bonn_crowd.yaml
 6 | python run.py  ./configs/Dynamic/Bonn/bonn_crowd2.yaml
 7 | python run.py  ./configs/Dynamic/Bonn/bonn_moving_nonobstructing_box.yaml
 8 | python run.py  ./configs/Dynamic/Bonn/bonn_moving_nonobstructing_box2.yaml
 9 | python run.py  ./configs/Dynamic/Bonn/bonn_person_tracking.yaml
10 | python run.py  ./configs/Dynamic/Bonn/bonn_person_tracking2.yaml
11 | 


--------------------------------------------------------------------------------
/scripts_run/run_tum_dynamic_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg2_desk_with_person.yaml
 4 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_sitting_halfsphere_static.yaml
 5 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_sitting_halfsphere.yaml
 6 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_sitting_rpy.yaml
 7 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_sitting_xyz.yaml
 8 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_walking_halfsphere_static.yaml
 9 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_walking_halfsphere.yaml
10 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_walking_rpy.yaml
11 | python run.py  ./configs/Dynamic/TUM_RGBD/freiburg3_walking_xyz.yaml


--------------------------------------------------------------------------------
/scripts_run/run_wild_slam_mocap_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/ball.yaml
 4 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/crowd.yaml
 5 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/person_tracking.yaml
 6 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/racket.yaml
 7 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/stones.yaml
 8 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/table_tracking1.yaml
 9 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/table_tracking2.yaml
10 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/umbrella.yaml
11 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/ANYmal1.yaml
12 | python run.py ./configs/Dynamic/Wild_SLAM_Mocap/ANYmal2.yaml
13 | 


--------------------------------------------------------------------------------
/scripts_run/summarize_pose_eval.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import json
 3 | import pandas as pd
 4 | import os
 5 | 
 6 | datasets = os.listdir('./output')
 7 | for dataset in datasets:
 8 |     if not os.path.isdir(os.path.join('output', dataset)):
 9 |         continue
10 |     dataset_path = os.path.join('output', dataset)
11 |     scenes = sorted(os.listdir(dataset_path))
12 | 
13 |     data = {scene: [] for scene in scenes}
14 |     averages = []
15 | 
16 |     row_data = []
17 |     rmses = []
18 |     for scene in scenes:
19 |         exp_folder = os.path.join(dataset_path, scene)
20 |         # metrics_full_traj, metrics_kf_traj, metrics_kf_traj_before_ba
21 |         result_file = os.path.join(exp_folder, "traj/metrics_full_traj.txt")
22 |         if os.path.exists(result_file):
23 |             # Load the JSON file
24 |             with open(result_file, "r") as f:
25 |                 output = f.readlines()
26 | 
27 |             rmse = float(output[8].split(',')[0].replace("{'rmse': ",''))
28 |             
29 |             # Add metrics to the row
30 |             row_data.append(f"{rmse*1e2:.2f}")
31 |             rmses.append(rmse)
32 |         else:
33 |             row_data.append("N/A")  # If file doesn't exist, mark it as N/A
34 |     avg_rmse = np.nanmean(rmses)
35 |     averages.append(f"{avg_rmse*1e2:.2f}")
36 |     for scene, value in zip(scenes, row_data):
37 |         data[scene].append(value)
38 | 
39 |     data['Average'] = averages
40 | 
41 |     # Convert the data to a Pandas DataFrame
42 |     df = pd.DataFrame(data, index=['wildgs-slam'])
43 | 
44 |     # Save the DataFrame as a CSV file
45 |     csv_path = f"./output/{dataset}_eval.csv"
46 |     df.to_csv(csv_path)
47 | 
48 |     # Output the CSV file path
49 |     print(f"Results saved to {csv_path}")
50 |         


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | import os.path as osp
 5 | ROOT = osp.dirname(osp.abspath(__file__))
 6 | 
 7 | setup(
 8 |     name='droid_backends',
 9 |     ext_modules=[
10 |         CUDAExtension('droid_backends',
11 |             include_dirs=[osp.join(ROOT, 'thirdparty/lietorch/eigen')],
12 |             sources=[
13 |                 'src/lib/droid.cpp',
14 |                 'src/lib/droid_kernels.cu',
15 |                 'src/lib/correlation_kernels.cu',
16 |                 'src/lib/altcorr_kernel.cu',
17 |             ],
18 |             extra_compile_args={
19 |                 'cxx': ['-O3'],
20 |                 'nvcc': ['-O3',
21 |                     '-gencode=arch=compute_60,code=sm_60',
22 |                     '-gencode=arch=compute_61,code=sm_61',
23 |                     '-gencode=arch=compute_70,code=sm_70',
24 |                     '-gencode=arch=compute_75,code=sm_75',
25 |                     '-gencode=arch=compute_80,code=sm_80',
26 |                     '-gencode=arch=compute_86,code=sm_86',
27 |                 ]
28 |             }),
29 |     ],
30 |     cmdclass={ 'build_ext' : BuildExtension }
31 | )
32 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/src/__init__.py


--------------------------------------------------------------------------------
/src/backend.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The GlORIE-SLAM Authors.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import torch
 16 | from src.factor_graph import FactorGraph
 17 | from copy import deepcopy
 18 | 
 19 | class Backend:
 20 |     def __init__(self, net, video, cfg):
 21 |         self.cfg = cfg
 22 |         self.video = video
 23 |         self.update_op = net.update
 24 |         self.device = cfg['device']
 25 |         # global optimization window
 26 |         self.t0 = 0
 27 |         self.t1 = 0
 28 | 
 29 |         self.beta = cfg['tracking']['beta']
 30 |         self.backend_thresh = cfg['tracking']['backend']['thresh']
 31 |         self.backend_radius = cfg['tracking']['backend']['radius']
 32 |         self.backend_nms = cfg['tracking']['backend']['nms']
 33 |         self.backend_normalize = cfg['tracking']['backend']['normalize']
 34 |         self.output = f"{cfg['data']['output']}/{cfg['scene']}"
 35 |         
 36 |         self.backend_loop_window = cfg['tracking']['backend']['loop_window']
 37 |         self.backend_loop_thresh = cfg['tracking']['backend']['loop_thresh']
 38 |         self.backend_loop_radius = cfg['tracking']['backend']['loop_radius']
 39 |         self.backend_loop_nms = cfg['tracking']['backend']['loop_nms']
 40 | 
 41 |     @torch.no_grad()
 42 |     def backend_ba(self, t_start, t_end, steps, graph, nms, radius, thresh, max_factors, t_start_loop=None, loop=False, motion_only=False, enable_wq=True):
 43 |         """ main update """
 44 |         if self.cfg['tracking']["uncertainty_params"]['activate']:
 45 |             self.video.update_all_uncertainty_mask()
 46 |         
 47 |         if t_start_loop is None or not loop:
 48 |             t_start_loop = t_start
 49 |         assert t_start_loop >= t_start, f'short: {t_start_loop}, long: {t_start}.'
 50 |         edge_num = graph.add_backend_proximity_factors(t_start,t_end,nms,radius,thresh,max_factors,self.beta, t_start_loop,loop)
 51 |         if edge_num == 0:
 52 |             graph.clear_edges()
 53 |             return 0
 54 |         
 55 |         graph.update_lowmem(
 56 |             t0=t_start_loop+1,  # fix the start point to avoid drift, be sure to use t_start_loop rather than t_start here.
 57 |             t1=t_end,
 58 |             itrs=2,
 59 |             use_inactive=False,
 60 |             steps=steps,
 61 |             enable_wq = enable_wq
 62 |         )
 63 | 
 64 |         graph.clear_edges()
 65 |         return edge_num
 66 | 
 67 |     @torch.no_grad()
 68 |     def dense_ba(self, steps=6, enable_wq=True):
 69 |         t_start = 0
 70 |         t_end = self.video.counter.value
 71 |         nms = self.backend_nms
 72 |         radius = self.backend_radius
 73 |         thresh = self.backend_thresh
 74 |         n = t_end - t_start
 75 |         max_factors = ((radius + 2) * 2) * n
 76 |         if self.backend_normalize:
 77 |             self.video.normalize()
 78 |         graph = FactorGraph(self.video, self.update_op, device=self.device, 
 79 |                             corr_impl='alt', max_factors=max_factors)
 80 |         n_edges = self.backend_ba(t_start, t_end, steps, graph, nms, radius, 
 81 |                           thresh, max_factors, motion_only=False, enable_wq=enable_wq)
 82 | 
 83 |         del graph
 84 |         torch.cuda.empty_cache()
 85 |         self.video.set_dirty(t_start,t_end)
 86 |         self.video.update_valid_depth_mask()
 87 |         return n, n_edges
 88 | 
 89 | 
 90 | 
 91 |     @torch.no_grad()
 92 |     def loop_ba(self, t_start, t_end, steps=6, motion_only=False, local_graph=None, enable_wq=True):
 93 |         ''' loop closure, add edges with high-covisiablity'''
 94 |         radius = self.backend_loop_radius
 95 |         window = self.backend_loop_window
 96 |         max_factors = 8 * window
 97 |         nms = self.backend_loop_nms
 98 |         thresh = self.backend_loop_thresh
 99 |         t_start_loop = max(0, t_end - window)
100 | 
101 |         graph = FactorGraph(self.video, self.update_op, device=self.device, corr_impl='alt', max_factors=max_factors)
102 |         if local_graph is not None:
103 |             copy_attr = ['ii', 'jj', 'age', 'net', 'target', 'weight']
104 |             for key in copy_attr:
105 |                 val = getattr(local_graph, key)
106 |                 if val is not None:
107 |                     setattr(graph, key, deepcopy(val))
108 | 
109 |         left_factors = max_factors - len(graph.ii)
110 |         n_edges = self.backend_ba(t_start, t_end, steps, graph, nms, radius, thresh, 
111 |                           left_factors, t_start_loop=t_start_loop, loop=True, 
112 |                           motion_only=motion_only, enable_wq=enable_wq)
113 |         del graph
114 |         torch.cuda.empty_cache()
115 |         return t_end - t_start_loop, n_edges
116 | 
117 | 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The GlORIE-SLAM Authors.
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import yaml
16 | 
17 | 
18 | def load_config(path, default_path=None):
19 |     """
20 |     Load config file
21 |     Args:
22 |         path:                           (str), path to config file
23 |         default_path:                   (str, optional), whether to use default path.
24 | 
25 |     Returns:
26 |         cfg:                            (dict), config dict
27 | 
28 |     """
29 |     # load configuration from file itself
30 |     with open(path, 'r' ) as f:
31 |         cfg_special = yaml.full_load(f)
32 | 
33 |     # check if we should inherit from a config
34 |     inherit_from = cfg_special.get('inherit_from')
35 | 
36 |     # if yes, load this config first as default
37 |     # if no, use the default path
38 |     if inherit_from is not None:
39 |         cfg = load_config(inherit_from, default_path)
40 |     elif default_path is not None:
41 |         with open(default_path, 'r') as f:
42 |             cfg = yaml.full_load(f)
43 |     else:
44 |         cfg = dict()
45 | 
46 |     # include main configuration
47 |     update_recursive(cfg, cfg_special)
48 | 
49 |     return cfg
50 | 
51 | def save_config(cfg, path):
52 |     with open(path, 'w+') as fp:
53 |         yaml.dump(cfg, fp)
54 | 
55 | 
56 | def update_recursive(dict1, dict2):
57 |     """
58 |     update two config dictionaries recursively
59 |     Args:
60 |         dict1:                          (dict), first dictionary to be updated
61 |         dictw:                          (dict), second dictionary which entries should be used
62 | 
63 |     Returns:
64 | 
65 |     """
66 |     for k, v in dict2.items():
67 |         if k not in dict1:
68 |             dict1[k] = dict()
69 |         if isinstance(v, dict):
70 |             update_recursive(dict1[k], v)
71 |         else:
72 |             dict1[k] = v


--------------------------------------------------------------------------------
/src/geom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/src/geom/__init__.py


--------------------------------------------------------------------------------
/src/geom/chol.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The GlORIE-SLAM Authors.
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn.functional as F
17 | import src.geom.projective_ops as pops
18 | 
19 | # class CholeskySolver(torch.autograd.Function):
20 | class CholeskySolver():
21 |     @staticmethod
22 | 
23 |     def apply(H,b):
24 |         try:
25 |             U = torch.linalg.cholesky(H)
26 |             xs = torch.cholesky_solve(b, U)
27 |         except Exception as e:
28 |             print(e)
29 |             xs = torch.zeros_like(b)
30 | 
31 |         return xs
32 | 
33 |     def __call__(ctx, H, b):
34 |         # don't crash training if cholesky decomp fails
35 |         try:
36 |             U = torch.linalg.cholesky(H)
37 |             xs = torch.cholesky_solve(b, U)
38 |             ctx.save_for_backward(U, xs)
39 |             ctx.failed = False
40 |         except Exception as e:
41 |             print(e)
42 |             ctx.failed = True
43 |             xs = torch.zeros_like(b)
44 | 
45 |         return xs
46 | 
47 |     @staticmethod
48 |     def backward(ctx, grad_x):
49 |         if ctx.failed:
50 |             return None, None
51 | 
52 |         U, xs = ctx.saved_tensors
53 |         dz = torch.cholesky_solve(grad_x, U)
54 |         dH = -torch.matmul(xs, dz.transpose(-1,-2))
55 | 
56 |         return dH, dz
57 | 
58 | def block_solve(H, b, ep=0.1, lm=0.0001):
59 |     """ solve normal equations """
60 |     B, N, _, D, _ = H.shape
61 |     I = torch.eye(D).to(H.device)
62 |     H = H + (ep + lm*H) * I
63 | 
64 |     H = H.permute(0,1,3,2,4)
65 |     H = H.reshape(B, N*D, N*D)
66 |     b = b.reshape(B, N*D, 1)
67 | 
68 |     x = CholeskySolver.apply(H,b)
69 |     return x.reshape(B, N, D)
70 | 
71 | 
72 | def schur_solve(H, E, C, v, w, ep=0.1, lm=0.0001, sless=False):
73 |     """ solve using shur complement """
74 |     
75 |     B, P, M, D, HW = E.shape
76 |     H = H.permute(0,1,3,2,4).reshape(B, P*D, P*D)
77 |     E = E.permute(0,1,3,2,4).reshape(B, P*D, M*HW)
78 |     Q = (1.0 / C).view(B, M*HW, 1)
79 | 
80 |     # damping
81 |     I = torch.eye(P*D).to(H.device)
82 |     H = H + (ep + lm*H) * I
83 |     
84 |     v = v.reshape(B, P*D, 1)
85 |     w = w.reshape(B, M*HW, 1)
86 | 
87 |     Et = E.transpose(1,2)
88 |     S = H - torch.matmul(E, Q*Et)
89 |     v = v - torch.matmul(E, Q*w)
90 | 
91 |     dx = CholeskySolver.apply(S, v)
92 |     if sless:
93 |         return dx.reshape(B, P, D)
94 | 
95 |     dz = Q * (w - Et @ dx)    
96 |     dx = dx.reshape(B, P, D)
97 |     dz = dz.reshape(B, M, HW)
98 | 
99 |     return dx, dz


--------------------------------------------------------------------------------
/src/geom/projective_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The GlORIE-SLAM Authors.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import torch
 16 | import torch.nn.functional as F
 17 | 
 18 | from lietorch import SE3, Sim3
 19 | 
 20 | MIN_DEPTH = 0.2
 21 | 
 22 | def extract_intrinsics(intrinsics):
 23 |     return intrinsics[...,None,None,:].unbind(dim=-1)
 24 | 
 25 | def coords_grid(ht, wd, device):
 26 |     y, x = torch.meshgrid(
 27 |         torch.arange(ht).to(device).float(),
 28 |         torch.arange(wd).to(device).float(),indexing="ij")
 29 | 
 30 |     return torch.stack([x, y], dim=-1)
 31 | 
 32 | def iproj(disps, intrinsics, jacobian=False):
 33 |     """ pinhole camera inverse projection """
 34 |     ht, wd = disps.shape[2:]
 35 |     fx, fy, cx, cy = extract_intrinsics(intrinsics)
 36 |     
 37 |     y, x = torch.meshgrid(
 38 |         torch.arange(ht).to(disps.device).float(),
 39 |         torch.arange(wd).to(disps.device).float(),indexing="ij")
 40 | 
 41 |     i = torch.ones_like(disps)
 42 |     X = (x - cx) / fx
 43 |     Y = (y - cy) / fy
 44 |     pts = torch.stack([X, Y, i, disps], dim=-1)
 45 | 
 46 |     if jacobian:
 47 |         J = torch.zeros_like(pts)
 48 |         J[...,-1] = 1.0
 49 |         return pts, J
 50 | 
 51 |     return pts, None
 52 | 
 53 | def proj(Xs, intrinsics, jacobian=False, return_depth=False):
 54 |     """ pinhole camera projection """
 55 |     fx, fy, cx, cy = extract_intrinsics(intrinsics)
 56 |     X, Y, Z, D = Xs.unbind(dim=-1)
 57 | 
 58 |     Z = torch.where(Z < 0.5*MIN_DEPTH, torch.ones_like(Z), Z)
 59 |     d = 1.0 / Z
 60 | 
 61 |     x = fx * (X * d) + cx
 62 |     y = fy * (Y * d) + cy
 63 |     if return_depth:
 64 |         coords = torch.stack([x, y, D*d], dim=-1)
 65 |     else:
 66 |         coords = torch.stack([x, y], dim=-1)
 67 | 
 68 |     if jacobian:
 69 |         B, N, H, W = d.shape
 70 |         o = torch.zeros_like(d)
 71 |         proj_jac = torch.stack([
 72 |              fx*d,     o, -fx*X*d*d,  o,
 73 |                 o,  fy*d, -fy*Y*d*d,  o,
 74 |                 # o,     o,    -D*d*d,  d,
 75 |         ], dim=-1).view(B, N, H, W, 2, 4)
 76 | 
 77 |         return coords, proj_jac
 78 | 
 79 |     return coords, None
 80 | 
 81 | def actp(Gij, X0, jacobian=False):
 82 |     """ action on point cloud """
 83 |     X1 = Gij[:,:,None,None] * X0
 84 |     
 85 |     if jacobian:
 86 |         X, Y, Z, d = X1.unbind(dim=-1)
 87 |         o = torch.zeros_like(d)
 88 |         B, N, H, W = d.shape
 89 | 
 90 |         if isinstance(Gij, SE3):
 91 |             Ja = torch.stack([
 92 |                 d,  o,  o,  o,  Z, -Y,
 93 |                 o,  d,  o, -Z,  o,  X, 
 94 |                 o,  o,  d,  Y, -X,  o,
 95 |                 o,  o,  o,  o,  o,  o,
 96 |             ], dim=-1).view(B, N, H, W, 4, 6)
 97 | 
 98 |         elif isinstance(Gij, Sim3):
 99 |             Ja = torch.stack([
100 |                 d,  o,  o,  o,  Z, -Y,  X,
101 |                 o,  d,  o, -Z,  o,  X,  Y,
102 |                 o,  o,  d,  Y, -X,  o,  Z,
103 |                 o,  o,  o,  o,  o,  o,  o
104 |             ], dim=-1).view(B, N, H, W, 4, 7)
105 | 
106 |         return X1, Ja
107 | 
108 |     return X1, None
109 | 
110 | def projective_transform(poses, depths, intrinsics, ii, jj, jacobian=False, return_depth=False):
111 |     """ map points from ii->jj """
112 | 
113 |     # inverse project (pinhole)
114 |     X0, Jz = iproj(depths[:,ii], intrinsics[:,ii], jacobian=jacobian)
115 |     
116 |     # transform
117 |     Gij = poses[:,jj] * poses[:,ii].inv()
118 | 
119 |     Gij.data[:,ii==jj] = torch.as_tensor([-0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device="cuda")
120 |     X1, Ja = actp(Gij, X0, jacobian=jacobian)
121 |     
122 |     # project (pinhole)
123 |     x1, Jp = proj(X1, intrinsics[:,jj], jacobian=jacobian, return_depth=return_depth)
124 | 
125 |     # exclude points too close to camera
126 |     valid = ((X1[...,2] > MIN_DEPTH) & (X0[...,2] > MIN_DEPTH)).float()
127 |     valid = valid.unsqueeze(-1)
128 | 
129 |     if jacobian:
130 |         # Ji transforms according to dual adjoint
131 |         Jj = torch.matmul(Jp, Ja)
132 |         Ji = -Gij[:,:,None,None,None].adjT(Jj)
133 | 
134 |         Jz = Gij[:,:,None,None] * Jz
135 |         Jz = torch.matmul(Jp, Jz.unsqueeze(-1))
136 | 
137 |         return x1, valid, (Ji, Jj, Jz)
138 | 
139 |     return x1, valid
140 | 
141 | def induced_flow(poses, disps, intrinsics, ii, jj):
142 |     """ optical flow induced by camera motion """
143 | 
144 |     ht, wd = disps.shape[2:]
145 |     y, x = torch.meshgrid(
146 |         torch.arange(ht).to(disps.device).float(),
147 |         torch.arange(wd).to(disps.device).float(),indexing="ij")
148 | 
149 |     coords0 = torch.stack([x, y], dim=-1)
150 |     coords1, valid = projective_transform(poses, disps, intrinsics, ii, jj, False)
151 | 
152 |     return coords1[...,:2] - coords0, valid
153 | 
154 | 


--------------------------------------------------------------------------------
/src/gui/gl_render/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Li Ma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/src/gui/gl_render/__init__.py:
--------------------------------------------------------------------------------
1 | from . import render_ogl, util, util_gau
2 | 


--------------------------------------------------------------------------------
/src/gui/gl_render/render_ogl.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from OpenGL import GL as gl
  6 | 
  7 | from . import util, util_gau
  8 | 
  9 | _sort_buffer_xyz = None
 10 | _sort_buffer_gausid = None  # used to tell whether gaussian is reloaded
 11 | 
 12 | 
 13 | def _sort_gaussian_torch(gaus, view_mat):
 14 |     global _sort_buffer_gausid, _sort_buffer_xyz
 15 |     if _sort_buffer_gausid != id(gaus):
 16 |         _sort_buffer_xyz = torch.tensor(gaus.xyz).cuda()
 17 |         _sort_buffer_gausid = id(gaus)
 18 | 
 19 |     xyz = torch.tensor(gaus.xyz).cuda()
 20 |     view_mat = torch.tensor(view_mat).cuda()
 21 |     xyz_view = view_mat[None, :3, :3] @ xyz[..., None] + view_mat[None, :3, 3, None]
 22 |     depth = xyz_view[:, 2, 0]
 23 |     index = torch.argsort(depth)
 24 |     index = index.type(torch.int32).reshape(-1, 1).cpu().numpy()
 25 |     return index
 26 | 
 27 | 
 28 | # Decide which sort to use
 29 | _sort_gaussian = None
 30 | if not torch.cuda.is_available():
 31 |     raise ImportError
 32 | _sort_gaussian = _sort_gaussian_torch
 33 | 
 34 | 
 35 | class GaussianRenderBase:
 36 |     def __init__(self):
 37 |         self.gaussians = None
 38 | 
 39 |     def update_gaussian_data(self, gaus: util_gau.GaussianData):
 40 |         raise NotImplementedError()
 41 | 
 42 |     def sort_and_update(self):
 43 |         raise NotImplementedError()
 44 | 
 45 |     def set_scale_modifier(self, modifier: float):
 46 |         raise NotImplementedError()
 47 | 
 48 |     def set_render_mod(self, mod: int):
 49 |         raise NotImplementedError()
 50 | 
 51 |     def update_camera_pose(self, camera: util.Camera):
 52 |         raise NotImplementedError()
 53 | 
 54 |     def update_camera_intrin(self, camera: util.Camera):
 55 |         raise NotImplementedError()
 56 | 
 57 |     def draw(self):
 58 |         raise NotImplementedError()
 59 | 
 60 |     def set_render_reso(self, w, h):
 61 |         raise NotImplementedError()
 62 | 
 63 | 
 64 | class OpenGLRenderer(GaussianRenderBase):
 65 |     def __init__(self, w, h):
 66 |         super().__init__()
 67 |         gl.glViewport(0, 0, w, h)
 68 |         cur_path = os.path.dirname(os.path.abspath(__file__))
 69 |         self.program = util.load_shaders(
 70 |             os.path.join(cur_path, "shaders/gau_vert.glsl"),
 71 |             os.path.join(cur_path, "shaders/gau_frag.glsl"),
 72 |         )
 73 | 
 74 |         # Vertex data for a quad
 75 |         self.quad_v = np.array([-1, 1, 1, 1, 1, -1, -1, -1], dtype=np.float32).reshape(
 76 |             4, 2
 77 |         )
 78 |         self.quad_f = np.array([0, 1, 2, 0, 2, 3], dtype=np.uint32).reshape(2, 3)
 79 | 
 80 |         # load quad geometry
 81 |         vao, buffer_id = util.set_attributes(self.program, ["position"], [self.quad_v])
 82 |         util.set_faces_tovao(vao, self.quad_f)
 83 |         self.vao = vao
 84 |         self.gau_bufferid = None
 85 |         self.index_bufferid = None
 86 | 
 87 |         # opengl settings
 88 |         gl.glDisable(gl.GL_CULL_FACE)
 89 |         gl.glEnable(gl.GL_BLEND)
 90 |         gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)
 91 | 
 92 |     def update_gaussian_data(self, gaus: util_gau.GaussianData):
 93 |         self.gaussians = gaus
 94 |         # load gaussian geometry
 95 |         gaussian_data = gaus.flat()
 96 |         self.gau_bufferid = util.set_storage_buffer_data(
 97 |             self.program, "gaussian_data", gaussian_data, bind_idx=0,
 98 |             buffer_id=self.gau_bufferid
 99 |         )
100 |         util.set_uniform_1int(self.program, gaus.sh_dim, "sh_dim")
101 | 
102 |     def sort_and_update(self, camera: util.Camera):
103 |         index = _sort_gaussian(self.gaussians, camera.get_view_matrix())
104 |         self.index_bufferid = util.set_storage_buffer_data(self.program, "gi", index, bind_idx=1,
105 |                                                            buffer_id=self.index_bufferid)
106 |         return
107 | 
108 |     def set_scale_modifier(self, modifier):
109 |         util.set_uniform_1f(self.program, modifier, "scale_modifier")
110 | 
111 |     def set_render_mod(self, mod: int):
112 |         util.set_uniform_1int(self.program, mod, "render_mod")
113 | 
114 |     def set_render_reso(self, w, h):
115 |         gl.glViewport(0, 0, w, h)
116 | 
117 |     def update_camera_pose(self, camera: util.Camera):
118 |         view_mat = camera.get_view_matrix()
119 |         util.set_uniform_mat4(self.program, view_mat, "view_matrix")
120 |         util.set_uniform_v3(self.program, camera.position, "cam_pos")
121 | 
122 |     def update_camera_intrin(self, camera: util.Camera):
123 |         proj_mat = camera.get_project_matrix()
124 |         util.set_uniform_mat4(self.program, proj_mat, "projection_matrix")
125 |         util.set_uniform_v3(self.program, camera.get_htanfovxy_focal(), "hfovxy_focal")
126 | 
127 |     def draw(self):
128 |         gl.glUseProgram(self.program)
129 |         gl.glBindVertexArray(self.vao)
130 |         num_gau = len(self.gaussians)
131 |         gl.glDrawElementsInstanced(
132 |             gl.GL_TRIANGLES,
133 |             len(self.quad_f.reshape(-1)),
134 |             gl.GL_UNSIGNED_INT,
135 |             None,
136 |             num_gau,
137 |         )
138 | 


--------------------------------------------------------------------------------
/src/gui/gl_render/shaders/gau_frag.glsl:
--------------------------------------------------------------------------------
 1 | #version 430 core
 2 | 
 3 | in vec3 color;
 4 | in float alpha;
 5 | in vec3 conic;
 6 | in vec2 coordxy;  // local coordinate in quad, unit in pixel
 7 | 
 8 | uniform int render_mod;  // > 0 render 0-ith SH dim, -1 depth, -2 bill board, -3 flat ball, -4 gaussian ball
 9 | 
10 | out vec4 FragColor;
11 | 
12 | void main()
13 | {
14 |     if (render_mod == -2)
15 |     {
16 |         FragColor = vec4(color, 1.f);
17 |         return;
18 |     }
19 | 
20 |     float power = -0.5f * (conic.x * coordxy.x * coordxy.x + conic.z * coordxy.y * coordxy.y) - conic.y * coordxy.x * coordxy.y;
21 |     if (power > 0.f)
22 |         discard;
23 |     float opacity = min(0.99f, alpha * exp(power));
24 |     if (opacity < 1.f / 255.f)
25 |         discard;
26 |     FragColor = vec4(color, opacity);
27 | 
28 |     // handling special shading effect
29 |     if (render_mod == -3)
30 |         FragColor.a = FragColor.a > 0.22 ? 1 : 0;
31 |     else if (render_mod == -4)
32 |     {
33 |         FragColor.a = FragColor.a > 0.4 ? 1 : 0;
34 |         FragColor.rgb = FragColor.rgb * exp(power);
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/gui/gl_render/util_gau.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | @dataclass
 7 | class GaussianData:
 8 |     xyz: np.ndarray
 9 |     rot: np.ndarray
10 |     scale: np.ndarray
11 |     opacity: np.ndarray
12 |     sh: np.ndarray
13 | 
14 |     def flat(self) -> np.ndarray:
15 |         ret = np.concatenate(
16 |             [self.xyz, self.rot, self.scale, self.opacity, self.sh], axis=-1
17 |         )
18 |         return np.ascontiguousarray(ret)
19 | 
20 |     def __len__(self):
21 |         return len(self.xyz)
22 | 
23 |     @property
24 |     def sh_dim(self):
25 |         return self.sh.shape[-1]
26 | 


--------------------------------------------------------------------------------
/src/modules/droid_net/__init__.py:
--------------------------------------------------------------------------------
1 | from .clipping import GradientClip
2 | from .gru import ConvGRU
3 | from .extractor import BasicEncoder
4 | from .corr import CorrBlock, AltCorrBlock
5 | from .droid_net import DroidNet, cvx_upsample


--------------------------------------------------------------------------------
/src/modules/droid_net/clipping.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The GlORIE-SLAM Authors.
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn as nn
17 | 
18 | 
19 | GRAD_CLIP = 0.01
20 | 
21 | class GradClip(torch.autograd.Function):
22 |     @staticmethod
23 |     def forward(ctx, x):
24 |         return x
25 | 
26 |     @staticmethod
27 |     def backward(ctx, grad_x):
28 |         o = torch.zeros_like(grad_x)
29 |         grad_x = torch.where(grad_x.abs() > GRAD_CLIP, o, grad_x)
30 |         grad_x = torch.where(torch.isnan(grad_x), o, grad_x)
31 | 
32 |         return grad_x
33 | 
34 | 
35 | class GradientClip(nn.Module):
36 |     def __init__(self):
37 |         super(GradientClip, self).__init__()
38 | 
39 |     def forward(self, x):
40 |         return GradClip.apply(x)


--------------------------------------------------------------------------------
/src/modules/droid_net/droid_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The GlORIE-SLAM Authors.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | from torch_scatter import scatter_mean
 19 | 
 20 | from src.modules.droid_net import ConvGRU, BasicEncoder, GradientClip
 21 | 
 22 | 
 23 | def cvx_upsample(data, mask):
 24 |     """ upsample pixel-wise transformation field """
 25 |     batch, ht, wd, dim = data.shape
 26 |     data = data.permute(0, 3, 1, 2).contiguous()
 27 |     mask = mask.view(batch, 1, 9, 8, 8, ht, wd)
 28 |     mask = torch.softmax(mask, dim=2)
 29 | 
 30 |     up_data = F.unfold(data, kernel_size=(3, 3), padding=(1, 1))
 31 |     up_data = up_data.view(batch, dim, 9, 1, 1, ht, wd)
 32 | 
 33 |     up_data = torch.sum(mask * up_data, dim=2, keepdim=False)
 34 |     up_data = up_data.permute(0, 4, 2, 5, 3, 1).contiguous()
 35 |     up_data = up_data.reshape(batch, 8*ht, 8*wd, dim)
 36 | 
 37 |     return up_data
 38 | 
 39 | 
 40 | def upsample_disp(disp, mask):
 41 |     batch, num, ht, wd = disp.shape
 42 |     disp = disp.view(batch*num, ht, wd, 1)
 43 |     mask = mask.view(batch*num, -1, ht, wd)
 44 | 
 45 |     return cvx_upsample(disp, mask).view(batch, num, 8*ht, 8*wd)
 46 | 
 47 | 
 48 | class GraphAgg(nn.Module):
 49 |     def __init__(self):
 50 |         super(GraphAgg, self).__init__()
 51 |         self.conv1 = nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1))
 52 |         self.conv2 = nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1))
 53 |         self.relu = nn.ReLU(inplace=True)
 54 | 
 55 |         self.eta = nn.Sequential(
 56 |             nn.Conv2d(128, 1, kernel_size=(3, 3), padding=(1, 1)),
 57 |             GradientClip(),
 58 |             nn.Softplus(),
 59 |         )
 60 | 
 61 |         self.upmask = nn.Sequential(
 62 |             nn.Conv2d(128, 8*8*9, kernel_size=(1, 1), padding=(0, 0))
 63 |         )
 64 | 
 65 |     def forward(self, net, ii):
 66 |         batch, num, ch, ht, wd = net.shape
 67 |         net = net.view(batch*num, ch, ht, wd)
 68 | 
 69 |         _, ix = torch.unique(ii, sorted=True, return_inverse=True)
 70 |         net = self.relu(self.conv1(net))
 71 |         net =net.view(batch, num, 128, ht, wd)
 72 | 
 73 |         net = scatter_mean(net, ix, dim=1)
 74 |         net = net.view(-1, 128, ht, wd)
 75 |         
 76 |         net = self.relu(self.conv2(net))
 77 |         eta = self.eta(net).view(batch, -1, ht, wd)
 78 |         upmask = self.upmask(net).view(batch, -1, 8*8*9, ht, wd)
 79 | 
 80 |         return 0.01 * eta, upmask
 81 | 
 82 | 
 83 | class UpdateModule(nn.Module):
 84 |     def __init__(self):
 85 |         super(UpdateModule, self).__init__()
 86 |         cor_planes = 4 * (2*3+1)**2
 87 | 
 88 |         self.corr_encoder = nn.Sequential(
 89 |             nn.Conv2d(cor_planes, 128, kernel_size=(1, 1), padding=(0, 0)),
 90 |             nn.ReLU(inplace=True),
 91 |             nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1)),
 92 |             nn.ReLU(inplace=True),
 93 |         )
 94 | 
 95 |         self.flow_encoder = nn.Sequential(
 96 |             nn.Conv2d(4, 128, kernel_size=(7, 7), padding=(3, 3)),
 97 |             nn.ReLU(inplace=True),
 98 |             nn.Conv2d(128, 64, kernel_size=(3, 3), padding=(1, 1)),
 99 |             nn.ReLU(inplace=True),
100 |         )
101 | 
102 |         self.weight = nn.Sequential(
103 |             nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1)),
104 |             nn.ReLU(inplace=True),
105 |             nn.Conv2d(128, 2, kernel_size=(3, 3), padding=(1, 1)),
106 |             GradientClip(),
107 |             nn.Sigmoid(),
108 |         )
109 | 
110 |         self.delta = nn.Sequential(
111 |             nn.Conv2d(128, 128, kernel_size=(3, 3), padding=(1, 1)),
112 |             nn.ReLU(inplace=True),
113 |             nn.Conv2d(128, 2, kernel_size=(3, 3), padding=(1, 1)),
114 |             GradientClip(),
115 |         )
116 | 
117 |         self.gru = ConvGRU(128, 128+128+64)
118 |         self.agg = GraphAgg()
119 | 
120 |     def forward(self, net, inp, corr, flow=None, ii=None, jj=None):
121 |         """ update operation """
122 | 
123 |         batch, num, ch, ht, wd = net.shape
124 |         device = net.device
125 | 
126 |         if flow is None:
127 |             flow = torch.zeros(batch, num, 4, ht, wd, device=device)
128 | 
129 |         out_dim = (batch, num, -1, ht, wd)
130 | 
131 |         net = net.view(batch*num, -1, ht, wd)
132 |         inp = inp.view(batch*num, -1, ht, wd)
133 |         corr = corr.view(batch*num, -1, ht, wd)
134 |         flow = flow.view(batch*num, -1, ht, wd)
135 | 
136 |         corr = self.corr_encoder(corr)
137 |         flow = self.flow_encoder(flow)
138 |         net = self.gru(net, inp, corr, flow)
139 | 
140 |         ### update variables ###
141 |         delta = self.delta(net).view(*out_dim)
142 |         weight = self.weight(net).view(*out_dim)
143 | 
144 |         delta = delta.permute(0, 1, 3, 4, 2)[..., :2].contiguous()
145 |         weight = weight.permute(0, 1, 3, 4, 2)[..., :2].contiguous()
146 | 
147 |         net = net.view(*out_dim)
148 | 
149 |         if ii is not None:
150 |             eta, upmask = self.agg(net, ii.to(device))
151 |             return net, delta, weight, eta, upmask
152 |         else:
153 |             return net, delta, weight
154 | 
155 | 
156 | class DroidNet(nn.Module):
157 |     def __init__(self):
158 |         super(DroidNet, self).__init__()
159 |         self.fnet = BasicEncoder(out_dim=128, norm_fn='instance')
160 |         self.cnet = BasicEncoder(out_dim=256, norm_fn='none')
161 |         self.update = UpdateModule()
162 | 
163 | 


--------------------------------------------------------------------------------
/src/modules/droid_net/extractor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 The GlORIE-SLAM Authors.
  2 | 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | 
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import torch.nn as nn
 16 | 
 17 | 
 18 | class ResidualBlock(nn.Module):
 19 |     def __init__(self, in_planes, planes, norm_fn='group', stride=1):
 20 |         super(ResidualBlock, self).__init__()
 21 | 
 22 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
 23 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
 24 |         self.relu = nn.ReLU(inplace=True)
 25 | 
 26 |         num_groups = planes // 8
 27 |         if norm_fn == 'group':
 28 |             self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
 29 |             self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
 30 |             if stride > 1:
 31 |                 self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
 32 | 
 33 |         elif norm_fn == 'batch':
 34 |             self.norm1 = nn.BatchNorm2d(planes)
 35 |             self.norm2 = nn.BatchNorm2d(planes)
 36 |             if stride > 1:
 37 |                 self.norm3 = nn.BatchNorm2d(planes)
 38 | 
 39 |         elif norm_fn == 'instance':
 40 |             self.norm1 = nn.InstanceNorm2d(planes)
 41 |             self.norm2 = nn.InstanceNorm2d(planes)
 42 |             if stride > 1:
 43 |                 self.norm3 = nn.InstanceNorm2d(planes)
 44 | 
 45 |         elif norm_fn == 'none':
 46 |             self.norm1 = nn.Sequential()
 47 |             self.norm2 = nn.Sequential()
 48 |             if stride > 1:
 49 |                 self.norm3 = nn.Sequential()
 50 |         else:
 51 |             raise TypeError(norm_fn)
 52 | 
 53 |         if stride == 1:
 54 |             self.downsample = None
 55 |         else:
 56 |             self.downsample = nn.Sequential(
 57 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, padding=0),
 58 |                 self.norm3,
 59 |             )
 60 | 
 61 |     def forward(self, x):
 62 |         y = x
 63 |         y = self.relu(self.norm1(self.conv1(y)))
 64 |         y = self.relu(self.norm2(self.conv2(y)))
 65 | 
 66 |         if self.downsample is not None:
 67 |             x = self.downsample(x)
 68 | 
 69 |         return self.relu(x+y)
 70 | 
 71 | 
 72 | DIM = 32
 73 | 
 74 | 
 75 | class BasicEncoder(nn.Module):
 76 |     def __init__(self, out_dim, norm_fn='batch'):
 77 |         super(BasicEncoder, self).__init__()
 78 |         self.out_dim = out_dim
 79 |         self.norm_fn = norm_fn
 80 | 
 81 |         if norm_fn == 'group':
 82 |             self.norm1 = nn.GroupNorm(num_groups=8, num_channels=DIM)
 83 | 
 84 |         elif norm_fn == 'batch':
 85 |             self.norm1 = nn.BatchNorm2d(DIM)
 86 | 
 87 |         elif norm_fn == 'instance':
 88 |             self.norm1 = nn.InstanceNorm2d(DIM)
 89 | 
 90 |         elif self.norm_fn == 'none':
 91 |             self.norm1 = nn.Sequential()
 92 | 
 93 |         else:
 94 |             raise TypeError(self.norm_fn)
 95 | 
 96 |         self.conv1 = nn.Conv2d(3, DIM, 7, 2, 3)
 97 |         self.relu1 = nn.ReLU(inplace=True)
 98 | 
 99 |         self.in_planes = DIM
100 |         self.layer1 = self._make_layer(DIM, stride=1)
101 |         self.layer2 = self._make_layer(2*DIM, stride=2)
102 |         self.layer3 = self._make_layer(4*DIM, stride=2)
103 | 
104 |         self.conv2 = nn.Conv2d(4*DIM, out_dim, kernel_size=(1, 1))
105 | 
106 |         for m in self.modules():
107 |             if isinstance(m, nn.Conv2d):
108 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
109 |             elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
110 |                 if m.weight is not None:
111 |                     nn.init.constant_(m.weight, 1)
112 |                 if m.bias is not None:
113 |                     nn.init.constant_(m.bias, 0)
114 | 
115 |     def _make_layer(self, dim, stride=1):
116 |         layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
117 |         layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
118 |         layers = [layer1, layer2]
119 | 
120 |         self.in_planes = dim
121 | 
122 |         return nn.Sequential(*layers)
123 | 
124 |     def forward(self, x):
125 |         b, n, c1, h1, w1 = x.shape
126 |         x = x.view(b*n, c1, h1, w1)
127 | 
128 |         x = self.conv1(x)
129 |         x = self.norm1(x)
130 |         x = self.relu1(x)
131 | 
132 |         x = self.layer1(x)
133 |         x = self.layer2(x)
134 |         x = self.layer3(x)
135 | 
136 |         x = self.conv2(x)
137 | 
138 |         _, c2, h2, w2 = x.shape
139 |         x = x.view(b, n, c2, h2, w2)
140 | 
141 |         return x


--------------------------------------------------------------------------------
/src/modules/droid_net/gru.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The GlORIE-SLAM Authors.
 2 | 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | 
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn as nn
17 | 
18 | 
19 | class ConvGRU(nn.Module):
20 |     def __init__(self, h_planes=128, i_planes=128):
21 |         super(ConvGRU, self).__init__()
22 |         self.do_checkpoint = False
23 | 
24 |         self.convz = nn.Conv2d(h_planes+i_planes, h_planes, kernel_size=(3, 3), padding=(1, 1))
25 |         self.convr = nn.Conv2d(h_planes+i_planes, h_planes, kernel_size=(3, 3), padding=(1, 1))
26 |         self.convq = nn.Conv2d(h_planes+i_planes, h_planes, kernel_size=(3, 3), padding=(1, 1))
27 | 
28 |         self.w = nn.Conv2d(h_planes, h_planes, kernel_size=(1, 1), padding=(0, 0))
29 | 
30 |         self.convz_glo = nn.Conv2d(h_planes, h_planes, kernel_size=(1, 1), padding=(0, 0))
31 |         self.convr_glo = nn.Conv2d(h_planes, h_planes, kernel_size=(1, 1), padding=(0, 0))
32 |         self.convq_glo = nn.Conv2d(h_planes, h_planes, kernel_size=(1, 1), padding=(0, 0))
33 | 
34 |     def forward(self, net, *inputs):
35 |         inp = torch.cat(inputs, dim=1)
36 |         net_inp = torch.cat([net, inp], dim=1)
37 | 
38 |         b, c, h, w = net.shape
39 |         glo = torch.sigmoid(self.w(net)) * net
40 |         glo = glo.view(b, c, h*w).mean(dim=-1, keepdim=True).view(b, c, 1, 1)
41 | 
42 |         z = torch.sigmoid(self.convz(net_inp) + self.convz_glo(glo))
43 |         r = torch.sigmoid(self.convr(net_inp) + self.convr_glo(glo))
44 |         q = torch.tanh(self.convq(torch.cat([r*net, inp], dim=1)) + self.convq_glo(glo))
45 | 
46 |         net = (1 - z) * net + z * q
47 | 
48 |         return net


--------------------------------------------------------------------------------
/src/tracker.py:
--------------------------------------------------------------------------------
 1 | from src.motion_filter import MotionFilter
 2 | from src.frontend import Frontend 
 3 | from src.backend import Backend
 4 | import torch
 5 | from colorama import Fore, Style
 6 | from multiprocessing.connection import Connection
 7 | from src.utils.datasets import BaseDataset
 8 | from src.utils.Printer import Printer,FontColor
 9 | class Tracker:
10 |     def __init__(self, slam, pipe:Connection):
11 |         self.cfg = slam.cfg
12 |         self.device = self.cfg['device']
13 |         self.net = slam.droid_net
14 |         self.video = slam.video
15 |         self.verbose = slam.verbose
16 |         self.pipe = pipe
17 |         self.output = slam.save_dir
18 | 
19 |         # filter incoming frames so that there is enough motion
20 |         self.frontend_window = self.cfg['tracking']['frontend']['window']
21 |         filter_thresh = self.cfg['tracking']['motion_filter']['thresh']
22 |         self.motion_filter = MotionFilter(self.net, self.video, self.cfg, thresh=filter_thresh, device=self.device)
23 |         self.enable_online_ba = self.cfg['tracking']['frontend']['enable_online_ba']
24 |         # frontend process
25 |         self.frontend = Frontend(self.net, self.video, self.cfg)
26 |         self.online_ba = Backend(self.net,self.video, self.cfg)
27 |         self.ba_freq = self.cfg['tracking']['backend']['ba_freq']
28 | 
29 |         self.printer:Printer = slam.printer
30 | 
31 |     def run(self, stream:BaseDataset):
32 |         '''
33 |         Trigger the tracking process.
34 |         1. check whether there is enough motion between the current frame and last keyframe by motion_filter
35 |         2. use frontend to do local bundle adjustment, to estimate camera pose and depth image, 
36 |             also delete the current keyframe if it is too close to the previous keyframe after local BA.
37 |         3. run online global BA periodically by backend
38 |         4. send the estimated pose and depth to mapper, 
39 |             and wait until the mapper finish its current mapping optimization.
40 |         '''
41 |         prev_kf_idx = 0
42 |         curr_kf_idx = 0
43 |         prev_ba_idx = 0
44 | 
45 |         intrinsic = stream.get_intrinsic()
46 |         # for (timestamp, image, _, _) in tqdm(stream):
47 |         for i in range(len(stream)):
48 |             timestamp, image, _, _ = stream[i]
49 |             with torch.no_grad():
50 |                 starting_count = self.video.counter.value
51 |                 ### check there is enough motion
52 |                 force_to_add_keyframe = self.motion_filter.track(timestamp, image, intrinsic)
53 | 
54 |                 # local bundle adjustment
55 |                 self.frontend(force_to_add_keyframe)
56 | 
57 |                 if (starting_count < self.video.counter.value) and self.cfg['mapping']['full_resolution']:
58 |                     if self.motion_filter.uncertainty_aware:
59 |                         img_full = stream.get_color_full_resol(i)
60 |                         self.motion_filter.get_img_feature(timestamp,img_full,suffix='full')
61 |             curr_kf_idx = self.video.counter.value - 1
62 |             
63 |             if curr_kf_idx != prev_kf_idx and self.frontend.is_initialized:
64 |                 if self.video.counter.value == self.frontend.warmup:
65 |                     ## We just finish the initialization
66 |                     self.pipe.send({"is_keyframe":True, "video_idx":curr_kf_idx,
67 |                                     "timestamp":timestamp, "just_initialized": True, 
68 |                                     "end":False})
69 |                     self.pipe.recv()
70 |                     self.frontend.initialize_second_stage()
71 |                 else:
72 |                     if self.enable_online_ba and curr_kf_idx >= prev_ba_idx + self.ba_freq:
73 |                         # run online global BA every {self.ba_freq} keyframes
74 |                         self.printer.print(f"Online BA at {curr_kf_idx}th keyframe, frame index: {timestamp}",FontColor.TRACKER)
75 |                         self.online_ba.dense_ba(2)
76 |                         prev_ba_idx = curr_kf_idx
77 |                     # inform the mapper that the estimation of current pose and depth is finished
78 |                     self.pipe.send({"is_keyframe":True, "video_idx":curr_kf_idx,
79 |                                     "timestamp":timestamp, "just_initialized": False, 
80 |                                     "end":False})
81 |                     self.pipe.recv()
82 | 
83 |             prev_kf_idx = curr_kf_idx
84 |             self.printer.update_pbar()
85 | 
86 |         self.pipe.send({"is_keyframe":True, "video_idx":None,
87 |                         "timestamp":None, "just_initialized": False, 
88 |                         "end":True})
89 | 
90 | 
91 |                 


--------------------------------------------------------------------------------
/src/trajectory_filler.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import lietorch
  3 | from lietorch import SE3
  4 | from src.factor_graph import FactorGraph
  5 | from tqdm import tqdm
  6 | from src.utils.datasets import BaseDataset
  7 | from src.utils.Printer import FontColor
  8 | from src.utils.mono_priors.img_feature_extractors import predict_img_features, get_feature_extractor
  9 | 
 10 | class PoseTrajectoryFiller:
 11 |     """ This class is used to fill in non-keyframe poses 
 12 |         mainly inherited from DROID-SLAM
 13 |     """
 14 |     def __init__(self, cfg, net, video, printer, device='cuda:0'):
 15 |         self.cfg = cfg
 16 | 
 17 |         # split net modules
 18 |         self.cnet = net.cnet
 19 |         self.fnet = net.fnet
 20 |         self.update = net.update
 21 | 
 22 |         self.count = 0
 23 |         self.video = video
 24 |         self.device = device
 25 |         self.printer = printer
 26 | 
 27 |         # mean, std for image normalization
 28 |         self.MEAN = torch.tensor([0.485, 0.456, 0.406], device=device)[:, None, None]
 29 |         self.STDV = torch.tensor([0.229, 0.224, 0.225], device=device)[:, None, None]
 30 | 
 31 |         self.uncertainty_aware = cfg['tracking']["uncertainty_params"]['activate']        
 32 |         
 33 |     def setup_feature_extractor(self):
 34 |         if self.uncertainty_aware:
 35 |             self.feat_extractor = get_feature_extractor(self.cfg)
 36 | 
 37 |     @torch.amp.autocast('cuda',enabled=True)
 38 |     def __feature_encoder(self, image):
 39 |         """ features for correlation volume """
 40 |         return self.fnet(image)
 41 | 
 42 |     def __fill(self, timestamps, images, depths, intrinsics, dino_features):
 43 |         """ fill operator """
 44 |         tt = torch.tensor(timestamps, device=self.device)
 45 |         images = torch.stack(images, dim=0)
 46 |         if depths is not None:
 47 |             depths = torch.stack(depths, dim=0)
 48 |         intrinsics = torch.stack(intrinsics, 0)
 49 |         if dino_features is not None:
 50 |             dino_features = torch.stack(dino_features, dim=0).to(self.device)
 51 |         inputs = images.to(self.device)
 52 | 
 53 |         ### linear pose interpolation ###
 54 |         N = self.video.counter.value
 55 |         M = len(timestamps)
 56 | 
 57 |         ts = self.video.timestamp[:N]
 58 |         Ps = SE3(self.video.poses[:N])
 59 | 
 60 |         # found the location of current timestamp in keyframe queue
 61 |         t0 = torch.tensor([ts[ts<=t].shape[0] - 1 for t in timestamps])
 62 |         t1 = torch.where(t0 < N-1, t0+1, t0)
 63 | 
 64 |         # time interval between nearby keyframes
 65 |         dt = ts[t1] - ts[t0] + 1e-3
 66 |         dP = Ps[t1] * Ps[t0].inv()
 67 | 
 68 |         v = dP.log() / dt.unsqueeze(dim=-1)
 69 |         w = v * (tt - ts[t0]).unsqueeze(dim=-1)
 70 |         Gs = SE3.exp(w) * Ps[t0]
 71 | 
 72 |         # extract features (no need for context features)
 73 |         inputs = inputs.sub_(self.MEAN).div_(self.STDV)
 74 |         fmap = self.__feature_encoder(inputs)
 75 | 
 76 |         # temporally put the non-keyframe at the end of keyframe queue
 77 |         self.video.counter.value += M
 78 |         self.video[N:N+M] = (tt, images[:, 0], Gs.data, 1, depths, intrinsics / 8.0, fmap, None, None, dino_features)
 79 | 
 80 |         if self.uncertainty_aware:
 81 |             self.video.update_uncertainty_mask_given_index(range(N,N+M))
 82 | 
 83 |         graph = FactorGraph(self.video, self.update)
 84 |         # build edge between current frame and nearby keyframes for optimization
 85 |         graph.add_factors(t0.cuda(), torch.arange(N, N+M).cuda())
 86 |         graph.add_factors(t1.cuda(), torch.arange(N, N+M).cuda())
 87 | 
 88 |         for _ in range(12):
 89 |             graph.update(N, N+M, motion_only=True)
 90 | 
 91 |         Gs = SE3(self.video.poses[N:N+M].clone())
 92 |         self.video.counter.value -= M
 93 | 
 94 |         return [Gs]
 95 | 
 96 |     @torch.no_grad()
 97 |     def __call__(self, image_stream:BaseDataset):
 98 |         """ fill in poses of non-keyframe images. """
 99 | 
100 |         # store all camera poses
101 |         pose_list = []
102 |         dino_feats = None
103 |         if self.uncertainty_aware:
104 |             dino_feats = []
105 | 
106 |         timestamps = []
107 |         images = []
108 |         intrinsics = []
109 |         dino_features = []
110 | 
111 |         self.printer.print("Filling full trajectory ...",FontColor.INFO)
112 |         intrinsic = image_stream.get_intrinsic()
113 |         for (timestamp, image, _ , _)  in tqdm(image_stream):
114 |             timestamps.append(timestamp)
115 |             images.append(image)
116 |             intrinsics.append(intrinsic)
117 |             if self.uncertainty_aware:
118 |                 dino_feature = predict_img_features(self.feat_extractor,
119 |                                                     timestamp,image,
120 |                                                     self.cfg,
121 |                                                     self.device,
122 |                                                     save_feat=False)
123 |                 dino_features.append(dino_feature)
124 |             else:
125 |                 dino_features = None
126 | 
127 |             if len(timestamps) == 16:
128 |                 pose_list += self.__fill(timestamps, images, None, intrinsics, dino_features)
129 |                 if dino_features is not None:
130 |                     dino_feats += dino_features 
131 |                 timestamps, images, intrinsics, dino_features = [], [], [], []
132 | 
133 |         if len(timestamps) > 0:
134 |             pose_list += self.__fill(timestamps, images, None, intrinsics, dino_features)
135 |             if dino_features is not None:
136 |                 dino_feats += dino_features
137 | 
138 |         # stitch pose segments together
139 |         return lietorch.cat(pose_list, dim=0), dino_feats


--------------------------------------------------------------------------------
/src/utils/Printer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The Splat-SLAM Authors.
 2 | # Licensed under the Apache License, Version 2.0
 3 | # available at: https://github.com/google-research/Splat-SLAM/blob/main/LICENSE
 4 |  
 5 | from colorama import Fore, Style
 6 | import torch.multiprocessing as mp
 7 | 
 8 | 
 9 | class FontColor(object):
10 |     MAPPER=Fore.CYAN
11 |     TRACKER=Fore.BLUE
12 |     INFO=Fore.YELLOW
13 |     ERROR=Fore.RED
14 |     PCL=Fore.GREEN
15 |     EVAL=Fore.MAGENTA
16 |     MESH="yellow"
17 | 
18 | 
19 | def get_msg_prefix(color):
20 |     if color == FontColor.MAPPER:
21 |         msg_prefix = color + "[MAPPER] " + Style.RESET_ALL
22 |     elif color ==  FontColor.TRACKER:
23 |         msg_prefix = color + "[TRACKER] " + Style.RESET_ALL
24 |     elif color ==  FontColor.INFO:
25 |         msg_prefix = color + "[INFO] " + Style.RESET_ALL
26 |     elif color ==  FontColor.ERROR:
27 |         msg_prefix = color + "[ERROR] " + Style.RESET_ALL
28 |     elif color ==  FontColor.PCL:
29 |         msg_prefix = color + "[POINTCLOUD] " + Style.RESET_ALL
30 |     elif color ==  FontColor.EVAL:
31 |         msg_prefix = color + "[EVALUATION] " + Style.RESET_ALL
32 |     elif color == FontColor.MESH:
33 |         msg_prefix = FontColor.INFO + "[MESH] " + Style.RESET_ALL
34 |     else:
35 |         msg_prefix = Style.RESET_ALL
36 |     return msg_prefix
37 | 
38 | class TrivialPrinter(object):
39 |     def print(self,msg:str,color=None):
40 |         msg_prefix = get_msg_prefix(color)
41 |         msg = msg_prefix + msg + Style.RESET_ALL
42 |         print(msg)        
43 | 
44 | class Printer(TrivialPrinter):
45 |     def __init__(self, total_img_num):
46 |         self.msg_lock = mp.Lock()
47 |         self.msg_queue = mp.Queue()
48 |         self.progress_counter = mp.Value('i', 0)
49 |         process = mp.Process(target=self.printer_process, args=(total_img_num,))
50 |         process.start()
51 |     def print(self,msg:str,color=None):
52 |         msg_prefix = get_msg_prefix(color)
53 |         msg = msg_prefix + msg + Style.RESET_ALL
54 |         with self.msg_lock:
55 |             self.msg_queue.put(msg)
56 |     def update_pbar(self):
57 |         with self.msg_lock:
58 |             self.progress_counter.value += 1
59 |             self.msg_queue.put(f"PROGRESS")
60 |     def pbar_ready(self):
61 |         with self.msg_lock:
62 |             self.msg_queue.put(f"READY")        
63 | 
64 |     def printer_process(self,total_img_num):
65 |         from tqdm import tqdm
66 |         while True:
67 |             message = self.msg_queue.get()
68 |             if message == "READY":
69 |                 break
70 |             else:
71 |                 print(message)
72 |         with tqdm(total=total_img_num) as pbar:
73 |             while self.progress_counter.value < total_img_num:
74 |                 message = self.msg_queue.get()
75 |                 if message == "DONE":
76 |                     break
77 |                 elif message.startswith("PROGRESS"):
78 |                     with self.msg_lock:
79 |                         completed = self.progress_counter.value
80 |                     pbar.set_description(FontColor.TRACKER+f"[TRACKER] "+Style.RESET_ALL)
81 |                     pbar.n = completed
82 |                     pbar.refresh()
83 |                 else:
84 |                     pbar.write(message)
85 |         while True:
86 |             message = self.msg_queue.get()
87 |             if message == "DONE":
88 |                 break
89 |             else:
90 |                 print(message)
91 |             
92 |     
93 |     def terminate(self):
94 |         self.msg_queue.put("DONE")
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/src/utils/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The Splat-SLAM Authors.
 2 | # Licensed under the Apache License, Version 2.0
 3 | # available at: https://github.com/google-research/Splat-SLAM/blob/main/LICENSE
 4 | 
 5 | import numpy as np
 6 | import random
 7 | import torch
 8 | 
 9 | 
10 | def setup_seed(seed):
11 |     torch.manual_seed(seed)
12 |     torch.cuda.manual_seed_all(seed)
13 |     np.random.seed(seed)
14 |     random.seed(seed)
15 |     torch.backends.cudnn.deterministic = True
16 |     torch.backends.cudnn.benchmark = False
17 | 
18 | 
19 | def as_intrinsics_matrix(intrinsics):
20 |     """
21 |     Get matrix representation of intrinsics.
22 | 
23 |     """
24 |     K = torch.eye(3)
25 |     K[0, 0] = intrinsics[0]
26 |     K[1, 1] = intrinsics[1]
27 |     K[0, 2] = intrinsics[2]
28 |     K[1, 2] = intrinsics[3]
29 |     return K
30 | 
31 | 
32 | def update_cam(cfg):
33 |     """
34 |     Update the camera intrinsics according to the pre-processing config,
35 |     such as resize or edge crop
36 |     """
37 |     # resize the input images to crop_size(variable name used in lietorch)
38 |     H, W = cfg['cam']['H'], cfg['cam']['W']
39 |     fx, fy = cfg['cam']['fx'], cfg['cam']['fy']
40 |     cx, cy = cfg['cam']['cx'], cfg['cam']['cy']
41 | 
42 |     h_edge, w_edge = cfg['cam']['H_edge'], cfg['cam']['W_edge']
43 |     H_out, W_out = cfg['cam']['H_out'], cfg['cam']['W_out']
44 | 
45 |     fx = fx * (W_out + w_edge * 2) / W
46 |     fy = fy * (H_out + h_edge * 2) / H
47 |     cx = cx * (W_out + w_edge * 2) / W
48 |     cy = cy * (H_out + h_edge * 2) / H
49 |     H, W = H_out, W_out
50 | 
51 |     cx = cx - w_edge
52 |     cy = cy - h_edge
53 |     return H,W,fx,fy,cx,cy    
54 | 
55 | 
56 | @torch.no_grad()
57 | def align_scale_and_shift(prediction, target, weights):
58 | 
59 |     '''
60 |     weighted least squares problem to solve scale and shift: 
61 |         min sum{ 
62 |                   weight[i,j] * 
63 |                   (prediction[i,j] * scale + shift - target[i,j])^2 
64 |                }
65 | 
66 |     prediction: [B,H,W]
67 |     target: [B,H,W]
68 |     weights: [B,H,W]
69 |     '''
70 | 
71 |     if weights is None:
72 |         weights = torch.ones_like(prediction).to(prediction.device)
73 |     if len(prediction.shape)<3:
74 |         prediction = prediction.unsqueeze(0)
75 |         target = target.unsqueeze(0)
76 |         weights = weights.unsqueeze(0)  
77 |     a_00 = torch.sum(weights * prediction * prediction, dim=[1,2])
78 |     a_01 = torch.sum(weights * prediction, dim=[1,2])
79 |     a_11 = torch.sum(weights, dim=[1,2])
80 |     # right hand side: b = [b_0, b_1]
81 |     b_0 = torch.sum(weights * prediction * target, dim=[1,2])
82 |     b_1 = torch.sum(weights * target, dim=[1,2])
83 |     # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b            
84 |     det = a_00 * a_11 - a_01 * a_01
85 |     scale = (a_11 * b_0 - a_01 * b_1) / det
86 |     shift = (-a_01 * b_0 + a_00 * b_1) / det
87 |     error = (scale[:,None,None]*prediction+shift[:,None,None]-target).abs()
88 |     masked_error = error*weights
89 |     error_sum = masked_error.sum(dim=[1,2])
90 |     error_num = weights.sum(dim=[1,2])
91 |     avg_error = error_sum/error_num
92 | 
93 |     return scale,shift,avg_error


--------------------------------------------------------------------------------
/src/utils/dyn_uncertainty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/src/utils/dyn_uncertainty/__init__.py


--------------------------------------------------------------------------------
/src/utils/dyn_uncertainty/median_filter.py:
--------------------------------------------------------------------------------
 1 | # Based on https://gist.github.com/rwightman/f2d3849281624be7c0f11c85c87c1598
 2 | import math
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.nn.modules.utils import _pair, _quadruple
 7 | 
 8 | 
 9 | class MedianPool2d(nn.Module):
10 |     """ Median pool module.
11 | 
12 |     This is used to smooth the thin line in ssim loss.
13 |     
14 |     Args:
15 |          kernel_size: size of pooling kernel, int or 2-tuple
16 |          stride: pool stride, int or 2-tuple
17 |          padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad
18 |          same: override padding and enforce same padding, boolean
19 |     """
20 |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
21 |         super(MedianPool2d, self).__init__()
22 |         self.k = _pair(kernel_size)
23 |         self.stride = _pair(stride)
24 |         self.padding = _quadruple(padding)  # convert to l, r, t, b
25 |         self.same = same
26 | 
27 |     def _padding(self, x):
28 |         if self.same:
29 |             ih, iw = x.size()[2:]
30 |             if ih % self.stride[0] == 0:
31 |                 ph = max(self.k[0] - self.stride[0], 0)
32 |             else:
33 |                 ph = max(self.k[0] - (ih % self.stride[0]), 0)
34 |             if iw % self.stride[1] == 0:
35 |                 pw = max(self.k[1] - self.stride[1], 0)
36 |             else:
37 |                 pw = max(self.k[1] - (iw % self.stride[1]), 0)
38 |             pl = pw // 2
39 |             pr = pw - pl
40 |             pt = ph // 2
41 |             pb = ph - pt
42 |             padding = (pl, pr, pt, pb)
43 |         else:
44 |             padding = self.padding
45 |         return padding
46 |     
47 |     def forward(self, x):
48 |         # using existing pytorch functions and tensor ops so that we get autograd, 
49 |         # would likely be more efficient to implement from scratch at C/Cuda level
50 |         x = F.pad(x, self._padding(x), mode='reflect')
51 |         x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1])
52 |         x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0]
53 |         return x


--------------------------------------------------------------------------------
/src/utils/dyn_uncertainty/uncertainty_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class MLPNetwork(nn.Module):
 6 |     def __init__(self, input_dim: int = 384, hidden_dim: int = 64, output_dim: int = 1, 
 7 |                  net_depth: int = 2, net_activation=F.relu, weight_init: str = 'he_uniform'):
 8 |         super(MLPNetwork, self).__init__()
 9 |         
10 |         self.output_layer_input_dim = hidden_dim
11 |         
12 |         # Initialize MLP layers
13 |         self.layers = nn.ModuleList()
14 |         for i in range(net_depth):
15 |             dense_layer = nn.Linear(input_dim if i == 0 else hidden_dim, hidden_dim)
16 |             
17 |             # Apply weight initialization
18 |             if weight_init == 'he_uniform':
19 |                 nn.init.kaiming_uniform_(dense_layer.weight, nonlinearity='relu')
20 |             elif weight_init == 'xavier_uniform':
21 |                 nn.init.xavier_uniform_(dense_layer.weight)
22 |             else:
23 |                 raise NotImplementedError(f"Unknown Weight initialization method {weight_init}")
24 | 
25 |             self.layers.append(dense_layer)
26 |         
27 |         # Initialize output layer
28 |         self.output_layer = nn.Linear(self.output_layer_input_dim, output_dim)
29 |         nn.init.kaiming_uniform_(self.output_layer.weight, nonlinearity='relu')
30 |         
31 |         # Set activation function
32 |         self.net_activation = net_activation
33 |         self.softplus = nn.Softplus()
34 | 
35 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
36 |         # Get input dimensions
37 |         H, W, C = x.shape[-3:]
38 |         input_with_batch_dim = True
39 |         
40 |         # Add batch dimension if not present
41 |         if len(x.shape) == 3:
42 |             input_with_batch_dim = False
43 |             x = x.unsqueeze(0)
44 |             batch_size = 1
45 |         else:
46 |             batch_size = x.shape[0]
47 | 
48 |         # Flatten input for MLP
49 |         x = x.view(-1, x.size()[-1])
50 |         
51 |         # Pass through MLP layers
52 |         for layer in self.layers:
53 |             x = layer(x)
54 |             x = self.net_activation(x)
55 |             x = F.dropout(x, p=0.2)
56 | 
57 |         # Pass through output layer and apply softplus activation
58 |         x = self.output_layer(x)
59 |         x = self.softplus(x)
60 | 
61 |         # Reshape output to original dimensions
62 |         if input_with_batch_dim:
63 |             x = x.view(batch_size, H, W)
64 |         else:
65 |             x = x.view(H, W)
66 | 
67 |         return x
68 | 
69 | def generate_uncertainty_mlp(n_features: int) -> MLPNetwork:
70 |     # Create and return an MLP network with the specified input dimensions
71 |     network = MLPNetwork(input_dim=n_features).cuda()
72 |     return network


--------------------------------------------------------------------------------
/src/utils/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/src/utils/eval_utils.py


--------------------------------------------------------------------------------
/src/utils/mono_priors/metric_depth_estimators.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn.functional as F
  4 | from torchvision import transforms
  5 | import torchvision.transforms.functional as TF
  6 | from typing import Dict, Tuple, Union
  7 | 
  8 | from thirdparty.depth_anything_v2.metric_depth.depth_anything_v2.dpt import (
  9 |     DepthAnythingV2,
 10 | )
 11 | 
 12 | 
 13 | def get_metric_depth_estimator(cfg: Dict) -> torch.nn.Module:
 14 |     """
 15 |     Get the metric depth estimator model based on the configuration.
 16 | 
 17 |     Args:
 18 |         cfg (Dict): Configuration dictionary.
 19 | 
 20 |     Returns:
 21 |         torch.nn.Module: The metric depth estimator model.
 22 |     """
 23 |     device = cfg["device"]
 24 |     depth_model = cfg["mono_prior"]["depth"]
 25 | 
 26 |     if "metric3d_vit" in depth_model:
 27 |         # Options: metric3d_vit_small, metric3d_vit_large, metric3d_vit_giant2
 28 |         model = torch.hub.load("yvanyin/metric3d", depth_model, pretrain=True)
 29 |     elif "dpt2" in depth_model:
 30 |         model = _create_dpt2_model(depth_model)
 31 |     else:
 32 |         # If use other metric depth estimator as prior, write the code here
 33 |         raise NotImplementedError("Unsupported depth model")
 34 |     return model.to(device).eval()
 35 | 
 36 | 
 37 | def _create_dpt2_model(depth_model: str) -> DepthAnythingV2:
 38 |     """
 39 |     Create a DPT2 model based on the depth model string.
 40 | 
 41 |     Args:
 42 |         depth_model (str): Depth model configuration string.
 43 | 
 44 |     Returns:
 45 |         DepthAnythingV2: Configured DPT2 model.
 46 |     """
 47 |     model_configs = {
 48 |         "vits": {"encoder": "vits", "features": 64, "out_channels": [48, 96, 192, 384]},
 49 |         "vitb": {
 50 |             "encoder": "vitb",
 51 |             "features": 128,
 52 |             "out_channels": [96, 192, 384, 768],
 53 |         },
 54 |         "vitl": {
 55 |             "encoder": "vitl",
 56 |             "features": 256,
 57 |             "out_channels": [256, 512, 1024, 1024],
 58 |         },
 59 |     }
 60 | 
 61 |     encoder, dataset, max_depth = depth_model.split("_")[1:4]
 62 |     config = {**model_configs[encoder], "max_depth": int(max_depth)}
 63 |     model = DepthAnythingV2(**config)
 64 | 
 65 |     weights_path = f"pretrained/depth_anything_v2_metric_{dataset}_{encoder}.pth"
 66 |     model.load_state_dict(
 67 |         torch.load(weights_path, map_location="cpu", weights_only=True)
 68 |     )
 69 | 
 70 |     return model
 71 | 
 72 | 
 73 | @torch.no_grad()
 74 | def predict_metric_depth(
 75 |     model: torch.nn.Module,
 76 |     idx: int,
 77 |     input_tensor: torch.Tensor,
 78 |     cfg: Dict,
 79 |     device: str,
 80 |     save_depth: bool = True,
 81 | ) -> torch.Tensor:
 82 |     """
 83 |     Predict metric depth using the given model.
 84 | 
 85 |     Args:
 86 |         model (torch.nn.Module): The depth estimation model.
 87 |         idx (int): Image index.
 88 |         input_tensor (torch.Tensor): Input image tensor of shape (1, 3, H, W).
 89 |         cfg (Dict): Configuration dictionary.
 90 |         device (str): Device to run the model on.
 91 |         save_depth (bool): Whether to save the depth map.
 92 | 
 93 |     Returns:
 94 |         torch.Tensor: Predicted depth map.
 95 |     """
 96 |     depth_model = cfg["mono_prior"]["depth"]
 97 |     if "metric3d_vit" in depth_model:
 98 |         output = _predict_metric3d_depth(model, input_tensor, cfg, device)
 99 |     elif "dpt2" in depth_model:
100 |         # dpt2 model takes np.uint8 as the dtype of input
101 |         input_numpy = (255.0 * input.squeeze().permute(1, 2, 0).cpu().numpy()).astype(
102 |             np.uint8
103 |         )
104 |         depth = model.infer_image(input_numpy, input_size=518)
105 |         output = torch.tensor(depth).to(device)
106 |     else:
107 |         # If use other metric depth estimator as prior, write the code here
108 |         raise NotImplementedError("Unsupported depth model")
109 | 
110 |     if save_depth:
111 |         _save_depth_map(output, cfg, idx)
112 | 
113 |     return output
114 | 
115 | 
116 | def _predict_metric3d_depth(
117 |     model: torch.nn.Module, input_tensor: torch.Tensor, cfg: Dict, device: str
118 | ) -> torch.Tensor:
119 |     # Refer from: https://github.com/YvanYin/Metric3D/blob/34afafe58d9543f13c01b65222255dab53333838/hubconf.py#L181
120 |     image_size = (616, 1064)
121 |     h, w = input_tensor.shape[-2:]
122 |     scale = min(image_size[0] / h, image_size[1] / w)
123 | 
124 |     trans_totensor = transforms.Compose(
125 |         [
126 |             transforms.Resize((int(h * scale), int(w * scale))),
127 |             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
128 |         ]
129 |     )
130 |     img_tensor = trans_totensor(input_tensor).to(device)
131 | 
132 |     pad_h, pad_w = image_size[0] - int(h * scale), image_size[1] - int(w * scale)
133 |     pad_h_half, pad_w_half = pad_h // 2, pad_w // 2
134 |     img_tensor = TF.pad(
135 |         img_tensor,
136 |         (pad_w_half, pad_h_half, pad_w - pad_w_half, pad_h - pad_h_half),
137 |         padding_mode="constant",
138 |         fill=0.0,
139 |     )
140 | 
141 |     pad_info = [pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
142 |     pred_depth, _, _ = model.inference({"input": img_tensor})
143 |     pred_depth = pred_depth.squeeze()
144 |     pred_depth = pred_depth[
145 |         pad_info[0] : pred_depth.shape[0] - pad_info[1],
146 |         pad_info[2] : pred_depth.shape[1] - pad_info[3],
147 |     ]
148 |     pred_depth = F.interpolate(
149 |         pred_depth[None, None, :, :], (h, w), mode="bicubic"
150 |     ).squeeze()
151 | 
152 |     canonical_to_real_scale = cfg["cam"]["fx"] / 1000.0
153 |     pred_depth = pred_depth * canonical_to_real_scale
154 |     return torch.clamp(pred_depth, 0, 300)
155 | 
156 | 
157 | def _save_depth_map(depth_map: torch.Tensor, cfg: Dict, idx: int) -> None:
158 |     output_dir = f"{cfg['data']['output']}/{cfg['scene']}"
159 |     output_path = f"{output_dir}/mono_priors/depths/{idx:05d}.npy"
160 |     final_depth = depth_map.detach().cpu().float().numpy()
161 |     np.save(output_path, final_depth)
162 | 


--------------------------------------------------------------------------------
/src/utils/plot_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | import re
 4 | 
 5 | 
 6 | def create_gif_from_directory(directory_path, output_filename, duration=100, online=True):
 7 |     """
 8 |     Creates a GIF from all PNG images in a given directory.
 9 | 
10 |     :param directory_path: Path to the directory containing PNG images.
11 |     :param output_filename: Output filename for the GIF.
12 |     :param duration: Duration of each frame in the GIF (in milliseconds).
13 |     """
14 |     # Function to extract the number from the filename
15 |     def extract_number(filename):
16 |         # Pattern to find a number followed by '.png'
17 |         match = re.search(r'(\d+)\.png$', filename)
18 |         if match:
19 |             return int(match.group(1))
20 |         else:
21 |             return None
22 | 
23 | 
24 |     if online:
25 |         # Get all PNG files in the directory
26 |         image_files = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.png')]
27 | 
28 |         # Sort the files based on the number in the filename
29 |         image_files.sort(key=extract_number)
30 |     else:
31 |         # Get all PNG files in the directory
32 |         image_files = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.png')]
33 | 
34 |         # Sort the files based on the number in the filename
35 |         image_files.sort()
36 | 
37 |     # Load images
38 |     images = [Image.open(file) for file in image_files]
39 | 
40 |     # Convert images to the same mode and size for consistency
41 |     images = [img.convert('RGBA') for img in images]
42 |     base_size = images[0].size
43 |     resized_images = [img.resize(base_size, Image.LANCZOS) for img in images]
44 | 
45 |     # Save as GIF
46 |     resized_images[0].save(output_filename, save_all=True, append_images=resized_images[1:], optimize=False, duration=duration, loop=0)


--------------------------------------------------------------------------------
/src/utils/pose_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 The MonoGS Authors.
 2 | 
 3 | # Licensed under the License issued by the MonoGS Authors
 4 | # available here: https://github.com/muskie82/MonoGS/blob/main/LICENSE.md
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | 
10 | def rt2mato(R, T): # TODO: remove?
11 |     mat = np.eye(4)
12 |     mat[0:3, 0:3] = R
13 |     mat[0:3, 3] = T
14 |     return mat
15 | 
16 | 
17 | def skew_sym_mat(x):
18 |     device = x.device
19 |     dtype = x.dtype
20 |     ssm = torch.zeros(3, 3, device=device, dtype=dtype)
21 |     ssm[0, 1] = -x[2]
22 |     ssm[0, 2] = x[1]
23 |     ssm[1, 0] = x[2]
24 |     ssm[1, 2] = -x[0]
25 |     ssm[2, 0] = -x[1]
26 |     ssm[2, 1] = x[0]
27 |     return ssm
28 | 
29 | 
30 | def SO3_exp(theta):
31 |     device = theta.device
32 |     dtype = theta.dtype
33 | 
34 |     W = skew_sym_mat(theta)
35 |     W2 = W @ W
36 |     angle = torch.norm(theta)
37 |     I = torch.eye(3, device=device, dtype=dtype)
38 |     if angle < 1e-5:
39 |         return I + W + 0.5 * W2
40 |     else:
41 |         return (
42 |             I
43 |             + (torch.sin(angle) / angle) * W
44 |             + ((1 - torch.cos(angle)) / (angle**2)) * W2
45 |         )
46 | 
47 | 
48 | def V(theta):
49 |     dtype = theta.dtype
50 |     device = theta.device
51 |     I = torch.eye(3, device=device, dtype=dtype)
52 |     W = skew_sym_mat(theta)
53 |     W2 = W @ W
54 |     angle = torch.norm(theta)
55 |     if angle < 1e-5:
56 |         V = I + 0.5 * W + (1.0 / 6.0) * W2
57 |     else:
58 |         V = (
59 |             I
60 |             + W * ((1.0 - torch.cos(angle)) / (angle**2))
61 |             + W2 * ((angle - torch.sin(angle)) / (angle**3))
62 |         )
63 |     return V
64 | 
65 | 
66 | def SE3_exp(tau):
67 |     dtype = tau.dtype
68 |     device = tau.device
69 | 
70 |     rho = tau[:3]
71 |     theta = tau[3:]
72 |     R = SO3_exp(theta)
73 |     t = V(theta) @ rho
74 | 
75 |     T = torch.eye(4, device=device, dtype=dtype)
76 |     T[:3, :3] = R
77 |     T[:3, 3] = t
78 |     return T
79 | 
80 | 
81 | def update_pose(camera, converged_threshold=1e-4):
82 |     tau = torch.cat([camera.cam_trans_delta, camera.cam_rot_delta], axis=0)
83 | 
84 |     T_w2c = torch.eye(4, device=tau.device)
85 |     T_w2c[0:3, 0:3] = camera.R
86 |     T_w2c[0:3, 3] = camera.T
87 | 
88 |     new_w2c = SE3_exp(tau) @ T_w2c
89 | 
90 |     new_R = new_w2c[0:3, 0:3]
91 |     new_T = new_w2c[0:3, 3]
92 | 
93 |     converged = tau.norm() < converged_threshold
94 |     camera.update_RT(new_R, new_T)
95 | 
96 |     camera.cam_rot_delta.data.fill_(0)
97 |     camera.cam_trans_delta.data.fill_(0)
98 |     return converged
99 | 


--------------------------------------------------------------------------------
/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/__init__.py


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/DA-2K.md:
--------------------------------------------------------------------------------
 1 | # DA-2K Evaluation Benchmark
 2 | 
 3 | ## Introduction
 4 | 
 5 | ![DA-2K](assets/DA-2K.png)
 6 | 
 7 | DA-2K is proposed in [Depth Anything V2](https://depth-anything-v2.github.io) to evaluate the relative depth estimation capability. It encompasses eight representative scenarios of `indoor`, `outdoor`, `non_real`, `transparent_reflective`, `adverse_style`, `aerial`, `underwater`, and `object`. It consists of 1K diverse high-quality images and 2K precise pair-wise relative depth annotations.
 8 | 
 9 | Please refer to our [paper](https://arxiv.org/abs/2406.09414) for details in constructing this benchmark.
10 | 
11 | 
12 | ## Usage
13 | 
14 | Please first [download the benchmark](https://huggingface.co/datasets/depth-anything/DA-2K/tree/main).
15 | 
16 | All annotations are stored in `annotations.json`. The annotation file is a JSON object where each key is the path to an image file, and the value is a list of annotations associated with that image. Each annotation describes two points and identifies which point is closer to the camera. The structure is detailed below:
17 | 
18 | ```
19 | {
20 |   "image_path": [
21 |     {
22 |       "point1": [h1, w1], # (vertical position, horizontal position)
23 |       "point2": [h2, w2], # (vertical position, horizontal position)
24 |       "closer_point": "point1" # we always set "point1" as the closer one
25 |     },
26 |     ...
27 |   ],
28 |   ...
29 | }
30 | ```
31 | 
32 | To visualize the annotations:
33 | ```bash
34 | python visualize.py [--scene-type <type>]
35 | ```
36 | 
37 | **Options**
38 | - `--scene-type <type>` (optional): Specify the scene type (`indoor`, `outdoor`, `non_real`, `transparent_reflective`, `adverse_style`, `aerial`, `underwater`, and `object`). Skip this argument or set <type> as `""` to include all scene types.
39 | 
40 | ## Citation
41 | 
42 | If you find this benchmark useful, please consider citing:
43 | 
44 | ```bibtex
45 | @article{depth_anything_v2,
46 |   title={Depth Anything V2},
47 |   author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
48 |   journal={arXiv:2406.09414},
49 |   year={2024}
50 | }
51 | ```


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/app.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import gradio as gr
 3 | import matplotlib
 4 | import numpy as np
 5 | from PIL import Image
 6 | import torch
 7 | import tempfile
 8 | from gradio_imageslider import ImageSlider
 9 | 
10 | from depth_anything_v2.dpt import DepthAnythingV2
11 | 
12 | css = """
13 | #img-display-container {
14 |     max-height: 100vh;
15 | }
16 | #img-display-input {
17 |     max-height: 80vh;
18 | }
19 | #img-display-output {
20 |     max-height: 80vh;
21 | }
22 | #download {
23 |     height: 62px;
24 | }
25 | """
26 | DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
27 | model_configs = {
28 |     'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
29 |     'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
30 |     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
31 |     'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
32 | }
33 | encoder = 'vitl'
34 | model = DepthAnythingV2(**model_configs[encoder])
35 | state_dict = torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location="cpu")
36 | model.load_state_dict(state_dict)
37 | model = model.to(DEVICE).eval()
38 | 
39 | title = "# Depth Anything V2"
40 | description = """Official demo for **Depth Anything V2**.
41 | Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), or [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details."""
42 | 
43 | def predict_depth(image):
44 |     return model.infer_image(image)
45 | 
46 | with gr.Blocks(css=css) as demo:
47 |     gr.Markdown(title)
48 |     gr.Markdown(description)
49 |     gr.Markdown("### Depth Prediction demo")
50 | 
51 |     with gr.Row():
52 |         input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
53 |         depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5)
54 |     submit = gr.Button(value="Compute Depth")
55 |     gray_depth_file = gr.File(label="Grayscale depth map", elem_id="download",)
56 |     raw_file = gr.File(label="16-bit raw output (can be considered as disparity)", elem_id="download",)
57 | 
58 |     cmap = matplotlib.colormaps.get_cmap('Spectral_r')
59 | 
60 |     def on_submit(image):
61 |         original_image = image.copy()
62 | 
63 |         h, w = image.shape[:2]
64 | 
65 |         depth = predict_depth(image[:, :, ::-1])
66 | 
67 |         raw_depth = Image.fromarray(depth.astype('uint16'))
68 |         tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
69 |         raw_depth.save(tmp_raw_depth.name)
70 | 
71 |         depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
72 |         depth = depth.astype(np.uint8)
73 |         colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)
74 | 
75 |         gray_depth = Image.fromarray(depth)
76 |         tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
77 |         gray_depth.save(tmp_gray_depth.name)
78 | 
79 |         return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
80 | 
81 |     submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
82 | 
83 |     example_files = glob.glob('assets/examples/*')
84 |     examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], fn=on_submit)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     demo.queue().launch()


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/DA-2K.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/DA-2K.png


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo01.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo02.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo03.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo04.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo05.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo06.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo07.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo08.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo09.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo10.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo11.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo12.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo13.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo14.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo15.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo16.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo17.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo18.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo19.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples/demo20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples/demo20.jpg


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples_video/basketball.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples_video/basketball.mp4


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/examples_video/ferris_wheel.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/examples_video/ferris_wheel.mp4


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/assets/teaser.png


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .mlp import Mlp
 8 | from .patch_embed import PatchEmbed
 9 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
10 | from .block import NestedTensorBlock
11 | from .attention import MemEffAttention
12 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
10 | 
11 | import logging
12 | 
13 | from torch import Tensor
14 | from torch import nn
15 | 
16 | 
17 | logger = logging.getLogger("dinov2")
18 | 
19 | 
20 | try:
21 |     from xformers.ops import memory_efficient_attention, unbind, fmha
22 | 
23 |     XFORMERS_AVAILABLE = True
24 | except ImportError:
25 |     logger.warning("xFormers not available")
26 |     XFORMERS_AVAILABLE = False
27 | 
28 | 
29 | class Attention(nn.Module):
30 |     def __init__(
31 |         self,
32 |         dim: int,
33 |         num_heads: int = 8,
34 |         qkv_bias: bool = False,
35 |         proj_bias: bool = True,
36 |         attn_drop: float = 0.0,
37 |         proj_drop: float = 0.0,
38 |     ) -> None:
39 |         super().__init__()
40 |         self.num_heads = num_heads
41 |         head_dim = dim // num_heads
42 |         self.scale = head_dim**-0.5
43 | 
44 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
45 |         self.attn_drop = nn.Dropout(attn_drop)
46 |         self.proj = nn.Linear(dim, dim, bias=proj_bias)
47 |         self.proj_drop = nn.Dropout(proj_drop)
48 | 
49 |     def forward(self, x: Tensor) -> Tensor:
50 |         B, N, C = x.shape
51 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
52 | 
53 |         q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
54 |         attn = q @ k.transpose(-2, -1)
55 | 
56 |         attn = attn.softmax(dim=-1)
57 |         attn = self.attn_drop(attn)
58 | 
59 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
60 |         x = self.proj(x)
61 |         x = self.proj_drop(x)
62 |         return x
63 | 
64 | 
65 | class MemEffAttention(Attention):
66 |     def forward(self, x: Tensor, attn_bias=None) -> Tensor:
67 |         if not XFORMERS_AVAILABLE:
68 |             assert attn_bias is None, "xFormers is required for nested tensors usage"
69 |             return super().forward(x)
70 | 
71 |         B, N, C = x.shape
72 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
73 | 
74 |         q, k, v = unbind(qkv, 2)
75 | 
76 |         x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
77 |         x = x.reshape([B, N, C])
78 | 
79 |         x = self.proj(x)
80 |         x = self.proj_drop(x)
81 |         return x
82 | 
83 |         


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/drop_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
10 | 
11 | 
12 | from torch import nn
13 | 
14 | 
15 | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
16 |     if drop_prob == 0.0 or not training:
17 |         return x
18 |     keep_prob = 1 - drop_prob
19 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
20 |     random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
21 |     if keep_prob > 0.0:
22 |         random_tensor.div_(keep_prob)
23 |     output = x * random_tensor
24 |     return output
25 | 
26 | 
27 | class DropPath(nn.Module):
28 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
29 | 
30 |     def __init__(self, drop_prob=None):
31 |         super(DropPath, self).__init__()
32 |         self.drop_prob = drop_prob
33 | 
34 |     def forward(self, x):
35 |         return drop_path(x, self.drop_prob, self.training)
36 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/layer_scale.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
 8 | 
 9 | from typing import Union
10 | 
11 | import torch
12 | from torch import Tensor
13 | from torch import nn
14 | 
15 | 
16 | class LayerScale(nn.Module):
17 |     def __init__(
18 |         self,
19 |         dim: int,
20 |         init_values: Union[float, Tensor] = 1e-5,
21 |         inplace: bool = False,
22 |     ) -> None:
23 |         super().__init__()
24 |         self.inplace = inplace
25 |         self.gamma = nn.Parameter(init_values * torch.ones(dim))
26 | 
27 |     def forward(self, x: Tensor) -> Tensor:
28 |         return x.mul_(self.gamma) if self.inplace else x * self.gamma
29 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/mlp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
10 | 
11 | 
12 | from typing import Callable, Optional
13 | 
14 | from torch import Tensor, nn
15 | 
16 | 
17 | class Mlp(nn.Module):
18 |     def __init__(
19 |         self,
20 |         in_features: int,
21 |         hidden_features: Optional[int] = None,
22 |         out_features: Optional[int] = None,
23 |         act_layer: Callable[..., nn.Module] = nn.GELU,
24 |         drop: float = 0.0,
25 |         bias: bool = True,
26 |     ) -> None:
27 |         super().__init__()
28 |         out_features = out_features or in_features
29 |         hidden_features = hidden_features or in_features
30 |         self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
31 |         self.act = act_layer()
32 |         self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
33 |         self.drop = nn.Dropout(drop)
34 | 
35 |     def forward(self, x: Tensor) -> Tensor:
36 |         x = self.fc1(x)
37 |         x = self.act(x)
38 |         x = self.drop(x)
39 |         x = self.fc2(x)
40 |         x = self.drop(x)
41 |         return x
42 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/patch_embed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
10 | 
11 | from typing import Callable, Optional, Tuple, Union
12 | 
13 | from torch import Tensor
14 | import torch.nn as nn
15 | 
16 | 
17 | def make_2tuple(x):
18 |     if isinstance(x, tuple):
19 |         assert len(x) == 2
20 |         return x
21 | 
22 |     assert isinstance(x, int)
23 |     return (x, x)
24 | 
25 | 
26 | class PatchEmbed(nn.Module):
27 |     """
28 |     2D image to patch embedding: (B,C,H,W) -> (B,N,D)
29 | 
30 |     Args:
31 |         img_size: Image size.
32 |         patch_size: Patch token size.
33 |         in_chans: Number of input image channels.
34 |         embed_dim: Number of linear projection output channels.
35 |         norm_layer: Normalization layer.
36 |     """
37 | 
38 |     def __init__(
39 |         self,
40 |         img_size: Union[int, Tuple[int, int]] = 224,
41 |         patch_size: Union[int, Tuple[int, int]] = 16,
42 |         in_chans: int = 3,
43 |         embed_dim: int = 768,
44 |         norm_layer: Optional[Callable] = None,
45 |         flatten_embedding: bool = True,
46 |     ) -> None:
47 |         super().__init__()
48 | 
49 |         image_HW = make_2tuple(img_size)
50 |         patch_HW = make_2tuple(patch_size)
51 |         patch_grid_size = (
52 |             image_HW[0] // patch_HW[0],
53 |             image_HW[1] // patch_HW[1],
54 |         )
55 | 
56 |         self.img_size = image_HW
57 |         self.patch_size = patch_HW
58 |         self.patches_resolution = patch_grid_size
59 |         self.num_patches = patch_grid_size[0] * patch_grid_size[1]
60 | 
61 |         self.in_chans = in_chans
62 |         self.embed_dim = embed_dim
63 | 
64 |         self.flatten_embedding = flatten_embedding
65 | 
66 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
67 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
68 | 
69 |     def forward(self, x: Tensor) -> Tensor:
70 |         _, _, H, W = x.shape
71 |         patch_H, patch_W = self.patch_size
72 | 
73 |         assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
74 |         assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
75 | 
76 |         x = self.proj(x)  # B C H W
77 |         H, W = x.size(2), x.size(3)
78 |         x = x.flatten(2).transpose(1, 2)  # B HW C
79 |         x = self.norm(x)
80 |         if not self.flatten_embedding:
81 |             x = x.reshape(-1, H, W, self.embed_dim)  # B H W C
82 |         return x
83 | 
84 |     def flops(self) -> float:
85 |         Ho, Wo = self.patches_resolution
86 |         flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
87 |         if self.norm is not None:
88 |             flops += Ho * Wo * self.embed_dim
89 |         return flops
90 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/dinov2_layers/swiglu_ffn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, Optional
 8 | 
 9 | from torch import Tensor, nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class SwiGLUFFN(nn.Module):
14 |     def __init__(
15 |         self,
16 |         in_features: int,
17 |         hidden_features: Optional[int] = None,
18 |         out_features: Optional[int] = None,
19 |         act_layer: Callable[..., nn.Module] = None,
20 |         drop: float = 0.0,
21 |         bias: bool = True,
22 |     ) -> None:
23 |         super().__init__()
24 |         out_features = out_features or in_features
25 |         hidden_features = hidden_features or in_features
26 |         self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
27 |         self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
28 | 
29 |     def forward(self, x: Tensor) -> Tensor:
30 |         x12 = self.w12(x)
31 |         x1, x2 = x12.chunk(2, dim=-1)
32 |         hidden = F.silu(x1) * x2
33 |         return self.w3(hidden)
34 | 
35 | 
36 | try:
37 |     from xformers.ops import SwiGLU
38 | 
39 |     XFORMERS_AVAILABLE = True
40 | except ImportError:
41 |     SwiGLU = SwiGLUFFN
42 |     XFORMERS_AVAILABLE = False
43 | 
44 | 
45 | class SwiGLUFFNFused(SwiGLU):
46 |     def __init__(
47 |         self,
48 |         in_features: int,
49 |         hidden_features: Optional[int] = None,
50 |         out_features: Optional[int] = None,
51 |         act_layer: Callable[..., nn.Module] = None,
52 |         drop: float = 0.0,
53 |         bias: bool = True,
54 |     ) -> None:
55 |         out_features = out_features or in_features
56 |         hidden_features = hidden_features or in_features
57 |         hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
58 |         super().__init__(
59 |             in_features=in_features,
60 |             hidden_features=hidden_features,
61 |             out_features=out_features,
62 |             bias=bias,
63 |         )
64 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/depth_anything_v2/util/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | 
  4 | def _make_scratch(in_shape, out_shape, groups=1, expand=False):
  5 |     scratch = nn.Module()
  6 | 
  7 |     out_shape1 = out_shape
  8 |     out_shape2 = out_shape
  9 |     out_shape3 = out_shape
 10 |     if len(in_shape) >= 4:
 11 |         out_shape4 = out_shape
 12 | 
 13 |     if expand:
 14 |         out_shape1 = out_shape
 15 |         out_shape2 = out_shape * 2
 16 |         out_shape3 = out_shape * 4
 17 |         if len(in_shape) >= 4:
 18 |             out_shape4 = out_shape * 8
 19 | 
 20 |     scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 21 |     scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 22 |     scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 23 |     if len(in_shape) >= 4:
 24 |         scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 25 | 
 26 |     return scratch
 27 | 
 28 | 
 29 | class ResidualConvUnit(nn.Module):
 30 |     """Residual convolution module.
 31 |     """
 32 | 
 33 |     def __init__(self, features, activation, bn):
 34 |         """Init.
 35 | 
 36 |         Args:
 37 |             features (int): number of features
 38 |         """
 39 |         super().__init__()
 40 | 
 41 |         self.bn = bn
 42 | 
 43 |         self.groups=1
 44 | 
 45 |         self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
 46 |         
 47 |         self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
 48 | 
 49 |         if self.bn == True:
 50 |             self.bn1 = nn.BatchNorm2d(features)
 51 |             self.bn2 = nn.BatchNorm2d(features)
 52 | 
 53 |         self.activation = activation
 54 | 
 55 |         self.skip_add = nn.quantized.FloatFunctional()
 56 | 
 57 |     def forward(self, x):
 58 |         """Forward pass.
 59 | 
 60 |         Args:
 61 |             x (tensor): input
 62 | 
 63 |         Returns:
 64 |             tensor: output
 65 |         """
 66 |         
 67 |         out = self.activation(x)
 68 |         out = self.conv1(out)
 69 |         if self.bn == True:
 70 |             out = self.bn1(out)
 71 |        
 72 |         out = self.activation(out)
 73 |         out = self.conv2(out)
 74 |         if self.bn == True:
 75 |             out = self.bn2(out)
 76 | 
 77 |         if self.groups > 1:
 78 |             out = self.conv_merge(out)
 79 | 
 80 |         return self.skip_add.add(out, x)
 81 | 
 82 | 
 83 | class FeatureFusionBlock(nn.Module):
 84 |     """Feature fusion block.
 85 |     """
 86 | 
 87 |     def __init__(
 88 |         self, 
 89 |         features, 
 90 |         activation, 
 91 |         deconv=False, 
 92 |         bn=False, 
 93 |         expand=False, 
 94 |         align_corners=True,
 95 |         size=None
 96 |     ):
 97 |         """Init.
 98 |         
 99 |         Args:
100 |             features (int): number of features
101 |         """
102 |         super(FeatureFusionBlock, self).__init__()
103 | 
104 |         self.deconv = deconv
105 |         self.align_corners = align_corners
106 | 
107 |         self.groups=1
108 | 
109 |         self.expand = expand
110 |         out_features = features
111 |         if self.expand == True:
112 |             out_features = features // 2
113 |         
114 |         self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
115 | 
116 |         self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
117 |         self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
118 |         
119 |         self.skip_add = nn.quantized.FloatFunctional()
120 | 
121 |         self.size=size
122 | 
123 |     def forward(self, *xs, size=None):
124 |         """Forward pass.
125 | 
126 |         Returns:
127 |             tensor: output
128 |         """
129 |         output = xs[0]
130 | 
131 |         if len(xs) == 2:
132 |             res = self.resConfUnit1(xs[1])
133 |             output = self.skip_add.add(output, res)
134 | 
135 |         output = self.resConfUnit2(output)
136 | 
137 |         if (size is None) and (self.size is None):
138 |             modifier = {"scale_factor": 2}
139 |         elif size is None:
140 |             modifier = {"size": self.size}
141 |         else:
142 |             modifier = {"size": size}
143 | 
144 |         output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
145 |         
146 |         output = self.out_conv(output)
147 | 
148 |         return output
149 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/README.md:
--------------------------------------------------------------------------------
  1 | # Depth Anything V2 for Metric Depth Estimation
  2 | 
  3 | ![teaser](./assets/compare_zoedepth.png)
  4 | 
  5 | We here provide a simple codebase to fine-tune our Depth Anything V2 pre-trained encoder for metric depth estimation. Built on our powerful encoder, we use a simple DPT head to regress the depth. We fine-tune our pre-trained encoder on synthetic Hypersim / Virtual KITTI datasets for indoor / outdoor metric depth estimation, respectively.
  6 | 
  7 | 
  8 | # Pre-trained Models
  9 | 
 10 | We provide **six metric depth models** of three scales for indoor and outdoor scenes, respectively.
 11 | 
 12 | | Base Model | Params | Indoor (Hypersim) | Outdoor (Virtual KITTI 2) |
 13 | |:-|-:|:-:|:-:|
 14 | | Depth-Anything-V2-Small | 24.8M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Small/resolve/main/depth_anything_v2_metric_hypersim_vits.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Small/resolve/main/depth_anything_v2_metric_vkitti_vits.pth?download=true) |
 15 | | Depth-Anything-V2-Base | 97.5M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Base/resolve/main/depth_anything_v2_metric_hypersim_vitb.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Base/resolve/main/depth_anything_v2_metric_vkitti_vitb.pth?download=true) |
 16 | | Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Large/resolve/main/depth_anything_v2_metric_vkitti_vitl.pth?download=true) |
 17 | 
 18 | *We recommend to first try our larger models (if computational cost is affordable) and the indoor version.*
 19 | 
 20 | ## Usage
 21 | 
 22 | ### Prepraration
 23 | 
 24 | ```bash
 25 | git clone https://github.com/DepthAnything/Depth-Anything-V2
 26 | cd Depth-Anything-V2/metric_depth
 27 | pip install -r requirements.txt
 28 | ```
 29 | 
 30 | Download the checkpoints listed [here](#pre-trained-models) and put them under the `checkpoints` directory.
 31 | 
 32 | ### Use our models
 33 | ```python
 34 | import cv2
 35 | import torch
 36 | 
 37 | from depth_anything_v2.dpt import DepthAnythingV2
 38 | 
 39 | model_configs = {
 40 |     'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
 41 |     'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
 42 |     'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}
 43 | }
 44 | 
 45 | encoder = 'vitl' # or 'vits', 'vitb'
 46 | dataset = 'hypersim' # 'hypersim' for indoor model, 'vkitti' for outdoor model
 47 | max_depth = 20 # 20 for indoor model, 80 for outdoor model
 48 | 
 49 | model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth})
 50 | model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_metric_{dataset}_{encoder}.pth', map_location='cpu'))
 51 | model.eval()
 52 | 
 53 | raw_img = cv2.imread('your/image/path')
 54 | depth = model.infer_image(raw_img) # HxW depth map in meters in numpy
 55 | ```
 56 | 
 57 | ### Running script on images
 58 | 
 59 | Here, we take the `vitl` encoder as an example. You can also use `vitb` or `vits` encoders.
 60 | 
 61 | ```bash
 62 | # indoor scenes
 63 | python run.py \
 64 |   --encoder vitl \
 65 |   --load-from checkpoints/depth_anything_v2_metric_hypersim_vitl.pth \
 66 |   --max-depth 20 \
 67 |   --img-path <path> --outdir <outdir> [--input-size <size>] [--save-numpy]
 68 | 
 69 | # outdoor scenes
 70 | python run.py \
 71 |   --encoder vitl \
 72 |   --load-from checkpoints/depth_anything_v2_metric_vkitti_vitl.pth \
 73 |   --max-depth 80 \
 74 |   --img-path <path> --outdir <outdir> [--input-size <size>] [--save-numpy]
 75 | ```
 76 | 
 77 | ### Project 2D images to point clouds:
 78 | 
 79 | ```bash
 80 | python depth_to_pointcloud.py \
 81 |   --encoder vitl \
 82 |   --load-from checkpoints/depth_anything_v2_metric_hypersim_vitl.pth \
 83 |   --max-depth 20 \
 84 |   --img-path <path> --outdir <outdir>
 85 | ```
 86 | 
 87 | ### Reproduce training
 88 | 
 89 | Please first prepare the [Hypersim](https://github.com/apple/ml-hypersim) and [Virtual KITTI 2](https://europe.naverlabs.com/research/computer-vision/proxy-virtual-worlds-vkitti-2/) datasets. Then:
 90 | 
 91 | ```bash
 92 | bash dist_train.sh
 93 | ```
 94 | 
 95 | 
 96 | ## Citation
 97 | 
 98 | If you find this project useful, please consider citing:
 99 | 
100 | ```bibtex
101 | @article{depth_anything_v2,
102 |   title={Depth Anything V2},
103 |   author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
104 |   journal={arXiv:2406.09414},
105 |   year={2024}
106 | }
107 | 
108 | @inproceedings{depth_anything_v1,
109 |   title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, 
110 |   author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang},
111 |   booktitle={CVPR},
112 |   year={2024}
113 | }
114 | ```
115 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/assets/compare_zoedepth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/depth_anything_v2/metric_depth/assets/compare_zoedepth.png


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/dataset/hypersim.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import h5py
 3 | import numpy as np
 4 | import torch
 5 | from torch.utils.data import Dataset
 6 | from torchvision.transforms import Compose
 7 | 
 8 | from dataset.transform import Resize, NormalizeImage, PrepareForNet, Crop
 9 | 
10 | 
11 | def hypersim_distance_to_depth(npyDistance):
12 |     intWidth, intHeight, fltFocal = 1024, 768, 886.81
13 | 
14 |     npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
15 |         1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
16 |     npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
17 |                                  intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
18 |     npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
19 |     npyImageplane = np.concatenate(
20 |         [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
21 | 
22 |     npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
23 |     return npyDepth
24 | 
25 | 
26 | class Hypersim(Dataset):
27 |     def __init__(self, filelist_path, mode, size=(518, 518)):
28 |         
29 |         self.mode = mode
30 |         self.size = size
31 |         
32 |         with open(filelist_path, 'r') as f:
33 |             self.filelist = f.read().splitlines()
34 |         
35 |         net_w, net_h = size
36 |         self.transform = Compose([
37 |             Resize(
38 |                 width=net_w,
39 |                 height=net_h,
40 |                 resize_target=True if mode == 'train' else False,
41 |                 keep_aspect_ratio=True,
42 |                 ensure_multiple_of=14,
43 |                 resize_method='lower_bound',
44 |                 image_interpolation_method=cv2.INTER_CUBIC,
45 |             ),
46 |             NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
47 |             PrepareForNet(),
48 |         ] + ([Crop(size[0])] if self.mode == 'train' else []))
49 |         
50 |     def __getitem__(self, item):
51 |         img_path = self.filelist[item].split(' ')[0]
52 |         depth_path = self.filelist[item].split(' ')[1]
53 |         
54 |         image = cv2.imread(img_path)
55 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
56 |         
57 |         depth_fd = h5py.File(depth_path, "r")
58 |         distance_meters = np.array(depth_fd['dataset'])
59 |         depth = hypersim_distance_to_depth(distance_meters)
60 |         
61 |         sample = self.transform({'image': image, 'depth': depth})
62 | 
63 |         sample['image'] = torch.from_numpy(sample['image'])
64 |         sample['depth'] = torch.from_numpy(sample['depth'])
65 |         
66 |         sample['valid_mask'] = (torch.isnan(sample['depth']) == 0)
67 |         sample['depth'][sample['valid_mask'] == 0] = 0
68 |         
69 |         sample['image_path'] = self.filelist[item].split(' ')[0]
70 |         
71 |         return sample
72 | 
73 |     def __len__(self):
74 |         return len(self.filelist)


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/dataset/kitti.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | from torchvision.transforms import Compose
 5 | 
 6 | from dataset.transform import Resize, NormalizeImage, PrepareForNet
 7 | 
 8 | 
 9 | class KITTI(Dataset):
10 |     def __init__(self, filelist_path, mode, size=(518, 518)):
11 |         if mode != 'val':
12 |             raise NotImplementedError
13 |         
14 |         self.mode = mode
15 |         self.size = size
16 |         
17 |         with open(filelist_path, 'r') as f:
18 |             self.filelist = f.read().splitlines()
19 |         
20 |         net_w, net_h = size
21 |         self.transform = Compose([
22 |             Resize(
23 |                 width=net_w,
24 |                 height=net_h,
25 |                 resize_target=True if mode == 'train' else False,
26 |                 keep_aspect_ratio=True,
27 |                 ensure_multiple_of=14,
28 |                 resize_method='lower_bound',
29 |                 image_interpolation_method=cv2.INTER_CUBIC,
30 |             ),
31 |             NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
32 |             PrepareForNet(),
33 |         ])
34 |     
35 |     def __getitem__(self, item):
36 |         img_path = self.filelist[item].split(' ')[0]
37 |         depth_path = self.filelist[item].split(' ')[1]
38 |         
39 |         image = cv2.imread(img_path)
40 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
41 |         
42 |         depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype('float32')
43 |         
44 |         sample = self.transform({'image': image, 'depth': depth})
45 |         
46 |         sample['image'] = torch.from_numpy(sample['image'])
47 |         sample['depth'] = torch.from_numpy(sample['depth'])
48 |         sample['depth'] = sample['depth'] / 256.0  # convert in meters
49 |         
50 |         sample['valid_mask'] = sample['depth'] > 0
51 |         
52 |         sample['image_path'] = self.filelist[item].split(' ')[0]
53 |         
54 |         return sample
55 | 
56 |     def __len__(self):
57 |         return len(self.filelist)


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/dataset/vkitti2.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | from torchvision.transforms import Compose
 5 | 
 6 | from dataset.transform import Resize, NormalizeImage, PrepareForNet, Crop
 7 | 
 8 | 
 9 | class VKITTI2(Dataset):
10 |     def __init__(self, filelist_path, mode, size=(518, 518)):
11 |         
12 |         self.mode = mode
13 |         self.size = size
14 |         
15 |         with open(filelist_path, 'r') as f:
16 |             self.filelist = f.read().splitlines()
17 |         
18 |         net_w, net_h = size
19 |         self.transform = Compose([
20 |             Resize(
21 |                 width=net_w,
22 |                 height=net_h,
23 |                 resize_target=True if mode == 'train' else False,
24 |                 keep_aspect_ratio=True,
25 |                 ensure_multiple_of=14,
26 |                 resize_method='lower_bound',
27 |                 image_interpolation_method=cv2.INTER_CUBIC,
28 |             ),
29 |             NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
30 |             PrepareForNet(),
31 |         ] + ([Crop(size[0])] if self.mode == 'train' else []))
32 |     
33 |     def __getitem__(self, item):
34 |         img_path = self.filelist[item].split(' ')[0]
35 |         depth_path = self.filelist[item].split(' ')[1]
36 |         
37 |         image = cv2.imread(img_path)
38 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
39 |         
40 |         depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) / 100.0  # cm to m
41 |         
42 |         sample = self.transform({'image': image, 'depth': depth})
43 | 
44 |         sample['image'] = torch.from_numpy(sample['image'])
45 |         sample['depth'] = torch.from_numpy(sample['depth'])
46 |         
47 |         sample['valid_mask'] = (sample['depth'] <= 80)
48 |         
49 |         sample['image_path'] = self.filelist[item].split(' ')[0]
50 |         
51 |         return sample
52 | 
53 |     def __len__(self):
54 |         return len(self.filelist)


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .mlp import Mlp
 8 | from .patch_embed import PatchEmbed
 9 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
10 | from .block import NestedTensorBlock
11 | from .attention import MemEffAttention
12 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
10 | 
11 | import logging
12 | 
13 | from torch import Tensor
14 | from torch import nn
15 | 
16 | 
17 | logger = logging.getLogger("dinov2")
18 | 
19 | 
20 | try:
21 |     from xformers.ops import memory_efficient_attention, unbind, fmha
22 | 
23 |     XFORMERS_AVAILABLE = True
24 | except ImportError:
25 |     logger.warning("xFormers not available")
26 |     XFORMERS_AVAILABLE = False
27 | 
28 | 
29 | class Attention(nn.Module):
30 |     def __init__(
31 |         self,
32 |         dim: int,
33 |         num_heads: int = 8,
34 |         qkv_bias: bool = False,
35 |         proj_bias: bool = True,
36 |         attn_drop: float = 0.0,
37 |         proj_drop: float = 0.0,
38 |     ) -> None:
39 |         super().__init__()
40 |         self.num_heads = num_heads
41 |         head_dim = dim // num_heads
42 |         self.scale = head_dim**-0.5
43 | 
44 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
45 |         self.attn_drop = nn.Dropout(attn_drop)
46 |         self.proj = nn.Linear(dim, dim, bias=proj_bias)
47 |         self.proj_drop = nn.Dropout(proj_drop)
48 | 
49 |     def forward(self, x: Tensor) -> Tensor:
50 |         B, N, C = x.shape
51 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
52 | 
53 |         q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
54 |         attn = q @ k.transpose(-2, -1)
55 | 
56 |         attn = attn.softmax(dim=-1)
57 |         attn = self.attn_drop(attn)
58 | 
59 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
60 |         x = self.proj(x)
61 |         x = self.proj_drop(x)
62 |         return x
63 | 
64 | 
65 | class MemEffAttention(Attention):
66 |     def forward(self, x: Tensor, attn_bias=None) -> Tensor:
67 |         if not XFORMERS_AVAILABLE:
68 |             assert attn_bias is None, "xFormers is required for nested tensors usage"
69 |             return super().forward(x)
70 | 
71 |         B, N, C = x.shape
72 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
73 | 
74 |         q, k, v = unbind(qkv, 2)
75 | 
76 |         x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
77 |         x = x.reshape([B, N, C])
78 | 
79 |         x = self.proj(x)
80 |         x = self.proj_drop(x)
81 |         return x
82 | 
83 |         


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/drop_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
10 | 
11 | 
12 | from torch import nn
13 | 
14 | 
15 | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
16 |     if drop_prob == 0.0 or not training:
17 |         return x
18 |     keep_prob = 1 - drop_prob
19 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
20 |     random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
21 |     if keep_prob > 0.0:
22 |         random_tensor.div_(keep_prob)
23 |     output = x * random_tensor
24 |     return output
25 | 
26 | 
27 | class DropPath(nn.Module):
28 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
29 | 
30 |     def __init__(self, drop_prob=None):
31 |         super(DropPath, self).__init__()
32 |         self.drop_prob = drop_prob
33 | 
34 |     def forward(self, x):
35 |         return drop_path(x, self.drop_prob, self.training)
36 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/layer_scale.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
 8 | 
 9 | from typing import Union
10 | 
11 | import torch
12 | from torch import Tensor
13 | from torch import nn
14 | 
15 | 
16 | class LayerScale(nn.Module):
17 |     def __init__(
18 |         self,
19 |         dim: int,
20 |         init_values: Union[float, Tensor] = 1e-5,
21 |         inplace: bool = False,
22 |     ) -> None:
23 |         super().__init__()
24 |         self.inplace = inplace
25 |         self.gamma = nn.Parameter(init_values * torch.ones(dim))
26 | 
27 |     def forward(self, x: Tensor) -> Tensor:
28 |         return x.mul_(self.gamma) if self.inplace else x * self.gamma
29 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/mlp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
10 | 
11 | 
12 | from typing import Callable, Optional
13 | 
14 | from torch import Tensor, nn
15 | 
16 | 
17 | class Mlp(nn.Module):
18 |     def __init__(
19 |         self,
20 |         in_features: int,
21 |         hidden_features: Optional[int] = None,
22 |         out_features: Optional[int] = None,
23 |         act_layer: Callable[..., nn.Module] = nn.GELU,
24 |         drop: float = 0.0,
25 |         bias: bool = True,
26 |     ) -> None:
27 |         super().__init__()
28 |         out_features = out_features or in_features
29 |         hidden_features = hidden_features or in_features
30 |         self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
31 |         self.act = act_layer()
32 |         self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
33 |         self.drop = nn.Dropout(drop)
34 | 
35 |     def forward(self, x: Tensor) -> Tensor:
36 |         x = self.fc1(x)
37 |         x = self.act(x)
38 |         x = self.drop(x)
39 |         x = self.fc2(x)
40 |         x = self.drop(x)
41 |         return x
42 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/patch_embed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
10 | 
11 | from typing import Callable, Optional, Tuple, Union
12 | 
13 | from torch import Tensor
14 | import torch.nn as nn
15 | 
16 | 
17 | def make_2tuple(x):
18 |     if isinstance(x, tuple):
19 |         assert len(x) == 2
20 |         return x
21 | 
22 |     assert isinstance(x, int)
23 |     return (x, x)
24 | 
25 | 
26 | class PatchEmbed(nn.Module):
27 |     """
28 |     2D image to patch embedding: (B,C,H,W) -> (B,N,D)
29 | 
30 |     Args:
31 |         img_size: Image size.
32 |         patch_size: Patch token size.
33 |         in_chans: Number of input image channels.
34 |         embed_dim: Number of linear projection output channels.
35 |         norm_layer: Normalization layer.
36 |     """
37 | 
38 |     def __init__(
39 |         self,
40 |         img_size: Union[int, Tuple[int, int]] = 224,
41 |         patch_size: Union[int, Tuple[int, int]] = 16,
42 |         in_chans: int = 3,
43 |         embed_dim: int = 768,
44 |         norm_layer: Optional[Callable] = None,
45 |         flatten_embedding: bool = True,
46 |     ) -> None:
47 |         super().__init__()
48 | 
49 |         image_HW = make_2tuple(img_size)
50 |         patch_HW = make_2tuple(patch_size)
51 |         patch_grid_size = (
52 |             image_HW[0] // patch_HW[0],
53 |             image_HW[1] // patch_HW[1],
54 |         )
55 | 
56 |         self.img_size = image_HW
57 |         self.patch_size = patch_HW
58 |         self.patches_resolution = patch_grid_size
59 |         self.num_patches = patch_grid_size[0] * patch_grid_size[1]
60 | 
61 |         self.in_chans = in_chans
62 |         self.embed_dim = embed_dim
63 | 
64 |         self.flatten_embedding = flatten_embedding
65 | 
66 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
67 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
68 | 
69 |     def forward(self, x: Tensor) -> Tensor:
70 |         _, _, H, W = x.shape
71 |         patch_H, patch_W = self.patch_size
72 | 
73 |         assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
74 |         assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
75 | 
76 |         x = self.proj(x)  # B C H W
77 |         H, W = x.size(2), x.size(3)
78 |         x = x.flatten(2).transpose(1, 2)  # B HW C
79 |         x = self.norm(x)
80 |         if not self.flatten_embedding:
81 |             x = x.reshape(-1, H, W, self.embed_dim)  # B H W C
82 |         return x
83 | 
84 |     def flops(self) -> float:
85 |         Ho, Wo = self.patches_resolution
86 |         flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
87 |         if self.norm is not None:
88 |             flops += Ho * Wo * self.embed_dim
89 |         return flops
90 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/swiglu_ffn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, Optional
 8 | 
 9 | from torch import Tensor, nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class SwiGLUFFN(nn.Module):
14 |     def __init__(
15 |         self,
16 |         in_features: int,
17 |         hidden_features: Optional[int] = None,
18 |         out_features: Optional[int] = None,
19 |         act_layer: Callable[..., nn.Module] = None,
20 |         drop: float = 0.0,
21 |         bias: bool = True,
22 |     ) -> None:
23 |         super().__init__()
24 |         out_features = out_features or in_features
25 |         hidden_features = hidden_features or in_features
26 |         self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
27 |         self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
28 | 
29 |     def forward(self, x: Tensor) -> Tensor:
30 |         x12 = self.w12(x)
31 |         x1, x2 = x12.chunk(2, dim=-1)
32 |         hidden = F.silu(x1) * x2
33 |         return self.w3(hidden)
34 | 
35 | 
36 | try:
37 |     from xformers.ops import SwiGLU
38 | 
39 |     XFORMERS_AVAILABLE = True
40 | except ImportError:
41 |     SwiGLU = SwiGLUFFN
42 |     XFORMERS_AVAILABLE = False
43 | 
44 | 
45 | class SwiGLUFFNFused(SwiGLU):
46 |     def __init__(
47 |         self,
48 |         in_features: int,
49 |         hidden_features: Optional[int] = None,
50 |         out_features: Optional[int] = None,
51 |         act_layer: Callable[..., nn.Module] = None,
52 |         drop: float = 0.0,
53 |         bias: bool = True,
54 |     ) -> None:
55 |         out_features = out_features or in_features
56 |         hidden_features = hidden_features or in_features
57 |         hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
58 |         super().__init__(
59 |             in_features=in_features,
60 |             hidden_features=hidden_features,
61 |             out_features=out_features,
62 |             bias=bias,
63 |         )
64 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_anything_v2/util/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | 
  4 | def _make_scratch(in_shape, out_shape, groups=1, expand=False):
  5 |     scratch = nn.Module()
  6 | 
  7 |     out_shape1 = out_shape
  8 |     out_shape2 = out_shape
  9 |     out_shape3 = out_shape
 10 |     if len(in_shape) >= 4:
 11 |         out_shape4 = out_shape
 12 | 
 13 |     if expand:
 14 |         out_shape1 = out_shape
 15 |         out_shape2 = out_shape * 2
 16 |         out_shape3 = out_shape * 4
 17 |         if len(in_shape) >= 4:
 18 |             out_shape4 = out_shape * 8
 19 | 
 20 |     scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 21 |     scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 22 |     scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 23 |     if len(in_shape) >= 4:
 24 |         scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
 25 | 
 26 |     return scratch
 27 | 
 28 | 
 29 | class ResidualConvUnit(nn.Module):
 30 |     """Residual convolution module.
 31 |     """
 32 | 
 33 |     def __init__(self, features, activation, bn):
 34 |         """Init.
 35 | 
 36 |         Args:
 37 |             features (int): number of features
 38 |         """
 39 |         super().__init__()
 40 | 
 41 |         self.bn = bn
 42 | 
 43 |         self.groups=1
 44 | 
 45 |         self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
 46 |         
 47 |         self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
 48 | 
 49 |         if self.bn == True:
 50 |             self.bn1 = nn.BatchNorm2d(features)
 51 |             self.bn2 = nn.BatchNorm2d(features)
 52 | 
 53 |         self.activation = activation
 54 | 
 55 |         self.skip_add = nn.quantized.FloatFunctional()
 56 | 
 57 |     def forward(self, x):
 58 |         """Forward pass.
 59 | 
 60 |         Args:
 61 |             x (tensor): input
 62 | 
 63 |         Returns:
 64 |             tensor: output
 65 |         """
 66 |         
 67 |         out = self.activation(x)
 68 |         out = self.conv1(out)
 69 |         if self.bn == True:
 70 |             out = self.bn1(out)
 71 |        
 72 |         out = self.activation(out)
 73 |         out = self.conv2(out)
 74 |         if self.bn == True:
 75 |             out = self.bn2(out)
 76 | 
 77 |         if self.groups > 1:
 78 |             out = self.conv_merge(out)
 79 | 
 80 |         return self.skip_add.add(out, x)
 81 | 
 82 | 
 83 | class FeatureFusionBlock(nn.Module):
 84 |     """Feature fusion block.
 85 |     """
 86 | 
 87 |     def __init__(
 88 |         self, 
 89 |         features, 
 90 |         activation, 
 91 |         deconv=False, 
 92 |         bn=False, 
 93 |         expand=False, 
 94 |         align_corners=True,
 95 |         size=None
 96 |     ):
 97 |         """Init.
 98 |         
 99 |         Args:
100 |             features (int): number of features
101 |         """
102 |         super(FeatureFusionBlock, self).__init__()
103 | 
104 |         self.deconv = deconv
105 |         self.align_corners = align_corners
106 | 
107 |         self.groups=1
108 | 
109 |         self.expand = expand
110 |         out_features = features
111 |         if self.expand == True:
112 |             out_features = features // 2
113 |         
114 |         self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
115 | 
116 |         self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
117 |         self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
118 |         
119 |         self.skip_add = nn.quantized.FloatFunctional()
120 | 
121 |         self.size=size
122 | 
123 |     def forward(self, *xs, size=None):
124 |         """Forward pass.
125 | 
126 |         Returns:
127 |             tensor: output
128 |         """
129 |         output = xs[0]
130 | 
131 |         if len(xs) == 2:
132 |             res = self.resConfUnit1(xs[1])
133 |             output = self.skip_add.add(output, res)
134 | 
135 |         output = self.resConfUnit2(output)
136 | 
137 |         if (size is None) and (self.size is None):
138 |             modifier = {"scale_factor": 2}
139 |         elif size is None:
140 |             modifier = {"size": self.size}
141 |         else:
142 |             modifier = {"size": size}
143 | 
144 |         output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
145 |         
146 |         output = self.out_conv(output)
147 | 
148 |         return output
149 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/depth_to_pointcloud.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Born out of Depth Anything V1 Issue 36
  3 | Make sure you have the necessary libraries installed.
  4 | Code by @1ssb
  5 | 
  6 | This script processes a set of images to generate depth maps and corresponding point clouds.
  7 | The resulting point clouds are saved in the specified output directory.
  8 | 
  9 | Usage:
 10 |     python script.py --encoder vitl --load-from path_to_model --max-depth 20 --img-path path_to_images --outdir output_directory --focal-length-x 470.4 --focal-length-y 470.4
 11 | 
 12 | Arguments:
 13 |     --encoder: Model encoder to use. Choices are ['vits', 'vitb', 'vitl', 'vitg'].
 14 |     --load-from: Path to the pre-trained model weights.
 15 |     --max-depth: Maximum depth value for the depth map.
 16 |     --img-path: Path to the input image or directory containing images.
 17 |     --outdir: Directory to save the output point clouds.
 18 |     --focal-length-x: Focal length along the x-axis.
 19 |     --focal-length-y: Focal length along the y-axis.
 20 | """
 21 | 
 22 | import argparse
 23 | import cv2
 24 | import glob
 25 | import numpy as np
 26 | import open3d as o3d
 27 | import os
 28 | from PIL import Image
 29 | import torch
 30 | 
 31 | from depth_anything_v2.dpt import DepthAnythingV2
 32 | 
 33 | 
 34 | def main():
 35 |     # Parse command-line arguments
 36 |     parser = argparse.ArgumentParser(description='Generate depth maps and point clouds from images.')
 37 |     parser.add_argument('--encoder', default='vitl', type=str, choices=['vits', 'vitb', 'vitl', 'vitg'],
 38 |                         help='Model encoder to use.')
 39 |     parser.add_argument('--load-from', default='', type=str, required=True,
 40 |                         help='Path to the pre-trained model weights.')
 41 |     parser.add_argument('--max-depth', default=20, type=float,
 42 |                         help='Maximum depth value for the depth map.')
 43 |     parser.add_argument('--img-path', type=str, required=True,
 44 |                         help='Path to the input image or directory containing images.')
 45 |     parser.add_argument('--outdir', type=str, default='./vis_pointcloud',
 46 |                         help='Directory to save the output point clouds.')
 47 |     parser.add_argument('--focal-length-x', default=470.4, type=float,
 48 |                         help='Focal length along the x-axis.')
 49 |     parser.add_argument('--focal-length-y', default=470.4, type=float,
 50 |                         help='Focal length along the y-axis.')
 51 | 
 52 |     args = parser.parse_args()
 53 | 
 54 |     # Determine the device to use (CUDA, MPS, or CPU)
 55 |     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
 56 | 
 57 |     # Model configuration based on the chosen encoder
 58 |     model_configs = {
 59 |         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
 60 |         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
 61 |         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
 62 |         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
 63 |     }
 64 | 
 65 |     # Initialize the DepthAnythingV2 model with the specified configuration
 66 |     depth_anything = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth})
 67 |     depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu'))
 68 |     depth_anything = depth_anything.to(DEVICE).eval()
 69 | 
 70 |     # Get the list of image files to process
 71 |     if os.path.isfile(args.img_path):
 72 |         if args.img_path.endswith('txt'):
 73 |             with open(args.img_path, 'r') as f:
 74 |                 filenames = f.read().splitlines()
 75 |         else:
 76 |             filenames = [args.img_path]
 77 |     else:
 78 |         filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
 79 | 
 80 |     # Create the output directory if it doesn't exist
 81 |     os.makedirs(args.outdir, exist_ok=True)
 82 | 
 83 |     # Process each image file
 84 |     for k, filename in enumerate(filenames):
 85 |         print(f'Processing {k+1}/{len(filenames)}: {filename}')
 86 | 
 87 |         # Load the image
 88 |         color_image = Image.open(filename).convert('RGB')
 89 |         width, height = color_image.size
 90 | 
 91 |         # Read the image using OpenCV
 92 |         image = cv2.imread(filename)
 93 |         pred = depth_anything.infer_image(image, height)
 94 | 
 95 |         # Resize depth prediction to match the original image size
 96 |         resized_pred = Image.fromarray(pred).resize((width, height), Image.NEAREST)
 97 | 
 98 |         # Generate mesh grid and calculate point cloud coordinates
 99 |         x, y = np.meshgrid(np.arange(width), np.arange(height))
100 |         x = (x - width / 2) / args.focal_length_x
101 |         y = (y - height / 2) / args.focal_length_y
102 |         z = np.array(resized_pred)
103 |         points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
104 |         colors = np.array(color_image).reshape(-1, 3) / 255.0
105 | 
106 |         # Create the point cloud and save it to the output directory
107 |         pcd = o3d.geometry.PointCloud()
108 |         pcd.points = o3d.utility.Vector3dVector(points)
109 |         pcd.colors = o3d.utility.Vector3dVector(colors)
110 |         o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | now=$(date +"%Y%m%d_%H%M%S")
 3 | 
 4 | epoch=120
 5 | bs=4
 6 | gpus=8
 7 | lr=0.000005
 8 | encoder=vitl
 9 | dataset=hypersim # vkitti
10 | img_size=518
11 | min_depth=0.001
12 | max_depth=20 # 80 for virtual kitti
13 | pretrained_from=../checkpoints/depth_anything_v2_${encoder}.pth
14 | save_path=exp/hypersim # exp/vkitti
15 | 
16 | mkdir -p $save_path
17 | 
18 | python3 -m torch.distributed.launch \
19 |     --nproc_per_node=$gpus \
20 |     --nnodes 1 \
21 |     --node_rank=0 \
22 |     --master_addr=localhost \
23 |     --master_port=20596 \
24 |     train.py --epoch $epoch --encoder $encoder --bs $bs --lr $lr --save-path $save_path --dataset $dataset \
25 |     --img-size $img_size --min-depth $min_depth --max-depth $max_depth --pretrained-from $pretrained_from \
26 |     --port 20596 2>&1 | tee -a $save_path/$now.log
27 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | opencv-python
3 | open3d
4 | torch
5 | torchvision
6 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/run.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import glob
 4 | import matplotlib
 5 | import numpy as np
 6 | import os
 7 | import torch
 8 | 
 9 | from depth_anything_v2.dpt import DepthAnythingV2
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser(description='Depth Anything V2 Metric Depth Estimation')
14 |     
15 |     parser.add_argument('--img-path', type=str)
16 |     parser.add_argument('--input-size', type=int, default=518)
17 |     parser.add_argument('--outdir', type=str, default='./vis_depth')
18 |     
19 |     parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
20 |     parser.add_argument('--load-from', type=str, default='checkpoints/depth_anything_v2_metric_hypersim_vitl.pth')
21 |     parser.add_argument('--max-depth', type=float, default=20)
22 |     
23 |     parser.add_argument('--save-numpy', dest='save_numpy', action='store_true', help='save the model raw output')
24 |     parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
25 |     parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
26 |     
27 |     args = parser.parse_args()
28 |     
29 |     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
30 |     
31 |     model_configs = {
32 |         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
33 |         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
34 |         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
35 |         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
36 |     }
37 |     
38 |     depth_anything = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth})
39 |     depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu'))
40 |     depth_anything = depth_anything.to(DEVICE).eval()
41 |     
42 |     if os.path.isfile(args.img_path):
43 |         if args.img_path.endswith('txt'):
44 |             with open(args.img_path, 'r') as f:
45 |                 filenames = f.read().splitlines()
46 |         else:
47 |             filenames = [args.img_path]
48 |     else:
49 |         filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
50 |     
51 |     os.makedirs(args.outdir, exist_ok=True)
52 |     
53 |     cmap = matplotlib.colormaps.get_cmap('Spectral')
54 |     
55 |     for k, filename in enumerate(filenames):
56 |         print(f'Progress {k+1}/{len(filenames)}: {filename}')
57 |         
58 |         raw_image = cv2.imread(filename)
59 |         
60 |         depth = depth_anything.infer_image(raw_image, args.input_size)
61 |         
62 |         if args.save_numpy:
63 |             output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '_raw_depth_meter.npy')
64 |             np.save(output_path, depth)
65 |         
66 |         depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
67 |         depth = depth.astype(np.uint8)
68 |         
69 |         if args.grayscale:
70 |             depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
71 |         else:
72 |             depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
73 |         
74 |         output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png')
75 |         if args.pred_only:
76 |             cv2.imwrite(output_path, depth)
77 |         else:
78 |             split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255
79 |             combined_result = cv2.hconcat([raw_image, split_region, depth])
80 |             
81 |             cv2.imwrite(output_path, combined_result)


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/util/dist_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | import torch
 5 | import torch.distributed as dist
 6 | 
 7 | 
 8 | def setup_distributed(backend="nccl", port=None):
 9 |     """AdaHessian Optimizer
10 |     Lifted from https://github.com/BIGBALLON/distribuuuu/blob/master/distribuuuu/utils.py
11 |     Originally licensed MIT, Copyright (c) 2020 Wei Li
12 |     """
13 |     num_gpus = torch.cuda.device_count()
14 | 
15 |     if "SLURM_JOB_ID" in os.environ:
16 |         rank = int(os.environ["SLURM_PROCID"])
17 |         world_size = int(os.environ["SLURM_NTASKS"])
18 |         node_list = os.environ["SLURM_NODELIST"]
19 |         addr = subprocess.getoutput(f"scontrol show hostname {node_list} | head -n1")
20 |         # specify master port
21 |         if port is not None:
22 |             os.environ["MASTER_PORT"] = str(port)
23 |         elif "MASTER_PORT" not in os.environ:
24 |             os.environ["MASTER_PORT"] = "10685"
25 |         if "MASTER_ADDR" not in os.environ:
26 |             os.environ["MASTER_ADDR"] = addr
27 |         os.environ["WORLD_SIZE"] = str(world_size)
28 |         os.environ["LOCAL_RANK"] = str(rank % num_gpus)
29 |         os.environ["RANK"] = str(rank)
30 |     else:
31 |         rank = int(os.environ["RANK"])
32 |         world_size = int(os.environ["WORLD_SIZE"])
33 | 
34 |     torch.cuda.set_device(rank % num_gpus)
35 | 
36 |     dist.init_process_group(
37 |         backend=backend,
38 |         world_size=world_size,
39 |         rank=rank,
40 |     )
41 |     return rank, world_size
42 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/util/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class SiLogLoss(nn.Module):
 6 |     def __init__(self, lambd=0.5):
 7 |         super().__init__()
 8 |         self.lambd = lambd
 9 | 
10 |     def forward(self, pred, target, valid_mask):
11 |         valid_mask = valid_mask.detach()
12 |         diff_log = torch.log(target[valid_mask]) - torch.log(pred[valid_mask])
13 |         loss = torch.sqrt(torch.pow(diff_log, 2).mean() -
14 |                           self.lambd * torch.pow(diff_log.mean(), 2))
15 | 
16 |         return loss
17 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/util/metric.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def eval_depth(pred, target):
 5 |     assert pred.shape == target.shape
 6 | 
 7 |     thresh = torch.max((target / pred), (pred / target))
 8 | 
 9 |     d1 = torch.sum(thresh < 1.25).float() / len(thresh)
10 |     d2 = torch.sum(thresh < 1.25 ** 2).float() / len(thresh)
11 |     d3 = torch.sum(thresh < 1.25 ** 3).float() / len(thresh)
12 | 
13 |     diff = pred - target
14 |     diff_log = torch.log(pred) - torch.log(target)
15 | 
16 |     abs_rel = torch.mean(torch.abs(diff) / target)
17 |     sq_rel = torch.mean(torch.pow(diff, 2) / target)
18 | 
19 |     rmse = torch.sqrt(torch.mean(torch.pow(diff, 2)))
20 |     rmse_log = torch.sqrt(torch.mean(torch.pow(diff_log , 2)))
21 | 
22 |     log10 = torch.mean(torch.abs(torch.log10(pred) - torch.log10(target)))
23 |     silog = torch.sqrt(torch.pow(diff_log, 2).mean() - 0.5 * torch.pow(diff_log.mean(), 2))
24 | 
25 |     return {'d1': d1.item(), 'd2': d2.item(), 'd3': d3.item(), 'abs_rel': abs_rel.item(), 'sq_rel': sq_rel.item(), 
26 |             'rmse': rmse.item(), 'rmse_log': rmse_log.item(), 'log10':log10.item(), 'silog':silog.item()}


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/metric_depth/util/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import numpy as np
 4 | import logging
 5 | 
 6 | logs = set()
 7 | 
 8 | 
 9 | def init_log(name, level=logging.INFO):
10 |     if (name, level) in logs:
11 |         return
12 |     logs.add((name, level))
13 |     logger = logging.getLogger(name)
14 |     logger.setLevel(level)
15 |     ch = logging.StreamHandler()
16 |     ch.setLevel(level)
17 |     if "SLURM_PROCID" in os.environ:
18 |         rank = int(os.environ["SLURM_PROCID"])
19 |         logger.addFilter(lambda record: rank == 0)
20 |     else:
21 |         rank = 0
22 |     format_str = "[%(asctime)s][%(levelname)8s] %(message)s"
23 |     formatter = logging.Formatter(format_str)
24 |     ch.setFormatter(formatter)
25 |     logger.addHandler(ch)
26 |     return logger
27 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio_imageslider
2 | gradio==4.29.0
3 | matplotlib
4 | opencv-python
5 | torch
6 | torchvision
7 | 


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/run.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import glob
 4 | import matplotlib
 5 | import numpy as np
 6 | import os
 7 | import torch
 8 | 
 9 | from depth_anything_v2.dpt import DepthAnythingV2
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser(description='Depth Anything V2')
14 |     
15 |     parser.add_argument('--img-path', type=str)
16 |     parser.add_argument('--input-size', type=int, default=518)
17 |     parser.add_argument('--outdir', type=str, default='./vis_depth')
18 |     
19 |     parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
20 |     
21 |     parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
22 |     parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
23 |     
24 |     args = parser.parse_args()
25 |     
26 |     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
27 |     
28 |     model_configs = {
29 |         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
30 |         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
31 |         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
32 |         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
33 |     }
34 |     
35 |     depth_anything = DepthAnythingV2(**model_configs[args.encoder])
36 |     depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
37 |     depth_anything = depth_anything.to(DEVICE).eval()
38 |     
39 |     if os.path.isfile(args.img_path):
40 |         if args.img_path.endswith('txt'):
41 |             with open(args.img_path, 'r') as f:
42 |                 filenames = f.read().splitlines()
43 |         else:
44 |             filenames = [args.img_path]
45 |     else:
46 |         filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True)
47 |     
48 |     os.makedirs(args.outdir, exist_ok=True)
49 |     
50 |     cmap = matplotlib.colormaps.get_cmap('Spectral_r')
51 |     
52 |     for k, filename in enumerate(filenames):
53 |         print(f'Progress {k+1}/{len(filenames)}: {filename}')
54 |         
55 |         raw_image = cv2.imread(filename)
56 |         
57 |         depth = depth_anything.infer_image(raw_image, args.input_size)
58 |         
59 |         depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
60 |         depth = depth.astype(np.uint8)
61 |         
62 |         if args.grayscale:
63 |             depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
64 |         else:
65 |             depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
66 |         
67 |         if args.pred_only:
68 |             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth)
69 |         else:
70 |             split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255
71 |             combined_result = cv2.hconcat([raw_image, split_region, depth])
72 |             
73 |             cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result)


--------------------------------------------------------------------------------
/thirdparty/depth_anything_v2/run_video.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import glob
 4 | import matplotlib
 5 | import numpy as np
 6 | import os
 7 | import torch
 8 | 
 9 | from depth_anything_v2.dpt import DepthAnythingV2
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser(description='Depth Anything V2')
14 |     
15 |     parser.add_argument('--video-path', type=str)
16 |     parser.add_argument('--input-size', type=int, default=518)
17 |     parser.add_argument('--outdir', type=str, default='./vis_video_depth')
18 |     
19 |     parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])
20 |     
21 |     parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
22 |     parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')
23 |     
24 |     args = parser.parse_args()
25 |     
26 |     DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
27 |     
28 |     model_configs = {
29 |         'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
30 |         'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
31 |         'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
32 |         'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
33 |     }
34 |     
35 |     depth_anything = DepthAnythingV2(**model_configs[args.encoder])
36 |     depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
37 |     depth_anything = depth_anything.to(DEVICE).eval()
38 |     
39 |     if os.path.isfile(args.video_path):
40 |         if args.video_path.endswith('txt'):
41 |             with open(args.video_path, 'r') as f:
42 |                 lines = f.read().splitlines()
43 |         else:
44 |             filenames = [args.video_path]
45 |     else:
46 |         filenames = glob.glob(os.path.join(args.video_path, '**/*'), recursive=True)
47 |     
48 |     os.makedirs(args.outdir, exist_ok=True)
49 |     
50 |     margin_width = 50
51 |     cmap = matplotlib.colormaps.get_cmap('Spectral_r')
52 |     
53 |     for k, filename in enumerate(filenames):
54 |         print(f'Progress {k+1}/{len(filenames)}: {filename}')
55 |         
56 |         raw_video = cv2.VideoCapture(filename)
57 |         frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
58 |         frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
59 |         
60 |         if args.pred_only: 
61 |             output_width = frame_width
62 |         else: 
63 |             output_width = frame_width * 2 + margin_width
64 |         
65 |         output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.mp4')
66 |         out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (output_width, frame_height))
67 |         
68 |         while raw_video.isOpened():
69 |             ret, raw_frame = raw_video.read()
70 |             if not ret:
71 |                 break
72 |             
73 |             depth = depth_anything.infer_image(raw_frame, args.input_size)
74 |             
75 |             depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
76 |             depth = depth.astype(np.uint8)
77 |             
78 |             if args.grayscale:
79 |                 depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
80 |             else:
81 |                 depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)
82 |             
83 |             if args.pred_only:
84 |                 out.write(depth)
85 |             else:
86 |                 split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
87 |                 combined_frame = cv2.hconcat([raw_frame, split_region, depth])
88 |                 
89 |                 out.write(combined_frame)
90 |         
91 |         raw_video.release()
92 |         out.release()
93 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  
84 | 
85 | ## 6.  Files subject to permissive licenses
86 | The contents of the file ```utils/loss_utils.py``` are based on publicly available code authored by Evan Su, which falls under the permissive MIT license. 
87 | 
88 | Title: pytorch-ssim\
89 | Project code: https://github.com/Po-Hsun-Su/pytorch-ssim\
90 | Copyright Evan Su, 2017\
91 | License: https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/LICENSE.txt (MIT)


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GradientSpaces/WildGS-SLAM/24e6abf400d978955e2b26b3c451817aa6a6a11a/thirdparty/gaussian_splatting/__init__.py


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/gaussian_renderer/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import math
 13 | 
 14 | import torch
 15 | from diff_gaussian_rasterization import (
 16 |     GaussianRasterizationSettings,
 17 |     GaussianRasterizer,
 18 | )
 19 | 
 20 | from thirdparty.gaussian_splatting.scene.gaussian_model import GaussianModel
 21 | from thirdparty.gaussian_splatting.utils.sh_utils import eval_sh
 22 | 
 23 | 
 24 | def render(
 25 |     viewpoint_camera,
 26 |     pc: GaussianModel,
 27 |     pipe,
 28 |     bg_color: torch.Tensor,
 29 |     scaling_modifier=1.0,
 30 |     override_color=None,
 31 |     mask=None,
 32 | ):
 33 |     """
 34 |     Render the scene.
 35 | 
 36 |     Background tensor (bg_color) must be on GPU!
 37 |     """
 38 | 
 39 |     # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
 40 |     if pc.get_xyz.shape[0] == 0:
 41 |         return None
 42 | 
 43 |     screenspace_points = (
 44 |         torch.zeros_like(
 45 |             pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
 46 |         )
 47 |         + 0
 48 |     )
 49 |     try:
 50 |         screenspace_points.retain_grad()
 51 |     except Exception:
 52 |         pass
 53 | 
 54 |     # Set up rasterization configuration
 55 |     tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
 56 |     tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
 57 | 
 58 |     raster_settings = GaussianRasterizationSettings(
 59 |         image_height=int(viewpoint_camera.image_height),
 60 |         image_width=int(viewpoint_camera.image_width),
 61 |         tanfovx=tanfovx,
 62 |         tanfovy=tanfovy,
 63 |         bg=bg_color,
 64 |         scale_modifier=scaling_modifier,
 65 |         viewmatrix=viewpoint_camera.world_view_transform,
 66 |         projmatrix=viewpoint_camera.full_proj_transform,
 67 |         projmatrix_raw=viewpoint_camera.projection_matrix,
 68 |         sh_degree=pc.active_sh_degree,
 69 |         campos=viewpoint_camera.camera_center,
 70 |         prefiltered=False,
 71 |         debug=False,
 72 |     )
 73 | 
 74 |     rasterizer = GaussianRasterizer(raster_settings=raster_settings)
 75 | 
 76 |     means3D = pc.get_xyz
 77 |     means2D = screenspace_points
 78 |     opacity = pc.get_opacity
 79 | 
 80 |     # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
 81 |     # scaling / rotation by the rasterizer.
 82 |     scales = None
 83 |     rotations = None
 84 |     cov3D_precomp = None
 85 |     if pipe.compute_cov3D_python:
 86 |         cov3D_precomp = pc.get_covariance(scaling_modifier)
 87 |     else:
 88 |         # check if the covariance is isotropic
 89 |         if pc.get_scaling.shape[-1] == 1:
 90 |             scales = pc.get_scaling.repeat(1, 3)
 91 |         else:
 92 |             scales = pc.get_scaling
 93 |         rotations = pc.get_rotation
 94 | 
 95 |     # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
 96 |     # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
 97 |     shs = None
 98 |     colors_precomp = None
 99 |     if colors_precomp is None:
100 |         if pipe.convert_SHs_python:
101 |             shs_view = pc.get_features.transpose(1, 2).view(
102 |                 -1, 3, (pc.max_sh_degree + 1) ** 2
103 |             )
104 |             dir_pp = pc.get_xyz - viewpoint_camera.camera_center.repeat(
105 |                 pc.get_features.shape[0], 1
106 |             )
107 |             dir_pp_normalized = dir_pp / dir_pp.norm(dim=1, keepdim=True)
108 |             sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
109 |             colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
110 |         else:
111 |             shs = pc.get_features
112 |     else:
113 |         colors_precomp = override_color
114 | 
115 |     # Rasterize visible Gaussians to image, obtain their radii (on screen).
116 |     if mask is not None:
117 |         rendered_image, radii, depth, opacity = rasterizer(
118 |             means3D=means3D[mask],
119 |             means2D=means2D[mask],
120 |             shs=shs[mask],
121 |             colors_precomp=colors_precomp[mask] if colors_precomp is not None else None,
122 |             opacities=opacity[mask],
123 |             scales=scales[mask],
124 |             rotations=rotations[mask],
125 |             cov3D_precomp=cov3D_precomp[mask] if cov3D_precomp is not None else None,
126 |             theta=viewpoint_camera.cam_rot_delta,
127 |             rho=viewpoint_camera.cam_trans_delta,
128 |         )
129 |     else:
130 |         rendered_image, radii, depth, opacity, n_touched = rasterizer(
131 |             means3D=means3D,
132 |             means2D=means2D,
133 |             shs=shs,
134 |             colors_precomp=colors_precomp,
135 |             opacities=opacity,
136 |             scales=scales,
137 |             rotations=rotations,
138 |             cov3D_precomp=cov3D_precomp,
139 |             theta=viewpoint_camera.cam_rot_delta,
140 |             rho=viewpoint_camera.cam_trans_delta,
141 |         )
142 | 
143 |     # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
144 |     # They will be excluded from value updates used in the splitting criteria.
145 |     return {
146 |         "render": rendered_image,
147 |         "viewspace_points": screenspace_points,
148 |         "visibility_filter": radii > 0,
149 |         "radii": radii,
150 |         "depth": depth,
151 |         "opacity": opacity,
152 |         "n_touched": n_touched,
153 |     }
154 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/utils/graphics_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import math
 13 | from typing import NamedTuple
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | 
 18 | 
 19 | class BasicPointCloud(NamedTuple):
 20 |     points: np.array
 21 |     colors: np.array
 22 |     normals: np.array
 23 | 
 24 | 
 25 | def getWorld2View(R, t):
 26 |     Rt = np.zeros((4, 4))
 27 |     Rt[:3, :3] = R.transpose()
 28 |     Rt[:3, 3] = t
 29 |     Rt[3, 3] = 1.0
 30 |     return np.float32(Rt)
 31 | 
 32 | 
 33 | def getWorld2View2(R, t, translate=torch.tensor([0.0, 0.0, 0.0]), scale=1.0):
 34 |     translate = translate.to(R.device)
 35 |     Rt = torch.zeros((4, 4), device=R.device)
 36 |     # Rt[:3, :3] = R.transpose()
 37 |     Rt[:3, :3] = R
 38 |     Rt[:3, 3] = t
 39 |     Rt[3, 3] = 1.0
 40 | 
 41 |     C2W = torch.linalg.inv(Rt)
 42 |     cam_center = C2W[:3, 3]
 43 |     cam_center = (cam_center + translate) * scale
 44 |     C2W[:3, 3] = cam_center
 45 |     Rt = torch.linalg.inv(C2W)
 46 |     return Rt
 47 | 
 48 | 
 49 | def getProjectionMatrix(znear, zfar, fovX, fovY):
 50 |     tanHalfFovY = math.tan((fovY / 2))
 51 |     tanHalfFovX = math.tan((fovX / 2))
 52 | 
 53 |     top = tanHalfFovY * znear
 54 |     bottom = -top
 55 |     right = tanHalfFovX * znear
 56 |     left = -right
 57 | 
 58 |     P = torch.zeros(4, 4)
 59 | 
 60 |     z_sign = 1.0
 61 | 
 62 |     P[0, 0] = 2.0 * znear / (right - left)
 63 |     P[1, 1] = 2.0 * znear / (top - bottom)
 64 |     P[0, 2] = (right + left) / (right - left)
 65 |     P[1, 2] = (top + bottom) / (top - bottom)
 66 |     P[3, 2] = z_sign
 67 |     P[2, 2] = -(zfar + znear) / (zfar - znear)
 68 |     P[2, 3] = -2 * (zfar * znear) / (zfar - znear)
 69 |     return P
 70 | 
 71 | 
 72 | def getProjectionMatrix2(znear, zfar, cx, cy, fx, fy, W, H):
 73 |     left = ((2 * cx - W) / W - 1.0) * W / 2.0
 74 |     right = ((2 * cx - W) / W + 1.0) * W / 2.0
 75 |     top = ((2 * cy - H) / H + 1.0) * H / 2.0
 76 |     bottom = ((2 * cy - H) / H - 1.0) * H / 2.0
 77 |     left = znear / fx * left
 78 |     right = znear / fx * right
 79 |     top = znear / fy * top
 80 |     bottom = znear / fy * bottom
 81 |     P = torch.zeros(4, 4)
 82 | 
 83 |     z_sign = 1.0
 84 | 
 85 |     P[0, 0] = 2.0 * znear / (right - left)
 86 |     P[1, 1] = 2.0 * znear / (top - bottom)
 87 |     P[0, 2] = (right + left) / (right - left)
 88 |     P[1, 2] = (top + bottom) / (top - bottom)
 89 |     P[3, 2] = z_sign
 90 |     P[2, 2] = z_sign * zfar / (zfar - znear)
 91 |     P[2, 3] = -(zfar * znear) / (zfar - znear)
 92 | 
 93 |     return P
 94 | 
 95 | 
 96 | def fov2focal(fov, pixels):
 97 |     return pixels / (2 * math.tan(fov / 2))
 98 | 
 99 | 
100 | def focal2fov(focal, pixels):
101 |     return 2 * math.atan(pixels / (2 * focal))
102 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | 
14 | 
15 | def mse(img1, img2):
16 |     return ((img1 - img2) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
17 | 
18 | 
19 | def psnr(img1, img2):
20 |     mse = ((img1 - img2) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
21 |     return 20 * torch.log10(1.0 / torch.sqrt(mse))
22 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/utils/loss_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | from math import exp
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | import torch
 17 | import torch.nn.functional as F
 18 | from torch.autograd import Variable
 19 | 
 20 | 
 21 | def l1_loss(network_output, gt):
 22 |     return torch.abs((network_output - gt)).mean()
 23 | 
 24 | 
 25 | def l1_loss_weight(network_output, gt):
 26 |     image = gt.detach().cpu().numpy().transpose((1, 2, 0))
 27 |     rgb_raw_gray = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
 28 |     sobelx = cv2.Sobel(rgb_raw_gray, cv2.CV_64F, 1, 0, ksize=5)
 29 |     sobely = cv2.Sobel(rgb_raw_gray, cv2.CV_64F, 0, 1, ksize=5)
 30 |     sobel_merge = np.sqrt(sobelx * sobelx + sobely * sobely) + 1e-10
 31 |     sobel_merge = np.exp(sobel_merge)
 32 |     sobel_merge /= np.max(sobel_merge)
 33 |     sobel_merge = torch.from_numpy(sobel_merge)[None, ...].to(gt.device)
 34 | 
 35 |     return torch.abs((network_output - gt) * sobel_merge).mean()
 36 | 
 37 | 
 38 | def l2_loss(network_output, gt):
 39 |     return ((network_output - gt) ** 2).mean()
 40 | 
 41 | 
 42 | def gaussian(window_size, sigma):
 43 |     gauss = torch.Tensor(
 44 |         [
 45 |             exp(-((x - window_size // 2) ** 2) / float(2 * sigma**2))
 46 |             for x in range(window_size)
 47 |         ]
 48 |     )
 49 |     return gauss / gauss.sum()
 50 | 
 51 | 
 52 | def create_window(window_size, channel):
 53 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
 54 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
 55 |     window = Variable(
 56 |         _2D_window.expand(channel, 1, window_size, window_size).contiguous()
 57 |     )
 58 |     return window
 59 | 
 60 | 
 61 | def ssim(img1, img2, window_size=11, size_average=True):
 62 |     channel = img1.size(-3)
 63 |     window = create_window(window_size, channel)
 64 | 
 65 |     if img1.is_cuda:
 66 |         window = window.cuda(img1.get_device())
 67 |     window = window.type_as(img1)
 68 | 
 69 |     return _ssim(img1, img2, window, window_size, channel, size_average)
 70 | 
 71 | 
 72 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
 73 |     mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
 74 |     mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
 75 | 
 76 |     mu1_sq = mu1.pow(2)
 77 |     mu2_sq = mu2.pow(2)
 78 |     mu1_mu2 = mu1 * mu2
 79 | 
 80 |     sigma1_sq = (
 81 |         F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
 82 |     )
 83 |     sigma2_sq = (
 84 |         F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
 85 |     )
 86 |     sigma12 = (
 87 |         F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel)
 88 |         - mu1_mu2
 89 |     )
 90 | 
 91 |     C1 = 0.01**2
 92 |     C2 = 0.03**2
 93 | 
 94 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
 95 |         (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
 96 |     )
 97 | 
 98 |     if size_average:
 99 |         return ssim_map.mean()
100 |     else:
101 |         return ssim_map.mean(1).mean(1).mean(1)
102 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/utils/sh_utils.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2021 The PlenOctree Authors.
  2 | #  Redistribution and use in source and binary forms, with or without
  3 | #  modification, are permitted provided that the following conditions are met:
  4 | #
  5 | #  1. Redistributions of source code must retain the above copyright notice,
  6 | #  this list of conditions and the following disclaimer.
  7 | #
  8 | #  2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #  this list of conditions and the following disclaimer in the documentation
 10 | #  and/or other materials provided with the distribution.
 11 | #
 12 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 13 | #  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 14 | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 15 | #  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 16 | #  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 17 | #  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 18 | #  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | #  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 20 | #  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 21 | #  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 22 | #  POSSIBILITY OF SUCH DAMAGE.
 23 | 
 24 | C0 = 0.28209479177387814
 25 | C1 = 0.4886025119029199
 26 | C2 = [
 27 |     1.0925484305920792,
 28 |     -1.0925484305920792,
 29 |     0.31539156525252005,
 30 |     -1.0925484305920792,
 31 |     0.5462742152960396,
 32 | ]
 33 | C3 = [
 34 |     -0.5900435899266435,
 35 |     2.890611442640554,
 36 |     -0.4570457994644658,
 37 |     0.3731763325901154,
 38 |     -0.4570457994644658,
 39 |     1.445305721320277,
 40 |     -0.5900435899266435,
 41 | ]
 42 | C4 = [
 43 |     2.5033429417967046,
 44 |     -1.7701307697799304,
 45 |     0.9461746957575601,
 46 |     -0.6690465435572892,
 47 |     0.10578554691520431,
 48 |     -0.6690465435572892,
 49 |     0.47308734787878004,
 50 |     -1.7701307697799304,
 51 |     0.6258357354491761,
 52 | ]
 53 | 
 54 | 
 55 | def eval_sh(deg, sh, dirs):
 56 |     """
 57 |     Evaluate spherical harmonics at unit directions
 58 |     using hardcoded SH polynomials.
 59 |     Works with torch/np/jnp.
 60 |     ... Can be 0 or more batch dimensions.
 61 |     Args:
 62 |         deg: int SH deg. Currently, 0-3 supported
 63 |         sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
 64 |         dirs: jnp.ndarray unit directions [..., 3]
 65 |     Returns:
 66 |         [..., C]
 67 |     """
 68 |     assert deg <= 4 and deg >= 0
 69 |     coeff = (deg + 1) ** 2
 70 |     assert sh.shape[-1] >= coeff
 71 | 
 72 |     result = C0 * sh[..., 0]
 73 |     if deg > 0:
 74 |         x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
 75 |         result = (
 76 |             result - C1 * y * sh[..., 1] + C1 * z * sh[..., 2] - C1 * x * sh[..., 3]
 77 |         )
 78 | 
 79 |         if deg > 1:
 80 |             xx, yy, zz = x * x, y * y, z * z
 81 |             xy, yz, xz = x * y, y * z, x * z
 82 |             result = (
 83 |                 result
 84 |                 + C2[0] * xy * sh[..., 4]
 85 |                 + C2[1] * yz * sh[..., 5]
 86 |                 + C2[2] * (2.0 * zz - xx - yy) * sh[..., 6]
 87 |                 + C2[3] * xz * sh[..., 7]
 88 |                 + C2[4] * (xx - yy) * sh[..., 8]
 89 |             )
 90 | 
 91 |             if deg > 2:
 92 |                 result = (
 93 |                     result
 94 |                     + C3[0] * y * (3 * xx - yy) * sh[..., 9]
 95 |                     + C3[1] * xy * z * sh[..., 10]
 96 |                     + C3[2] * y * (4 * zz - xx - yy) * sh[..., 11]
 97 |                     + C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12]
 98 |                     + C3[4] * x * (4 * zz - xx - yy) * sh[..., 13]
 99 |                     + C3[5] * z * (xx - yy) * sh[..., 14]
100 |                     + C3[6] * x * (xx - 3 * yy) * sh[..., 15]
101 |                 )
102 | 
103 |                 if deg > 3:
104 |                     result = (
105 |                         result
106 |                         + C4[0] * xy * (xx - yy) * sh[..., 16]
107 |                         + C4[1] * yz * (3 * xx - yy) * sh[..., 17]
108 |                         + C4[2] * xy * (7 * zz - 1) * sh[..., 18]
109 |                         + C4[3] * yz * (7 * zz - 3) * sh[..., 19]
110 |                         + C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20]
111 |                         + C4[5] * xz * (7 * zz - 3) * sh[..., 21]
112 |                         + C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22]
113 |                         + C4[7] * xz * (xx - 3 * yy) * sh[..., 23]
114 |                         + C4[8]
115 |                         * (xx * (xx - 3 * yy) - yy * (3 * xx - yy))
116 |                         * sh[..., 24]
117 |                     )
118 |     return result
119 | 
120 | 
121 | def RGB2SH(rgb):
122 |     return (rgb - 0.5) / C0
123 | 
124 | 
125 | def SH2RGB(sh):
126 |     return sh * C0 + 0.5
127 | 


--------------------------------------------------------------------------------
/thirdparty/gaussian_splatting/utils/system_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import os
13 | from errno import EEXIST
14 | from os import makedirs, path
15 | 
16 | 
17 | def mkdir_p(folder_path):
18 |     # Creates a directory. equivalent to using mkdir -p on the command line
19 |     try:
20 |         makedirs(folder_path)
21 |     except OSError as exc:  # Python >2.5
22 |         if exc.errno == EEXIST and path.isdir(folder_path):
23 |             pass
24 |         else:
25 |             raise
26 | 
27 | 
28 | def searchForMaxIteration(folder):
29 |     saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
30 |     return max(saved_iters)
31 | 


--------------------------------------------------------------------------------