├── .gitmodules ├── LICENSE.md ├── README.md ├── arguments └── __init__.py ├── bash_scripts ├── 0_train.sh ├── 1_preprocess_tnt.sh ├── 2_extract_normal_dsine.sh ├── 3_extract_mask.sh ├── 4_extract_normal_geow.sh ├── convert.sh └── install.sh ├── configs ├── 360_v2 │ └── base.yaml ├── config.py ├── config_base.yaml ├── dtu │ ├── base.yaml │ └── dtu_scan24.yaml ├── reconstruct.yaml ├── scannetpp │ └── base.yaml └── tnt │ ├── Barn.yaml │ ├── Caterpillar.yaml │ ├── Courthouse.yaml │ ├── Ignatius.yaml │ ├── Meetingroom.yaml │ ├── Truck.yaml │ └── base.yaml ├── environment.yml ├── evaluation ├── crop_mesh.py ├── eval_dtu │ ├── eval.py │ ├── evaluate_single_scene.py │ └── render_utils.py ├── eval_tnt.py ├── full_eval.py ├── lpipsPyTorch │ ├── __init__.py │ └── modules │ │ ├── lpips.py │ │ ├── networks.py │ │ └── utils.py ├── metrics.py ├── render.py └── tnt_eval │ ├── README.md │ ├── config.py │ ├── evaluation.py │ ├── images │ ├── f-score.jpg │ ├── precision.jpg │ └── recall.jpg │ ├── plot.py │ ├── registration.py │ ├── requirements.txt │ ├── run.py │ ├── trajectory_io.py │ └── util.py ├── gaussian_renderer ├── __init__.py └── network_gui.py ├── media └── VCR-GauS.jpg ├── process_data ├── convert.py ├── convert_360_to_json.py ├── convert_data_to_json.py ├── convert_dtu_to_json.py ├── convert_tnt_to_json.py ├── extract_mask.py ├── extract_normal.py ├── extract_normal_geo.py ├── visualize_colmap.ipynb └── visualize_transforms.ipynb ├── pyproject.toml ├── python_scripts ├── run_base.py ├── run_dtu.py ├── run_mipnerf360.py ├── run_tnt.py ├── show_360.py ├── show_dtu.py └── show_tnt.py ├── requirements.txt ├── scene ├── __init__.py ├── appearance_network.py ├── cameras.py ├── colmap_loader.py ├── dataset_readers.py └── gaussian_model.py ├── tools ├── __init__.py ├── camera.py ├── camera_utils.py ├── crop_mesh.py ├── denoise_pcd.py ├── depth2mesh.py ├── distributed.py ├── general_utils.py ├── graphics_utils.py ├── image_utils.py ├── loss_utils.py ├── math_utils.py ├── mcube_utils.py ├── mesh_utils.py ├── normal_utils.py ├── prune.py ├── render_utils.py ├── semantic_id.py ├── sh_utils.py ├── system_utils.py ├── termcolor.py ├── visualization.py └── visualize.py ├── train.py └── trainer.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/simple-knn"] 2 | path = submodules/simple-knn 3 | url = https://gitlab.inria.fr/bkerbl/simple-knn.git 4 | [submodule "submodules/diff-gaussian-rasterization"] 5 | path = submodules/diff-gaussian-rasterization 6 | url = https://github.com/HLinChen/diff-gaussian-rasterization 7 | [submodule "SIBR_viewers"] 8 | path = SIBR_viewers 9 | url = https://gitlab.inria.fr/sibr/sibr_core.git 10 | [submodule "submodules/colmap"] 11 | path = submodules/colmap 12 | url = https://github.com/colmap/colmap.git 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. 84 | -------------------------------------------------------------------------------- /arguments/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from argparse import ArgumentParser, Namespace 13 | import sys 14 | import os 15 | 16 | class GroupParams: 17 | pass 18 | 19 | class ParamGroup: 20 | def __init__(self, parser: ArgumentParser, name : str, fill_none = False): 21 | group = parser.add_argument_group(name) 22 | for key, value in vars(self).items(): 23 | shorthand = False 24 | if key.startswith("_"): 25 | shorthand = True 26 | key = key[1:] 27 | t = type(value) 28 | value = value if not fill_none else None 29 | if shorthand: 30 | if t == bool: 31 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true") 32 | else: 33 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t) 34 | else: 35 | if t == bool: 36 | group.add_argument("--" + key, default=value, action="store_true") 37 | else: 38 | group.add_argument("--" + key, default=value, type=t) 39 | 40 | def extract(self, args): 41 | group = GroupParams() 42 | for arg in vars(args).items(): 43 | if arg[0] in vars(self) or ("_" + arg[0]) in vars(self): 44 | setattr(group, arg[0], arg[1]) 45 | return group 46 | 47 | class ModelParams(ParamGroup): 48 | def __init__(self, parser, sentinel=False): 49 | self.sh_degree = 3 50 | self._source_path = "" 51 | self._model_path = "" 52 | self._images = "images" 53 | self._resolution = -1 54 | self._white_background = False 55 | self.data_device = "cuda" 56 | self.eval = False 57 | super().__init__(parser, "Loading Parameters", sentinel) 58 | 59 | def extract(self, args): 60 | g = super().extract(args) 61 | g.source_path = os.path.abspath(g.source_path) 62 | return g 63 | 64 | class PipelineParams(ParamGroup): 65 | def __init__(self, parser): 66 | self.convert_SHs_python = False 67 | self.compute_cov3D_python = False 68 | self.debug = False 69 | super().__init__(parser, "Pipeline Parameters") 70 | 71 | class OptimizationParams(ParamGroup): 72 | def __init__(self, parser): 73 | self.iterations = 30_000 74 | self.position_lr_init = 0.00016 75 | self.position_lr_final = 0.0000016 76 | self.position_lr_delay_mult = 0.01 77 | self.position_lr_max_steps = 30_000 78 | self.feature_lr = 0.0025 79 | self.opacity_lr = 0.05 80 | self.scaling_lr = 0.005 81 | self.rotation_lr = 0.001 82 | self.percent_dense = 0.01 83 | self.lambda_dssim = 0.2 84 | self.densification_interval = 100 85 | self.opacity_reset_interval = 3000 86 | self.densify_from_iter = 500 87 | self.densify_until_iter = 15_000 88 | self.densify_grad_threshold = 0.0002 89 | self.random_background = False 90 | super().__init__(parser, "Optimization Parameters") 91 | 92 | def get_combined_args(parser : ArgumentParser): 93 | cmdlne_string = sys.argv[1:] 94 | cfgfile_string = "Namespace()" 95 | args_cmdline = parser.parse_args(cmdlne_string) 96 | 97 | try: 98 | cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args") 99 | print("Looking for config file in", cfgfilepath) 100 | with open(cfgfilepath) as cfg_file: 101 | print("Config file found: {}".format(cfgfilepath)) 102 | cfgfile_string = cfg_file.read() 103 | except TypeError: 104 | print("Config file not found at") 105 | pass 106 | args_cfgfile = eval(cfgfile_string) 107 | 108 | merged_dict = vars(args_cfgfile).copy() 109 | for k,v in vars(args_cmdline).items(): 110 | if v != None: 111 | merged_dict[k] = v 112 | return Namespace(**merged_dict) 113 | -------------------------------------------------------------------------------- /bash_scripts/0_train.sh: -------------------------------------------------------------------------------- 1 | GPU=0 2 | export CUDA_VISIBLE_DEVICES=${GPU} 3 | ls 4 | 5 | 6 | DATASET=tnt 7 | SCENE=Barn 8 | NAME=${SCENE} 9 | 10 | PROJECT=vcr_gaus 11 | 12 | TRIAL_NAME=vcr_gaus 13 | 14 | CFG=configs/${DATASET}/${SCENE}.yaml 15 | 16 | DIR=/your/log/path/${PROJECT}/${DATASET}/${NAME}/${TRIAL_NAME} 17 | 18 | python train.py \ 19 | --config=${CFG} \ 20 | --port=-1 \ 21 | --logdir=${DIR} \ 22 | --model.source_path=/your/data/path/${DATASET}/${SCENE}/ \ 23 | --model.resolution=1 \ 24 | --model.data_device=cpu \ 25 | --wandb \ 26 | --wandb_name ${PROJECT} 27 | -------------------------------------------------------------------------------- /bash_scripts/1_preprocess_tnt.sh: -------------------------------------------------------------------------------- 1 | echo "Compute intrinsics, undistort images and generate json files. This may take a while" 2 | python process_data/convert_tnt_to_json.py \ 3 | --tnt_path /your/data/path \ 4 | --run_colmap \ 5 | --export_json -------------------------------------------------------------------------------- /bash_scripts/2_extract_normal_dsine.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | DOMAIN_TYPE=indoor 4 | DATADIR=/your/data/path 5 | 6 | CODE_PATH=/your/dsine/code/path 7 | CKPT=/your/dsine/code/path/checkpoints/dsine.pt 8 | 9 | for SCENE in Barn Caterpillar Courthouse Ignatius Meetingroom Truck; 10 | do 11 | SCENE_PATH=${DATADIR}/${SCENE} 12 | # dsine 13 | python -W ignore process_data/extract_normal.py \ 14 | --dsine_path ${CODE_PATH} \ 15 | --ckpt ${CKPT} \ 16 | --img_path ${SCENE_PATH}/images \ 17 | --intrins_path ${SCENE_PATH}/ \ 18 | --output_path ${SCENE_PATH}/normals 19 | done -------------------------------------------------------------------------------- /bash_scripts/3_extract_mask.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | DATADIR=/your/data/path 4 | GSAM_PATH=~/code/gsam 5 | CKPT_PATH=${GSAM_PATH} 6 | 7 | for SCENE in Barn Caterpillar Courthouse Ignatius Meetingroom Truck; 8 | do 9 | SCENE_PATH=${DATADIR}/${SCENE} 10 | # meething room scene_tye: indoor, others: outdoor 11 | if [ ${SCENE} = "Meetingroom" ]; then 12 | SCENE_TYPE="indoor" 13 | else 14 | SCENE_TYPE="outdoor" 15 | fi 16 | python -W ignore process_data/extract_mask.py \ 17 | --config ${GSAM_PATH}/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py \ 18 | --grounded_checkpoint ${CKPT_PATH}/groundingdino_swint_ogc.pth \ 19 | --sam_hq_checkpoint ${CKPT_PATH}/sam_hq_vit_h.pth \ 20 | --gsam_path ${GSAM_PATH} \ 21 | --use_sam_hq \ 22 | --input_image ${SCENE_PATH}/images/ \ 23 | --output_dir ${SCENE_PATH}/masks \ 24 | --box_threshold 0.5 \ 25 | --text_threshold 0.2 \ 26 | --scene ${SCENE} \ 27 | --scene_type ${SCENE_TYPE} \ 28 | --device "cuda" 29 | done 30 | -------------------------------------------------------------------------------- /bash_scripts/4_extract_normal_geow.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | # DOMAIN_TYPE=outdoor 4 | # DOMAIN_TYPE=indoor 5 | DOMAIN_TYPE=object 6 | DATADIR=/your/data/path/DTU_mask 7 | 8 | CODE_PATH=/your/geowizard/path 9 | 10 | 11 | for SCENE in scan106 scan114 scan122 scan37 scan55 scan65 scan83 scan105 scan110 scan118 scan24 scan40 scan63 scan69 scan97; 12 | do 13 | SCENE_PATH=${DATADIR}/${SCENE} 14 | python process_data/extract_normal_geo.py \ 15 | --code_path ${CODE_PATH} \ 16 | --input_dir ${SCENE_PATH}/images/ \ 17 | --output_dir ${SCENE_PATH}/ \ 18 | --ensemble_size 3 \ 19 | --denoise_steps 10 \ 20 | --seed 0 \ 21 | --domain ${DOMAIN_TYPE} 22 | done -------------------------------------------------------------------------------- /bash_scripts/convert.sh: -------------------------------------------------------------------------------- 1 | SCENE=Truck 2 | DATA_ROOT=/your/data/path/${SCENE} 3 | 4 | python convert.py -s $DATA_ROOT # [--resize] #If not resizing, ImageMagick is not needed 5 | 6 | 7 | -------------------------------------------------------------------------------- /bash_scripts/install.sh: -------------------------------------------------------------------------------- 1 | env=vcr 2 | conda create -n $env -y python=3.10 3 | conda activate $env 4 | pip install -e ".[train]" 5 | export CUDA_HOME=/usr/local/cuda-11.2 6 | pip install -r requirements.txt -------------------------------------------------------------------------------- /configs/360_v2/base.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/reconstruct.yaml 2 | 3 | model: 4 | eval: True 5 | llffhold: 8 6 | split: False 7 | 8 | optim: 9 | mask_depth_thr: 1 10 | densify_large: 11 | percent_dense: 5e-2 12 | sample_cams: 13 | random: False 14 | num: 100 15 | loss_weight: 16 | semantic: 0 17 | l1_scale: 1 -------------------------------------------------------------------------------- /configs/config_base.yaml: -------------------------------------------------------------------------------- 1 | logdir: "/your/log/path/debug/" 2 | ip: 127.0.0.1 3 | port: -1 4 | detect_anomaly: False 5 | silent: 0 6 | seed: 0 7 | 8 | model: 9 | sh_degree: 3 10 | source_path: "/your/data/path/tnt/Barn/" 11 | model_path: "/your/log/path/" 12 | images: "images" 13 | resolution: -1 14 | white_background: False 15 | data_device: "cuda" 16 | eval: False 17 | llffhold: 1 18 | init_ply: "sparse/points3D.ply" 19 | max_init_points: 20 | split: False 21 | sphere: False 22 | load_depth: False 23 | load_normal: False 24 | load_mask: False 25 | normal_folder: 'normals' 26 | depth_folder: 'depths' 27 | use_decoupled_appearance: False 28 | ch_sem_feat: 0 29 | num_cls: 0 30 | max_mem: 22 31 | load_mask: False 32 | use_decoupled_appearance: False 33 | use_decoupled_dnormal: False 34 | ratio: 0 35 | mesh: 36 | voxel_size: 3e-3 37 | depth_type: 'traditional' 38 | 39 | optim: 40 | iterations: 30000 41 | position_lr_init: 0.00016 42 | position_lr_final: 0.0000016 43 | position_lr_delay_mult: 0.01 44 | position_lr_max_steps: 30000 45 | feature_lr: 0.0025 46 | sdf_lr: 0.001 47 | weight_decay: 1e-2 48 | opacity_lr: 0.05 49 | scaling_lr: 0.005 50 | rotation_lr: 0.001 51 | appearance_embeddings_lr: 0.001 52 | appearance_network_lr: 0.001 53 | cls_lr: 5e-4 54 | percent_dense: 0.01 55 | densification_interval: 100 56 | opacity_reset_interval: 3000 57 | densify_from_iter: 500 58 | densify_until_iter: 15000 59 | densify_grad_threshold: 0.0005 60 | random_background: False 61 | rand_pts: 20000 62 | edge_thr: 0 63 | mask_depth_thr: 0 64 | loss_weight: 65 | l1: 0.8 66 | ssim: 0.2 67 | distortion: 0. 68 | semantic: 0 69 | mono_depth: 0 70 | mono_normal: 0 71 | depth_normal: 0 72 | prune: 73 | iterations: [] 74 | percent: 0.5 75 | decay: 0.6 76 | v_pow: 0.1 77 | 78 | pipline: 79 | convert_SHs_python: False 80 | compute_cov3D_python: False 81 | debug: False 82 | 83 | data: 84 | name: dummy 85 | 86 | train: 87 | test_iterations: [7000, 30000] 88 | save_iterations: [7000, 30000] 89 | checkpoint_iterations: [30000] 90 | save_splat: False 91 | start_checkpoint: 92 | debug_from: -1 93 | 94 | -------------------------------------------------------------------------------- /configs/dtu/base.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/reconstruct.yaml 2 | 3 | model: 4 | use_decoupled_appearance: False 5 | use_decoupled_dnormal: False 6 | normal_folder: 'normal_npz_indoor' 7 | eval: False 8 | 9 | optim: 10 | exp_t: 0.01 11 | mask_depth_thr: 0 12 | loss_weight: 13 | l1_scale: 0.5 14 | consistent_normal_from_iter: 15000 15 | close_depth_from_iter: 15000 16 | densify_large: 17 | percent_dense: 1e-2 18 | sample_cams: 19 | random: False 20 | num: 30 21 | loss_weight: 22 | semantic: 0 23 | depth_normal: 0 24 | mono_normal: 0.01 25 | consistent_normal: 0.05 26 | distortion: 1000 27 | depth_var: 0 28 | random_background: False 29 | -------------------------------------------------------------------------------- /configs/dtu/dtu_scan24.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/dtu/base.yaml 2 | -------------------------------------------------------------------------------- /configs/reconstruct.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/config_base.yaml 2 | 3 | 4 | model: 5 | load_mask: False 6 | use_decoupled_appearance: False 7 | use_decoupled_dnormal: False 8 | ch_sem_feat: 2 9 | num_cls: 2 10 | depth_type: 'intersection' 11 | optim: 12 | mask_depth_thr: 0.8 13 | edge_thr: 0 14 | exp_t: 0.01 15 | cos_thr: -1 16 | close_depth_from_iter: 0 17 | normal_from_iter: 0 18 | dnormal_from_iter: 0 19 | consistent_normal_from_iter: 0 20 | curv_from_iter: 0 21 | loss_weight: 22 | l1: 0.8 23 | ssim: 0.2 24 | l1_scale: 1 25 | entropy: 0 26 | depth_var: 0. 27 | mono_depth: 0 28 | mono_normal: 0.01 29 | depth_normal: 0.01 30 | consistent_normal: 0 31 | prune: 32 | iterations: [15000, 25000] 33 | percent: 0.5 34 | decay: 0.6 35 | v_pow: 0.1 36 | densify_large: 37 | percent_dense: 2e-3 38 | interval: 1 39 | sample_cams: 40 | random: True 41 | num: 200 42 | up: True 43 | around: True 44 | look_mode: 'target' 45 | random_background: True 46 | 47 | 48 | train: 49 | checkpoint_iterations: [] 50 | save_mesh: False 51 | save_iterations: [30000] -------------------------------------------------------------------------------- /configs/scannetpp/base.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/reconstruct.yaml 2 | 3 | model: 4 | split: True 5 | eval: True 6 | use_decoupled_appearance: False 7 | use_decoupled_dnormal: False 8 | mesh: 9 | voxel_size: 1.5e-2 10 | 11 | optim: 12 | mask_depth_thr: 0 13 | curv_from_iter: 15000 14 | densify_large: 15 | percent_dense: 1e-2 16 | sample_cams: 17 | random: False 18 | loss_weight: 19 | semantic: 0 20 | curv: 0.05 -------------------------------------------------------------------------------- /configs/tnt/Barn.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | -------------------------------------------------------------------------------- /configs/tnt/Caterpillar.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | -------------------------------------------------------------------------------- /configs/tnt/Courthouse.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | -------------------------------------------------------------------------------- /configs/tnt/Ignatius.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | -------------------------------------------------------------------------------- /configs/tnt/Meetingroom.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | 3 | optim: 4 | exp_t: 1e-3 5 | mask_depth_thr: 0 6 | densify_large: 7 | percent_dense: 5e-3 8 | sample_cams: 9 | random: False 10 | loss_weight: 11 | semantic: 0 12 | model: 13 | num_cls: 3 14 | use_decoupled_appearance: False -------------------------------------------------------------------------------- /configs/tnt/Truck.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/tnt/base.yaml 2 | -------------------------------------------------------------------------------- /configs/tnt/base.yaml: -------------------------------------------------------------------------------- 1 | _parent_: configs/reconstruct.yaml 2 | 3 | model: 4 | use_decoupled_appearance: True 5 | use_decoupled_dnormal: False 6 | eval: False 7 | llffhold: 5 8 | 9 | optim: 10 | exp_t: 5e-3 11 | loss_weight: 12 | depth_normal: 0.015 13 | semantic: 0.005 14 | l1_scale: 1 -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: fast_render 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - python=3.10 9 | - pytorch==2.0.1 10 | - torchvision==0.15.2 11 | - torchaudio==2.0.2 12 | - pytorch-cuda=11.8 13 | - pip: 14 | - open3d 15 | - plyfile 16 | - ninja 17 | - GPUtil 18 | - opencv-python 19 | - lpips 20 | - trimesh 21 | - pymeshlab 22 | - termcolor 23 | - wandb 24 | - imageio 25 | - scikit-image 26 | - torchmetrics 27 | - mediapy 28 | - "git+https://github.com/facebookresearch/pytorch3d.git" 29 | - submodules/diff-gaussian-rasterization 30 | - submodules/simple-knn -------------------------------------------------------------------------------- /evaluation/crop_mesh.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import plyfile 4 | import argparse 5 | # import open3d as o3d 6 | import numpy as np 7 | # from tqdm import tqdm 8 | import trimesh 9 | from sklearn.cluster import DBSCAN 10 | 11 | 12 | def align_gt_with_cam(pts, trans): 13 | trans_inv = np.linalg.inv(trans) 14 | pts_aligned = pts @ trans_inv[:3, :3].transpose(-1, -2) + trans_inv[:3, -1] 15 | return pts_aligned 16 | 17 | 18 | def main(args): 19 | assert os.path.exists(args.ply_path), f"PLY file {args.ply_path} does not exist." 20 | gt_trans = np.loadtxt(args.align_path) 21 | 22 | mesh_rec = trimesh.load(args.ply_path, process=False) 23 | mesh_gt = trimesh.load(args.gt_path, process=False) 24 | 25 | mesh_gt.vertices = align_gt_with_cam(mesh_gt.vertices, gt_trans) 26 | 27 | to_align, _ = trimesh.bounds.oriented_bounds(mesh_gt) 28 | mesh_gt.vertices = (to_align[:3, :3] @ mesh_gt.vertices.T + to_align[:3, 3:]).T 29 | mesh_rec.vertices = (to_align[:3, :3] @ mesh_rec.vertices.T + to_align[:3, 3:]).T 30 | 31 | min_points = mesh_gt.vertices.min(axis=0) 32 | max_points = mesh_gt.vertices.max(axis=0) 33 | 34 | mask_min = (mesh_rec.vertices - min_points[None]) > 0 35 | mask_max = (mesh_rec.vertices - max_points[None]) < 0 36 | 37 | mask = np.concatenate((mask_min, mask_max), axis=1).all(axis=1) 38 | face_mask = mask[mesh_rec.faces].all(axis=1) 39 | 40 | mesh_rec.update_vertices(mask) 41 | mesh_rec.update_faces(face_mask) 42 | 43 | mesh_rec.vertices = (to_align[:3, :3].T @ mesh_rec.vertices.T - to_align[:3, :3].T @ to_align[:3, 3:]).T 44 | mesh_gt.vertices = (to_align[:3, :3].T @ mesh_gt.vertices.T - to_align[:3, :3].T @ to_align[:3, 3:]).T 45 | 46 | # save mesh_rec and mesh_rec in args.out_path 47 | mesh_rec.export(args.out_path) 48 | 49 | # downsample mesh_gt 50 | 51 | idx = np.random.choice(np.arange(len(mesh_gt.vertices)), 5000000) 52 | mesh_gt.vertices = mesh_gt.vertices[idx] 53 | mesh_gt.colors = mesh_gt.colors[idx] 54 | 55 | mesh_gt.export(args.gt_path.replace('.ply', '_trans.ply')) 56 | 57 | 58 | return 59 | 60 | 61 | 62 | if __name__ == '__main__': 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument( 65 | "--gt_path", 66 | type=str, 67 | default='/your/path//Barn_GT.ply', 68 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 69 | ) 70 | parser.add_argument( 71 | "--align_path", 72 | type=str, 73 | default='/your/path//Barn_trans.txt', 74 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 75 | ) 76 | parser.add_argument( 77 | "--ply_path", 78 | type=str, 79 | default='/your/path//Barn_lowres.ply', 80 | help="path to reconstruction ply file", 81 | ) 82 | parser.add_argument( 83 | "--scene", 84 | type=str, 85 | default='Barn', 86 | help="path to reconstruction ply file", 87 | ) 88 | parser.add_argument( 89 | "--out_path", 90 | type=str, 91 | default='/your/path//Barn_lowres_crop.ply', 92 | help= 93 | "output directory, default: an evaluation directory is created in the directory of the ply file", 94 | ) 95 | args = parser.parse_args() 96 | 97 | main(args) -------------------------------------------------------------------------------- /evaluation/eval_dtu/eval.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/jzhangbs/DTUeval-python 2 | import numpy as np 3 | import open3d as o3d 4 | import sklearn.neighbors as skln 5 | from tqdm import tqdm 6 | from scipy.io import loadmat 7 | import multiprocessing as mp 8 | import argparse 9 | 10 | def sample_single_tri(input_): 11 | n1, n2, v1, v2, tri_vert = input_ 12 | c = np.mgrid[:n1+1, :n2+1] 13 | c += 0.5 14 | c[0] /= max(n1, 1e-7) 15 | c[1] /= max(n2, 1e-7) 16 | c = np.transpose(c, (1,2,0)) 17 | k = c[c.sum(axis=-1) < 1] # m2 18 | q = v1 * k[:,:1] + v2 * k[:,1:] + tri_vert 19 | return q 20 | 21 | def write_vis_pcd(file, points, colors): 22 | pcd = o3d.geometry.PointCloud() 23 | pcd.points = o3d.utility.Vector3dVector(points) 24 | pcd.colors = o3d.utility.Vector3dVector(colors) 25 | o3d.io.write_point_cloud(file, pcd) 26 | 27 | if __name__ == '__main__': 28 | mp.freeze_support() 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('--data', type=str, default='data_in.ply') 32 | parser.add_argument('--scan', type=int, default=1) 33 | parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd']) 34 | parser.add_argument('--dataset_dir', type=str, default='.') 35 | parser.add_argument('--vis_out_dir', type=str, default='.') 36 | parser.add_argument('--downsample_density', type=float, default=0.2) 37 | parser.add_argument('--patch_size', type=float, default=60) 38 | parser.add_argument('--max_dist', type=float, default=20) 39 | parser.add_argument('--visualize_threshold', type=float, default=10) 40 | args = parser.parse_args() 41 | 42 | thresh = args.downsample_density 43 | if args.mode == 'mesh': 44 | pbar = tqdm(total=9) 45 | pbar.set_description('read data mesh') 46 | data_mesh = o3d.io.read_triangle_mesh(args.data) 47 | 48 | vertices = np.asarray(data_mesh.vertices) 49 | triangles = np.asarray(data_mesh.triangles) 50 | tri_vert = vertices[triangles] 51 | 52 | pbar.update(1) 53 | pbar.set_description('sample pcd from mesh') 54 | v1 = tri_vert[:,1] - tri_vert[:,0] 55 | v2 = tri_vert[:,2] - tri_vert[:,0] 56 | l1 = np.linalg.norm(v1, axis=-1, keepdims=True) 57 | l2 = np.linalg.norm(v2, axis=-1, keepdims=True) 58 | area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True) 59 | non_zero_area = (area2 > 0)[:,0] 60 | l1, l2, area2, v1, v2, tri_vert = [ 61 | arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert] 62 | ] 63 | thr = thresh * np.sqrt(l1 * l2 / area2) 64 | n1 = np.floor(l1 / thr) 65 | n2 = np.floor(l2 / thr) 66 | 67 | with mp.Pool() as mp_pool: 68 | new_pts = mp_pool.map(sample_single_tri, ((n1[i,0], n2[i,0], v1[i:i+1], v2[i:i+1], tri_vert[i:i+1,0]) for i in range(len(n1))), chunksize=1024) 69 | 70 | new_pts = np.concatenate(new_pts, axis=0) 71 | data_pcd = np.concatenate([vertices, new_pts], axis=0) 72 | 73 | elif args.mode == 'pcd': 74 | pbar = tqdm(total=8) 75 | pbar.set_description('read data pcd') 76 | data_pcd_o3d = o3d.io.read_point_cloud(args.data) 77 | data_pcd = np.asarray(data_pcd_o3d.points) 78 | 79 | pbar.update(1) 80 | pbar.set_description('random shuffle pcd index') 81 | shuffle_rng = np.random.default_rng() 82 | shuffle_rng.shuffle(data_pcd, axis=0) 83 | 84 | pbar.update(1) 85 | pbar.set_description('downsample pcd') 86 | nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1) 87 | nn_engine.fit(data_pcd) 88 | rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False) 89 | mask = np.ones(data_pcd.shape[0], dtype=np.bool_) 90 | for curr, idxs in enumerate(rnn_idxs): 91 | if mask[curr]: 92 | mask[idxs] = 0 93 | mask[curr] = 1 94 | data_down = data_pcd[mask] 95 | 96 | pbar.update(1) 97 | pbar.set_description('masking data pcd') 98 | obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat') 99 | ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']] 100 | BB = BB.astype(np.float32) 101 | 102 | patch = args.patch_size 103 | inbound = ((data_down >= BB[:1]-patch) & (data_down < BB[1:]+patch*2)).sum(axis=-1) ==3 104 | data_in = data_down[inbound] 105 | 106 | data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32) 107 | grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) ==3 108 | data_grid_in = data_grid[grid_inbound] 109 | in_obs = ObsMask[data_grid_in[:,0], data_grid_in[:,1], data_grid_in[:,2]].astype(np.bool_) 110 | data_in_obs = data_in[grid_inbound][in_obs] 111 | 112 | pbar.update(1) 113 | pbar.set_description('read STL pcd') 114 | stl_pcd = o3d.io.read_point_cloud(f'{args.dataset_dir}/Points/stl/stl{args.scan:03}_total.ply') 115 | stl = np.asarray(stl_pcd.points) 116 | 117 | pbar.update(1) 118 | pbar.set_description('compute data2stl') 119 | nn_engine.fit(stl) 120 | dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True) 121 | max_dist = args.max_dist 122 | mean_d2s = dist_d2s[dist_d2s < max_dist].mean() 123 | 124 | pbar.update(1) 125 | pbar.set_description('compute stl2data') 126 | ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P'] 127 | 128 | stl_hom = np.concatenate([stl, np.ones_like(stl[:,:1])], -1) 129 | above = (ground_plane.reshape((1,4)) * stl_hom).sum(-1) > 0 130 | stl_above = stl[above] 131 | 132 | nn_engine.fit(data_in) 133 | dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True) 134 | mean_s2d = dist_s2d[dist_s2d < max_dist].mean() 135 | 136 | pbar.update(1) 137 | pbar.set_description('visualize error') 138 | vis_dist = args.visualize_threshold 139 | R = np.array([[1,0,0]], dtype=np.float64) 140 | G = np.array([[0,1,0]], dtype=np.float64) 141 | B = np.array([[0,0,1]], dtype=np.float64) 142 | W = np.array([[1,1,1]], dtype=np.float64) 143 | data_color = np.tile(B, (data_down.shape[0], 1)) 144 | data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist 145 | data_color[ np.where(inbound)[0][grid_inbound][in_obs] ] = R * data_alpha + W * (1-data_alpha) 146 | data_color[ np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:,0] >= max_dist] ] = G 147 | write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2s.ply', data_down, data_color) 148 | stl_color = np.tile(B, (stl.shape[0], 1)) 149 | stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist 150 | stl_color[ np.where(above)[0] ] = R * stl_alpha + W * (1-stl_alpha) 151 | stl_color[ np.where(above)[0][dist_s2d[:,0] >= max_dist] ] = G 152 | write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_s2d.ply', stl, stl_color) 153 | 154 | pbar.update(1) 155 | pbar.set_description('done') 156 | pbar.close() 157 | over_all = (mean_d2s + mean_s2d) / 2 158 | print(mean_d2s, mean_s2d, over_all) 159 | 160 | import json 161 | with open(f'{args.vis_out_dir}/results.json', 'w') as fp: 162 | json.dump({ 163 | 'mean_d2s': mean_d2s, 164 | 'mean_s2d': mean_s2d, 165 | 'overall': over_all, 166 | }, fp, indent=True) 167 | 168 | 169 | -------------------------------------------------------------------------------- /evaluation/eval_dtu/evaluate_single_scene.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import cv2 5 | import numpy as np 6 | import os 7 | import glob 8 | from skimage.morphology import binary_dilation, disk 9 | import argparse 10 | 11 | import trimesh 12 | from pathlib import Path 13 | from tqdm import tqdm 14 | 15 | import sys 16 | 17 | sys.path.append(os.getcwd()) 18 | 19 | import evaluation.eval_dtu.render_utils as rend_util 20 | 21 | 22 | def cull_scan(scan, mesh_path, result_mesh_file, instance_dir): 23 | 24 | # load poses 25 | image_dir = '{0}/images'.format(instance_dir) 26 | image_paths = sorted(glob.glob(os.path.join(image_dir, "*.png"))) 27 | n_images = len(image_paths) 28 | cam_file = '{0}/cameras.npz'.format(instance_dir) 29 | camera_dict = np.load(cam_file) 30 | scale_mats = [camera_dict['scale_mat_%d' % idx].astype(np.float32) for idx in range(n_images)] 31 | world_mats = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in range(n_images)] 32 | 33 | intrinsics_all = [] 34 | pose_all = [] 35 | for scale_mat, world_mat in zip(scale_mats, world_mats): 36 | P = world_mat @ scale_mat 37 | P = P[:3, :4] 38 | intrinsics, pose = rend_util.load_K_Rt_from_P(None, P) 39 | intrinsics_all.append(torch.from_numpy(intrinsics).float()) 40 | pose_all.append(torch.from_numpy(pose).float()) 41 | 42 | # load mask 43 | mask_dir = '{0}/mask'.format(instance_dir) 44 | mask_paths = sorted(glob.glob(os.path.join(mask_dir, "*.png"))) 45 | masks = [] 46 | for p in mask_paths: 47 | mask = cv2.imread(p) 48 | masks.append(mask) 49 | 50 | # hard-coded image shape 51 | W, H = 1600, 1200 52 | 53 | # load mesh 54 | mesh = trimesh.load(mesh_path) 55 | 56 | # load transformation matrix 57 | 58 | vertices = mesh.vertices 59 | 60 | # project and filter 61 | vertices = torch.from_numpy(vertices).cuda() 62 | vertices = torch.cat((vertices, torch.ones_like(vertices[:, :1])), dim=-1) 63 | vertices = vertices.permute(1, 0) 64 | vertices = vertices.float() 65 | 66 | sampled_masks = [] 67 | for i in tqdm(range(n_images), desc="Culling mesh given masks"): 68 | pose = pose_all[i] 69 | w2c = torch.inverse(pose).cuda() 70 | intrinsic = intrinsics_all[i].cuda() 71 | 72 | with torch.no_grad(): 73 | # transform and project 74 | cam_points = intrinsic @ w2c @ vertices 75 | pix_coords = cam_points[:2, :] / (cam_points[2, :].unsqueeze(0) + 1e-6) 76 | pix_coords = pix_coords.permute(1, 0) 77 | pix_coords[..., 0] /= W - 1 78 | pix_coords[..., 1] /= H - 1 79 | pix_coords = (pix_coords - 0.5) * 2 80 | valid = ((pix_coords > -1. ) & (pix_coords < 1.)).all(dim=-1).float() 81 | 82 | # dialate mask similar to unisurf 83 | maski = masks[i][:, :, 0].astype(np.float32) / 256. 84 | maski = torch.from_numpy(binary_dilation(maski, disk(24))).float()[None, None].cuda() 85 | 86 | sampled_mask = F.grid_sample(maski, pix_coords[None, None], mode='nearest', padding_mode='zeros', align_corners=True)[0, -1, 0] 87 | 88 | sampled_mask = sampled_mask + (1. - valid) 89 | sampled_masks.append(sampled_mask) 90 | 91 | sampled_masks = torch.stack(sampled_masks, -1) 92 | # filter 93 | 94 | mask = (sampled_masks > 0.).all(dim=-1).cpu().numpy() 95 | face_mask = mask[mesh.faces].all(axis=1) 96 | 97 | mesh.update_vertices(mask) 98 | mesh.update_faces(face_mask) 99 | 100 | # transform vertices to world 101 | scale_mat = scale_mats[0] 102 | mesh.vertices = mesh.vertices * scale_mat[0, 0] + scale_mat[:3, 3][None] 103 | 104 | # Taking the biggest connected component 105 | print("Taking the biggest connected component") 106 | components = mesh.split(only_watertight=False) 107 | areas = np.array([c.area for c in components], dtype=np.float32) 108 | mesh = components[areas.argmax()] 109 | 110 | mesh.export(result_mesh_file) 111 | del mesh 112 | 113 | 114 | if __name__ == "__main__": 115 | 116 | parser = argparse.ArgumentParser( 117 | description='Arguments to evaluate the mesh.' 118 | ) 119 | 120 | parser.add_argument('--input_mesh', type=str, help='path to the mesh to be evaluated') 121 | parser.add_argument('--scan_id', type=str, help='scan id of the input mesh') 122 | parser.add_argument('--output_dir', type=str, default='evaluation_results_single', help='path to the output folder') 123 | parser.add_argument('--mask_dir', type=str, default='mask', help='path to uncropped mask') 124 | parser.add_argument('--DTU', type=str, default='Offical_DTU_Dataset', help='path to the GT DTU point clouds') 125 | args = parser.parse_args() 126 | 127 | Offical_DTU_Dataset = args.DTU 128 | out_dir = args.output_dir 129 | Path(out_dir).mkdir(parents=True, exist_ok=True) 130 | 131 | scan = args.scan_id 132 | ply_file = args.input_mesh 133 | print("cull mesh ....") 134 | result_mesh_file = os.path.join(out_dir, "culled_mesh.ply") 135 | cull_scan(scan, ply_file, result_mesh_file, instance_dir=os.path.join(args.mask_dir, f'scan{args.scan_id}')) 136 | 137 | script_dir = os.path.dirname(os.path.abspath(__file__)) 138 | cmd = f"python {script_dir}/eval.py --data {result_mesh_file} --scan {scan} --mode mesh --dataset_dir {Offical_DTU_Dataset} --vis_out_dir {out_dir}" 139 | os.system(cmd) -------------------------------------------------------------------------------- /evaluation/eval_dtu/render_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import imageio 3 | import skimage 4 | import cv2 5 | import torch 6 | from torch.nn import functional as F 7 | 8 | 9 | def get_psnr(img1, img2, normalize_rgb=False): 10 | if normalize_rgb: # [-1,1] --> [0,1] 11 | img1 = (img1 + 1.) / 2. 12 | img2 = (img2 + 1. ) / 2. 13 | 14 | mse = torch.mean((img1 - img2) ** 2) 15 | psnr = -10. * torch.log(mse) / torch.log(torch.Tensor([10.]).cuda()) 16 | 17 | return psnr 18 | 19 | 20 | def load_rgb(path, normalize_rgb = False): 21 | img = imageio.imread(path) 22 | img = skimage.img_as_float32(img) 23 | 24 | if normalize_rgb: # [-1,1] --> [0,1] 25 | img -= 0.5 26 | img *= 2. 27 | img = img.transpose(2, 0, 1) 28 | return img 29 | 30 | 31 | def load_K_Rt_from_P(filename, P=None): 32 | if P is None: 33 | lines = open(filename).read().splitlines() 34 | if len(lines) == 4: 35 | lines = lines[1:] 36 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] 37 | P = np.asarray(lines).astype(np.float32).squeeze() 38 | 39 | out = cv2.decomposeProjectionMatrix(P) 40 | K = out[0] 41 | R = out[1] 42 | t = out[2] 43 | 44 | K = K/K[2,2] 45 | intrinsics = np.eye(4) 46 | intrinsics[:3, :3] = K 47 | 48 | pose = np.eye(4, dtype=np.float32) 49 | pose[:3, :3] = R.transpose() 50 | pose[:3,3] = (t[:3] / t[3])[:,0] 51 | 52 | return intrinsics, pose 53 | 54 | 55 | def get_camera_params(uv, pose, intrinsics): 56 | if pose.shape[1] == 7: #In case of quaternion vector representation 57 | cam_loc = pose[:, 4:] 58 | R = quat_to_rot(pose[:,:4]) 59 | p = torch.eye(4).repeat(pose.shape[0],1,1).cuda().float() 60 | p[:, :3, :3] = R 61 | p[:, :3, 3] = cam_loc 62 | else: # In case of pose matrix representation 63 | cam_loc = pose[:, :3, 3] 64 | p = pose 65 | 66 | batch_size, num_samples, _ = uv.shape 67 | 68 | depth = torch.ones((batch_size, num_samples)).cuda() 69 | x_cam = uv[:, :, 0].view(batch_size, -1) 70 | y_cam = uv[:, :, 1].view(batch_size, -1) 71 | z_cam = depth.view(batch_size, -1) 72 | 73 | pixel_points_cam = lift(x_cam, y_cam, z_cam, intrinsics=intrinsics) 74 | 75 | # permute for batch matrix product 76 | pixel_points_cam = pixel_points_cam.permute(0, 2, 1) 77 | 78 | world_coords = torch.bmm(p, pixel_points_cam).permute(0, 2, 1)[:, :, :3] 79 | ray_dirs = world_coords - cam_loc[:, None, :] 80 | ray_dirs = F.normalize(ray_dirs, dim=2) 81 | 82 | return ray_dirs, cam_loc 83 | 84 | 85 | def get_camera_for_plot(pose): 86 | if pose.shape[1] == 7: #In case of quaternion vector representation 87 | cam_loc = pose[:, 4:].detach() 88 | R = quat_to_rot(pose[:,:4].detach()) 89 | else: # In case of pose matrix representation 90 | cam_loc = pose[:, :3, 3] 91 | R = pose[:, :3, :3] 92 | cam_dir = R[:, :3, 2] 93 | return cam_loc, cam_dir 94 | 95 | 96 | def lift(x, y, z, intrinsics): 97 | # parse intrinsics 98 | intrinsics = intrinsics.cuda() 99 | fx = intrinsics[:, 0, 0] 100 | fy = intrinsics[:, 1, 1] 101 | cx = intrinsics[:, 0, 2] 102 | cy = intrinsics[:, 1, 2] 103 | sk = intrinsics[:, 0, 1] 104 | 105 | x_lift = (x - cx.unsqueeze(-1) + cy.unsqueeze(-1)*sk.unsqueeze(-1)/fy.unsqueeze(-1) - sk.unsqueeze(-1)*y/fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z 106 | y_lift = (y - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z 107 | 108 | # homogeneous 109 | return torch.stack((x_lift, y_lift, z, torch.ones_like(z).cuda()), dim=-1) 110 | 111 | 112 | def quat_to_rot(q): 113 | batch_size, _ = q.shape 114 | q = F.normalize(q, dim=1) 115 | R = torch.ones((batch_size, 3,3)).cuda() 116 | qr=q[:,0] 117 | qi = q[:, 1] 118 | qj = q[:, 2] 119 | qk = q[:, 3] 120 | R[:, 0, 0]=1-2 * (qj**2 + qk**2) 121 | R[:, 0, 1] = 2 * (qj *qi -qk*qr) 122 | R[:, 0, 2] = 2 * (qi * qk + qr * qj) 123 | R[:, 1, 0] = 2 * (qj * qi + qk * qr) 124 | R[:, 1, 1] = 1-2 * (qi**2 + qk**2) 125 | R[:, 1, 2] = 2*(qj*qk - qi*qr) 126 | R[:, 2, 0] = 2 * (qk * qi-qj * qr) 127 | R[:, 2, 1] = 2 * (qj*qk + qi*qr) 128 | R[:, 2, 2] = 1-2 * (qi**2 + qj**2) 129 | return R 130 | 131 | 132 | def rot_to_quat(R): 133 | batch_size, _,_ = R.shape 134 | q = torch.ones((batch_size, 4)).cuda() 135 | 136 | R00 = R[:, 0,0] 137 | R01 = R[:, 0, 1] 138 | R02 = R[:, 0, 2] 139 | R10 = R[:, 1, 0] 140 | R11 = R[:, 1, 1] 141 | R12 = R[:, 1, 2] 142 | R20 = R[:, 2, 0] 143 | R21 = R[:, 2, 1] 144 | R22 = R[:, 2, 2] 145 | 146 | q[:,0]=torch.sqrt(1.0+R00+R11+R22)/2 147 | q[:, 1]=(R21-R12)/(4*q[:,0]) 148 | q[:, 2] = (R02 - R20) / (4 * q[:, 0]) 149 | q[:, 3] = (R10 - R01) / (4 * q[:, 0]) 150 | return q 151 | 152 | 153 | def get_sphere_intersections(cam_loc, ray_directions, r = 1.0): 154 | # Input: n_rays x 3 ; n_rays x 3 155 | # Output: n_rays x 1, n_rays x 1 (close and far) 156 | 157 | ray_cam_dot = torch.bmm(ray_directions.view(-1, 1, 3), 158 | cam_loc.view(-1, 3, 1)).squeeze(-1) 159 | under_sqrt = ray_cam_dot ** 2 - (cam_loc.norm(2, 1, keepdim=True) ** 2 - r ** 2) 160 | 161 | # sanity check 162 | if (under_sqrt <= 0).sum() > 0: 163 | print('BOUNDING SPHERE PROBLEM!') 164 | exit() 165 | 166 | sphere_intersections = torch.sqrt(under_sqrt) * torch.Tensor([-1, 1]).cuda().float() - ray_cam_dot 167 | sphere_intersections = sphere_intersections.clamp_min(0.0) 168 | 169 | return sphere_intersections -------------------------------------------------------------------------------- /evaluation/eval_tnt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import trimesh 3 | import argparse 4 | import numpy as np 5 | import open3d as o3d 6 | from sklearn.neighbors import KDTree 7 | 8 | 9 | def nn_correspondance(verts1, verts2): 10 | indices = [] 11 | distances = [] 12 | if len(verts1) == 0 or len(verts2) == 0: 13 | return indices, distances 14 | 15 | kdtree = KDTree(verts1) 16 | distances, indices = kdtree.query(verts2) 17 | distances = distances.reshape(-1) 18 | 19 | return distances 20 | 21 | 22 | def evaluate(mesh_pred, mesh_trgt, threshold=.05, down_sample=.02): 23 | pcd_trgt = o3d.geometry.PointCloud() 24 | pcd_pred = o3d.geometry.PointCloud() 25 | 26 | pcd_trgt.points = o3d.utility.Vector3dVector(mesh_trgt.vertices[:, :3]) 27 | pcd_pred.points = o3d.utility.Vector3dVector(mesh_pred.vertices[:, :3]) 28 | 29 | if down_sample: 30 | pcd_pred = pcd_pred.voxel_down_sample(down_sample) 31 | pcd_trgt = pcd_trgt.voxel_down_sample(down_sample) 32 | 33 | verts_pred = np.asarray(pcd_pred.points) 34 | verts_trgt = np.asarray(pcd_trgt.points) 35 | 36 | dist1 = nn_correspondance(verts_pred, verts_trgt) 37 | dist2 = nn_correspondance(verts_trgt, verts_pred) 38 | 39 | precision = np.mean((dist2 < threshold).astype('float')) 40 | recal = np.mean((dist1 < threshold).astype('float')) 41 | fscore = 2 * precision * recal / (precision + recal) 42 | metrics = { 43 | 'Acc': np.mean(dist2), 44 | 'Comp': np.mean(dist1), 45 | 'Prec': precision, 46 | 'Recal': recal, 47 | 'F-score': fscore, 48 | } 49 | return metrics 50 | 51 | 52 | def main(args): 53 | assert os.path.exists(args.ply_path), f"PLY file {args.ply_path} does not exist." 54 | 55 | mesh_rec = trimesh.load(args.ply_path, process=False) 56 | mesh_gt = trimesh.load(args.gt_path, process=False) 57 | 58 | to_align, _ = trimesh.bounds.oriented_bounds(mesh_gt) 59 | mesh_gt.vertices = (to_align[:3, :3] @ mesh_gt.vertices.T + to_align[:3, 3:]).T 60 | mesh_rec.vertices = (to_align[:3, :3] @ mesh_rec.vertices.T + to_align[:3, 3:]).T 61 | 62 | min_points = mesh_gt.vertices.min(axis=0) 63 | max_points = mesh_gt.vertices.max(axis=0) 64 | 65 | mask_min = (mesh_rec.vertices - min_points[None]) > 0 66 | mask_max = (mesh_rec.vertices - max_points[None]) < 0 67 | 68 | mask = np.concatenate((mask_min, mask_max), axis=1).all(axis=1) 69 | face_mask = mask[mesh_rec.faces].all(axis=1) 70 | 71 | mesh_rec.update_vertices(mask) 72 | mesh_rec.update_faces(face_mask) 73 | 74 | metrics = evaluate(mesh_rec, mesh_gt) 75 | 76 | metrics_path = os.path.join(os.path.dirname(args.ply_path), 'metrics.txt') 77 | with open(metrics_path, 'w') as f: 78 | for k, v in metrics.items(): 79 | f.write(f'{k}: {v}\n') 80 | 81 | print('Scene: {} F-score: {}'.format(args.scene, metrics['F-score'])) 82 | 83 | mesh_rec.vertices = (to_align[:3, :3].T @ mesh_rec.vertices.T - to_align[:3, :3].T @ to_align[:3, 3:]).T 84 | mesh_rec.export(args.ply_path.replace('.ply', '_crop.ply')) 85 | 86 | return 87 | 88 | 89 | if __name__ == '__main__': 90 | parser = argparse.ArgumentParser() 91 | parser.add_argument( 92 | "--gt_path", 93 | type=str, 94 | default='/your/path//Barn_GT.ply', 95 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 96 | ) 97 | parser.add_argument( 98 | "--ply_path", 99 | type=str, 100 | default='/your/path//Barn_lowres.ply', 101 | help="path to reconstruction ply file", 102 | ) 103 | parser.add_argument( 104 | "--scene", 105 | type=str, 106 | default='Barn', 107 | help="path to reconstruction ply file", 108 | ) 109 | args = parser.parse_args() 110 | 111 | main(args) 112 | 113 | -------------------------------------------------------------------------------- /evaluation/full_eval.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | from argparse import ArgumentParser 14 | 15 | mipnerf360_outdoor_scenes = ["bicycle", "flowers", "garden", "stump", "treehill"] 16 | mipnerf360_indoor_scenes = ["room", "counter", "kitchen", "bonsai"] 17 | tanks_and_temples_scenes = ["truck", "train"] 18 | deep_blending_scenes = ["drjohnson", "playroom"] 19 | 20 | parser = ArgumentParser(description="Full evaluation script parameters") 21 | parser.add_argument("--skip_training", action="store_true") 22 | parser.add_argument("--skip_rendering", action="store_true") 23 | parser.add_argument("--skip_metrics", action="store_true") 24 | parser.add_argument("--output_path", default="./eval") 25 | args, _ = parser.parse_known_args() 26 | 27 | all_scenes = [] 28 | all_scenes.extend(mipnerf360_outdoor_scenes) 29 | all_scenes.extend(mipnerf360_indoor_scenes) 30 | all_scenes.extend(tanks_and_temples_scenes) 31 | all_scenes.extend(deep_blending_scenes) 32 | 33 | if not args.skip_training or not args.skip_rendering: 34 | parser.add_argument('--mipnerf360', "-m360", required=True, type=str) 35 | parser.add_argument("--tanksandtemples", "-tat", required=True, type=str) 36 | parser.add_argument("--deepblending", "-db", required=True, type=str) 37 | args = parser.parse_args() 38 | 39 | if not args.skip_training: 40 | common_args = " --quiet --eval --test_iterations -1 " 41 | for scene in mipnerf360_outdoor_scenes: 42 | source = args.mipnerf360 + "/" + scene 43 | os.system("python train.py -s " + source + " -i images_4 -m " + args.output_path + "/" + scene + common_args) 44 | for scene in mipnerf360_indoor_scenes: 45 | source = args.mipnerf360 + "/" + scene 46 | os.system("python train.py -s " + source + " -i images_2 -m " + args.output_path + "/" + scene + common_args) 47 | for scene in tanks_and_temples_scenes: 48 | source = args.tanksandtemples + "/" + scene 49 | os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args) 50 | for scene in deep_blending_scenes: 51 | source = args.deepblending + "/" + scene 52 | os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args) 53 | 54 | if not args.skip_rendering: 55 | all_sources = [] 56 | for scene in mipnerf360_outdoor_scenes: 57 | all_sources.append(args.mipnerf360 + "/" + scene) 58 | for scene in mipnerf360_indoor_scenes: 59 | all_sources.append(args.mipnerf360 + "/" + scene) 60 | for scene in tanks_and_temples_scenes: 61 | all_sources.append(args.tanksandtemples + "/" + scene) 62 | for scene in deep_blending_scenes: 63 | all_sources.append(args.deepblending + "/" + scene) 64 | 65 | common_args = " --quiet --eval --skip_train" 66 | for scene, source in zip(all_scenes, all_sources): 67 | os.system("python render.py --iteration 7000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) 68 | os.system("python render.py --iteration 30000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) 69 | 70 | if not args.skip_metrics: 71 | scenes_string = "" 72 | for scene in all_scenes: 73 | scenes_string += "\"" + args.output_path + "/" + scene + "\" " 74 | 75 | os.system("python metrics.py -m " + scenes_string) -------------------------------------------------------------------------------- /evaluation/lpipsPyTorch/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .modules.lpips import LPIPS 4 | 5 | 6 | def lpips(x: torch.Tensor, 7 | y: torch.Tensor, 8 | net_type: str = 'alex', 9 | version: str = '0.1'): 10 | r"""Function that measures 11 | Learned Perceptual Image Patch Similarity (LPIPS). 12 | 13 | Arguments: 14 | x, y (torch.Tensor): the input tensors to compare. 15 | net_type (str): the network type to compare the features: 16 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'. 17 | version (str): the version of LPIPS. Default: 0.1. 18 | """ 19 | device = x.device 20 | criterion = LPIPS(net_type, version).to(device) 21 | return criterion(x, y) 22 | -------------------------------------------------------------------------------- /evaluation/lpipsPyTorch/modules/lpips.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .networks import get_network, LinLayers 5 | from .utils import get_state_dict 6 | 7 | 8 | class LPIPS(nn.Module): 9 | r"""Creates a criterion that measures 10 | Learned Perceptual Image Patch Similarity (LPIPS). 11 | 12 | Arguments: 13 | net_type (str): the network type to compare the features: 14 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'. 15 | version (str): the version of LPIPS. Default: 0.1. 16 | """ 17 | def __init__(self, net_type: str = 'alex', version: str = '0.1'): 18 | 19 | assert version in ['0.1'], 'v0.1 is only supported now' 20 | 21 | super(LPIPS, self).__init__() 22 | 23 | # pretrained network 24 | self.net = get_network(net_type) 25 | 26 | # linear layers 27 | self.lin = LinLayers(self.net.n_channels_list) 28 | self.lin.load_state_dict(get_state_dict(net_type, version)) 29 | 30 | def forward(self, x: torch.Tensor, y: torch.Tensor): 31 | feat_x, feat_y = self.net(x), self.net(y) 32 | 33 | diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)] 34 | res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)] 35 | 36 | return torch.sum(torch.cat(res, 0), 0, True) 37 | -------------------------------------------------------------------------------- /evaluation/lpipsPyTorch/modules/networks.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | from itertools import chain 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torchvision import models 8 | 9 | from .utils import normalize_activation 10 | 11 | 12 | def get_network(net_type: str): 13 | if net_type == 'alex': 14 | return AlexNet() 15 | elif net_type == 'squeeze': 16 | return SqueezeNet() 17 | elif net_type == 'vgg': 18 | return VGG16() 19 | else: 20 | raise NotImplementedError('choose net_type from [alex, squeeze, vgg].') 21 | 22 | 23 | class LinLayers(nn.ModuleList): 24 | def __init__(self, n_channels_list: Sequence[int]): 25 | super(LinLayers, self).__init__([ 26 | nn.Sequential( 27 | nn.Identity(), 28 | nn.Conv2d(nc, 1, 1, 1, 0, bias=False) 29 | ) for nc in n_channels_list 30 | ]) 31 | 32 | for param in self.parameters(): 33 | param.requires_grad = False 34 | 35 | 36 | class BaseNet(nn.Module): 37 | def __init__(self): 38 | super(BaseNet, self).__init__() 39 | 40 | # register buffer 41 | self.register_buffer( 42 | 'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None]) 43 | self.register_buffer( 44 | 'std', torch.Tensor([.458, .448, .450])[None, :, None, None]) 45 | 46 | def set_requires_grad(self, state: bool): 47 | for param in chain(self.parameters(), self.buffers()): 48 | param.requires_grad = state 49 | 50 | def z_score(self, x: torch.Tensor): 51 | return (x - self.mean) / self.std 52 | 53 | def forward(self, x: torch.Tensor): 54 | x = self.z_score(x) 55 | 56 | output = [] 57 | for i, (_, layer) in enumerate(self.layers._modules.items(), 1): 58 | x = layer(x) 59 | if i in self.target_layers: 60 | output.append(normalize_activation(x)) 61 | if len(output) == len(self.target_layers): 62 | break 63 | return output 64 | 65 | 66 | class SqueezeNet(BaseNet): 67 | def __init__(self): 68 | super(SqueezeNet, self).__init__() 69 | 70 | self.layers = models.squeezenet1_1(True).features 71 | self.target_layers = [2, 5, 8, 10, 11, 12, 13] 72 | self.n_channels_list = [64, 128, 256, 384, 384, 512, 512] 73 | 74 | self.set_requires_grad(False) 75 | 76 | 77 | class AlexNet(BaseNet): 78 | def __init__(self): 79 | super(AlexNet, self).__init__() 80 | 81 | self.layers = models.alexnet(True).features 82 | self.target_layers = [2, 5, 8, 10, 12] 83 | self.n_channels_list = [64, 192, 384, 256, 256] 84 | 85 | self.set_requires_grad(False) 86 | 87 | 88 | class VGG16(BaseNet): 89 | def __init__(self): 90 | super(VGG16, self).__init__() 91 | 92 | self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features 93 | self.target_layers = [4, 9, 16, 23, 30] 94 | self.n_channels_list = [64, 128, 256, 512, 512] 95 | 96 | self.set_requires_grad(False) 97 | -------------------------------------------------------------------------------- /evaluation/lpipsPyTorch/modules/utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | 5 | 6 | def normalize_activation(x, eps=1e-10): 7 | norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True)) 8 | return x / (norm_factor + eps) 9 | 10 | 11 | def get_state_dict(net_type: str = 'alex', version: str = '0.1'): 12 | # build url 13 | url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \ 14 | + f'master/lpips/weights/v{version}/{net_type}.pth' 15 | 16 | # download 17 | old_state_dict = torch.hub.load_state_dict_from_url( 18 | url, progress=True, 19 | map_location=None if torch.cuda.is_available() else torch.device('cpu') 20 | ) 21 | 22 | # rename keys 23 | new_state_dict = OrderedDict() 24 | for key, val in old_state_dict.items(): 25 | new_key = key 26 | new_key = new_key.replace('lin', '') 27 | new_key = new_key.replace('model.', '') 28 | new_state_dict[new_key] = val 29 | 30 | return new_state_dict 31 | -------------------------------------------------------------------------------- /evaluation/metrics.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import sys 14 | import json 15 | import torch 16 | from PIL import Image 17 | from tqdm import tqdm 18 | from pathlib import Path 19 | import torchvision.transforms.functional as tf 20 | sys.path.append(os.getcwd()) 21 | 22 | from tools.loss_utils import ssim 23 | from lpipsPyTorch import lpips 24 | from tools.image_utils import psnr 25 | from argparse import ArgumentParser 26 | from configs.config import Config 27 | from tools.general_utils import set_random_seed 28 | 29 | 30 | def readImages(renders_dir, gt_dir): 31 | renders = [] 32 | gts = [] 33 | image_names = [] 34 | for fname in os.listdir(renders_dir): 35 | render = Image.open(renders_dir / fname) 36 | gt = Image.open(gt_dir / fname) 37 | renders.append(tf.to_tensor(render).unsqueeze(0)[:, :3, :, :].cuda()) 38 | gts.append(tf.to_tensor(gt).unsqueeze(0)[:, :3, :, :].cuda()) 39 | image_names.append(fname) 40 | return renders, gts, image_names 41 | 42 | def evaluate(model_paths): 43 | 44 | full_dict = {} 45 | per_view_dict = {} 46 | full_dict_polytopeonly = {} 47 | per_view_dict_polytopeonly = {} 48 | print("") 49 | 50 | for scene_dir in model_paths: 51 | try: 52 | print("Scene:", scene_dir) 53 | full_dict[scene_dir] = {} 54 | per_view_dict[scene_dir] = {} 55 | full_dict_polytopeonly[scene_dir] = {} 56 | per_view_dict_polytopeonly[scene_dir] = {} 57 | 58 | test_dir = Path(scene_dir) / "test" 59 | 60 | for method in os.listdir(test_dir): 61 | print("Method:", method) 62 | 63 | full_dict[scene_dir][method] = {} 64 | per_view_dict[scene_dir][method] = {} 65 | full_dict_polytopeonly[scene_dir][method] = {} 66 | per_view_dict_polytopeonly[scene_dir][method] = {} 67 | 68 | method_dir = test_dir / method 69 | gt_dir = method_dir/ "gt" 70 | renders_dir = method_dir / "renders" 71 | renders, gts, image_names = readImages(renders_dir, gt_dir) 72 | 73 | ssims = [] 74 | psnrs = [] 75 | lpipss = [] 76 | 77 | for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"): 78 | ssims.append(ssim(renders[idx], gts[idx])) 79 | psnrs.append(psnr(renders[idx], gts[idx])) 80 | lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg')) 81 | 82 | 83 | full_dict[scene_dir][method].update({"SSIM": torch.tensor(ssims).mean().item(), 84 | "PSNR": torch.tensor(psnrs).mean().item(), 85 | "LPIPS": torch.tensor(lpipss).mean().item()}) 86 | per_view_dict[scene_dir][method].update({"SSIM": {name: ssim for ssim, name in zip(torch.tensor(ssims).tolist(), image_names)}, 87 | "PSNR": {name: psnr for psnr, name in zip(torch.tensor(psnrs).tolist(), image_names)}, 88 | "LPIPS": {name: lp for lp, name in zip(torch.tensor(lpipss).tolist(), image_names)}}) 89 | 90 | with open(scene_dir + "/results.json", 'w') as fp: 91 | json.dump(full_dict[scene_dir], fp, indent=True) 92 | with open(scene_dir + "/per_view.json", 'w') as fp: 93 | json.dump(per_view_dict[scene_dir], fp, indent=True) 94 | except: 95 | print("Unable to compute metrics for model", scene_dir) 96 | 97 | if __name__ == "__main__": 98 | device = torch.device("cuda:0") 99 | torch.cuda.set_device(device) 100 | 101 | # Set up command line argument parser 102 | parser = ArgumentParser(description="Training script parameters") 103 | parser.add_argument('--cfg_path', type=str, default='configs/config_base.yaml') 104 | args = parser.parse_args() 105 | 106 | cfg = Config(args.cfg_path) 107 | cfg.model.data_device = 'cpu' 108 | cfg.model.load_normal = False 109 | 110 | set_random_seed(cfg.seed) 111 | 112 | evaluate([cfg.model.model_path]) 113 | -------------------------------------------------------------------------------- /evaluation/render.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import sys 14 | import torch 15 | import torchvision 16 | from tqdm import tqdm 17 | from argparse import ArgumentParser 18 | sys.path.append(os.getcwd()) 19 | 20 | from scene import Scene 21 | from gaussian_renderer import render, render_fast 22 | from gaussian_renderer import GaussianModel 23 | from configs.config import Config 24 | from tools.general_utils import set_random_seed 25 | from tools.loss_utils import cos_weight 26 | 27 | 28 | def render_set(model_path, name, iteration, views, gaussians, cfg, background): 29 | render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders") 30 | gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt") 31 | 32 | os.makedirs(render_path, exist_ok=True) 33 | os.makedirs(gts_path, exist_ok=True) 34 | alphas = [] 35 | 36 | for idx, view in enumerate(tqdm(views, desc="Rendering progress")): 37 | outs = render(view, gaussians, cfg, background) 38 | # outs = render_fast(view, gaussians, cfg, background) 39 | 40 | rendering = outs["render"] 41 | gt = view.original_image[0:3, :, :] 42 | torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png")) 43 | torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png")) 44 | alphas.append(outs["alpha"].detach().clone().view(-1).cpu()) 45 | 46 | if False: 47 | normal_map = outs["normal"].detach().clone() 48 | normal_gt = view.normal.cuda() 49 | cos = cos_weight(normal_gt, normal_map, cfg.optim.exp_t, cfg.optim.cos_thr) 50 | torchvision.utils.save_image(cos, os.path.join(render_path, '{0:05d}_cosine'.format(idx) + ".png")) 51 | 52 | # alphas = torch.cat(alphas, dim=0) 53 | # print("Alpha min: {}, max: {}".format(alphas.min(), alphas.max())) 54 | # print("Alpha mean: {}, std: {}".format(alphas.mean(), alphas.std())) 55 | # print("Alpha median: {}".format(alphas.median())) 56 | 57 | 58 | def render_sets(cfg, iteration : int, skip_train : bool, skip_test : bool): 59 | with torch.no_grad(): 60 | gaussians = GaussianModel(cfg.model) 61 | scene = Scene(cfg.model, gaussians, load_iteration=iteration, shuffle=False) 62 | # gaussians.extent = scene.cameras_extent 63 | 64 | bg_color = [1,1,1] if cfg.model.white_background else [0, 0, 0] 65 | background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") 66 | 67 | if not skip_train: 68 | render_set(cfg.model.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, cfg, background) 69 | 70 | if not skip_test: 71 | render_set(cfg.model.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, cfg, background) 72 | 73 | 74 | if __name__ == "__main__": 75 | # Set up command line argument parser 76 | parser = ArgumentParser() 77 | parser.add_argument('--cfg_path', type=str, default='configs/config_base.yaml') 78 | parser.add_argument("--iteration", default=-1, type=int) 79 | parser.add_argument("--skip_train", action="store_true") 80 | parser.add_argument("--skip_test", action="store_true") 81 | args = parser.parse_args() 82 | 83 | cfg = Config(args.cfg_path) 84 | cfg.model.data_device = 'cuda' 85 | cfg.model.load_normal = False 86 | cfg.model.load_mask = False 87 | 88 | set_random_seed(cfg.seed) 89 | 90 | # Initialize system state (RNG) 91 | # safe_state(args.quiet) 92 | 93 | render_sets(cfg, args.iteration, args.skip_train, args.skip_test) -------------------------------------------------------------------------------- /evaluation/tnt_eval/README.md: -------------------------------------------------------------------------------- 1 | # Python Toolbox for Evaluation 2 | 3 | This Python script evaluates **training** dataset of TanksAndTemples benchmark. 4 | The script requires ``Open3D`` and a few Python packages such as ``matplotlib``, ``json``, and ``numpy``. 5 | 6 | ## How to use: 7 | **Step 0**. Reconstruct 3D models and recover camera poses from the training dataset. 8 | The raw videos of the training dataset can be found from: 9 | https://tanksandtemples.org/download/ 10 | 11 | **Step 1**. Download evaluation data (ground truth geometry + reference reconstruction) using 12 | [this link](https://drive.google.com/open?id=1UoKPiUUsKa0AVHFOrnMRhc5hFngjkE-t). In this example, we regard ``TanksAndTemples/evaluation/data/`` as a dataset folder. 13 | 14 | **Step 2**. Install Open3D. Follow instructions in http://open3d.org/docs/getting_started.html 15 | 16 | **Step 3**. Run the evaluation script and grab some coffee. 17 | ``` 18 | python run.py --dataset-dir path/to/TanksAndTemples/evaluation/data/Ignatius --traj-path path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius_COLMAP_SfM.log --ply-path path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius_COLMAP.ply 19 | ``` 20 | Output (evaluation of Ignatius): 21 | ``` 22 | =========================== 23 | Evaluating Ignatius 24 | =========================== 25 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius_COLMAP.ply 26 | Reading PLY: [========================================] 100% 27 | Read PointCloud: 6929586 vertices. 28 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius.ply 29 | Reading PLY: [========================================] 100% 30 | : 31 | ICP Iteration #0: Fitness 0.9980, RMSE 0.0044 32 | ICP Iteration #1: Fitness 0.9980, RMSE 0.0043 33 | ICP Iteration #2: Fitness 0.9980, RMSE 0.0043 34 | ICP Iteration #3: Fitness 0.9980, RMSE 0.0043 35 | ICP Iteration #4: Fitness 0.9980, RMSE 0.0042 36 | ICP Iteration #5: Fitness 0.9980, RMSE 0.0042 37 | ICP Iteration #6: Fitness 0.9979, RMSE 0.0042 38 | ICP Iteration #7: Fitness 0.9979, RMSE 0.0042 39 | ICP Iteration #8: Fitness 0.9979, RMSE 0.0042 40 | ICP Iteration #9: Fitness 0.9979, RMSE 0.0042 41 | ICP Iteration #10: Fitness 0.9979, RMSE 0.0042 42 | [EvaluateHisto] 43 | Cropping geometry: [========================================] 100% 44 | Pointcloud down sampled from 6929586 points to 1449840 points. 45 | Pointcloud down sampled from 1449840 points to 1365628 points. 46 | path/to/TanksAndTemples/evaluation/data/Ignatius/evaluation//Ignatius.precision.ply 47 | Cropping geometry: [========================================] 100% 48 | Pointcloud down sampled from 5016769 points to 4957123 points. 49 | Pointcloud down sampled from 4957123 points to 4181506 points. 50 | [compute_point_cloud_to_point_cloud_distance] 51 | [compute_point_cloud_to_point_cloud_distance] 52 | : 53 | [ViewDistances] Add color coding to visualize error 54 | [ViewDistances] Add color coding to visualize error 55 | : 56 | [get_f1_score_histo2] 57 | ============================== 58 | evaluation result : Ignatius 59 | ============================== 60 | distance tau : 0.003 61 | precision : 0.7679 62 | recall : 0.7937 63 | f-score : 0.7806 64 | ============================== 65 | ``` 66 | 67 | **Step 5**. Go to the evaluation folder. ``TanksAndTemples/evaluation/data/Ignatius/evaluation/`` will have the following outputs. 68 | 69 | 70 | 71 | ``PR_Ignatius_@d_th_0_0030.pdf`` (Precision and recall curves with a F-score) 72 | 73 | | | | 74 | |--|--| 75 | | ``Ignatius.precision.ply`` | ``Ignatius.recall.ply`` | 76 | 77 | (3D visualization of precision and recall. Each mesh is color coded using hot colormap) 78 | 79 | # Requirements 80 | 81 | - Python 3 82 | - open3d v0.9.0 83 | - matplotlib 84 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/config.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | 32 | # some global parameters - do not modify 33 | scenes_tau_dict = { 34 | "Barn": 0.01, 35 | "Caterpillar": 0.005, 36 | "Church": 0.025, 37 | "Courthouse": 0.025, 38 | "Ignatius": 0.003, 39 | "Meetingroom": 0.01, 40 | "Truck": 0.005, 41 | } 42 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/evaluation.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | # 32 | # This python script is for downloading dataset from www.tanksandtemples.org 33 | # The dataset has a different license, please refer to 34 | # https://tanksandtemples.org/license/ 35 | 36 | import json 37 | import copy 38 | import os 39 | import numpy as np 40 | import open3d as o3d 41 | import matplotlib.pyplot as plt 42 | 43 | 44 | def read_alignment_transformation(filename): 45 | with open(filename) as data_file: 46 | data = json.load(data_file) 47 | return np.asarray(data["transformation"]).reshape((4, 4)).transpose() 48 | 49 | 50 | def write_color_distances(path, pcd, distances, max_distance): 51 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Debug) 52 | # cmap = plt.get_cmap("afmhot") 53 | cmap = plt.get_cmap("hot_r") 54 | distances = np.array(distances) 55 | colors = cmap(np.minimum(distances, max_distance) / max_distance)[:, :3] 56 | pcd.colors = o3d.utility.Vector3dVector(colors) 57 | o3d.io.write_point_cloud(path, pcd) 58 | 59 | 60 | def EvaluateHisto( 61 | source, 62 | target, 63 | trans, 64 | crop_volume, 65 | voxel_size, 66 | threshold, 67 | filename_mvs, 68 | plot_stretch, 69 | scene_name, 70 | verbose=True, 71 | ): 72 | print("[EvaluateHisto]") 73 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Debug) 74 | s = copy.deepcopy(source) 75 | s.transform(trans) 76 | s = crop_volume.crop_point_cloud(s) 77 | s = s.voxel_down_sample(voxel_size) 78 | s.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 79 | print(filename_mvs + "/" + scene_name + ".precision.ply") 80 | 81 | t = copy.deepcopy(target) 82 | t = crop_volume.crop_point_cloud(t) 83 | t = t.voxel_down_sample(voxel_size) 84 | t.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamKNN(knn=20)) 85 | print("[compute_point_cloud_to_point_cloud_distance]") 86 | distance1 = s.compute_point_cloud_distance(t) 87 | print("[compute_point_cloud_to_point_cloud_distance]") 88 | distance2 = t.compute_point_cloud_distance(s) 89 | 90 | # write the distances to bin files 91 | # np.array(distance1).astype("float64").tofile( 92 | # filename_mvs + "/" + scene_name + ".precision.bin" 93 | # ) 94 | # np.array(distance2).astype("float64").tofile( 95 | # filename_mvs + "/" + scene_name + ".recall.bin" 96 | # ) 97 | 98 | # Colorize the poincloud files prith the precision and recall values 99 | # o3d.io.write_point_cloud( 100 | # filename_mvs + "/" + scene_name + ".precision.ply", s 101 | # ) 102 | # o3d.io.write_point_cloud( 103 | # filename_mvs + "/" + scene_name + ".precision.ncb.ply", s 104 | # ) 105 | # o3d.io.write_point_cloud(filename_mvs + "/" + scene_name + ".recall.ply", t) 106 | 107 | source_n_fn = filename_mvs + "/" + scene_name + ".precision.ply" 108 | target_n_fn = filename_mvs + "/" + scene_name + ".recall.ply" 109 | 110 | print("[ViewDistances] Add color coding to visualize error") 111 | # eval_str_viewDT = ( 112 | # OPEN3D_EXPERIMENTAL_BIN_PATH 113 | # + "ViewDistances " 114 | # + source_n_fn 115 | # + " --max_distance " 116 | # + str(threshold * 3) 117 | # + " --write_color_back --without_gui" 118 | # ) 119 | # os.system(eval_str_viewDT) 120 | write_color_distances(source_n_fn, s, distance1, 3 * threshold) 121 | 122 | print("[ViewDistances] Add color coding to visualize error") 123 | # eval_str_viewDT = ( 124 | # OPEN3D_EXPERIMENTAL_BIN_PATH 125 | # + "ViewDistances " 126 | # + target_n_fn 127 | # + " --max_distance " 128 | # + str(threshold * 3) 129 | # + " --write_color_back --without_gui" 130 | # ) 131 | # os.system(eval_str_viewDT) 132 | write_color_distances(target_n_fn, t, distance2, 3 * threshold) 133 | 134 | # get histogram and f-score 135 | [ 136 | precision, 137 | recall, 138 | fscore, 139 | edges_source, 140 | cum_source, 141 | edges_target, 142 | cum_target, 143 | ] = get_f1_score_histo2(threshold, filename_mvs, plot_stretch, distance1, 144 | distance2) 145 | np.savetxt(filename_mvs + "/" + scene_name + ".recall.txt", cum_target) 146 | np.savetxt(filename_mvs + "/" + scene_name + ".precision.txt", cum_source) 147 | np.savetxt( 148 | filename_mvs + "/" + scene_name + ".prf_tau_plotstr.txt", 149 | np.array([precision, recall, fscore, threshold, plot_stretch]), 150 | ) 151 | 152 | return [ 153 | precision, 154 | recall, 155 | fscore, 156 | edges_source, 157 | cum_source, 158 | edges_target, 159 | cum_target, 160 | ] 161 | 162 | 163 | def get_f1_score_histo2(threshold, 164 | filename_mvs, 165 | plot_stretch, 166 | distance1, 167 | distance2, 168 | verbose=True): 169 | print("[get_f1_score_histo2]") 170 | dist_threshold = threshold 171 | if len(distance1) and len(distance2): 172 | 173 | recall = float(sum(d < threshold for d in distance2)) / float( 174 | len(distance2)) 175 | precision = float(sum(d < threshold for d in distance1)) / float( 176 | len(distance1)) 177 | fscore = 2 * recall * precision / (recall + precision) 178 | num = len(distance1) 179 | bins = np.arange(0, dist_threshold * plot_stretch, dist_threshold / 100) 180 | hist, edges_source = np.histogram(distance1, bins) 181 | cum_source = np.cumsum(hist).astype(float) / num 182 | 183 | num = len(distance2) 184 | bins = np.arange(0, dist_threshold * plot_stretch, dist_threshold / 100) 185 | hist, edges_target = np.histogram(distance2, bins) 186 | cum_target = np.cumsum(hist).astype(float) / num 187 | 188 | else: 189 | precision = 0 190 | recall = 0 191 | fscore = 0 192 | edges_source = np.array([0]) 193 | cum_source = np.array([0]) 194 | edges_target = np.array([0]) 195 | cum_target = np.array([0]) 196 | 197 | return [ 198 | precision, 199 | recall, 200 | fscore, 201 | edges_source, 202 | cum_source, 203 | edges_target, 204 | cum_target, 205 | ] 206 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/images/f-score.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HLinChen/VCR-GauS/aa715d19bfacfa9d491f477c572eab1839dcee3e/evaluation/tnt_eval/images/f-score.jpg -------------------------------------------------------------------------------- /evaluation/tnt_eval/images/precision.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HLinChen/VCR-GauS/aa715d19bfacfa9d491f477c572eab1839dcee3e/evaluation/tnt_eval/images/precision.jpg -------------------------------------------------------------------------------- /evaluation/tnt_eval/images/recall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HLinChen/VCR-GauS/aa715d19bfacfa9d491f477c572eab1839dcee3e/evaluation/tnt_eval/images/recall.jpg -------------------------------------------------------------------------------- /evaluation/tnt_eval/plot.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | # 32 | # This python script is for downloading dataset from www.tanksandtemples.org 33 | # The dataset has a different license, please refer to 34 | # https://tanksandtemples.org/license/ 35 | 36 | import matplotlib.pyplot as plt 37 | from cycler import cycler 38 | 39 | 40 | def plot_graph( 41 | scene, 42 | fscore, 43 | dist_threshold, 44 | edges_source, 45 | cum_source, 46 | edges_target, 47 | cum_target, 48 | plot_stretch, 49 | mvs_outpath, 50 | show_figure=False, 51 | ): 52 | f = plt.figure() 53 | plt_size = [14, 7] 54 | pfontsize = "medium" 55 | 56 | ax = plt.subplot(111) 57 | label_str = "precision" 58 | ax.plot( 59 | edges_source[1::], 60 | cum_source * 100, 61 | c="red", 62 | label=label_str, 63 | linewidth=2.0, 64 | ) 65 | 66 | label_str = "recall" 67 | ax.plot( 68 | edges_target[1::], 69 | cum_target * 100, 70 | c="blue", 71 | label=label_str, 72 | linewidth=2.0, 73 | ) 74 | 75 | ax.grid(True) 76 | plt.rcParams["figure.figsize"] = plt_size 77 | plt.rc("axes", prop_cycle=cycler("color", ["r", "g", "b", "y"])) 78 | plt.title("Precision and Recall: " + scene + ", " + "%02.2f f-score" % 79 | (fscore * 100)) 80 | plt.axvline(x=dist_threshold, c="black", ls="dashed", linewidth=2.0) 81 | 82 | plt.ylabel("# of points (%)", fontsize=15) 83 | plt.xlabel("Meters", fontsize=15) 84 | plt.axis([0, dist_threshold * plot_stretch, 0, 100]) 85 | ax.legend(shadow=True, fancybox=True, fontsize=pfontsize) 86 | # plt.axis([0, dist_threshold*plot_stretch, 0, 100]) 87 | 88 | plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize) 89 | 90 | plt.legend(loc=2, borderaxespad=0.0, fontsize=pfontsize) 91 | plt.legend(loc=4) 92 | leg = plt.legend(loc="lower right") 93 | 94 | box = ax.get_position() 95 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 96 | 97 | # Put a legend to the right of the current axis 98 | ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 99 | plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize) 100 | png_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.png".format( 101 | scene, "%04d" % (dist_threshold * 10000)) 102 | pdf_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.pdf".format( 103 | scene, "%04d" % (dist_threshold * 10000)) 104 | 105 | # save figure and display 106 | f.savefig(png_name, format="png", bbox_inches="tight") 107 | f.savefig(pdf_name, format="pdf", bbox_inches="tight") 108 | if show_figure: 109 | plt.show() 110 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/registration.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | # 32 | # This python script is for downloading dataset from www.tanksandtemples.org 33 | # The dataset has a different license, please refer to 34 | # https://tanksandtemples.org/license/ 35 | 36 | from trajectory_io import read_trajectory, convert_trajectory_to_pointcloud 37 | import copy 38 | import numpy as np 39 | import open3d as o3d 40 | 41 | MAX_POINT_NUMBER = 4e6 42 | 43 | 44 | def read_mapping(filename): 45 | mapping = [] 46 | with open(filename, "r") as f: 47 | n_sampled_frames = int(f.readline()) 48 | n_total_frames = int(f.readline()) 49 | mapping = np.zeros(shape=(n_sampled_frames, 2)) 50 | metastr = f.readline() 51 | for iter in range(n_sampled_frames): 52 | metadata = list(map(int, metastr.split())) 53 | mapping[iter, :] = metadata 54 | metastr = f.readline() 55 | return [n_sampled_frames, n_total_frames, mapping] 56 | 57 | 58 | def gen_sparse_trajectory(mapping, f_trajectory): 59 | sparse_traj = [] 60 | for m in mapping: 61 | sparse_traj.append(f_trajectory[int(m[1] - 1)]) 62 | return sparse_traj 63 | 64 | 65 | def trajectory_alignment(map_file, traj_to_register, gt_traj_col, gt_trans, 66 | scene): 67 | traj_pcd_col = convert_trajectory_to_pointcloud(gt_traj_col) 68 | traj_pcd_col.transform(gt_trans) 69 | corres = o3d.utility.Vector2iVector( 70 | np.asarray(list(map(lambda x: [x, x], range(len(gt_traj_col)))))) 71 | rr = o3d.registration.RANSACConvergenceCriteria() 72 | rr.max_iteration = 100000 73 | rr.max_validation = 100000 74 | 75 | # in this case a log file was used which contains 76 | # every movie frame (see tutorial for details) 77 | if len(traj_to_register) > 1600: 78 | n_sampled_frames, n_total_frames, mapping = read_mapping(map_file) 79 | traj_col2 = gen_sparse_trajectory(mapping, traj_to_register) 80 | traj_to_register_pcd = convert_trajectory_to_pointcloud(traj_col2) 81 | else: 82 | traj_to_register_pcd = convert_trajectory_to_pointcloud( 83 | traj_to_register) 84 | randomvar = 0.0 85 | nr_of_cam_pos = len(traj_to_register_pcd.points) 86 | rand_number_added = np.asanyarray(traj_to_register_pcd.points) * ( 87 | np.random.rand(nr_of_cam_pos, 3) * randomvar - randomvar / 2.0 + 1) 88 | list_rand = list(rand_number_added) 89 | traj_to_register_pcd_rand = o3d.geometry.PointCloud() 90 | for elem in list_rand: 91 | traj_to_register_pcd_rand.points.append(elem) 92 | 93 | # Rough registration based on aligned colmap SfM data 94 | reg = o3d.registration.registration_ransac_based_on_correspondence( 95 | traj_to_register_pcd_rand, 96 | traj_pcd_col, 97 | corres, 98 | 0.2, 99 | o3d.registration.TransformationEstimationPointToPoint(True), 100 | 6, 101 | rr, 102 | ) 103 | return reg.transformation 104 | 105 | 106 | def crop_and_downsample( 107 | pcd, 108 | crop_volume, 109 | down_sample_method="voxel", 110 | voxel_size=0.01, 111 | trans=np.identity(4), 112 | ): 113 | pcd_copy = copy.deepcopy(pcd) 114 | pcd_copy.transform(trans) 115 | pcd_crop = crop_volume.crop_point_cloud(pcd_copy) 116 | if down_sample_method == "voxel": 117 | # return voxel_down_sample(pcd_crop, voxel_size) 118 | return pcd_crop.voxel_down_sample(voxel_size) 119 | elif down_sample_method == "uniform": 120 | n_points = len(pcd_crop.points) 121 | if n_points > MAX_POINT_NUMBER: 122 | ds_rate = int(round(n_points / float(MAX_POINT_NUMBER))) 123 | return pcd_crop.uniform_down_sample(ds_rate) 124 | return pcd_crop 125 | 126 | 127 | def registration_unif( 128 | source, 129 | gt_target, 130 | init_trans, 131 | crop_volume, 132 | threshold, 133 | max_itr, 134 | max_size=4 * MAX_POINT_NUMBER, 135 | verbose=True, 136 | ): 137 | if verbose: 138 | print("[Registration] threshold: %f" % threshold) 139 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Debug) 140 | s = crop_and_downsample(source, 141 | crop_volume, 142 | down_sample_method="uniform", 143 | trans=init_trans) 144 | t = crop_and_downsample(gt_target, 145 | crop_volume, 146 | down_sample_method="uniform") 147 | reg = o3d.registration.registration_icp( 148 | s, 149 | t, 150 | threshold, 151 | np.identity(4), 152 | o3d.registration.TransformationEstimationPointToPoint(True), 153 | o3d.registration.ICPConvergenceCriteria(1e-6, max_itr), 154 | ) 155 | reg.transformation = np.matmul(reg.transformation, init_trans) 156 | return reg 157 | 158 | 159 | def registration_vol_ds( 160 | source, 161 | gt_target, 162 | init_trans, 163 | crop_volume, 164 | voxel_size, 165 | threshold, 166 | max_itr, 167 | verbose=True, 168 | ): 169 | if verbose: 170 | print("[Registration] voxel_size: %f, threshold: %f" % 171 | (voxel_size, threshold)) 172 | o3d.utility.set_verbosity_level(o3d.utility.VerbosityLevel.Debug) 173 | s = crop_and_downsample( 174 | source, 175 | crop_volume, 176 | down_sample_method="voxel", 177 | voxel_size=voxel_size, 178 | trans=init_trans, 179 | ) 180 | t = crop_and_downsample( 181 | gt_target, 182 | crop_volume, 183 | down_sample_method="voxel", 184 | voxel_size=voxel_size, 185 | ) 186 | 187 | s = crop_based_target(s, t) 188 | 189 | reg = o3d.registration.registration_icp( 190 | s, 191 | t, 192 | threshold, 193 | np.identity(4), 194 | o3d.registration.TransformationEstimationPointToPoint(True), 195 | o3d.registration.ICPConvergenceCriteria(1e-6, max_itr), 196 | ) 197 | reg.transformation = np.matmul(reg.transformation, init_trans) 198 | return reg 199 | 200 | 201 | def crop_based_target(s, t): 202 | bbox_t = t.get_axis_aligned_bounding_box() 203 | 204 | min_bound = bbox_t.get_min_bound() 205 | max_bound = bbox_t.get_max_bound() 206 | 207 | s_filtered = o3d.geometry.PointCloud() 208 | 209 | valid = np.logical_and(np.all(s.points >= min_bound, axis=1), np.all(s.points <= max_bound, axis=1)) 210 | s_filtered.points = o3d.utility.Vector3dVector(np.asarray(s.points)[valid]) 211 | 212 | return s_filtered -------------------------------------------------------------------------------- /evaluation/tnt_eval/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=1.3 2 | open3d==0.9 3 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/run.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | # 32 | # This python script is for downloading dataset from www.tanksandtemples.org 33 | # The dataset has a different license, please refer to 34 | # https://tanksandtemples.org/license/ 35 | 36 | # this script requires Open3D python binding 37 | # please follow the intructions in setup.py before running this script. 38 | import numpy as np 39 | import open3d as o3d 40 | import os 41 | import argparse 42 | import sys 43 | sys.path.append(os.getcwd()) 44 | 45 | from config import scenes_tau_dict 46 | from registration import ( 47 | trajectory_alignment, 48 | registration_vol_ds, 49 | registration_unif, 50 | read_trajectory, 51 | ) 52 | from evaluation import EvaluateHisto 53 | from util import make_dir 54 | from plot import plot_graph 55 | 56 | 57 | 58 | def run_evaluation(dataset_dir, traj_path, ply_path, out_dir): 59 | scene = os.path.basename(os.path.normpath(dataset_dir)) 60 | 61 | if scene not in scenes_tau_dict: 62 | print(dataset_dir, scene) 63 | raise Exception("invalid dataset-dir, not in scenes_tau_dict") 64 | 65 | print("") 66 | print("===========================") 67 | print("Evaluating %s" % scene) 68 | print("===========================") 69 | 70 | dTau = scenes_tau_dict[scene] 71 | # put the crop-file, the GT file, the COLMAP SfM log file and 72 | # the alignment of the according scene in a folder of 73 | # the same scene name in the dataset_dir 74 | colmap_ref_logfile = os.path.join(dataset_dir, scene + "_COLMAP_SfM.log") 75 | alignment = os.path.join(dataset_dir, scene + "_trans.txt") 76 | gt_filen = os.path.join(dataset_dir, scene + ".ply") 77 | # gt_filen = os.path.join(dataset_dir, scene + "_GT.ply") 78 | cropfile = os.path.join(dataset_dir, scene + ".json") 79 | map_file = os.path.join(dataset_dir, scene + "_mapping_reference.txt") 80 | 81 | make_dir(out_dir) 82 | 83 | assert os.path.exists(ply_path), f"ply_path {ply_path} does not exist" 84 | 85 | # Load reconstruction and according GT 86 | print(gt_filen) 87 | gt_pcd = o3d.io.read_point_cloud(gt_filen) 88 | print(ply_path) 89 | # pcd = o3d.io.read_point_cloud(ply_path) 90 | mesh = o3d.io.read_triangle_mesh(ply_path) 91 | pcd = mesh.sample_points_uniformly(len(gt_pcd.points)) 92 | 93 | gt_trans = np.loadtxt(alignment) 94 | traj_to_register = read_trajectory(traj_path) 95 | gt_traj_col = read_trajectory(colmap_ref_logfile) 96 | 97 | trajectory_transform = trajectory_alignment(map_file, traj_to_register, 98 | gt_traj_col, gt_trans, scene) 99 | 100 | # Refine alignment by using the actual GT and MVS pointclouds 101 | vol = o3d.visualization.read_selection_polygon_volume(cropfile) 102 | # big pointclouds will be downlsampled to this number to speed up alignment 103 | dist_threshold = dTau 104 | 105 | # Registration refinment in 3 iterations 106 | r2 = registration_vol_ds(pcd, gt_pcd, trajectory_transform, vol, dTau, 107 | dTau * 80, 20) 108 | r3 = registration_vol_ds(pcd, gt_pcd, r2.transformation, vol, dTau / 2.0, 109 | dTau * 20, 20) 110 | r = registration_unif(pcd, gt_pcd, r3.transformation, vol, 2 * dTau, 20) 111 | 112 | # Histogramms and P/R/F1 113 | plot_stretch = 5 114 | [ 115 | precision, 116 | recall, 117 | fscore, 118 | edges_source, 119 | cum_source, 120 | edges_target, 121 | cum_target, 122 | ] = EvaluateHisto( 123 | pcd, 124 | gt_pcd, 125 | r.transformation, 126 | vol, 127 | dTau / 2.0, 128 | dTau, 129 | out_dir, 130 | plot_stretch, 131 | scene, 132 | ) 133 | eva = [precision, recall, fscore] 134 | # eva = [i*100 for i in eva] 135 | print("==============================") 136 | print("evaluation result : %s" % scene) 137 | print("==============================") 138 | print("distance tau : %.3f" % dTau) 139 | print("precision : %.4f" % eva[0]) 140 | print("recall : %.4f" % eva[1]) 141 | print("f-score : %.4f" % eva[2]) 142 | print("==============================") 143 | 144 | with open(os.path.join(out_dir, "evaluation.txt"), "w") as f: 145 | f.write("evaluation result : %s\n" % scene) 146 | f.write("distance tau : %.3f\n" % dTau) 147 | f.write("precision : %.4f\n" % eva[0]) 148 | f.write("recall : %.4f\n" % eva[1]) 149 | f.write("f-score : %.4f\n" % eva[2]) 150 | 151 | # Plotting 152 | plot_graph( 153 | scene, 154 | fscore, 155 | dist_threshold, 156 | edges_source, 157 | cum_source, 158 | edges_target, 159 | cum_target, 160 | plot_stretch, 161 | out_dir, 162 | ) 163 | 164 | 165 | if __name__ == "__main__": 166 | parser = argparse.ArgumentParser() 167 | parser.add_argument( 168 | "--dataset-dir", 169 | type=str, 170 | required=True, 171 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 172 | ) 173 | parser.add_argument( 174 | "--traj-path", 175 | type=str, 176 | required=True, 177 | help= 178 | "path to trajectory file. See `convert_to_logfile.py` to create this file.", 179 | ) 180 | parser.add_argument( 181 | "--ply-path", 182 | type=str, 183 | required=True, 184 | help="path to reconstruction ply file", 185 | ) 186 | parser.add_argument( 187 | "--out-dir", 188 | type=str, 189 | default="", 190 | help= 191 | "output directory, default: an evaluation directory is created in the directory of the ply file", 192 | ) 193 | args = parser.parse_args() 194 | 195 | if args.out_dir.strip() == "": 196 | args.out_dir = os.path.join(os.path.dirname(args.ply_path), 197 | "evaluation") 198 | 199 | run_evaluation( 200 | dataset_dir=args.dataset_dir, 201 | traj_path=args.traj_path, 202 | ply_path=args.ply_path, 203 | out_dir=args.out_dir, 204 | ) 205 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/trajectory_io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | 4 | 5 | class CameraPose: 6 | 7 | def __init__(self, meta, mat): 8 | self.metadata = meta 9 | self.pose = mat 10 | 11 | def __str__(self): 12 | return ("Metadata : " + " ".join(map(str, self.metadata)) + "\n" + 13 | "Pose : " + "\n" + np.array_str(self.pose)) 14 | 15 | 16 | def convert_trajectory_to_pointcloud(traj): 17 | pcd = o3d.geometry.PointCloud() 18 | for t in traj: 19 | pcd.points.append(t.pose[:3, 3]) 20 | return pcd 21 | 22 | 23 | def read_trajectory(filename): 24 | traj = [] 25 | with open(filename, "r") as f: 26 | metastr = f.readline() 27 | while metastr: 28 | metadata = map(int, metastr.split()) 29 | mat = np.zeros(shape=(4, 4)) 30 | for i in range(4): 31 | matstr = f.readline() 32 | mat[i, :] = np.fromstring(matstr, dtype=float, sep=" \t") 33 | traj.append(CameraPose(metadata, mat)) 34 | metastr = f.readline() 35 | return traj 36 | 37 | 38 | def write_trajectory(traj, filename): 39 | with open(filename, "w") as f: 40 | for x in traj: 41 | p = x.pose.tolist() 42 | f.write(" ".join(map(str, x.metadata)) + "\n") 43 | f.write("\n".join( 44 | " ".join(map("{0:.12f}".format, p[i])) for i in range(4))) 45 | f.write("\n") 46 | -------------------------------------------------------------------------------- /evaluation/tnt_eval/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def make_dir(path): 5 | if not os.path.exists(path): 6 | os.makedirs(path) 7 | -------------------------------------------------------------------------------- /gaussian_renderer/network_gui.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import traceback 14 | import socket 15 | import json 16 | from scene.cameras import MiniCam 17 | 18 | host = "127.0.0.1" 19 | port = 6009 20 | 21 | conn = None 22 | addr = None 23 | 24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | 26 | def init(wish_host, wish_port): 27 | global host, port, listener 28 | host = wish_host 29 | port = wish_port 30 | listener.bind((host, port)) 31 | listener.listen() 32 | listener.settimeout(0) 33 | 34 | def try_connect(): 35 | global conn, addr, listener 36 | try: 37 | conn, addr = listener.accept() 38 | print(f"\nConnected by {addr}") 39 | conn.settimeout(None) 40 | except Exception as inst: 41 | pass 42 | 43 | def read(): 44 | global conn 45 | messageLength = conn.recv(4) 46 | messageLength = int.from_bytes(messageLength, 'little') 47 | message = conn.recv(messageLength) 48 | return json.loads(message.decode("utf-8")) 49 | 50 | def send(message_bytes, verify): 51 | global conn 52 | if message_bytes != None: 53 | conn.sendall(message_bytes) 54 | conn.sendall(len(verify).to_bytes(4, 'little')) 55 | conn.sendall(bytes(verify, 'ascii')) 56 | 57 | def receive(): 58 | message = read() 59 | 60 | width = message["resolution_x"] 61 | height = message["resolution_y"] 62 | 63 | if width != 0 and height != 0: 64 | try: 65 | do_training = bool(message["train"]) 66 | fovy = message["fov_y"] 67 | fovx = message["fov_x"] 68 | znear = message["z_near"] 69 | zfar = message["z_far"] 70 | do_shs_python = bool(message["shs_python"]) 71 | do_rot_scale_python = bool(message["rot_scale_python"]) 72 | keep_alive = bool(message["keep_alive"]) 73 | scaling_modifier = message["scaling_modifier"] 74 | world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda() 75 | world_view_transform[:,1] = -world_view_transform[:,1] 76 | world_view_transform[:,2] = -world_view_transform[:,2] 77 | full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda() 78 | full_proj_transform[:,1] = -full_proj_transform[:,1] 79 | custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform) 80 | except Exception as e: 81 | print("") 82 | traceback.print_exc() 83 | raise e 84 | return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier 85 | else: 86 | return None, None, None, None, None, None -------------------------------------------------------------------------------- /media/VCR-GauS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HLinChen/VCR-GauS/aa715d19bfacfa9d491f477c572eab1839dcee3e/media/VCR-GauS.jpg -------------------------------------------------------------------------------- /process_data/convert_360_to_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import sys 5 | from pathlib import Path 6 | from argparse import ArgumentParser 7 | import trimesh 8 | 9 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[0] 10 | sys.path.append(dir_path.__str__()) 11 | 12 | from process_data.convert_data_to_json import export_to_json, get_split_dict, bound_by_pose # NOQA 13 | 14 | from submodules.colmap.scripts.python.database import COLMAPDatabase # NOQA 15 | from submodules.colmap.scripts.python.read_write_model import read_model, rotmat2qvec # NOQA 16 | 17 | 18 | def create_init_files(pinhole_dict_file, db_file, out_dir): 19 | # Partially adapted from https://github.com/Kai-46/nerfplusplus/blob/master/colmap_runner/run_colmap_posed.py 20 | 21 | if not os.path.exists(out_dir): 22 | os.mkdir(out_dir) 23 | 24 | # create template 25 | with open(pinhole_dict_file) as fp: 26 | pinhole_dict = json.load(fp) 27 | 28 | template = {} 29 | cameras_line_template = '{camera_id} RADIAL {width} {height} {f} {cx} {cy} {k1} {k2}\n' 30 | images_line_template = '{image_id} {qw} {qx} {qy} {qz} {tx} {ty} {tz} {camera_id} {image_name}\n\n' 31 | 32 | for img_name in pinhole_dict: 33 | # w, h, fx, fy, cx, cy, qvec, t 34 | params = pinhole_dict[img_name] 35 | w = params[0] 36 | h = params[1] 37 | fx = params[2] 38 | # fy = params[3] 39 | cx = params[4] 40 | cy = params[5] 41 | qvec = params[6:10] 42 | tvec = params[10:13] 43 | 44 | cam_line = cameras_line_template.format( 45 | camera_id="{camera_id}", width=w, height=h, f=fx, cx=cx, cy=cy, k1=0, k2=0) 46 | img_line = images_line_template.format(image_id="{image_id}", qw=qvec[0], qx=qvec[1], qy=qvec[2], qz=qvec[3], 47 | tx=tvec[0], ty=tvec[1], tz=tvec[2], camera_id="{camera_id}", 48 | image_name=img_name) 49 | template[img_name] = (cam_line, img_line) 50 | 51 | # read database 52 | db = COLMAPDatabase.connect(db_file) 53 | table_images = db.execute("SELECT * FROM images") 54 | img_name2id_dict = {} 55 | for row in table_images: 56 | img_name2id_dict[row[1]] = row[0] 57 | 58 | cameras_txt_lines = [template[img_name][0].format(camera_id=1)] 59 | images_txt_lines = [] 60 | for img_name, img_id in img_name2id_dict.items(): 61 | image_line = template[img_name][1].format(image_id=img_id, camera_id=1) 62 | images_txt_lines.append(image_line) 63 | 64 | with open(os.path.join(out_dir, 'cameras.txt'), 'w') as fp: 65 | fp.writelines(cameras_txt_lines) 66 | 67 | with open(os.path.join(out_dir, 'images.txt'), 'w') as fp: 68 | fp.writelines(images_txt_lines) 69 | fp.write('\n') 70 | 71 | # create an empty points3D.txt 72 | fp = open(os.path.join(out_dir, 'points3D.txt'), 'w') 73 | fp.close() 74 | 75 | 76 | def convert_cam_dict_to_pinhole_dict(cam_dict, pinhole_dict_file): 77 | # Partially adapted from https://github.com/Kai-46/nerfplusplus/blob/master/colmap_runner/run_colmap_posed.py 78 | 79 | print('Writing pinhole_dict to: ', pinhole_dict_file) 80 | h = 1080 81 | w = 1920 82 | 83 | pinhole_dict = {} 84 | for img_name in cam_dict: 85 | W2C = cam_dict[img_name] 86 | 87 | # params 88 | fx = 0.6 * w 89 | fy = 0.6 * w 90 | cx = w / 2.0 91 | cy = h / 2.0 92 | 93 | qvec = rotmat2qvec(W2C[:3, :3]) 94 | tvec = W2C[:3, 3] 95 | 96 | params = [w, h, fx, fy, cx, cy, 97 | qvec[0], qvec[1], qvec[2], qvec[3], 98 | tvec[0], tvec[1], tvec[2]] 99 | pinhole_dict[img_name] = params 100 | 101 | with open(pinhole_dict_file, 'w') as fp: 102 | json.dump(pinhole_dict, fp, indent=2, sort_keys=True) 103 | 104 | 105 | def load_COLMAP_poses(cam_file, img_dir, tf='w2c'): 106 | # load img_dir namges 107 | names = sorted(os.listdir(img_dir)) 108 | 109 | with open(cam_file) as f: 110 | lines = f.readlines() 111 | 112 | # C2W 113 | poses = {} 114 | for idx, line in enumerate(lines): 115 | if idx % 5 == 0: # header 116 | img_idx, valid, _ = line.split(' ') 117 | if valid != '-1': 118 | poses[int(img_idx)] = np.eye(4) 119 | poses[int(img_idx)] 120 | else: 121 | if int(img_idx) in poses: 122 | num = np.array([float(n) for n in line.split(' ')]) 123 | poses[int(img_idx)][idx % 5-1, :] = num 124 | 125 | if tf == 'c2w': 126 | return poses 127 | else: 128 | # convert to W2C (follow nerf convention) 129 | poses_w2c = {} 130 | for k, v in poses.items(): 131 | poses_w2c[names[k]] = np.linalg.inv(v) 132 | return poses_w2c 133 | 134 | 135 | def load_transformation(trans_file): 136 | with open(trans_file) as f: 137 | lines = f.readlines() 138 | 139 | trans = np.eye(4) 140 | for idx, line in enumerate(lines): 141 | num = np.array([float(n) for n in line.split(' ')]) 142 | trans[idx, :] = num 143 | 144 | return trans 145 | 146 | 147 | def align_gt_with_cam(pts, trans): 148 | trans_inv = np.linalg.inv(trans) 149 | pts_aligned = pts @ trans_inv[:3, :3].transpose(-1, -2) + trans_inv[:3, -1] 150 | return pts_aligned 151 | 152 | 153 | def main(args): 154 | assert args.data_path, "Provide path to 360 dataset" 155 | scene_list = os.listdir(args.data_path) 156 | scene_list = sorted(scene_list) 157 | 158 | for scene in scene_list: 159 | scene_path = os.path.join(args.data_path, scene) 160 | if not os.path.isdir(scene_path): continue 161 | 162 | cameras, images, points3D = read_model(os.path.join(scene_path, "sparse/0"), ext=".bin") 163 | 164 | trans, scale, bounding_box = bound_by_pose(images) 165 | trans = trans.tolist() 166 | 167 | export_to_json(trans, scale, scene_path, 'meta.json') 168 | print('Writing data to json file: ', os.path.join(scene_path, 'meta.json')) 169 | 170 | 171 | if __name__ == '__main__': 172 | parser = ArgumentParser() 173 | parser.add_argument('--data_path', type=str, default=None, help='Path to tanks and temples dataset') 174 | parser.add_argument('--run_colmap', action='store_true', help='Run colmap') 175 | parser.add_argument('--export_json', action='store_true', help='export json') 176 | 177 | args = parser.parse_args() 178 | 179 | main(args) 180 | -------------------------------------------------------------------------------- /process_data/convert_data_to_json.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | from argparse import ArgumentParser 15 | import os 16 | import sys 17 | from pathlib import Path 18 | import json 19 | import trimesh 20 | 21 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[0] 22 | sys.path.append(dir_path.__str__()) 23 | 24 | from submodules.colmap.scripts.python.read_write_model import read_model, qvec2rotmat # NOQA 25 | 26 | 27 | def find_closest_point(p1, d1, p2, d2): 28 | # Calculate the direction vectors of the lines 29 | d1_norm = d1 / np.linalg.norm(d1) 30 | d2_norm = d2 / np.linalg.norm(d2) 31 | 32 | # Create the coefficient matrix A and the constant vector b 33 | A = np.vstack((d1_norm, -d2_norm)).T 34 | b = p2 - p1 35 | 36 | # Solve the linear system to find the parameters t1 and t2 37 | t1, t2 = np.linalg.lstsq(A, b, rcond=None)[0] 38 | 39 | # Calculate the closest point on each line 40 | closest_point1 = p1 + d1_norm * t1 41 | closest_point2 = p2 + d2_norm * t2 42 | 43 | # Calculate the average of the two closest points 44 | closest_point = 0.5 * (closest_point1 + closest_point2) 45 | 46 | return closest_point 47 | 48 | 49 | def bound_by_pose(images): 50 | poses = [] 51 | for img in images.values(): 52 | rotation = qvec2rotmat(img.qvec) 53 | translation = img.tvec.reshape(3, 1) 54 | w2c = np.concatenate([rotation, translation], 1) 55 | w2c = np.concatenate([w2c, np.array([0, 0, 0, 1])[None]], 0) 56 | c2w = np.linalg.inv(w2c) 57 | poses.append(c2w) 58 | 59 | center = np.array([0.0, 0.0, 0.0]) 60 | for f in poses: 61 | src_frame = f[0:3, :] 62 | for g in poses: 63 | tgt_frame = g[0:3, :] 64 | p = find_closest_point(src_frame[:, 3], src_frame[:, 2], tgt_frame[:, 3], tgt_frame[:, 2]) 65 | center += p 66 | center /= len(poses) ** 2 67 | 68 | radius = 0.0 69 | for f in poses: 70 | radius += np.linalg.norm(f[0:3, 3]) 71 | radius /= len(poses) 72 | bounding_box = [ 73 | [center[0] - radius, center[0] + radius], 74 | [center[1] - radius, center[1] + radius], 75 | [center[2] - radius, center[2] + radius], 76 | ] 77 | return center, radius, bounding_box 78 | 79 | 80 | def bound_by_points(points3D): 81 | if not isinstance(points3D, np.ndarray): 82 | xyzs = np.stack([point.xyz for point in points3D.values()]) 83 | else: 84 | xyzs = points3D 85 | center = xyzs.mean(axis=0) 86 | std = xyzs.std(axis=0) 87 | # radius = float(std.max() * 2) # use 2*std to define the region, equivalent to 95% percentile 88 | radius = np.abs(xyzs).max(0) * 1.1 89 | bounding_box = [ 90 | [center[0] - std[0] * 3, center[0] + std[0] * 3], 91 | [center[1] - std[1] * 3, center[1] + std[1] * 3], 92 | [center[2] - std[2] * 3, center[2] + std[2] * 3], 93 | ] 94 | return center, radius, bounding_box 95 | 96 | 97 | def compute_oriented_bound(pts): 98 | to_align, _ = trimesh.bounds.oriented_bounds(pts) 99 | 100 | scale = (np.abs((to_align[:3, :3] @ pts.vertices.T + to_align[:3, 3:]).T).max(0) * 1.2).tolist() 101 | 102 | return to_align.tolist(), scale 103 | 104 | 105 | def split_data(names, split=10): 106 | split_dict = {'train': [], 'test': []} 107 | names = sorted(names) 108 | 109 | for i, name in enumerate(names): 110 | if i % split == 0: 111 | split_dict['test'].append(name) 112 | else: 113 | split_dict['train'].append(name) 114 | 115 | split_dict['train'] = sorted(split_dict['train']) 116 | split_dict['test'] = sorted(split_dict['test']) 117 | return split_dict 118 | 119 | 120 | def get_split_dict(scene_path): 121 | split_dict = None 122 | 123 | if os.path.exists(os.path.join(scene_path, 'train_test_lists.json')): 124 | image_names = os.listdir(os.path.join(scene_path, "images")) 125 | image_names = sorted(['{:06}'.format(int(i.split(".")[0])) for i in image_names]) 126 | 127 | with open(os.path.join(scene_path, 'train_test_lists.json'), 'r') as fp: 128 | split_dict = json.load(fp) 129 | 130 | test_split = sorted([i.split(".")[0] for i in split_dict['test']]) 131 | train_split = [i for i in image_names if i not in test_split] 132 | 133 | assert len(train_split) + len(test_split) == len(image_names), "train and test split do not cover all images" 134 | 135 | split_dict = { 136 | 'train': train_split, 137 | 'test': test_split, 138 | } 139 | 140 | return split_dict 141 | 142 | 143 | def check_concentric(images, ang_tol=np.pi / 6.0, radii_tol=0.5, pose_tol=0.5): 144 | look_at = [] 145 | cam_loc = [] 146 | for img in images.values(): 147 | rotation = qvec2rotmat(img.qvec) 148 | translation = img.tvec.reshape(3, 1) 149 | w2c = np.concatenate([rotation, translation], 1) 150 | w2c = np.concatenate([w2c, np.array([0, 0, 0, 1])[None]], 0) 151 | c2w = np.linalg.inv(w2c) 152 | cam_loc.append(c2w[:3, -1]) 153 | look_at.append(c2w[:3, 2]) 154 | look_at = np.stack(look_at) 155 | look_at = look_at / np.linalg.norm(look_at, axis=1, keepdims=True) 156 | cam_loc = np.stack(cam_loc) 157 | num_images = cam_loc.shape[0] 158 | 159 | center = cam_loc.mean(axis=0) 160 | vec = center - cam_loc 161 | radii = np.linalg.norm(vec, axis=1, keepdims=True) 162 | vec_unit = vec / radii 163 | ang = np.arccos((look_at * vec_unit).sum(axis=-1, keepdims=True)) 164 | ang_valid = ang < ang_tol 165 | print(f"Fraction of images looking at the center: {ang_valid.sum()/num_images:.2f}.") 166 | 167 | radius_mean = radii.mean() 168 | radii_valid = np.isclose(radius_mean, radii, rtol=radii_tol) 169 | print(f"Fraction of images positioned around the center: {radii_valid.sum()/num_images:.2f}.") 170 | 171 | valid = ang_valid * radii_valid 172 | print(f"Valid fraction of concentric images: {valid.sum()/num_images:.2f}.") 173 | 174 | return valid.sum() / num_images > pose_tol 175 | 176 | 177 | def export_to_json(trans, scale, scene_path, file_name, split_dict=None, do_split=False): 178 | out = { 179 | "trans": trans, 180 | "scale": scale, 181 | } 182 | 183 | if do_split: 184 | if split_dict is None: 185 | image_names = os.listdir(os.path.join(scene_path, "images")) 186 | image_names = ['{:06}'.format(int(i.split(".")[0])) for i in image_names] 187 | split_dict = split_data(image_names, split=10) 188 | 189 | out.update(split_dict) 190 | 191 | with open(os.path.join(scene_path, file_name), "w") as outputfile: 192 | json.dump(out, outputfile, indent=4) 193 | 194 | return 195 | 196 | 197 | def data_to_json(args): 198 | cameras, images, points3D = read_model(os.path.join(args.data_dir, "sparse"), ext=".bin") 199 | 200 | # define bounding regions based on scene type 201 | if args.scene_type == "outdoor": 202 | if check_concentric(images): 203 | center, scale, bounding_box = bound_by_pose(images) 204 | else: 205 | center, scale, bounding_box = bound_by_points(points3D) 206 | elif args.scene_type == "indoor": 207 | # use sfm points as a proxy to define bounding regions 208 | center, scale, bounding_box = bound_by_points(points3D) 209 | elif args.scene_type == "object": 210 | # use poses as a proxy to define bounding regions 211 | center, scale, bounding_box = bound_by_pose(images) 212 | else: 213 | raise TypeError("Unknown scene type") 214 | 215 | # export json file 216 | export_to_json(list(center), scale, args.data_dir, "meta.json") 217 | print("Writing data to json file: ", os.path.join(args.data_dir, "meta.json")) 218 | return 219 | 220 | 221 | if __name__ == "__main__": 222 | parser = ArgumentParser() 223 | parser.add_argument("--data_dir", type=str, default=None, help="Path to data") 224 | parser.add_argument( 225 | "--scene_type", 226 | type=str, 227 | default="outdoor", 228 | choices=["outdoor", "indoor", "object"], 229 | help="Select scene type. Outdoor for building-scale reconstruction; " 230 | "indoor for room-scale reconstruction; object for object-centric scene reconstruction.", 231 | ) 232 | args = parser.parse_args() 233 | data_to_json(args) 234 | -------------------------------------------------------------------------------- /process_data/visualize_transforms.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Set the work directory to the imaginaire root.\n", 11 | "import os, sys, time\n", 12 | "import pathlib\n", 13 | "root_dir = pathlib.Path().absolute().parents[2]\n", 14 | "os.chdir(root_dir)\n", 15 | "print(f\"Root Directory Path: {root_dir}\")" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Import Python libraries.\n", 26 | "import numpy as np\n", 27 | "import torch\n", 28 | "import k3d\n", 29 | "import json\n", 30 | "from collections import OrderedDict\n", 31 | "# Import imaginaire modules.\n", 32 | "from projects.nerf.utils import camera, visualize\n", 33 | "from third_party.colmap.scripts.python.read_write_model import read_model" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "97bedecf-da68-44b1-96cf-580ef7e7f3f0", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Read the COLMAP data.\n", 44 | "colmap_path = \"datasets/lego_ds2\"\n", 45 | "json_fname = f\"{colmap_path}/transforms.json\"\n", 46 | "with open(json_fname) as file:\n", 47 | " meta = json.load(file)\n", 48 | "center = meta[\"sphere_center\"]\n", 49 | "radius = meta[\"sphere_radius\"]\n", 50 | "# Convert camera poses.\n", 51 | "poses = []\n", 52 | "for frame in meta[\"frames\"]:\n", 53 | " c2w = torch.tensor(frame[\"transform_matrix\"])\n", 54 | " c2w[:, 1:3] *= -1\n", 55 | " w2c = c2w.inverse()\n", 56 | " pose = w2c[:3] # [3,4]\n", 57 | " poses.append(pose)\n", 58 | "poses = torch.stack(poses, dim=0)\n", 59 | "print(f\"# images: {len(poses)}\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "2016d20c-1e58-407f-9810-cbe76dc5ccec", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "vis_depth = 0.2\n", 70 | "k3d_textures = []" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "d7168a09-6654-4660-b140-66b9dfd6f1e8", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# (optional) visualize the images.\n", 81 | "# This block can be skipped if we don't want to visualize the image observations.\n", 82 | "for i, frame in enumerate(meta[\"frames\"]):\n", 83 | " image_fname = frame[\"file_path\"]\n", 84 | " image_path = f\"{colmap_path}/{image_fname}\"\n", 85 | " with open(image_path, \"rb\") as file:\n", 86 | " binary = file.read()\n", 87 | " # Compute the corresponding image corners in 3D.\n", 88 | " pose = poses[i]\n", 89 | " corners = torch.tensor([[-0.5, 0.5, 1], [0.5, 0.5, 1], [-0.5, -0.5, 1]])\n", 90 | " corners *= vis_depth\n", 91 | " corners = camera.cam2world(corners, pose)\n", 92 | " puv = [corners[0].tolist(), (corners[1]-corners[0]).tolist(), (corners[2]-corners[0]).tolist()]\n", 93 | " k3d_texture = k3d.texture(binary, file_format=\"jpg\", puv=puv)\n", 94 | " k3d_textures.append(k3d_texture)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Visualize the bounding sphere.\n", 105 | "json_fname = f\"{colmap_path}/transforms.json\"\n", 106 | "with open(json_fname) as file:\n", 107 | " meta = json.load(file)\n", 108 | "center = meta[\"sphere_center\"]\n", 109 | "radius = meta[\"sphere_radius\"]\n", 110 | "# ------------------------------------------------------------------------------------\n", 111 | "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n", 112 | "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n", 113 | "readjust_center = np.array([0., 0., 0.])\n", 114 | "readjust_scale = 1.\n", 115 | "# ------------------------------------------------------------------------------------\n", 116 | "center += readjust_center\n", 117 | "radius *= readjust_scale\n", 118 | "# Make some points to hallucinate a bounding sphere.\n", 119 | "sphere_points = np.random.randn(100000, 3)\n", 120 | "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n", 121 | "sphere_points = sphere_points * radius + center" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "fdde170b-4546-4617-9162-a9fcb936347d", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# Visualize with K3D.\n", 132 | "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n", 133 | "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.)\n", 134 | "for k3d_object in k3d_objects:\n", 135 | " plot += k3d_object\n", 136 | "for k3d_texture in k3d_textures:\n", 137 | " plot += k3d_texture\n", 138 | "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n", 139 | "plot.display()\n", 140 | "plot.camera_fov = 30.0" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3 (ipykernel)", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.8.13" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 5 165 | } 166 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 240 3 | 4 | [build-system] 5 | requires = ["setuptools>=61.0"] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "vcr-gaus" 10 | version = "0.0.0.dev0" 11 | description = "VCR-GauS: View Consistent Depth-Normal Regularizer for Gaussian Surface Reconstruction" 12 | readme = "README.md" 13 | requires-python = ">=3.8" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "License :: OSI Approved :: Apache Software License", 17 | ] 18 | 19 | [project.optional-dependencies] 20 | 21 | f1eval = [ 22 | "open3d==0.10.0", 23 | "numpy" 24 | ] 25 | 26 | train = [ 27 | "torch==2.0.1", 28 | "torchvision==0.15.2", 29 | "torchaudio==2.0.2", 30 | "numpy==1.26.1", 31 | "open3d", 32 | "plyfile", 33 | "ninja", 34 | "GPUtil", 35 | "opencv-python", 36 | "lpips", 37 | "trimesh", 38 | "pymeshlab", 39 | "termcolor", 40 | "wandb", 41 | "imageio", 42 | "scikit-image", 43 | "torchmetrics", 44 | "mediapy", 45 | ] 46 | 47 | [project.urls] 48 | "Homepage" = "https://hlinchen.github.io/projects/VCR-GauS/" 49 | "Bug Tracker" = "https://github.com/HLinChen/VCR-GauS/issues" 50 | 51 | [tool.setuptools.packages.find] 52 | include = ["vcr*", "trl*"] 53 | exclude = [ 54 | "assets*", 55 | "benchmark*", 56 | "docs", 57 | "dist*", 58 | "playground*", 59 | "scripts*", 60 | "tests*", 61 | "checkpoints*", 62 | "project_checkpoints*", 63 | "debug_checkpoints*", 64 | "mlx_configs*", 65 | "wandb*", 66 | "notebooks*", 67 | ] 68 | 69 | [tool.wheel] 70 | exclude = [ 71 | "assets*", 72 | "benchmark*", 73 | "docs", 74 | "dist*", 75 | "playground*", 76 | "scripts*", 77 | "tests*", 78 | "checkpoints*", 79 | "project_checkpoints*", 80 | "debug_checkpoints*", 81 | "mlx_configs*", 82 | "wandb*", 83 | "notebooks*", 84 | ] 85 | -------------------------------------------------------------------------------- /python_scripts/run_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import GPUtil 4 | 5 | 6 | def worker(gpu, scene, factor, fn): 7 | print(f"Starting job on GPU {gpu} with scene {scene}\n") 8 | fn(gpu, scene, factor) 9 | print(f"Finished job on GPU {gpu} with scene {scene}\n") 10 | # This worker function starts a job and returns when it's done. 11 | 12 | 13 | def dispatch_jobs(jobs, executor, excluded_gpus, fn): 14 | future_to_job = {} 15 | reserved_gpus = set() # GPUs that are slated for work but may not be active yet 16 | 17 | while jobs or future_to_job: 18 | # Get the list of available GPUs, not including those that are reserved. 19 | all_available_gpus = set(GPUtil.getAvailable(order="first", limit=10, maxMemory=0.1, maxLoad=0.1)) 20 | available_gpus = list(all_available_gpus - reserved_gpus - excluded_gpus) 21 | 22 | # Launch new jobs on available GPUs 23 | while available_gpus and jobs: 24 | gpu = available_gpus.pop(0) 25 | job = jobs.pop(0) 26 | future = executor.submit(worker, gpu, *job, fn) # Unpacking job as arguments to worker 27 | future_to_job[future] = (gpu, job) 28 | 29 | reserved_gpus.add(gpu) # Reserve this GPU until the job starts processing 30 | 31 | # Check for completed jobs and remove them from the list of running jobs. 32 | # Also, release the GPUs they were using. 33 | done_futures = [future for future in future_to_job if future.done()] 34 | for future in done_futures: 35 | job = future_to_job.pop(future) # Remove the job associated with the completed future 36 | gpu = job[0] # The GPU is the first element in each job tuple 37 | reserved_gpus.discard(gpu) # Release this GPU 38 | print(f"Job {job} has finished., rellasing GPU {gpu}") 39 | # (Optional) You might want to introduce a small delay here to prevent this loop from spinning very fast 40 | # when there are no GPUs available. 41 | time.sleep(5) 42 | 43 | print("All jobs have been processed.") 44 | 45 | 46 | def check_finish(scene, path, type='mesh'): 47 | if not os.path.exists(path): 48 | print(f"Scene \033[1;31m{scene}\033[0m failed in \033[1;31m{type}\033[0m") 49 | return False 50 | return True 51 | 52 | 53 | train_cmd = "OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} \ 54 | python train.py \ 55 | --config=configs/{dataset}/{cfg}.yaml \ 56 | --logdir={log_dir} \ 57 | --model.source_path={data_dir}/{scene}/ \ 58 | --train.debug_from={debug_from} \ 59 | --model.data_device={data_device} \ 60 | --model.resolution={resolution} \ 61 | --wandb \ 62 | --wandb_name {project}" 63 | 64 | 65 | train_cmd_new = "OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} \ 66 | python train.py \ 67 | --config={cfg} \ 68 | --logdir={log_dir} \ 69 | --model.source_path={data_dir}/{scene}/ \ 70 | --train.debug_from={debug_from} \ 71 | --model.data_device={data_device} \ 72 | --model.resolution={resolution} \ 73 | --wandb \ 74 | --wandb_name {project}" 75 | 76 | 77 | extract_mesh_cmd = "OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} \ 78 | python tools/depth2mesh.py \ 79 | --mesh_name {ply} \ 80 | --split {step} \ 81 | --method {fuse_method} \ 82 | --voxel_size {voxel_size} \ 83 | --num_cluster {num_cluster} \ 84 | --max_depth {max_depth} \ 85 | --clean \ 86 | --prob_thres {prob_thr} \ 87 | --cfg_path {log_dir}/config.yaml" 88 | 89 | 90 | eval_tnt_cmd = "OMP_NUM_THREADS={num_threads} CUDA_VISIBLE_DEVICES={gpu} \ 91 | conda run -n {eval_env} \ 92 | python evaluation/tnt_eval/run.py \ 93 | --dataset-dir {data_dir}/{scene}/ \ 94 | --traj-path {data_dir}/{scene}/{scene}_COLMAP_SfM.log \ 95 | --ply-path {log_dir}/{ply} > {log_dir}/fscore.txt" 96 | 97 | 98 | eval_cd_cmd = "OMP_NUM_THREADS={num_threads} CUDA_VISIBLE_DEVICES={gpu} \ 99 | python evaluation/eval_dtu/evaluate_single_scene.py \ 100 | --input_mesh {tri_mesh_path} \ 101 | --scan_id {scan_id} --output_dir {output_dir} \ 102 | --mask_dir {data_dir} \ 103 | --DTU {data_dir}" 104 | 105 | 106 | render_cmd = "CUDA_VISIBLE_DEVICES={gpu} \ 107 | python evaluation/render.py \ 108 | --cfg_path {log_dir}/config.yaml \ 109 | --iteration 30000 \ 110 | --skip_train" 111 | 112 | eval_psnr_cmd = "CUDA_VISIBLE_DEVICES={gpu} \ 113 | python evaluation/metrics.py \ 114 | --cfg_path {log_dir}/config.yaml" 115 | 116 | eval_replica_cmd = "OMP_NUM_THREADS={num_threads} CUDA_VISIBLE_DEVICES={gpu} \ 117 | python evaluation/replica_eval/evaluate_single_scene.py \ 118 | --input_mesh {tri_mesh_path} \ 119 | --scene {scene} \ 120 | --output_dir {output_dir} \ 121 | --data_dir {data_dir}" -------------------------------------------------------------------------------- /python_scripts/run_dtu.py: -------------------------------------------------------------------------------- 1 | # training scripts for the TNT datasets 2 | import os 3 | import sys 4 | import time 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | sys.path.append(os.getcwd()) 8 | from python_scripts.run_base import dispatch_jobs, train_cmd, extract_mesh_cmd, eval_cd_cmd, check_finish 9 | from python_scripts.show_dtu import show_matrix 10 | 11 | TRIAL_NAME = 'vcr_gaus' 12 | PROJECT = 'vcr_gaus' 13 | PROJECT_wandb = 'vcr_gaus_dtu' 14 | DATASET = 'dtu' 15 | base_dir = "/your/path" 16 | output_dir = f"{base_dir}/output/{PROJECT}/{DATASET}" 17 | data_dir = f"{base_dir}/data/DTU_mask" 18 | 19 | do_train = False 20 | do_extract_mesh = False 21 | do_cd = True 22 | dry_run = False 23 | 24 | node = 0 25 | max_workers = 15 26 | be = node*max_workers 27 | excluded_gpus = set([]) 28 | 29 | total_list = [ 30 | 'scan24', 'scan37', 'scan40', 'scan55', 'scan63', 'scan65', 'scan69', 31 | 'scan83', 'scan97', 'scan105', 'scan106', 'scan110', 'scan114', 'scan118', 'scan122' 32 | ] 33 | training_list = [ 34 | 'scan24', 'scan37', 'scan40', 'scan55', 'scan63', 'scan65', 'scan69', 35 | 'scan83', 'scan97', 'scan105', 'scan106', 'scan110', 'scan114', 'scan118', 'scan122' 36 | ] 37 | 38 | training_list = training_list[be: be + max_workers] 39 | scenes = training_list 40 | 41 | factors = [-1] * len(scenes) 42 | debug_from = -1 43 | 44 | eval_env = 'pt' 45 | data_device = 'cuda' 46 | voxel_size = 0.004 47 | step = 1 48 | PLY = f"ours.ply" 49 | TOTAL_THREADS = 64 50 | NUM_THREADS = TOTAL_THREADS // max_workers 51 | prob_thr = 0.15 52 | num_cluster = 1 53 | max_depth = 3 54 | fuse_method = 'tsdf_cpu' 55 | 56 | jobs = list(zip(scenes, factors)) 57 | 58 | def train_scene(gpu, scene, factor): 59 | time.sleep(2*gpu) 60 | os.system('ulimit -n 9000') 61 | log_dir = f"{output_dir}/{scene}/{TRIAL_NAME}" 62 | 63 | fail = 0 64 | 65 | if not dry_run: 66 | if do_train: 67 | cmd = train_cmd.format(gpu=gpu, dataset=DATASET, cfg='base', 68 | scene=scene, log_dir=log_dir, 69 | data_dir=data_dir, debug_from=debug_from, 70 | data_device=data_device, resolution=factor, project=PROJECT_wandb) 71 | print(cmd) 72 | fail = os.system(cmd) 73 | 74 | if fail == 0: 75 | if not dry_run: 76 | # fusion 77 | if do_extract_mesh: 78 | if not check_finish(scene, f"{log_dir}/point_cloud", 'train'): return False 79 | cmd = extract_mesh_cmd.format(gpu=gpu, ply=PLY, step=step, fuse_method=fuse_method, voxel_size=voxel_size, num_cluster=num_cluster, max_depth=max_depth, log_dir=log_dir, prob_thr=prob_thr) 80 | fail = os.system(cmd) 81 | print(cmd) 82 | 83 | # evaluation 84 | # evaluate the mesh 85 | scan_id = scene[4:] 86 | cmd = eval_cd_cmd.format(num_threads=NUM_THREADS, gpu=gpu, tri_mesh_path=f'{log_dir}/{PLY}', scan_id=scan_id, output_dir=log_dir, data_dir=data_dir) 87 | if fail == 0: 88 | if not dry_run: 89 | if do_cd: 90 | if not check_finish(scene, f"{log_dir}/{PLY}", 'mesh'): return False 91 | print(cmd) 92 | fail = os.system(cmd) 93 | if not check_finish(scene, f"{log_dir}/results.json", 'cd'): return False 94 | return fail == 0 95 | 96 | 97 | # Using ThreadPoolExecutor to manage the thread pool 98 | with ThreadPoolExecutor(max_workers) as executor: 99 | dispatch_jobs(jobs, executor, excluded_gpus, train_scene) 100 | 101 | show_matrix(total_list, [output_dir], TRIAL_NAME) 102 | print(TRIAL_NAME, " done") -------------------------------------------------------------------------------- /python_scripts/run_mipnerf360.py: -------------------------------------------------------------------------------- 1 | # Training script for the Mip-NeRF 360 dataset 2 | import os 3 | import sys 4 | import time 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | sys.path.append(os.getcwd()) 8 | from python_scripts.run_base import dispatch_jobs, train_cmd, extract_mesh_cmd, check_finish, render_cmd, eval_psnr_cmd 9 | from python_scripts.show_360 import show_matrix 10 | 11 | 12 | TRIAL_NAME = 'vcr_gaus' 13 | PROJECT = 'vcr_gaus' 14 | PROJECT_wandb = 'vcr_gaus_360' 15 | 16 | do_train = True 17 | do_render = True 18 | do_eval = True 19 | do_extract_mesh = True 20 | dry_run = False 21 | 22 | node = 0 23 | max_workers = 9 24 | be = node*max_workers 25 | excluded_gpus = set([]) 26 | 27 | total_list = [ 28 | "bicycle", "bonsai", "counter", "flowers", "garden", "stump", "treehill", "kitchen", "room" 29 | ] 30 | training_list = [ 31 | "bicycle", "bonsai", "counter", "flowers", "garden", "stump", "treehill", "kitchen", "room" 32 | ] 33 | 34 | training_list = training_list[be: be + max_workers] 35 | scenes = training_list 36 | 37 | factors = [-1] * len(scenes) 38 | 39 | debug_from = -1 40 | 41 | DATASET = '360_v2' 42 | eval_env = 'pt' 43 | data_device = 'cpu' 44 | step = 1 45 | max_depth = 6.0 46 | voxel_size = 8e-3 47 | PLY = f"fused_mesh_split{step}.ply" 48 | TOTAL_THREADS = 64 49 | NUM_THREADS = TOTAL_THREADS // max_workers 50 | prob_thr = 0.15 51 | num_cluster = 1000 52 | fuse_method = 'tsdf' 53 | 54 | base_dir = "/your/path" 55 | output_dir = f"{base_dir}/output/{PROJECT}/{DATASET}" 56 | data_dir = f"{base_dir}/data/{DATASET}" 57 | 58 | jobs = list(zip(scenes, factors)) 59 | 60 | 61 | def train_scene(gpu, scene, factor): 62 | time.sleep(2*gpu) 63 | os.system('ulimit -n 9000') 64 | log_dir = f"{output_dir}/{scene}/{TRIAL_NAME}" 65 | 66 | fail = 0 67 | 68 | if not dry_run: 69 | if do_train: 70 | cmd = train_cmd.format(gpu=gpu, dataset=DATASET, cfg='base', 71 | scene=scene, log_dir=log_dir, 72 | data_dir=data_dir, debug_from=debug_from, 73 | data_device=data_device, resolution=factor, project=PROJECT_wandb) 74 | print(cmd) 75 | fail = os.system(cmd) 76 | 77 | if fail == 0: 78 | if not dry_run: 79 | # render 80 | cmd = render_cmd.format(gpu=gpu, log_dir=log_dir) 81 | if fail == 0: 82 | if not dry_run: 83 | if do_render: 84 | print(cmd) 85 | fail = os.system(cmd) 86 | if not check_finish(scene, f"{log_dir}/test/ours_30000/renders", 'render'): return False 87 | 88 | # eval 89 | cmd = eval_psnr_cmd.format(gpu=gpu, log_dir=log_dir) 90 | if fail == 0: 91 | if not dry_run: 92 | if do_eval: 93 | print(cmd) 94 | fail = os.system(cmd) 95 | if not check_finish(scene, f"{log_dir}/results.json", 'eval'): return False 96 | 97 | # fusion 98 | if do_extract_mesh: 99 | if not check_finish(scene, f"{log_dir}/point_cloud", 'train'): return False 100 | cmd = extract_mesh_cmd.format(gpu=gpu, ply=PLY, step=step, fuse_method=fuse_method, voxel_size=voxel_size, num_cluster=num_cluster, max_depth=max_depth, log_dir=log_dir, prob_thr=prob_thr) 101 | fail = os.system(cmd) 102 | print(cmd) 103 | 104 | return fail == 0 105 | 106 | 107 | # Using ThreadPoolExecutor to manage the thread pool 108 | with ThreadPoolExecutor(max_workers) as executor: 109 | dispatch_jobs(jobs, executor, excluded_gpus, train_scene) 110 | 111 | show_matrix(total_list, [output_dir], TRIAL_NAME) 112 | print(TRIAL_NAME, " done") 113 | -------------------------------------------------------------------------------- /python_scripts/run_tnt.py: -------------------------------------------------------------------------------- 1 | # training scripts for the TNT datasets 2 | import os 3 | import sys 4 | import time 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | sys.path.append(os.getcwd()) 8 | from python_scripts.run_base import dispatch_jobs, train_cmd, extract_mesh_cmd, eval_tnt_cmd, check_finish 9 | from python_scripts.show_tnt import show_matrix 10 | 11 | 12 | TRIAL_NAME = 'vcr_gaus' 13 | PROJECT = 'vcr_gaus' 14 | DATASET = 'tnt' 15 | base_dir = "/your/path" 16 | output_dir = f"{base_dir}/output/{PROJECT}/{DATASET}" 17 | data_dir = f"{base_dir}/data/{DATASET}" 18 | 19 | do_train = True 20 | do_extract_mesh = True 21 | do_f1 = True 22 | dry_run = False 23 | 24 | node = 0 25 | max_workers = 4 26 | be = node*max_workers 27 | excluded_gpus = set([]) 28 | 29 | total_list = [ 30 | 'Barn', 'Caterpillar', 'Courthouse', 'Ignatius', 31 | 'Meetingroom', 'Truck' 32 | ] 33 | training_list = [ 34 | 'Barn', 'Caterpillar', 'Courthouse', 'Ignatius', 35 | 'Meetingroom', 'Truck' 36 | ] 37 | 38 | training_list = training_list[be: be + max_workers] 39 | scenes = training_list 40 | 41 | factors = [1] * len(scenes) 42 | debug_from = -1 # enable wandb 43 | 44 | eval_env = 'f1eval' 45 | data_device = 'cpu' 46 | step = 3 47 | voxel_size = [0.02, 0.015, 0.01] + [x / 1000.0 for x in range(2, 10, 1)][::-1] 48 | voxel_size = sorted(voxel_size) 49 | PLY = f"ours.ply" 50 | TOTAL_THREADS = 128 51 | NUM_THREADS = TOTAL_THREADS // max_workers 52 | prob_thr = 0.3 53 | num_cluster = 1000 54 | fuse_method = 'tsdf' 55 | max_depth = 8 56 | 57 | 58 | jobs = list(zip(scenes, factors)) 59 | 60 | 61 | def train_scene(gpu, scene, factor): 62 | time.sleep(2*gpu) 63 | os.system('ulimit -n 9000') 64 | log_dir = f"{output_dir}/{scene}/{TRIAL_NAME}" 65 | 66 | fail = 0 67 | 68 | if not dry_run: 69 | if do_train: 70 | cmd = train_cmd.format(gpu=gpu, dataset=DATASET, cfg=scene, 71 | scene=scene, log_dir=log_dir, 72 | data_dir=data_dir, debug_from=debug_from, 73 | data_device=data_device, resolution=factor, project=PROJECT) 74 | print(cmd) 75 | fail = os.system(cmd) 76 | 77 | if fail == 0: 78 | if not dry_run: 79 | # fusion 80 | if do_extract_mesh: 81 | if not check_finish(scene, f"{log_dir}/point_cloud", 'train'): return False 82 | for vs in voxel_size: 83 | cmd = extract_mesh_cmd.format(gpu=gpu, ply=PLY, step=step, fuse_method=fuse_method, voxel_size=vs, num_cluster=num_cluster, max_depth=max_depth, log_dir=log_dir, prob_thr=prob_thr) 84 | fail = os.system(cmd) 85 | if fail == 0: break 86 | print(cmd) 87 | 88 | # evaluation 89 | # You need to install open3d==0.9 for evaluation 90 | # evaluate the mesh 91 | cmd = eval_tnt_cmd.format(num_threads=NUM_THREADS, gpu=gpu, eval_env=eval_env, data_dir=data_dir, scene=scene, log_dir=log_dir, ply=PLY) 92 | if fail == 0: 93 | if not dry_run: 94 | if do_f1: 95 | if not check_finish(scene, f"{log_dir}/{PLY}", 'mesh'): return False 96 | print(cmd) 97 | fail = os.system(cmd) 98 | if not check_finish(scene, f"{log_dir}/evaluation/evaluation.txt", 'f1'): return False 99 | # return True 100 | return fail == 0 101 | 102 | 103 | # Using ThreadPoolExecutor to manage the thread pool 104 | with ThreadPoolExecutor(max_workers) as executor: 105 | dispatch_jobs(jobs, executor, excluded_gpus, train_scene) 106 | 107 | show_matrix(total_list, [output_dir], TRIAL_NAME) 108 | print(TRIAL_NAME, " done") -------------------------------------------------------------------------------- /python_scripts/show_360.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | 4 | scenes = ['bicycle', 'flowers', 'garden', 'stump', 'treehill', 'room', 'counter', 'kitchen', 'bonsai'] 5 | 6 | output_dirs = ["exp_360/release"] 7 | 8 | outdoor_scenes = ["bicycle", "flowers", "garden", "stump", "treehill"] 9 | indoor_scenes = ["room", "counter", "kitchen", "bonsai"] 10 | 11 | all_metrics = {"PSNR": [], "SSIM": [], "LPIPS": [], 'scene': []} 12 | indoor_metrics = {"PSNR": [], "SSIM": [], "LPIPS": [], 'scene': []} 13 | outdoor_metrics = {"PSNR": [], "SSIM": [], "LPIPS": [], 'scene': []} 14 | TRIAL_NAME = 'vcr_gaus' 15 | 16 | def show_matrix(scenes, output_dirs, TRIAL_NAME): 17 | 18 | for scene in scenes: 19 | for output in output_dirs: 20 | json_file = f"{output}/{scene}/{TRIAL_NAME}/results.json" 21 | data = json.load(open(json_file)) 22 | data = data['ours_30000'] 23 | 24 | for k in ["PSNR", "SSIM", "LPIPS"]: 25 | all_metrics[k].append(data[k]) 26 | if scene in indoor_scenes: 27 | indoor_metrics[k].append(data[k]) 28 | else: 29 | outdoor_metrics[k].append(data[k]) 30 | all_metrics['scene'].append(scene) 31 | if scene in indoor_scenes: 32 | indoor_metrics['scene'].append(scene) 33 | else: 34 | outdoor_metrics['scene'].append(scene) 35 | 36 | latex = [] 37 | for k in ["PSNR", "SSIM", "LPIPS"]: 38 | numbers = np.asarray(all_metrics[k]).mean(axis=0).tolist() 39 | numbers = [numbers] 40 | if k == "PSNR": 41 | numbers = [f"{x:.2f}" for x in numbers] 42 | else: 43 | numbers = [f"{x:.3f}" for x in numbers] 44 | latex.extend([k+': ', numbers[-1]+' ']) 45 | 46 | indoor_latex = [] 47 | for k in ["PSNR", "SSIM", "LPIPS"]: 48 | numbers = np.asarray(indoor_metrics[k]).mean(axis=0).tolist() 49 | numbers = [numbers] 50 | if k == "PSNR": 51 | numbers = [f"{x:.2f}" for x in numbers] 52 | else: 53 | numbers = [f"{x:.3f}" for x in numbers] 54 | indoor_latex.extend([k+': ', numbers[-1]+' ']) 55 | 56 | outdoor_latex = [] 57 | for k in ["PSNR", "SSIM", "LPIPS"]: 58 | numbers = np.asarray(outdoor_metrics[k]).mean(axis=0).tolist() 59 | numbers = [numbers] 60 | if k == "PSNR": 61 | numbers = [f"{x:.2f}" for x in numbers] 62 | else: 63 | numbers = [f"{x:.3f}" for x in numbers] 64 | outdoor_latex.extend([k+': ', numbers[-1]+' ']) 65 | 66 | print('Outdoor scenes') 67 | for i in range(len(outdoor_metrics['scene'])): 68 | print('PSNR: {:.3f}, SSIM: {:.3f}, LPIPS: {:.3f}, scene: {}'.format(outdoor_metrics['PSNR'][i], outdoor_metrics['SSIM'][i], outdoor_metrics['LPIPS'][i], outdoor_metrics['scene'][i])) 69 | 70 | print('Indoor scenes') 71 | for i in range(len(indoor_metrics['scene'])): 72 | print('PSNR: {:.3f}, SSIM: {:.3f}, LPIPS: {:.3f}, scene: {}'.format(indoor_metrics['PSNR'][i], indoor_metrics['SSIM'][i], indoor_metrics['LPIPS'][i], indoor_metrics['scene'][i])) 73 | 74 | print('Outdoor:') 75 | print("".join(outdoor_latex)) 76 | print('Indoor:') 77 | print("".join(indoor_latex)) 78 | 79 | if __name__ == "__main__": 80 | show_matrix(scenes, output_dirs, TRIAL_NAME) 81 | -------------------------------------------------------------------------------- /python_scripts/show_dtu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | scenes = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122] 6 | output_dirs = ["exp_dtu/release"] 7 | TRIAL_NAME = 'vcr_gaus' 8 | 9 | 10 | def show_matrix_old(scenes, output_dirs, TRIAL_NAME): 11 | all_metrics = {"mean_d2s": [], "mean_s2d": [], "overall": []} 12 | print(output_dirs) 13 | 14 | for scene in scenes: 15 | print(scene,end=" ") 16 | for output in output_dirs: 17 | json_file = f"{output}/scan{scene}/test/ours_30000/tsdf/results.json" 18 | data = json.load(open(json_file)) 19 | 20 | for k in ["mean_d2s", "mean_s2d", "overall"]: 21 | all_metrics[k].append(data[k]) 22 | print(f"{data[k]:.3f}", end=" ") 23 | print() 24 | 25 | latex = [] 26 | for k in ["mean_d2s", "mean_s2d", "overall"]: 27 | numbers = np.asarray(all_metrics[k]).mean(axis=0).tolist() 28 | 29 | numbers = all_metrics[k] + [numbers] 30 | 31 | numbers = [f"{x:.2f}" for x in numbers] 32 | if k == "overall": 33 | latex.extend(numbers) 34 | 35 | print(" & ".join(latex)) 36 | 37 | 38 | def show_matrix(scenes, output_dirs, TRIAL_NAME): 39 | all_metrics = {"mean_d2s": [], "mean_s2d": [], "overall": [], 'scene': []} 40 | 41 | for scene in scenes: 42 | for output in output_dirs: 43 | json_file = f"{output}/{scene}/{TRIAL_NAME}/results.json" 44 | if not os.path.exists(json_file): 45 | print(f"Scene \033[1;31m{scene}\033[0m was not evaluated.") 46 | continue 47 | data = json.load(open(json_file)) 48 | 49 | for k in ["mean_d2s", "mean_s2d", "overall"]: 50 | all_metrics[k].append(data[k]) 51 | all_metrics['scene'].append(scene) 52 | 53 | latex = [] 54 | for k in ["mean_d2s", "mean_s2d", "overall"]: 55 | numbers = np.asarray(all_metrics[k]).mean(axis=0).tolist() 56 | 57 | numbers = all_metrics[k] + [numbers] 58 | 59 | numbers = [f"{x:.2f}" for x in numbers] 60 | latex.extend([k+': ', numbers[-1]+' ']) 61 | 62 | for i in range(len(all_metrics['scene'])): 63 | print('d2s: {:.3f}, s2d: {:.3f}, overall: {:.3f}, scene: {}'.format(all_metrics['mean_d2s'][i], all_metrics['mean_s2d'][i], all_metrics['overall'][i], all_metrics['scene'][i])) 64 | 65 | print("".join(latex)) 66 | 67 | 68 | if __name__ == "__main__": 69 | show_matrix(scenes, output_dirs, TRIAL_NAME) -------------------------------------------------------------------------------- /python_scripts/show_tnt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | training_list = [ 5 | 'Barn', 'Caterpillar', 'Courthouse', 'Ignatius', 'Meetingroom', 'Truck' 6 | ] 7 | 8 | scenes = training_list 9 | 10 | DATASET = 'tnt' 11 | base_dir = "/your/log/path/" 12 | TRIAL_NAME = 'vcr_gaus' 13 | PROJECT = 'sq_gs' 14 | output_dirs = [f"{base_dir}/{PROJECT}/{DATASET}"] 15 | 16 | 17 | def show_matrix(scenes, output_dirs, TRIAL_NAME): 18 | all_metrics = {"precision": [], "recall": [], "f-score": [], 'scene': []} 19 | for scene in scenes: 20 | for output in output_dirs: 21 | # precision 22 | eval_file = os.path.join(output, scene, f"{TRIAL_NAME}/evaluation/evaluation.txt") 23 | 24 | if not os.path.exists(eval_file): 25 | print(f"Scene \033[1;31m{scene}\033[0m was not evaluated.") 26 | continue 27 | with open(eval_file, 'r') as f: 28 | matrix = f.readlines() 29 | 30 | precision = float(matrix[2].split(" ")[-1]) 31 | recall = float(matrix[3].split(" ")[-1]) 32 | f_score = float(matrix[4].split(" ")[-1]) 33 | 34 | all_metrics["precision"].append(precision) 35 | all_metrics["recall"].append(recall) 36 | all_metrics["f-score"].append(f_score) 37 | all_metrics['scene'].append(scene) 38 | 39 | 40 | latex = [] 41 | for k in ["precision","recall", "f-score"]: 42 | numbers = all_metrics[k] 43 | mean = np.mean(numbers) 44 | numbers = numbers + [mean] 45 | 46 | numbers = [f"{x:.3f}" for x in numbers] 47 | latex.extend([k+': ', numbers[-1]+' ']) 48 | 49 | for i in range(len(all_metrics['scene'])): 50 | print('precision: {:.3f}, recall: {:.3f}, f-score: {:.3f}, scene: {}'.format(all_metrics['precision'][i], all_metrics['recall'][i], all_metrics['f-score'][i], all_metrics['scene'][i])) 51 | 52 | print("".join(latex)) 53 | 54 | return 55 | 56 | 57 | if __name__ == "__main__": 58 | show_matrix(scenes, output_dirs, TRIAL_NAME) 59 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | submodules/diff-gaussian-rasterization 2 | submodules/simple-knn/ 3 | git+https://github.com/facebookresearch/pytorch3d.git@stable -------------------------------------------------------------------------------- /scene/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import random 14 | import json 15 | import torch 16 | 17 | from arguments import ModelParams 18 | from scene.gaussian_model import GaussianModel 19 | from tools.system_utils import searchForMaxIteration 20 | from scene.dataset_readers import sceneLoadTypeCallbacks 21 | from tools.camera_utils import cameraList_from_camInfos, camera_to_JSON 22 | from tools.graphics_utils import get_all_px_dir 23 | 24 | class Scene: 25 | 26 | gaussians : GaussianModel 27 | 28 | def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]): 29 | """b 30 | :param path: Path to colmap scene main folder. 31 | """ 32 | self.model_path = args.model_path 33 | self.loaded_iter = None 34 | self.gaussians = gaussians 35 | self.split = args.split 36 | load_depth = args.load_depth 37 | load_normal = args.load_normal 38 | load_mask = args.load_mask 39 | 40 | if load_iteration: 41 | if load_iteration == -1: 42 | self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud")) 43 | else: 44 | self.loaded_iter = load_iteration 45 | print("Loading trained model at iteration {}".format(self.loaded_iter)) 46 | 47 | self.train_cameras = {} 48 | self.test_cameras = {} 49 | 50 | if os.path.exists(os.path.join(args.source_path, "sparse")): 51 | scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval, args.llffhold, args.ratio, split=self.split, load_depth=load_depth, load_normal=load_normal, load_mask=load_mask, normal_folder=args.normal_folder, depth_folder=args.depth_folder) 52 | elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")): 53 | print("Found transforms_train.json file, assuming Blender data set!") 54 | scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.eval) 55 | else: 56 | assert False, "Could not recognize scene type!" 57 | 58 | self.trans = scene_info.trans 59 | self.scale = scene_info.scale 60 | 61 | if not self.loaded_iter: 62 | with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file: 63 | dest_file.write(src_file.read()) 64 | json_cams = [] 65 | camlist = [] 66 | if scene_info.test_cameras: 67 | camlist.extend(scene_info.test_cameras) 68 | if scene_info.train_cameras: 69 | camlist.extend(scene_info.train_cameras) 70 | for id, cam in enumerate(camlist): 71 | json_cams.append(camera_to_JSON(id, cam)) 72 | with open(os.path.join(self.model_path, "cameras.json"), 'w') as file: 73 | json.dump(json_cams, file) 74 | 75 | if shuffle: 76 | random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling 77 | # random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling 78 | 79 | self.cameras_extent = scene_info.nerf_normalization["radius"] 80 | gaussians.extent = self.cameras_extent 81 | 82 | for resolution_scale in resolution_scales: 83 | print("Loading Training Cameras") 84 | self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args) 85 | print("Loading Test Cameras") 86 | self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args) 87 | 88 | for idx, camera in enumerate(self.train_cameras[resolution_scale] + self.test_cameras[resolution_scale]): 89 | camera.idx = idx 90 | 91 | if self.loaded_iter: 92 | self.gaussians.load_ply(os.path.join(self.model_path, 93 | "point_cloud", 94 | "iteration_" + str(self.loaded_iter), 95 | "point_cloud.ply")) 96 | else: 97 | self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent) 98 | 99 | if args.depth_type == "traditional": 100 | self.dirs = None 101 | elif args.depth_type == "intersection": 102 | self.dirs = get_all_px_dir(self.getTrainCameras()[0].intr, self.getTrainCameras()[0].image_height, self.getTrainCameras()[0].image_width).cuda() 103 | self.first_name = scene_info.first_name 104 | 105 | def save(self, iteration, visi=None, surf=None, save_splat=False): 106 | point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration)) 107 | self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply")) 108 | self.gaussians.save_inside_ply(os.path.join(point_cloud_path, "point_cloud_inside.ply")) 109 | 110 | if visi is not None: 111 | self.gaussians.save_visi_ply(os.path.join(point_cloud_path, "visi.ply"), visi) 112 | 113 | if surf is not None: 114 | self.gaussians.save_visi_ply(os.path.join(point_cloud_path, "surf.ply"), surf) 115 | 116 | if save_splat: 117 | self.gaussians.save_splat(os.path.join(point_cloud_path, "pcd.splat")) 118 | 119 | def getTrainCameras(self, scale=1.0): 120 | return self.train_cameras[scale] 121 | 122 | def getTestCameras(self, scale=1.0): 123 | return self.test_cameras[scale] 124 | 125 | def getFullCameras(self, scale=1.0): 126 | if self.split: 127 | return self.train_cameras[scale] + self.test_cameras[scale] 128 | else: 129 | return self.train_cameras[scale] 130 | 131 | def getUpCameras(self): 132 | return self.random_cameras_up 133 | 134 | def getAroundCameras(self): 135 | return self.random_cameras_around 136 | 137 | def getRandCameras(self, n, up=False, around=True, sample_mode='uniform'): 138 | if up and around: 139 | n = n // 2 140 | 141 | cameras = [] 142 | if up: 143 | up_cameras = self.getUpCameras().copy() 144 | idx = torch.randperm(len(up_cameras))[: n] 145 | if n == 1: 146 | cameras.append(up_cameras[idx]) 147 | else: 148 | cameras.extend(up_cameras[idx]) 149 | if around: 150 | around_cameras = self.getAroundCameras() 151 | 152 | if sample_mode == 'random': 153 | idx = torch.randperm(len(around_cameras))[: n] 154 | elif sample_mode == 'uniform': 155 | idx = torch.arange(len(around_cameras))[::len(around_cameras)//n] 156 | else: 157 | assert False, f"Unknown sample_mode: {sample_mode}" 158 | 159 | if n == 1: 160 | cameras.append(around_cameras[idx]) 161 | else: 162 | cameras.extend(around_cameras[idx]) 163 | return cameras -------------------------------------------------------------------------------- /scene/appearance_network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class UpsampleBlock(nn.Module): 6 | def __init__(self, num_input_channels, num_output_channels): 7 | super(UpsampleBlock, self).__init__() 8 | self.pixel_shuffle = nn.PixelShuffle(2) 9 | self.conv = nn.Conv2d(num_input_channels // (2 * 2), num_output_channels, 3, stride=1, padding=1) 10 | self.relu = nn.ReLU() 11 | 12 | def forward(self, x): 13 | x = self.pixel_shuffle(x) 14 | x = self.conv(x) 15 | x = self.relu(x) 16 | return x 17 | 18 | 19 | class AppearanceNetwork(nn.Module): 20 | def __init__(self, num_input_channels, num_output_channels): 21 | super(AppearanceNetwork, self).__init__() 22 | 23 | self.conv1 = nn.Conv2d(num_input_channels, 256, 3, stride=1, padding=1) 24 | self.up1 = UpsampleBlock(256, 128) 25 | self.up2 = UpsampleBlock(128, 64) 26 | self.up3 = UpsampleBlock(64, 32) 27 | self.up4 = UpsampleBlock(32, 16) 28 | 29 | self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=1) 30 | self.conv3 = nn.Conv2d(16, num_output_channels, 3, stride=1, padding=1) 31 | self.relu = nn.ReLU() 32 | self.sigmoid = nn.Sigmoid() 33 | 34 | def forward(self, x): 35 | x = self.conv1(x) 36 | x = self.relu(x) 37 | x = self.up1(x) 38 | x = self.up2(x) 39 | x = self.up3(x) 40 | x = self.up4(x) 41 | # bilinear interpolation 42 | x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) 43 | x = self.conv2(x) 44 | x = self.relu(x) 45 | x = self.conv3(x) 46 | x = self.sigmoid(x) 47 | return x 48 | 49 | 50 | if __name__ == "__main__": 51 | H, W = 1200//32, 1600//32 52 | input_channels = 3 + 64 53 | output_channels = 3 54 | input = torch.randn(1, input_channels, H, W).cuda() 55 | model = AppearanceNetwork(input_channels, output_channels).cuda() 56 | 57 | output = model(input) 58 | print(output.shape) -------------------------------------------------------------------------------- /scene/cameras.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from torch import nn 14 | import numpy as np 15 | 16 | from tools.graphics_utils import getWorld2View2, getProjectionMatrix, getIntrinsic 17 | 18 | 19 | class Camera(nn.Module): 20 | def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, 21 | image_name, uid, depth=None, normal=None, mask=None, 22 | trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda" 23 | ): 24 | super(Camera, self).__init__() 25 | 26 | self.uid = uid 27 | self.colmap_id = colmap_id 28 | self.R = R 29 | self.T = T 30 | self.FoVx = FoVx 31 | self.FoVy = FoVy 32 | self.image_name = image_name 33 | 34 | try: 35 | self.data_device = torch.device(data_device) 36 | except Exception as e: 37 | print(e) 38 | print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) 39 | self.data_device = torch.device("cuda") 40 | 41 | self.original_image = image.clamp(0.0, 1.0).to(self.data_device) 42 | self.image_width = self.original_image.shape[2] 43 | self.image_height = self.original_image.shape[1] 44 | 45 | if gt_alpha_mask is not None: 46 | self.gt_alpha_mask = gt_alpha_mask 47 | if mask is not None: 48 | mask = mask.squeeze(-1).cuda() 49 | mask[self.gt_alpha_mask[0] == 0] = 0 50 | else: 51 | mask = self.gt_alpha_mask.bool().squeeze(0).cuda() 52 | else: 53 | self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device) 54 | self.gt_alpha_mask = None 55 | 56 | self.depth = depth.to(data_device) if depth is not None else None 57 | self.normal = normal.to(data_device) if normal is not None else None 58 | 59 | if mask is not None: 60 | self.mask = mask.squeeze(-1).cuda() 61 | 62 | self.zfar = 100.0 63 | self.znear = 0.01 64 | 65 | self.trans = trans 66 | self.scale = scale 67 | 68 | self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() # w2c 69 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() 70 | self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) # w2c2image 71 | self.camera_center = self.world_view_transform.inverse()[3, :3] 72 | intr = getIntrinsic(self.FoVx, self.FoVy, self.image_height, self.image_width).cuda() 73 | self.intr = intr 74 | 75 | 76 | class MiniCam: 77 | def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): 78 | self.image_width = width 79 | self.image_height = height 80 | self.FoVy = fovy 81 | self.FoVx = fovx 82 | self.znear = znear 83 | self.zfar = zfar 84 | self.world_view_transform = world_view_transform 85 | self.full_proj_transform = full_proj_transform 86 | view_inv = torch.inverse(self.world_view_transform) 87 | self.camera_center = view_inv[3][:3] 88 | 89 | 90 | class SampleCam(nn.Module): 91 | def __init__(self, w2c, width, height, FoVx, FoVy, device='cuda'): 92 | super(SampleCam, self).__init__() 93 | 94 | self.FoVx = FoVx 95 | self.FoVy = FoVy 96 | self.image_width = width 97 | self.image_height = height 98 | 99 | self.zfar = 100.0 100 | self.znear = 0.01 101 | 102 | try: 103 | self.data_device = torch.device(device) 104 | except Exception as e: 105 | print(e) 106 | print(f"[Warning] Custom device {device} failed, fallback to default cuda device" ) 107 | self.data_device = torch.device("cuda") 108 | 109 | w2c = w2c.to(self.data_device) 110 | self.world_view_transform = w2c.transpose(0, 1) 111 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).to(w2c.device) 112 | self.full_proj_transform = self.world_view_transform @ self.projection_matrix 113 | self.camera_center = self.world_view_transform.inverse()[3, :3] 114 | 115 | class MiniCam2: 116 | def __init__(self, c2w, width, height, fovy, fovx, znear, zfar): 117 | # c2w (pose) should be in NeRF convention. 118 | 119 | self.image_width = width 120 | self.image_height = height 121 | self.FoVy = fovy 122 | self.FoVx = fovx 123 | self.znear = znear 124 | self.zfar = zfar 125 | 126 | w2c = np.linalg.inv(c2w) 127 | 128 | # rectify... 129 | w2c[1:3, :3] *= -1 130 | w2c[:3, 3] *= -1 131 | 132 | self.world_view_transform = torch.tensor(w2c).transpose(0, 1).cuda() 133 | self.projection_matrix = ( 134 | getProjectionMatrix( 135 | znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy 136 | ) 137 | .transpose(0, 1) 138 | .cuda() 139 | ) 140 | self.full_proj_transform = self.world_view_transform @ self.projection_matrix 141 | self.camera_center = -torch.tensor(c2w[:3, 3]).cuda() -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HLinChen/VCR-GauS/aa715d19bfacfa9d491f477c572eab1839dcee3e/tools/__init__.py -------------------------------------------------------------------------------- /tools/crop_mesh.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | import trimesh 5 | 6 | 7 | def align_gt_with_cam(pts, trans): 8 | trans_inv = np.linalg.inv(trans) 9 | pts_aligned = pts @ trans_inv[:3, :3].transpose(-1, -2) + trans_inv[:3, -1] 10 | return pts_aligned 11 | 12 | 13 | def filter_largest_cc(mesh): 14 | components = mesh.split(only_watertight=False) 15 | areas = np.array([c.area for c in components], dtype=float) 16 | if len(areas) > 0 and mesh.vertices.shape[0] > 0: 17 | new_mesh = components[areas.argmax()] 18 | else: 19 | new_mesh = trimesh.Trimesh() 20 | return new_mesh 21 | 22 | 23 | def main(args): 24 | assert os.path.exists(args.ply_path), f"PLY file {args.ply_path} does not exist." 25 | gt_trans = np.loadtxt(args.align_path) 26 | 27 | mesh_rec = trimesh.load(args.ply_path, process=False) 28 | mesh_gt = trimesh.load(args.gt_path, process=False) 29 | 30 | mesh_gt.vertices = align_gt_with_cam(mesh_gt.vertices, gt_trans) 31 | 32 | to_align, _ = trimesh.bounds.oriented_bounds(mesh_gt) 33 | mesh_gt.vertices = (to_align[:3, :3] @ mesh_gt.vertices.T + to_align[:3, 3:]).T 34 | mesh_rec.vertices = (to_align[:3, :3] @ mesh_rec.vertices.T + to_align[:3, 3:]).T 35 | 36 | min_points = mesh_gt.vertices.min(axis=0) 37 | max_points = mesh_gt.vertices.max(axis=0) 38 | 39 | mask_min = (mesh_rec.vertices - min_points[None]) > 0 40 | mask_max = (mesh_rec.vertices - max_points[None]) < 0 41 | 42 | mask = np.concatenate((mask_min, mask_max), axis=1).all(axis=1) 43 | face_mask = mask[mesh_rec.faces].all(axis=1) 44 | 45 | mesh_rec.update_vertices(mask) 46 | mesh_rec.update_faces(face_mask) 47 | 48 | mesh_rec.vertices = (to_align[:3, :3].T @ mesh_rec.vertices.T - to_align[:3, :3].T @ to_align[:3, 3:]).T 49 | mesh_gt.vertices = (to_align[:3, :3].T @ mesh_gt.vertices.T - to_align[:3, :3].T @ to_align[:3, 3:]).T 50 | 51 | 52 | # save mesh_rec and mesh_rec in args.out_path 53 | mesh_rec.export(args.out_path) 54 | 55 | # downsample mesh_gt 56 | 57 | idx = np.random.choice(np.arange(len(mesh_gt.vertices)), 5000000) 58 | mesh_gt.vertices = mesh_gt.vertices[idx] 59 | mesh_gt.colors = mesh_gt.colors[idx] 60 | 61 | mesh_gt.export(args.gt_path.replace('.ply', '_trans.ply')) 62 | 63 | 64 | return 65 | 66 | 67 | 68 | if __name__ == '__main__': 69 | parser = argparse.ArgumentParser() 70 | parser.add_argument( 71 | "--gt_path", 72 | type=str, 73 | default='/your/path//Barn_GT.ply', 74 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 75 | ) 76 | parser.add_argument( 77 | "--align_path", 78 | type=str, 79 | default='/your/path//Barn_trans.txt', 80 | help="path to a dataset/scene directory containing X.json, X.ply, ...", 81 | ) 82 | parser.add_argument( 83 | "--ply_path", 84 | type=str, 85 | default='/your/path//Barn_lowres.ply', 86 | help="path to reconstruction ply file", 87 | ) 88 | parser.add_argument( 89 | "--scene", 90 | type=str, 91 | default='Barn', 92 | help="path to reconstruction ply file", 93 | ) 94 | parser.add_argument( 95 | "--out_path", 96 | type=str, 97 | default='/your/path//Barn_lowres_crop.ply', 98 | help= 99 | "output directory, default: an evaluation directory is created in the directory of the ply file", 100 | ) 101 | args = parser.parse_args() 102 | 103 | main(args) -------------------------------------------------------------------------------- /tools/denoise_pcd.py: -------------------------------------------------------------------------------- 1 | from pytorch3d.ops import ball_query, knn_points 2 | 3 | 4 | def remove_radius_outlier(xyz, nb_points=5, radius=0.1): 5 | if xyz.dim() == 2: xyz = xyz[None] 6 | nn_dists, nn_idx, nn = ball_query(xyz, xyz, K=nb_points+1, radius=radius) 7 | valid = ~(nn_idx[0]==-1).any(-1) 8 | 9 | return valid 10 | 11 | 12 | def remove_statistical_outlier(xyz, nb_points=20, std_ratio=20.): 13 | if xyz.dim() == 2: xyz = xyz[None] 14 | nn_dists, nn_idx, nn = knn_points(xyz, xyz, K=nb_points, return_sorted=False) 15 | 16 | # Compute distances to neighbors 17 | distances = nn_dists.squeeze(0) # Shape: (N, nb_neighbors) 18 | 19 | # Compute mean and standard deviation of distances 20 | mean_distances = distances.mean(dim=-1) 21 | std_distances = distances.std(dim=-1) 22 | 23 | # Identify points that are not outliers 24 | threshold = mean_distances + std_ratio * std_distances 25 | valid = (distances <= threshold.unsqueeze(1)).any(dim=1) 26 | 27 | return valid 28 | 29 | 30 | if __name__ == '__main__': 31 | import torch 32 | import time 33 | 34 | gpu = 0 35 | device = torch.device('cuda:{:d}'.format(gpu) if torch.cuda.is_available() else 'cpu') 36 | t1 = time.time() 37 | xyz = torch.rand(int(1e7), 3).to(device) 38 | remove_statistical_outlier(xyz) 39 | print('time:', time.time()-t1, 's') 40 | 41 | -------------------------------------------------------------------------------- /tools/distributed.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import functools 14 | import ctypes 15 | 16 | import torch 17 | import torch.distributed as dist 18 | from contextlib import contextmanager 19 | 20 | 21 | def init_dist(local_rank, backend='nccl', **kwargs): 22 | r"""Initialize distributed training""" 23 | if dist.is_available(): 24 | if dist.is_initialized(): 25 | return torch.cuda.current_device() 26 | torch.cuda.set_device(local_rank) 27 | dist.init_process_group(backend=backend, init_method='env://', **kwargs) 28 | 29 | # Increase the L2 fetch granularity for faster speed. 30 | _libcudart = ctypes.CDLL('libcudart.so') 31 | # Set device limit on the current device 32 | # cudaLimitMaxL2FetchGranularity = 0x05 33 | pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int)) 34 | _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128)) 35 | _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05)) 36 | 37 | 38 | def get_rank(): 39 | r"""Get rank of the thread.""" 40 | rank = 0 41 | if dist.is_available(): 42 | if dist.is_initialized(): 43 | rank = dist.get_rank() 44 | return rank 45 | 46 | 47 | def get_world_size(): 48 | r"""Get world size. How many GPUs are available in this job.""" 49 | world_size = 1 50 | if dist.is_available(): 51 | if dist.is_initialized(): 52 | world_size = dist.get_world_size() 53 | return world_size 54 | 55 | 56 | def broadcast_object_list(message, src=0): 57 | r"""Broadcast object list from the master to the others""" 58 | # Send logdir from master to all workers. 59 | if dist.is_available(): 60 | if dist.is_initialized(): 61 | torch.distributed.broadcast_object_list(message, src=src) 62 | return message 63 | 64 | 65 | def master_only(func): 66 | r"""Apply this function only to the master GPU.""" 67 | @functools.wraps(func) 68 | def wrapper(*args, **kwargs): 69 | r"""Simple function wrapper for the master function""" 70 | if get_rank() == 0: 71 | return func(*args, **kwargs) 72 | else: 73 | return None 74 | return wrapper 75 | 76 | 77 | def is_master(): 78 | r"""check if current process is the master""" 79 | return get_rank() == 0 80 | 81 | 82 | def is_dist(): 83 | return dist.is_initialized() 84 | 85 | 86 | def barrier(): 87 | if is_dist(): 88 | dist.barrier() 89 | 90 | 91 | @contextmanager 92 | def master_first(): 93 | if not is_master(): 94 | barrier() 95 | yield 96 | if dist.is_initialized() and is_master(): 97 | barrier() 98 | 99 | 100 | def is_local_master(): 101 | return torch.cuda.current_device() == 0 102 | 103 | 104 | @master_only 105 | def master_only_print(*args): 106 | r"""master-only print""" 107 | print(*args) 108 | 109 | 110 | def dist_reduce_tensor(tensor, rank=0, reduce='mean'): 111 | r""" Reduce to rank 0 """ 112 | world_size = get_world_size() 113 | if world_size < 2: 114 | return tensor 115 | with torch.no_grad(): 116 | dist.reduce(tensor, dst=rank) 117 | if get_rank() == rank: 118 | if reduce == 'mean': 119 | tensor /= world_size 120 | elif reduce == 'sum': 121 | pass 122 | else: 123 | raise NotImplementedError 124 | return tensor 125 | 126 | 127 | def dist_all_reduce_tensor(tensor, reduce='mean'): 128 | r""" Reduce to all ranks """ 129 | world_size = get_world_size() 130 | if world_size < 2: 131 | return tensor 132 | with torch.no_grad(): 133 | dist.all_reduce(tensor) 134 | if reduce == 'mean': 135 | tensor /= world_size 136 | elif reduce == 'sum': 137 | pass 138 | else: 139 | raise NotImplementedError 140 | return tensor 141 | 142 | 143 | def dist_all_gather_tensor(tensor): 144 | r""" gather to all ranks """ 145 | world_size = get_world_size() 146 | if world_size < 2: 147 | return [tensor] 148 | tensor_list = [ 149 | torch.ones_like(tensor) for _ in range(dist.get_world_size())] 150 | with torch.no_grad(): 151 | dist.all_gather(tensor_list, tensor) 152 | return tensor_list 153 | -------------------------------------------------------------------------------- /tools/general_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import sys 14 | from datetime import datetime 15 | import numpy as np 16 | import random 17 | import torchvision.transforms.functional as torchvision_F 18 | from PIL import ImageFile 19 | ImageFile.LOAD_TRUNCATED_IMAGES = True 20 | 21 | 22 | def inverse_sigmoid(x): 23 | return torch.log(x/(1-x)) 24 | 25 | 26 | def PILtoTorch(pil_image, resolution): 27 | resized_image_PIL = pil_image.resize(resolution) 28 | resized_image = torch.from_numpy(np.array(resized_image_PIL)) 29 | if len(resized_image.shape) == 3: 30 | return resized_image.permute(2, 0, 1) 31 | else: 32 | return resized_image.unsqueeze(dim=-1).permute(2, 0, 1) 33 | 34 | 35 | def NumpytoTorch(image, resolution): 36 | image = torch.from_numpy(image) 37 | if image.ndim == 4: image = image.squeeze(0) 38 | if image.shape[-1] == 3 or image.shape[-1] == 1: 39 | image = image.permute(2, 0, 1) 40 | _, orig_h, orig_w = image.shape 41 | if resolution == [orig_h, orig_w]: 42 | resized_image = image 43 | else: 44 | resized_image = torchvision_F.resize(image, resolution, antialias=True) 45 | 46 | return resized_image 47 | 48 | 49 | def get_expon_lr_func( 50 | lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000 51 | ): 52 | """ 53 | Copied from Plenoxels 54 | 55 | Continuous learning rate decay function. Adapted from JaxNeRF 56 | The returned rate is lr_init when step=0 and lr_final when step=max_steps, and 57 | is log-linearly interpolated elsewhere (equivalent to exponential decay). 58 | If lr_delay_steps>0 then the learning rate will be scaled by some smooth 59 | function of lr_delay_mult, such that the initial learning rate is 60 | lr_init*lr_delay_mult at the beginning of optimization but will be eased back 61 | to the normal learning rate when steps>lr_delay_steps. 62 | :param conf: config subtree 'lr' or similar 63 | :param max_steps: int, the number of steps during optimization. 64 | :return HoF which takes step as input 65 | """ 66 | 67 | def helper(step): 68 | if step < 0 or (lr_init == 0.0 and lr_final == 0.0): 69 | # Disable this parameter 70 | return 0.0 71 | if lr_delay_steps > 0: 72 | # A kind of reverse cosine decay. 73 | delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin( 74 | 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1) 75 | ) 76 | else: 77 | delay_rate = 1.0 78 | t = np.clip(step / max_steps, 0, 1) 79 | log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t) 80 | return delay_rate * log_lerp 81 | 82 | return helper 83 | 84 | def strip_lowerdiag(L): 85 | uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda") 86 | 87 | uncertainty[:, 0] = L[:, 0, 0] 88 | uncertainty[:, 1] = L[:, 0, 1] 89 | uncertainty[:, 2] = L[:, 0, 2] 90 | uncertainty[:, 3] = L[:, 1, 1] 91 | uncertainty[:, 4] = L[:, 1, 2] 92 | uncertainty[:, 5] = L[:, 2, 2] 93 | return uncertainty 94 | 95 | def strip_symmetric(sym): 96 | return strip_lowerdiag(sym) 97 | 98 | def build_rotation(r): 99 | norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3]) 100 | 101 | q = r / norm[:, None] 102 | 103 | R = torch.zeros((q.size(0), 3, 3), device='cuda') 104 | 105 | r = q[:, 0] 106 | x = q[:, 1] 107 | y = q[:, 2] 108 | z = q[:, 3] 109 | 110 | R[:, 0, 0] = 1 - 2 * (y*y + z*z) 111 | R[:, 0, 1] = 2 * (x*y - r*z) 112 | R[:, 0, 2] = 2 * (x*z + r*y) 113 | R[:, 1, 0] = 2 * (x*y + r*z) 114 | R[:, 1, 1] = 1 - 2 * (x*x + z*z) 115 | R[:, 1, 2] = 2 * (y*z - r*x) 116 | R[:, 2, 0] = 2 * (x*z - r*y) 117 | R[:, 2, 1] = 2 * (y*z + r*x) 118 | R[:, 2, 2] = 1 - 2 * (x*x + y*y) 119 | return R 120 | 121 | def build_scaling_rotation(s, r): 122 | L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda") 123 | R = build_rotation(r) 124 | 125 | L[:,0,0] = s[:,0] 126 | L[:,1,1] = s[:,1] 127 | L[:,2,2] = s[:,2] 128 | 129 | L = R @ L 130 | return L 131 | 132 | def safe_state(silent): 133 | old_f = sys.stdout 134 | class F: 135 | def __init__(self, silent): 136 | self.silent = silent 137 | 138 | def write(self, x): 139 | if not self.silent: 140 | if x.endswith("\n"): 141 | old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S"))))) 142 | else: 143 | old_f.write(x) 144 | 145 | def flush(self): 146 | old_f.flush() 147 | 148 | sys.stdout = F(silent) 149 | 150 | 151 | def set_random_seed(seed): 152 | r"""Set random seeds for everything, including random, numpy, torch.manual_seed, torch.cuda_manual_seed. 153 | torch.cuda.manual_seed_all is not necessary (included in torch.manual_seed) 154 | 155 | Args: 156 | seed (int): Random seed. 157 | """ 158 | print(f"Using random seed {seed}") 159 | random.seed(seed) 160 | np.random.seed(seed) 161 | torch.manual_seed(seed) # sets seed on the current CPU & all GPUs 162 | torch.cuda.manual_seed(seed) # sets seed on current GPU 163 | # torch.cuda.manual_seed_all(seed) # included in torch.manual_seed 164 | torch.cuda.set_device(torch.device("cuda:0")) 165 | -------------------------------------------------------------------------------- /tools/graphics_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import math 14 | import numpy as np 15 | from typing import NamedTuple 16 | 17 | class BasicPointCloud(NamedTuple): 18 | points : np.array 19 | colors : np.array 20 | normals : np.array 21 | 22 | def geom_transform_points(points, transf_matrix): 23 | P, _ = points.shape 24 | ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) 25 | points_hom = torch.cat([points, ones], dim=1) 26 | points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) 27 | 28 | denom = points_out[..., 3:] + 0.0000001 29 | return (points_out[..., :3] / denom).squeeze(dim=0) 30 | 31 | def getWorld2View(R, t): 32 | Rt = np.zeros((4, 4)) 33 | Rt[:3, :3] = R.transpose() 34 | Rt[:3, 3] = t 35 | Rt[3, 3] = 1.0 36 | return np.float32(Rt) 37 | 38 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): 39 | Rt = np.zeros((4, 4)) # w2c 40 | Rt[:3, :3] = R.transpose() # w2c 41 | Rt[:3, 3] = t # w2c 42 | Rt[3, 3] = 1.0 43 | 44 | C2W = np.linalg.inv(Rt) # c2w 45 | cam_center = C2W[:3, 3] 46 | cam_center = (cam_center + translate) * scale 47 | C2W[:3, 3] = cam_center 48 | Rt = np.linalg.inv(C2W) # w2c 49 | return np.float32(Rt) 50 | 51 | def getView2World(R, t): 52 | ''' 53 | R: w2c 54 | t: w2c 55 | ''' 56 | Rt = np.zeros((4, 4)) 57 | Rt[:3, :3] = R.transpose() # c2w 58 | Rt[:3, 3] = -R.transpose() @ t # c2w 59 | Rt[3, 3] = 1.0 60 | 61 | return Rt 62 | 63 | def getProjectionMatrix(znear, zfar, fovX, fovY): 64 | ''' 65 | normalized intrinsics 66 | ''' 67 | tanHalfFovY = math.tan((fovY / 2)) 68 | tanHalfFovX = math.tan((fovX / 2)) 69 | 70 | top = tanHalfFovY * znear 71 | bottom = -top 72 | right = tanHalfFovX * znear 73 | left = -right 74 | 75 | P = torch.zeros(4, 4) 76 | 77 | z_sign = 1.0 78 | 79 | P[0, 0] = 2.0 * znear / (right - left) 80 | P[1, 1] = 2.0 * znear / (top - bottom) 81 | P[0, 2] = (right + left) / (right - left) 82 | P[1, 2] = (top + bottom) / (top - bottom) 83 | P[3, 2] = z_sign 84 | P[2, 2] = z_sign * zfar / (zfar - znear) 85 | P[2, 3] = -(zfar * znear) / (zfar - znear) 86 | return P 87 | 88 | 89 | def getIntrinsic(fovX, fovY, h, w): 90 | focal_length_y = fov2focal(fovY, h) 91 | focal_length_x = fov2focal(fovX, w) 92 | 93 | intrinsic = np.eye(3) 94 | intrinsic = torch.eye(3, dtype=torch.float32) 95 | 96 | intrinsic[0, 0] = focal_length_x # FovX 97 | intrinsic[1, 1] = focal_length_y # FovY 98 | intrinsic[0, 2] = w / 2 99 | intrinsic[1, 2] = h / 2 100 | 101 | return intrinsic 102 | 103 | 104 | def fov2focal(fov, pixels): 105 | return pixels / (2 * math.tan(fov / 2)) 106 | 107 | def focal2fov(focal, pixels): 108 | return 2*math.atan(pixels/(2*focal)) 109 | 110 | 111 | def ndc_2_cam(ndc_xyz, intrinsic, W, H): 112 | inv_scale = torch.tensor([[W - 1, H - 1]], device=ndc_xyz.device) 113 | cam_z = ndc_xyz[..., 2:3] 114 | cam_xy = ndc_xyz[..., :2] * inv_scale * cam_z 115 | cam_xyz = torch.cat([cam_xy, cam_z], dim=-1) 116 | cam_xyz = cam_xyz @ torch.inverse(intrinsic[0, ...].t()) 117 | return cam_xyz 118 | 119 | 120 | def depth2point_cam(sampled_depth, ref_intrinsic): 121 | B, N, C, H, W = sampled_depth.shape 122 | valid_z = sampled_depth 123 | valid_x = torch.arange(W, dtype=torch.float32, device=sampled_depth.device).add_(0.5) / (W - 1) 124 | valid_y = torch.arange(H, dtype=torch.float32, device=sampled_depth.device).add_(0.5) / (H - 1) 125 | valid_y, valid_x = torch.meshgrid(valid_y, valid_x, indexing='ij') 126 | # B,N,H,W 127 | valid_x = valid_x[None, None, None, ...].expand(B, N, C, -1, -1) 128 | valid_y = valid_y[None, None, None, ...].expand(B, N, C, -1, -1) 129 | ndc_xyz = torch.stack([valid_x, valid_y, valid_z], dim=-1).view(B, N, C, H, W, 3) # 1, 1, 5, 512, 640, 3 130 | cam_xyz = ndc_2_cam(ndc_xyz, ref_intrinsic, W, H) # 1, 1, 5, 512, 640, 3 131 | return ndc_xyz, cam_xyz 132 | 133 | 134 | def depth2point(depth_image, intrinsic_matrix, extrinsic_matrix): 135 | _, xyz_cam = depth2point_cam(depth_image[None,None,None,...], intrinsic_matrix[None,...]) 136 | xyz_cam = xyz_cam.reshape(-1,3) 137 | xyz_world = torch.cat([xyz_cam, torch.ones_like(xyz_cam[...,0:1])], axis=-1) @ torch.inverse(extrinsic_matrix).transpose(0,1) 138 | xyz_world = xyz_world[...,:3] 139 | 140 | return xyz_cam.reshape(*depth_image.shape, 3), xyz_world.reshape(*depth_image.shape, 3) 141 | 142 | 143 | @torch.no_grad() 144 | def get_all_px_dir(intrinsics, height, width): 145 | """ 146 | # Calculate the view direction for all pixels/rays in the image. 147 | # This is used for intersection calculation between ray and voxel textures. 148 | # """ 149 | 150 | a, ray_dir = depth2point_cam(torch.ones(1, 1, 1, height, width).cuda(), intrinsics[None]) 151 | a, ray_dir = a.squeeze(), ray_dir.squeeze() 152 | ray_dir = torch.nn.functional.normalize(ray_dir, dim=-1) 153 | 154 | ray_dir = ray_dir.permute(2, 0, 1) # 3, H, W 155 | return ray_dir -------------------------------------------------------------------------------- /tools/image_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | 14 | def mse(img1, img2): 15 | return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 16 | 17 | def psnr(img1, img2): 18 | mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 19 | return 20 * torch.log10(1.0 / torch.sqrt(mse)) 20 | -------------------------------------------------------------------------------- /tools/math_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def eps_sqrt(squared, eps=1e-17): 5 | """ 6 | Prepare for the input for sqrt, make sure the input positive and 7 | larger than eps 8 | """ 9 | return torch.clamp(squared.abs(), eps) 10 | 11 | 12 | def ndc_to_pix(p, resolution): 13 | """ 14 | Reverse of pytorch3d pix_to_ndc function 15 | Args: 16 | p (float tensor): (..., 3) 17 | resolution (scalar): image resolution (for now, supports only aspectratio = 1) 18 | Returns: 19 | pix (long tensor): (..., 2) 20 | """ 21 | pix = resolution - ((p[..., :2] + 1.0) * resolution - 1.0) / 2 22 | return pix 23 | 24 | 25 | def decompose_to_R_and_t(transform_mat, row_major=True): 26 | """ decompose a 4x4 transform matrix to R (3,3) and t (1,3)""" 27 | assert(transform_mat.shape[-2:] == (4, 4)), \ 28 | "Expecting batches of 4x4 matrice" 29 | # ... 3x3 30 | if not row_major: 31 | transform_mat = transform_mat.transpose(-2, -1) 32 | 33 | R = transform_mat[..., :3, :3] 34 | t = transform_mat[..., -1, :3] 35 | 36 | return R, t 37 | 38 | 39 | def to_homogen(x, dim=-1): 40 | """ append one to the specified dimension """ 41 | if dim < 0: 42 | dim = x.ndim + dim 43 | shp = x.shape 44 | new_shp = shp[:dim] + (1, ) + shp[dim + 1:] 45 | x_homogen = x.new_ones(new_shp) 46 | x_homogen = torch.cat([x, x_homogen], dim=dim) 47 | return x_homogen 48 | 49 | 50 | def normalize_pts(pts, trans, scale): 51 | ''' 52 | trans: (4, 4), world to 53 | ''' 54 | if trans.ndim == 1: 55 | pts = (pts - trans) / scale 56 | else: 57 | pts = ((trans[:3, :3] @ pts.T + trans[:3, 3:]).T) / scale 58 | return pts 59 | 60 | 61 | def inv_normalize_pts(pts, trans, scale): 62 | if trans.ndim == 1: 63 | pts = pts * scale + trans 64 | else: 65 | pts = (pts * scale[None] - trans[:3, 3:].T) @ trans[:3, :3] 66 | 67 | return pts 68 | 69 | 70 | def get_inside_normalized(xyz, trans, scale): 71 | pts = normalize_pts(xyz, trans, scale) 72 | with torch.no_grad(): 73 | inside = torch.all(torch.abs(pts) < 1, dim=-1) 74 | return inside, pts -------------------------------------------------------------------------------- /tools/mcube_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2024, ShanghaiTech 3 | # SVIP research group, https://github.com/svip-lab 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact huangbb@shanghaitech.edu.cn 10 | # 11 | 12 | import numpy as np 13 | import torch 14 | import trimesh 15 | from skimage import measure 16 | # modified from here https://github.com/autonomousvision/sdfstudio/blob/370902a10dbef08cb3fe4391bd3ed1e227b5c165/nerfstudio/utils/marching_cubes.py#L201 17 | def marching_cubes_with_contraction( 18 | sdf, 19 | resolution=512, 20 | bounding_box_min=(-1.0, -1.0, -1.0), 21 | bounding_box_max=(1.0, 1.0, 1.0), 22 | return_mesh=False, 23 | level=0, 24 | simplify_mesh=True, 25 | inv_contraction=None, 26 | max_range=32.0, 27 | ): 28 | assert resolution % 512 == 0 29 | 30 | resN = resolution 31 | cropN = 512 32 | level = 0 33 | N = resN // cropN 34 | 35 | grid_min = bounding_box_min 36 | grid_max = bounding_box_max 37 | xs = np.linspace(grid_min[0], grid_max[0], N + 1) 38 | ys = np.linspace(grid_min[1], grid_max[1], N + 1) 39 | zs = np.linspace(grid_min[2], grid_max[2], N + 1) 40 | 41 | meshes = [] 42 | for i in range(N): 43 | for j in range(N): 44 | for k in range(N): 45 | print(i, j, k) 46 | x_min, x_max = xs[i], xs[i + 1] 47 | y_min, y_max = ys[j], ys[j + 1] 48 | z_min, z_max = zs[k], zs[k + 1] 49 | 50 | x = np.linspace(x_min, x_max, cropN) 51 | y = np.linspace(y_min, y_max, cropN) 52 | z = np.linspace(z_min, z_max, cropN) 53 | 54 | xx, yy, zz = np.meshgrid(x, y, z, indexing="ij") 55 | points = torch.tensor(np.vstack([xx.ravel(), yy.ravel(), zz.ravel()]).T, dtype=torch.float).cuda() 56 | 57 | @torch.no_grad() 58 | def evaluate(points): 59 | z = [] 60 | for _, pnts in enumerate(torch.split(points, 256**3, dim=0)): 61 | z.append(sdf(pnts)) 62 | z = torch.cat(z, axis=0) 63 | return z 64 | 65 | # construct point pyramids 66 | points = points.reshape(cropN, cropN, cropN, 3) 67 | points = points.reshape(-1, 3) 68 | pts_sdf = evaluate(points.contiguous()) 69 | z = pts_sdf.detach().cpu().numpy() 70 | if not (np.min(z) > level or np.max(z) < level): 71 | z = z.astype(np.float32) 72 | verts, faces, normals, _ = measure.marching_cubes( 73 | volume=z.reshape(cropN, cropN, cropN), 74 | level=level, 75 | spacing=( 76 | (x_max - x_min) / (cropN - 1), 77 | (y_max - y_min) / (cropN - 1), 78 | (z_max - z_min) / (cropN - 1), 79 | ), 80 | ) 81 | verts = verts + np.array([x_min, y_min, z_min]) 82 | meshcrop = trimesh.Trimesh(verts, faces, normals) 83 | meshes.append(meshcrop) 84 | 85 | print("finished one block") 86 | 87 | combined = trimesh.util.concatenate(meshes) 88 | combined.merge_vertices(digits_vertex=6) 89 | 90 | # inverse contraction and clipping the points range 91 | if inv_contraction is not None: 92 | combined.vertices = inv_contraction(torch.from_numpy(combined.vertices).float().cuda()).cpu().numpy() 93 | combined.vertices = np.clip(combined.vertices, -max_range, max_range) 94 | 95 | return combined -------------------------------------------------------------------------------- /tools/normal_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from tools.graphics_utils import depth2point_cam 5 | 6 | 7 | def get_normal_sign(normals, begin=None, end=None, trans=None, mode='origin', vec=None): 8 | if mode == 'origin': 9 | if vec is None: 10 | if begin is None: 11 | # center 12 | if trans is not None: 13 | begin = - trans[:3, :3].T @ trans[:3, 3] \ 14 | if trans.ndim != 1 else trans 15 | else: 16 | begin = end.mean(0) 17 | begin[1] += 1 18 | vec = end - begin 19 | cos = (normals * vec).sum(-1, keepdim=True) 20 | 21 | return cos 22 | 23 | 24 | def compute_gradient(img): 25 | dy = torch.gradient(img, dim=0)[0] 26 | dx = torch.gradient(img, dim=1)[0] 27 | return dx, dy 28 | 29 | 30 | def compute_normals(depth_map, K): 31 | # Assuming depth_map is a PyTorch tensor of shape [H, W] 32 | # K_inv is the inverse of the intrinsic matrix 33 | 34 | _, cam_coords = depth2point_cam(depth_map[None, None], K[None]) 35 | cam_coords = cam_coords.squeeze(0).squeeze(0).squeeze(0) # [H, W, 3] 36 | 37 | dx, dy = compute_gradient(cam_coords) 38 | # Cross product of gradients gives normal 39 | normals = torch.cross(dx, dy, dim=-1) 40 | normals = F.normalize(normals, p=2, dim=-1) 41 | return normals 42 | 43 | 44 | def compute_edge(image, k=11, thr=0.01): 45 | dx, dy = compute_gradient(image) 46 | 47 | edge = torch.sqrt(dx**2 + dy**2) 48 | edge = edge / edge.max() 49 | 50 | p = (k - 1) // 2 51 | edge = F.max_pool2d(edge[None], kernel_size=k, stride=1, padding=p)[0] 52 | 53 | edge[edge>thr] = 1 54 | return edge 55 | 56 | 57 | def get_edge_aware_distortion_map(gt_image, distortion_map): 58 | grad_img_left = torch.mean(torch.abs(gt_image[:, 1:-1, 1:-1] - gt_image[:, 1:-1, :-2]), 0) 59 | grad_img_right = torch.mean(torch.abs(gt_image[:, 1:-1, 1:-1] - gt_image[:, 1:-1, 2:]), 0) 60 | grad_img_top = torch.mean(torch.abs(gt_image[:, 1:-1, 1:-1] - gt_image[:, :-2, 1:-1]), 0) 61 | grad_img_bottom = torch.mean(torch.abs(gt_image[:, 1:-1, 1:-1] - gt_image[:, 2:, 1:-1]), 0) 62 | max_grad = torch.max(torch.stack([grad_img_left, grad_img_right, grad_img_top, grad_img_bottom], dim=-1), dim=-1)[0] 63 | # pad 64 | max_grad = torch.exp(-max_grad) 65 | max_grad = torch.nn.functional.pad(max_grad, (1, 1, 1, 1), mode="constant", value=0) 66 | return distortion_map * max_grad -------------------------------------------------------------------------------- /tools/prune.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from gaussian_renderer import count_render, visi_acc_render 4 | 5 | 6 | def calculate_v_imp_score(gaussians, imp_list, v_pow): 7 | """ 8 | :param gaussians: A data structure containing Gaussian components with a get_scaling method. 9 | :param imp_list: The importance scores for each Gaussian component. 10 | :param v_pow: The power to which the volume ratios are raised. 11 | :return: A list of adjusted values (v_list) used for pruning. 12 | """ 13 | # Calculate the volume of each Gaussian component 14 | volume = torch.prod(gaussians.get_scaling, dim=1) 15 | # Determine the kth_percent_largest value 16 | index = int(len(volume) * 0.9) 17 | sorted_volume, _ = torch.sort(volume, descending=True) 18 | kth_percent_largest = sorted_volume[index] 19 | # Calculate v_list 20 | v_list = torch.pow(volume / kth_percent_largest, v_pow) 21 | v_list = v_list * imp_list 22 | return v_list 23 | 24 | 25 | def prune_list(gaussians, viewpoint_stack, pipe, background): 26 | gaussian_list, imp_list = None, None 27 | viewpoint_cam = viewpoint_stack.pop() 28 | render_pkg = count_render(viewpoint_cam, gaussians, pipe, background) 29 | gaussian_list, imp_list = ( 30 | render_pkg["gaussians_count"], 31 | render_pkg["important_score"], 32 | ) 33 | 34 | 35 | for iteration in range(len(viewpoint_stack)): 36 | # Pick a random Camera 37 | # prunning 38 | viewpoint_cam = viewpoint_stack.pop() 39 | render_pkg = count_render(viewpoint_cam, gaussians, pipe, background) 40 | gaussians_count, important_score = ( 41 | render_pkg["gaussians_count"].detach(), 42 | render_pkg["important_score"].detach(), 43 | ) 44 | gaussian_list += gaussians_count 45 | imp_list += important_score 46 | 47 | return gaussian_list, imp_list 48 | 49 | 50 | v_render = visi_acc_render 51 | def get_visi_list(gaussians, viewpoint_stack, pipe, background): 52 | out = {} 53 | gaussian_list = None 54 | viewpoint_cam = viewpoint_stack.pop() 55 | render_pkg = v_render(viewpoint_cam, gaussians, pipe, background) 56 | gaussian_list = render_pkg["countlist"] 57 | 58 | for i in range(len(viewpoint_stack)): 59 | # Pick a random Camera 60 | # prunning 61 | viewpoint_cam = viewpoint_stack.pop() 62 | render_pkg = v_render(viewpoint_cam, gaussians, pipe, background) 63 | gaussians_count = render_pkg["countlist"].detach() 64 | gaussian_list += gaussians_count 65 | 66 | visi = gaussian_list > 0 67 | 68 | out["visi"] = visi 69 | return out 70 | 71 | -------------------------------------------------------------------------------- /tools/semantic_id.py: -------------------------------------------------------------------------------- 1 | 2 | BACKGROUND = 0 3 | text_label_dict = { 4 | 'window': BACKGROUND, 5 | 'sky': BACKGROUND, 6 | 'sky window': BACKGROUND, 7 | 'window sky': BACKGROUND, 8 | 'floor': 2, 9 | } 10 | -------------------------------------------------------------------------------- /tools/sh_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The PlenOctree Authors. 2 | # Redistribution and use in source and binary forms, with or without 3 | # modification, are permitted provided that the following conditions are met: 4 | # 5 | # 1. Redistributions of source code must retain the above copyright notice, 6 | # this list of conditions and the following disclaimer. 7 | # 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 13 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 16 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 18 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 20 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 21 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 22 | # POSSIBILITY OF SUCH DAMAGE. 23 | 24 | import torch 25 | 26 | C0 = 0.28209479177387814 27 | C1 = 0.4886025119029199 28 | C2 = [ 29 | 1.0925484305920792, 30 | -1.0925484305920792, 31 | 0.31539156525252005, 32 | -1.0925484305920792, 33 | 0.5462742152960396 34 | ] 35 | C3 = [ 36 | -0.5900435899266435, 37 | 2.890611442640554, 38 | -0.4570457994644658, 39 | 0.3731763325901154, 40 | -0.4570457994644658, 41 | 1.445305721320277, 42 | -0.5900435899266435 43 | ] 44 | C4 = [ 45 | 2.5033429417967046, 46 | -1.7701307697799304, 47 | 0.9461746957575601, 48 | -0.6690465435572892, 49 | 0.10578554691520431, 50 | -0.6690465435572892, 51 | 0.47308734787878004, 52 | -1.7701307697799304, 53 | 0.6258357354491761, 54 | ] 55 | 56 | 57 | def eval_sh(deg, sh, dirs): 58 | """ 59 | Evaluate spherical harmonics at unit directions 60 | using hardcoded SH polynomials. 61 | Works with torch/np/jnp. 62 | ... Can be 0 or more batch dimensions. 63 | Args: 64 | deg: int SH deg. Currently, 0-3 supported 65 | sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2] 66 | dirs: jnp.ndarray unit directions [..., 3] 67 | Returns: 68 | [..., C] 69 | """ 70 | assert deg <= 4 and deg >= 0 71 | coeff = (deg + 1) ** 2 72 | assert sh.shape[-1] >= coeff 73 | 74 | result = C0 * sh[..., 0] 75 | if deg > 0: 76 | x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3] 77 | result = (result - 78 | C1 * y * sh[..., 1] + 79 | C1 * z * sh[..., 2] - 80 | C1 * x * sh[..., 3]) 81 | 82 | if deg > 1: 83 | xx, yy, zz = x * x, y * y, z * z 84 | xy, yz, xz = x * y, y * z, x * z 85 | result = (result + 86 | C2[0] * xy * sh[..., 4] + 87 | C2[1] * yz * sh[..., 5] + 88 | C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] + 89 | C2[3] * xz * sh[..., 7] + 90 | C2[4] * (xx - yy) * sh[..., 8]) 91 | 92 | if deg > 2: 93 | result = (result + 94 | C3[0] * y * (3 * xx - yy) * sh[..., 9] + 95 | C3[1] * xy * z * sh[..., 10] + 96 | C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] + 97 | C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] + 98 | C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] + 99 | C3[5] * z * (xx - yy) * sh[..., 14] + 100 | C3[6] * x * (xx - 3 * yy) * sh[..., 15]) 101 | 102 | if deg > 3: 103 | result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] + 104 | C4[1] * yz * (3 * xx - yy) * sh[..., 17] + 105 | C4[2] * xy * (7 * zz - 1) * sh[..., 18] + 106 | C4[3] * yz * (7 * zz - 3) * sh[..., 19] + 107 | C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] + 108 | C4[5] * xz * (7 * zz - 3) * sh[..., 21] + 109 | C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] + 110 | C4[7] * xz * (xx - 3 * yy) * sh[..., 23] + 111 | C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24]) 112 | return result 113 | 114 | def RGB2SH(rgb): 115 | return (rgb - 0.5) / C0 116 | 117 | def SH2RGB(sh): 118 | return sh * C0 + 0.5 -------------------------------------------------------------------------------- /tools/system_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from errno import EEXIST 13 | from os import makedirs, path 14 | import os 15 | 16 | def mkdir_p(folder_path): 17 | # Creates a directory. equivalent to using mkdir -p on the command line 18 | try: 19 | makedirs(folder_path) 20 | except OSError as exc: # Python >2.5 21 | if exc.errno == EEXIST and path.isdir(folder_path): 22 | pass 23 | else: 24 | raise 25 | 26 | def searchForMaxIteration(folder): 27 | saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)] 28 | return max(saved_iters) 29 | -------------------------------------------------------------------------------- /tools/termcolor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import pprint 14 | 15 | import termcolor 16 | 17 | 18 | def red(x): return termcolor.colored(str(x), color="red") 19 | def green(x): return termcolor.colored(str(x), color="green") 20 | def blue(x): return termcolor.colored(str(x), color="blue") 21 | def cyan(x): return termcolor.colored(str(x), color="cyan") 22 | def yellow(x): return termcolor.colored(str(x), color="yellow") 23 | def magenta(x): return termcolor.colored(str(x), color="magenta") 24 | def grey(x): return termcolor.colored(str(x), color="grey") 25 | 26 | 27 | COLORS = { 28 | 'red': red, 'green': green, 'blue': blue, 'cyan': cyan, 'yellow': yellow, 'magenta': magenta, 'grey': grey 29 | } 30 | 31 | 32 | def PP(x): 33 | string = pprint.pformat(x, indent=2) 34 | if isinstance(x, dict): 35 | string = '{\n ' + string[1:-1] + '\n}' 36 | return string 37 | 38 | 39 | def alert(x, color='red'): 40 | color = COLORS[color] 41 | print(color('-' * 32)) 42 | print(color(f'* {x}')) 43 | print(color('-' * 32)) 44 | -------------------------------------------------------------------------------- /tools/visualization.py: -------------------------------------------------------------------------------- 1 | import wandb 2 | import imageio 3 | import torch 4 | import torchvision 5 | 6 | from matplotlib import pyplot as plt 7 | from torchvision.transforms import functional as torchvision_F 8 | 9 | 10 | PALETTE = [ 11 | (0, 0, 0), 12 | (174, 199, 232), (152, 223, 138), (31, 119, 180), (255, 187, 120), (188, 189, 34), 13 | (140, 86, 75), (255, 152, 150), (214, 39, 40), (197, 176, 213), (148, 103, 189), 14 | (196, 156, 148), (23, 190, 207), (247, 182, 210), (219, 219, 141), (255, 127, 14), 15 | (158, 218, 229), (44, 160, 44), (112, 128, 144), (227, 119, 194), (82, 84, 163), 16 | ] 17 | PALETTE = torch.tensor(PALETTE, dtype=torch.uint8) 18 | 19 | 20 | def wandb_image(images, from_range=(0, 1)): 21 | images = preprocess_image(images, from_range=from_range) 22 | wandb_image = wandb.Image(images) 23 | return wandb_image 24 | 25 | 26 | def preprocess_image(images, from_range=(0, 1), cmap="viridis"): 27 | min, max = from_range 28 | images = (images - min) / (max - min) 29 | images = images.detach().cpu().float().clamp_(min=0, max=1) 30 | if images.shape[0] == 1: 31 | images = get_heatmap(images, cmap=cmap) 32 | images = tensor2pil(images) 33 | return images 34 | 35 | 36 | def wandb_sem(image, palette=PALETTE): 37 | image = image.detach().long().cpu() 38 | image = PALETTE[image].float().permute(2, 0, 1)[None] 39 | image = tensor2pil(image) 40 | wandb_image = wandb.Image(image) 41 | return wandb_image 42 | 43 | 44 | def tensor2pil(images): 45 | image_grid = torchvision.utils.make_grid(images, nrow=1, pad_value=1) 46 | image_grid = torchvision_F.to_pil_image(image_grid) 47 | return image_grid 48 | 49 | 50 | def get_heatmap(gray, cmap): # [N,H,W] 51 | color = plt.get_cmap(cmap)(gray.numpy()) 52 | color = torch.from_numpy(color[..., :3]).permute(0, 3, 1, 2).float() # [N,3,H,W] 53 | return color 54 | 55 | 56 | def save_render(render, path): 57 | image = torch.clamp(render, 0.0, 1.0).detach().cpu() 58 | image = (image.permute(1, 2, 0).numpy() * 255).astype('uint8') # [..., ::-1] 59 | imageio.imsave(path, image) 60 | 61 | 62 | -------------------------------------------------------------------------------- /tools/visualize.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import torch 15 | import matplotlib.pyplot as plt 16 | import plotly.graph_objs as go 17 | import k3d 18 | 19 | from tools import camera 20 | 21 | 22 | def get_camera_mesh(pose, depth=1): 23 | vertices = torch.tensor([[-0.5, -0.5, 1], 24 | [0.5, -0.5, 1], 25 | [0.5, 0.5, 1], 26 | [-0.5, 0.5, 1], 27 | [0, 0, 0]]) * depth # [6,3] 28 | faces = torch.tensor([[0, 1, 2], 29 | [0, 2, 3], 30 | [0, 1, 4], 31 | [1, 2, 4], 32 | [2, 3, 4], 33 | [3, 0, 4]]) # [6,3] 34 | vertices = camera.cam2world(vertices[None], pose) # [N,6,3] 35 | wireframe = vertices[:, [0, 1, 2, 3, 0, 4, 1, 2, 4, 3]] # [N,10,3] 36 | return vertices, faces, wireframe 37 | 38 | 39 | def merge_meshes(vertices, faces): 40 | mesh_N, vertex_N = vertices.shape[:2] 41 | faces_merged = torch.cat([faces + i * vertex_N for i in range(mesh_N)], dim=0) 42 | vertices_merged = vertices.view(-1, vertices.shape[-1]) 43 | return vertices_merged, faces_merged 44 | 45 | 46 | def merge_wireframes_k3d(wireframe): 47 | wf_first, wf_last, wf_dummy = wireframe[:, :1], wireframe[:, -1:], wireframe[:, :1] * np.nan 48 | wireframe_merged = torch.cat([wf_first, wireframe, wf_last, wf_dummy], dim=1) 49 | return wireframe_merged 50 | 51 | 52 | def merge_wireframes_plotly(wireframe): 53 | wf_dummy = wireframe[:, :1] * np.nan 54 | wireframe_merged = torch.cat([wireframe, wf_dummy], dim=1).view(-1, 3) 55 | return wireframe_merged 56 | 57 | 58 | def get_xyz_indicators(pose, length=0.1): 59 | xyz = torch.eye(4, 3)[None] * length 60 | xyz = camera.cam2world(xyz, pose) 61 | return xyz 62 | 63 | 64 | def merge_xyz_indicators_k3d(xyz): # [N,4,3] 65 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3] 66 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3] 67 | xyz_dummy = xyz_0 * np.nan 68 | xyz_merged = torch.stack([xyz_0, xyz_0, xyz_1, xyz_1, xyz_dummy], dim=2) # [N,3,5,3] 69 | return xyz_merged 70 | 71 | 72 | def merge_xyz_indicators_plotly(xyz): # [N,4,3] 73 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3] 74 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3] 75 | xyz_dummy = xyz_0 * np.nan 76 | xyz_merged = torch.stack([xyz_0, xyz_1, xyz_dummy], dim=2) # [N,3,3,3] 77 | xyz_merged = xyz_merged.view(-1, 3) 78 | return xyz_merged 79 | 80 | 81 | def k3d_visualize_pose(poses, vis_depth=0.5, xyz_length=0.1, center_size=0.1, xyz_width=0.02, mesh_opacity=0.05): 82 | # poses has shape [N,3,4] potentially in sequential order 83 | N = len(poses) 84 | centers_cam = torch.zeros(N, 1, 3) 85 | centers_world = camera.cam2world(centers_cam, poses) 86 | centers_world = centers_world[:, 0] 87 | # Get the camera wireframes. 88 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth) 89 | xyz = get_xyz_indicators(poses, length=xyz_length) 90 | vertices_merged, faces_merged = merge_meshes(vertices, faces) 91 | wireframe_merged = merge_wireframes_k3d(wireframe) 92 | xyz_merged = merge_xyz_indicators_k3d(xyz) 93 | # Set the color map for the camera trajectory and the xyz indicators. 94 | color_map = plt.get_cmap("gist_rainbow") 95 | center_color = [] 96 | vertices_merged_color = [] 97 | wireframe_color = [] 98 | xyz_color = [] 99 | x_hex, y_hex, z_hex = int(255) << 16, int(255) << 8, int(255) 100 | for i in range(N): 101 | # Set the camera pose colors (with a smooth gradient color map). 102 | r, g, b, _ = color_map(i / (N - 1)) 103 | r, g, b = r * 0.8, g * 0.8, b * 0.8 104 | pose_rgb_hex = (int(r * 255) << 16) + (int(g * 255) << 8) + int(b * 255) 105 | center_color += [pose_rgb_hex] 106 | vertices_merged_color += [pose_rgb_hex] * 5 107 | wireframe_color += [pose_rgb_hex] * 13 108 | # Set the xyz indicator colors. 109 | xyz_color += [x_hex] * 5 + [y_hex] * 5 + [z_hex] * 5 110 | # Plot in K3D. 111 | k3d_objects = [ 112 | k3d.points(centers_world, colors=center_color, point_size=center_size, shader="3d"), 113 | k3d.mesh(vertices_merged, faces_merged, colors=vertices_merged_color, side="double", opacity=mesh_opacity), 114 | k3d.line(wireframe_merged, colors=wireframe_color, shader="simple"), 115 | k3d.line(xyz_merged, colors=xyz_color, shader="thick", width=xyz_width), 116 | ] 117 | return k3d_objects 118 | 119 | 120 | def plotly_visualize_pose(poses, vis_depth=0.5, xyz_length=0.5, center_size=2, xyz_width=5, mesh_opacity=0.05): 121 | # poses has shape [N,3,4] potentially in sequential order 122 | N = len(poses) 123 | centers_cam = torch.zeros(N, 1, 3) 124 | centers_world = camera.cam2world(centers_cam, poses) 125 | centers_world = centers_world[:, 0] 126 | # Get the camera wireframes. 127 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth) 128 | xyz = get_xyz_indicators(poses, length=xyz_length) 129 | vertices_merged, faces_merged = merge_meshes(vertices, faces) 130 | wireframe_merged = merge_wireframes_plotly(wireframe) 131 | xyz_merged = merge_xyz_indicators_plotly(xyz) 132 | # Break up (x,y,z) coordinates. 133 | wireframe_x, wireframe_y, wireframe_z = wireframe_merged.unbind(dim=-1) 134 | xyz_x, xyz_y, xyz_z = xyz_merged.unbind(dim=-1) 135 | centers_x, centers_y, centers_z = centers_world.unbind(dim=-1) 136 | vertices_x, vertices_y, vertices_z = vertices_merged.unbind(dim=-1) 137 | # Set the color map for the camera trajectory and the xyz indicators. 138 | color_map = plt.get_cmap("gist_rainbow") 139 | center_color = [] 140 | faces_merged_color = [] 141 | wireframe_color = [] 142 | xyz_color = [] 143 | x_color, y_color, z_color = *np.eye(3).T, 144 | for i in range(N): 145 | # Set the camera pose colors (with a smooth gradient color map). 146 | r, g, b, _ = color_map(i / (N - 1)) 147 | rgb = np.array([r, g, b]) * 0.8 148 | wireframe_color += [rgb] * 11 149 | center_color += [rgb] 150 | faces_merged_color += [rgb] * 6 151 | xyz_color += [x_color] * 3 + [y_color] * 3 + [z_color] * 3 152 | # Plot in plotly. 153 | plotly_traces = [ 154 | go.Scatter3d(x=wireframe_x, y=wireframe_y, z=wireframe_z, mode="lines", 155 | line=dict(color=wireframe_color, width=1)), 156 | go.Scatter3d(x=xyz_x, y=xyz_y, z=xyz_z, mode="lines", line=dict(color=xyz_color, width=xyz_width)), 157 | go.Scatter3d(x=centers_x, y=centers_y, z=centers_z, mode="markers", 158 | marker=dict(color=center_color, size=center_size, opacity=1)), 159 | go.Mesh3d(x=vertices_x, y=vertices_y, z=vertices_z, 160 | i=[f[0] for f in faces_merged], j=[f[1] for f in faces_merged], k=[f[2] for f in faces_merged], 161 | facecolor=faces_merged_color, opacity=mesh_opacity), 162 | ] 163 | return plotly_traces 164 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | sys.path.append(os.getcwd()) 5 | 6 | from configs.config import Config, recursive_update_strict, parse_cmdline_arguments 7 | from trainer import Trainer 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Training') 12 | parser.add_argument('--config', help='Path to the training config file.', required=True) 13 | parser.add_argument('--wandb', action='store_true', help="Enable using Weights & Biases as the logger") 14 | parser.add_argument('--wandb_name', default='default', type=str) 15 | args, cfg_cmd = parser.parse_known_args() 16 | return args, cfg_cmd 17 | 18 | 19 | def main(): 20 | args, cfg_cmd = parse_args() 21 | cfg = Config(args.config) 22 | 23 | cfg_cmd = parse_cmdline_arguments(cfg_cmd) 24 | recursive_update_strict(cfg, cfg_cmd) 25 | 26 | trainer = Trainer(cfg) 27 | cfg.save_config(cfg.logdir) 28 | 29 | trainer.init_wandb(cfg, 30 | project=args.wandb_name, 31 | mode="disabled" if cfg.train.debug_from > -1 or not args.wandb else "online", 32 | use_group=True) 33 | 34 | trainer.train() 35 | trainer.finalize() 36 | 37 | return 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | --------------------------------------------------------------------------------