├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── arguments └── __init__.py ├── convert.py ├── docs ├── index.html └── static │ ├── css │ ├── bulma-carousel.min.css │ ├── bulma-slider.min.css │ ├── bulma.css.map.txt │ ├── bulma.min.css │ ├── fontawesome.all.min.css │ └── index.css │ ├── images │ ├── approach.png │ ├── bench.png │ ├── comparison_table.png │ ├── comparison_vis.png │ ├── favicon.svg │ ├── figurines-insertion.png │ ├── lawn.png │ ├── overview.png │ ├── room.png │ └── teaser.png │ ├── js │ ├── bulma-carousel.js │ ├── bulma-carousel.min.js │ ├── bulma-slider.js │ ├── bulma-slider.min.js │ ├── fontawesome.all.min.js │ └── index.js │ └── videos │ ├── ._Icon │ ├── bouquet.mp4 │ ├── bouquet_query.mp4 │ ├── bouquet_sg.mp4 │ ├── figurines.mp4 │ ├── figurines_query.mp4 │ ├── figurines_sg.mp4 │ ├── insertion_query.mp4 │ ├── insertion_vg.mp4 │ ├── ramen.mp4 │ ├── ramen_query.mp4 │ ├── ramen_sg.mp4 │ ├── replay.mp4 │ ├── shoe_rack.mp4 │ ├── shoe_rack_query.mp4 │ ├── shoe_rack_sg.mp4 │ ├── teaser.mp4 │ ├── teatime.mp4 │ ├── teatime_pca.mp4 │ ├── teatime_query.mp4 │ ├── teatime_sg.mp4 │ ├── waldo_kitchen.mp4 │ ├── waldo_kitchen_query.mp4 │ └── waldo_kitchen_sg.mp4 ├── environment.yml ├── eval ├── colormaps.py ├── colors.py ├── eval_utils.py ├── evaluate_iou_3dovs.py ├── openclip_encoder.py └── utils.py ├── eval_3DOVS.sh ├── feature_map_renderer.py ├── full_eval.py ├── gaussian_feature_extractor.py ├── gaussian_renderer ├── __init__.py └── network_gui.py ├── lpipsPyTorch ├── __init__.py └── modules │ ├── lpips.py │ ├── networks.py │ └── utils.py ├── metrics.py ├── render.py ├── requirements.txt ├── run_3DOVS.sh ├── run_lerf.sh ├── scene ├── __init__.py ├── cameras.py ├── colmap_loader.py ├── dataset_readers.py └── gaussian_model.py ├── train.py └── utils ├── camera_utils.py ├── general_utils.py ├── graphics_utils.py ├── image_utils.py ├── loss_utils.py ├── make_depth_scale.py ├── read_write_model.py ├── sh_utils.py └── system_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vscode 3 | output 4 | build 5 | diff_rasterization/diff_rast.egg-info 6 | diff_rasterization/dist 7 | tensorboard_3d 8 | screenshots 9 | eval_results -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/simple-knn"] 2 | path = submodules/simple-knn 3 | url = https://gitlab.inria.fr/bkerbl/simple-knn.git 4 | [submodule "submodules/diff-gaussian-rasterization"] 5 | path = submodules/diff-gaussian-rasterization 6 | url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git 7 | branch = dr_aa 8 | [submodule "SIBR_viewers"] 9 | path = SIBR_viewers 10 | url = https://gitlab.inria.fr/sibr/sibr_core.git 11 | [submodule "submodules/fused-ssim"] 12 | path = submodules/fused-ssim 13 | url = https://github.com/rahul-goel/fused-ssim.git 14 | [submodule "submodules/gsplat"] 15 | path = submodules/gsplat 16 | url = git@github.com:JoannaCCJH/gsplat.git 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. 84 | 85 | ## 6. Files subject to permissive licenses 86 | The contents of the file ```utils/loss_utils.py``` are based on publicly available code authored by Evan Su, which falls under the permissive MIT license. 87 | 88 | Title: pytorch-ssim\ 89 | Project code: https://github.com/Po-Hsun-Su/pytorch-ssim\ 90 | Copyright Evan Su, 2017\ 91 | License: https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/LICENSE.txt (MIT) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Occam's LGS: An Efficient Approach for Language Gaussian Splatting 2 | 3 | [![arXiv](https://img.shields.io/badge/arXiv-2412.01807-b31b1b.svg)](https://arxiv.org/abs/2412.01807) 4 | [![Project Page](https://img.shields.io/badge/Project-Page-blue)](https://insait-institute.github.io/OccamLGS/) 5 | 6 | This is the official implementation of "Occam's LGS: An Efficient Approach for Language Gaussian Splatting". 7 | 8 | ## Overview 9 | 10 | Occam's LGS is a simple, training-free approach for Language-guided 3D Gaussian Splatting that achieves state-of-the-art results with a 100x speed improvement. Our method: 11 | 12 | - 🎯 Lifts 2D language features to 3D Gaussian Splats without complex modules or training 13 | - 🚀 Provides 100x faster optimization compared to existing methods 14 | - 🧩 Works with any feature dimension without compression 15 | - 🎨 Enables easy scene manipulation and object insertion 16 | 17 | ## Installation Guide 18 | 19 | ### System Requirements 20 | We use the following setting to run OccamLGS: 21 | 22 | - NVIDIA GPU with CUDA support 23 | - PyTorch 2.2.2 24 | - Python 3.10 25 | - GCC 11.4.0 26 | 27 | ### Clone Repository 28 | ```bash 29 | git clone git@github.com:JoannaCCJH/occamlgs.git --recursive 30 | ``` 31 | 32 | ### Environment Setup 33 | ```bash 34 | micromamba create -n occamlgs python=3.10 35 | micromamba activate occamlgs 36 | pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121 37 | ``` 38 | 39 | ### Project Dependencies 40 | ```bash 41 | pip install -r requirements.txt 42 | micromamba install -c conda-forge gxx=11.4.0 43 | ``` 44 | 45 | ### Submodules 46 | ```bash 47 | pip install -e submodules/gsplat[dev] 48 | pip install -e submodules/simple-knn 49 | ``` 50 | 51 | ## Dataset Preparation 52 | ### Input Dataset 53 | The dataset follows a structured format where each 3D scene is organized as follows: 54 | ``` 55 | lerf_ovs/ 56 | └── scene_name/ # Name of the specific scene (e.g., teatime) 57 | ├── distorted/ 58 | ├── images/ # Contains the original, unprocessed scene images 59 | ├── language_features/ # Pre-extracted language embeddings 60 | │ ├── frame_00001_f.npy 61 | │ └── frame_00001_s.npy 62 | │ ├── ... 63 | ├── sparse/0/ 64 | │ ├── test.txt # Testing image list 65 | │ ├── cameras.bin 66 | │ ├── images.bin 67 | │ └── points3D.bin 68 | ├── stereo/ 69 | ``` 70 | Notes: 71 | - Language features are pre-extracted and stored as 512-dimensional vectors 72 | - For detailed information about feature levels and language feature extraction methodology, please refer to the [LangSplat repository](https://github.com/minghanqin/LangSplat). 73 | 74 | ### Output Directory Structure 75 | The pre-trained RGB model outputs are organized as follows: 76 | ``` 77 | output/ 78 | └── dataset_name/ 79 | └── scene_name/ 80 | ├── point_cloud/ 81 | │ └── iteration_30000/ 82 | │ └── point_cloud.ply # Point cloud at 30K iterations 83 | ├── cameras.json 84 | ├── cfg_args 85 | ├── chkpnt30000.pth # Model checkpoint at 30K iterations 86 | └── input.ply 87 | 88 | ``` 89 | After running the `gaussian_feature_extractor.py` for three levels of features, three additional checkpoint files are added: 90 | 91 | ``` 92 | output/ 93 | └── dataset_name/ 94 | └── scene_name/ 95 | ├── point_cloud/ 96 | │ └── iteration_30000/ 97 | │ └── point_cloud.ply # Point cloud at 30K iterations 98 | ├── cameras.json 99 | ├── cfg_args 100 | ├── chkpnt30000.pth # RGB model checkpoint 101 | ├── input.ply 102 | ├── chkpnt30000_langfeat_1.pth # Language features level 1 103 | ├── chkpnt30000_langfeat_2.pth # Language features level 2 104 | └── chkpnt30000_langfeat_3.pth # Language features level 3 105 | 106 | ``` 107 | 108 | Note: The script `gaussian_feature_extractor.py` generates three new semantic checkpoints, each containing a different level of language features while maintaining the same RGB model weights from the original checkpoint. 109 | 110 | ## Usage 111 | 112 | 113 | ### Prerequisites 114 | 115 | - A pre-trained RGB Gaussian model (use `train.py` and `render.py` commands below to train a model on your scene using gsplat renderer) 116 | - `test.txt` file in `scene_name/sparse/0/` defining test set 117 | 118 | 119 | #### 1. Train and Render RGB Gaussian Model 120 | ```bash 121 | # Train gaussian model 122 | python train.py -s $DATA_SOURCE_PATH -m $MODEL_OUTPUT_PATH --iterations 30000 123 | 124 | # Render trained model 125 | python render.py -m $MODEL_OUTPUT_PATH --iteration 30000 126 | ``` 127 | 128 | #### 2. Feature Extraction and Visualization 129 | ```bash 130 | # gaussian feature vectors 131 | python gaussian_feature_extractor.py -m $MODEL_OUTPUT_PATH --iteration 30000 --eval --feature_level 1 132 | 133 | # Render feature maps 134 | python feature_map_renderer.py -m $MODEL_OUTPUT_PATH --iteration 30000 --eval --feature_level 1 135 | ``` 136 | ### Example Pipeline 137 | Check `run_lerf.sh` for a complete example using the "teatime" scene from LERF_OVS dataset and `run_3DOVS.sh` for a complete example using the "bench" scene from 3D-OVS dataset. 138 | 139 | ## Evaluation 140 | ### LERF 141 | We follow the evaluation methodology established by LangSplat for our LERF assessments. For detailed information about the evaluation metrics and procedures, please refer to the LangSplat methodology. 142 | 143 | ### 3DOVS 144 | Here is the instructions on how to evaluate 3DOVS Dataset. 145 | 1. Configure Parameters: Open `eval_3DOVS.sh` and adjust the following: 146 | - `DATASET_NAME`: Set to your 3DOVS dataset split (e.g., "bench") 147 | - `GT_FOLDER`: Path to your preprocessed 3DOVS data 148 | - `FEAT_FOLDER_NAME`: Name of your model's feature output folder 149 | 2. Run the evaluation script 150 | ```bash 151 | sh eval_3DOVS.sh 152 | ``` 153 | 3. View Results: Evaluation metrics and visualizations will be saved to the `/eval_results` directory 154 | 155 | **Configuration Options** 156 | 157 | The evaluation script supports several parameters: 158 | 159 | - `--stability_thresh`: Threshold for stability analysis (default: 0.4) 160 | - `--min_mask_size`: Minimum valid mask size (default: 0.005) 161 | - `--max_mask_size`: Maximum valid mask size (default: 0.9) 162 | 163 | For detailed information about our evaluation methodology, please refer to the supplementary materials in our paper. 164 | 165 | 166 | ## TODO 167 | - [x] Training and rendering code released 168 | - [x] GSplat rasterizer code released 169 | - [x] Evaluation code to be released 170 | - [ ] Corrected room scene labels to be released 171 | - [ ] Autoencoder for any-dimensional feature to be released 172 | 173 | ## Acknowledgement 174 | Our code is built on [LangSplat](https://github.com/minghanqin/LangSplat), [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), and [gsplat](https://github.com/nerfstudio-project/gsplat). We gratefully appreciate their open source contribution! 175 | 176 | ## BibTeX 177 | 178 | ```bibtex 179 | @article{cheng2024occamslgssimpleapproach, 180 | title={Occam's LGS: A Simple Approach for Language Gaussian Splatting}, 181 | author={Jiahuan Cheng and Jan-Nico Zaech and Luc Van Gool and Danda Pani Paudel}, 182 | year={2024}, 183 | eprint={2412.01807} 184 | } 185 | -------------------------------------------------------------------------------- /arguments/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from argparse import ArgumentParser, Namespace 13 | import sys 14 | import os 15 | 16 | class GroupParams: 17 | pass 18 | 19 | class ParamGroup: 20 | def __init__(self, parser: ArgumentParser, name : str, fill_none = False): 21 | group = parser.add_argument_group(name) 22 | for key, value in vars(self).items(): 23 | shorthand = False 24 | if key.startswith("_"): 25 | shorthand = True 26 | key = key[1:] 27 | t = type(value) 28 | value = value if not fill_none else None 29 | if shorthand: 30 | if t == bool: 31 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true") 32 | else: 33 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t) 34 | else: 35 | if t == bool: 36 | group.add_argument("--" + key, default=value, action="store_true") 37 | else: 38 | group.add_argument("--" + key, default=value, type=t) 39 | 40 | def extract(self, args): 41 | group = GroupParams() 42 | for arg in vars(args).items(): 43 | if arg[0] in vars(self) or ("_" + arg[0]) in vars(self): 44 | setattr(group, arg[0], arg[1]) 45 | return group 46 | 47 | class ModelParams(ParamGroup): 48 | def __init__(self, parser, sentinel=False): 49 | self.sh_degree = 3 50 | self._source_path = "" 51 | self._model_path = "" 52 | self._images = "images" 53 | self._depths = "" 54 | self._resolution = -1 55 | self._white_background = False 56 | self.train_test_exp = False 57 | self.data_device = "cuda" 58 | self.eval = False 59 | 60 | self._language_features_name = "language_features" 61 | self._feature_level = 2 62 | super().__init__(parser, "Loading Parameters", sentinel) 63 | 64 | def extract(self, args): 65 | g = super().extract(args) 66 | g.source_path = os.path.abspath(g.source_path) 67 | try : 68 | g.lf_path = os.path.join(g.source_path, g.language_features_name) 69 | except: 70 | pass 71 | return g 72 | 73 | class PipelineParams(ParamGroup): 74 | def __init__(self, parser): 75 | self.convert_SHs_python = False 76 | self.compute_cov3D_python = False 77 | self.debug = False 78 | self.antialiasing = False 79 | super().__init__(parser, "Pipeline Parameters") 80 | 81 | class OptimizationParams(ParamGroup): 82 | def __init__(self, parser): 83 | self.iterations = 30_000 84 | self.position_lr_init = 0.00016 85 | self.position_lr_final = 0.0000016 86 | self.position_lr_delay_mult = 0.01 87 | self.position_lr_max_steps = 30_000 88 | self.feature_lr = 0.0025 89 | self.opacity_lr = 0.025 90 | self.scaling_lr = 0.005 91 | self.rotation_lr = 0.001 92 | self.percent_dense = 0.01 93 | self.lambda_dssim = 0.2 94 | self.densification_interval = 100 95 | self.opacity_reset_interval = 3000 96 | self.densify_from_iter = 500 97 | self.densify_until_iter = 15_000 98 | self.densify_grad_threshold = 0.0002 99 | self.depth_l1_weight_init = 1.0 100 | self.depth_l1_weight_final = 0.01 101 | self.random_background = False 102 | self.optimizer_type = "default" 103 | 104 | super().__init__(parser, "Optimization Parameters") 105 | 106 | def get_combined_args(parser : ArgumentParser): 107 | cmdlne_string = sys.argv[1:] 108 | cfgfile_string = "Namespace()" 109 | args_cmdline = parser.parse_args(cmdlne_string) 110 | 111 | try: 112 | cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args") 113 | print("Looking for config file in", cfgfilepath) 114 | with open(cfgfilepath) as cfg_file: 115 | print("Config file found: {}".format(cfgfilepath)) 116 | cfgfile_string = cfg_file.read() 117 | except TypeError: 118 | print("Config file not found at") 119 | pass 120 | args_cfgfile = eval(cfgfile_string) 121 | 122 | merged_dict = vars(args_cfgfile).copy() 123 | for k,v in vars(args_cmdline).items(): 124 | if v != None: 125 | merged_dict[k] = v 126 | return Namespace(**merged_dict) 127 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import logging 14 | from argparse import ArgumentParser 15 | import shutil 16 | 17 | # This Python script is based on the shell converter script provided in the MipNerF 360 repository. 18 | parser = ArgumentParser("Colmap converter") 19 | parser.add_argument("--no_gpu", action='store_true') 20 | parser.add_argument("--skip_matching", action='store_true') 21 | parser.add_argument("--source_path", "-s", required=True, type=str) 22 | parser.add_argument("--camera", default="OPENCV", type=str) 23 | parser.add_argument("--colmap_executable", default="", type=str) 24 | parser.add_argument("--resize", action="store_true") 25 | parser.add_argument("--magick_executable", default="", type=str) 26 | args = parser.parse_args() 27 | colmap_command = '"{}"'.format(args.colmap_executable) if len(args.colmap_executable) > 0 else "colmap" 28 | magick_command = '"{}"'.format(args.magick_executable) if len(args.magick_executable) > 0 else "magick" 29 | use_gpu = 1 if not args.no_gpu else 0 30 | 31 | if not args.skip_matching: 32 | os.makedirs(args.source_path + "/distorted/sparse", exist_ok=True) 33 | 34 | ## Feature extraction 35 | feat_extracton_cmd = colmap_command + " feature_extractor "\ 36 | "--database_path " + args.source_path + "/distorted/database.db \ 37 | --image_path " + args.source_path + "/input \ 38 | --ImageReader.single_camera 1 \ 39 | --ImageReader.camera_model " + args.camera + " \ 40 | --SiftExtraction.use_gpu " + str(use_gpu) 41 | exit_code = os.system(feat_extracton_cmd) 42 | if exit_code != 0: 43 | logging.error(f"Feature extraction failed with code {exit_code}. Exiting.") 44 | exit(exit_code) 45 | 46 | ## Feature matching 47 | feat_matching_cmd = colmap_command + " exhaustive_matcher \ 48 | --database_path " + args.source_path + "/distorted/database.db \ 49 | --SiftMatching.use_gpu " + str(use_gpu) 50 | exit_code = os.system(feat_matching_cmd) 51 | if exit_code != 0: 52 | logging.error(f"Feature matching failed with code {exit_code}. Exiting.") 53 | exit(exit_code) 54 | 55 | ### Bundle adjustment 56 | # The default Mapper tolerance is unnecessarily large, 57 | # decreasing it speeds up bundle adjustment steps. 58 | mapper_cmd = (colmap_command + " mapper \ 59 | --database_path " + args.source_path + "/distorted/database.db \ 60 | --image_path " + args.source_path + "/input \ 61 | --output_path " + args.source_path + "/distorted/sparse \ 62 | --Mapper.ba_global_function_tolerance=0.000001") 63 | exit_code = os.system(mapper_cmd) 64 | if exit_code != 0: 65 | logging.error(f"Mapper failed with code {exit_code}. Exiting.") 66 | exit(exit_code) 67 | 68 | ### Image undistortion 69 | ## We need to undistort our images into ideal pinhole intrinsics. 70 | img_undist_cmd = (colmap_command + " image_undistorter \ 71 | --image_path " + args.source_path + "/input \ 72 | --input_path " + args.source_path + "/distorted/sparse/0 \ 73 | --output_path " + args.source_path + "\ 74 | --output_type COLMAP") 75 | exit_code = os.system(img_undist_cmd) 76 | if exit_code != 0: 77 | logging.error(f"Mapper failed with code {exit_code}. Exiting.") 78 | exit(exit_code) 79 | 80 | files = os.listdir(args.source_path + "/sparse") 81 | os.makedirs(args.source_path + "/sparse/0", exist_ok=True) 82 | # Copy each file from the source directory to the destination directory 83 | for file in files: 84 | if file == '0': 85 | continue 86 | source_file = os.path.join(args.source_path, "sparse", file) 87 | destination_file = os.path.join(args.source_path, "sparse", "0", file) 88 | shutil.move(source_file, destination_file) 89 | 90 | if(args.resize): 91 | print("Copying and resizing...") 92 | 93 | # Resize images. 94 | os.makedirs(args.source_path + "/images_2", exist_ok=True) 95 | os.makedirs(args.source_path + "/images_4", exist_ok=True) 96 | os.makedirs(args.source_path + "/images_8", exist_ok=True) 97 | # Get the list of files in the source directory 98 | files = os.listdir(args.source_path + "/images") 99 | # Copy each file from the source directory to the destination directory 100 | for file in files: 101 | source_file = os.path.join(args.source_path, "images", file) 102 | 103 | destination_file = os.path.join(args.source_path, "images_2", file) 104 | shutil.copy2(source_file, destination_file) 105 | exit_code = os.system(magick_command + " mogrify -resize 50% " + destination_file) 106 | if exit_code != 0: 107 | logging.error(f"50% resize failed with code {exit_code}. Exiting.") 108 | exit(exit_code) 109 | 110 | destination_file = os.path.join(args.source_path, "images_4", file) 111 | shutil.copy2(source_file, destination_file) 112 | exit_code = os.system(magick_command + " mogrify -resize 25% " + destination_file) 113 | if exit_code != 0: 114 | logging.error(f"25% resize failed with code {exit_code}. Exiting.") 115 | exit(exit_code) 116 | 117 | destination_file = os.path.join(args.source_path, "images_8", file) 118 | shutil.copy2(source_file, destination_file) 119 | exit_code = os.system(magick_command + " mogrify -resize 12.5% " + destination_file) 120 | if exit_code != 0: 121 | logging.error(f"12.5% resize failed with code {exit_code}. Exiting.") 122 | exit(exit_code) 123 | 124 | print("Done.") 125 | -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /docs/static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff} -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | border: 1px solid #bbb; 121 | border-radius: 10px; 122 | padding: 0; 123 | font-size: 0; 124 | } 125 | 126 | .results-carousel video { 127 | margin: 0; 128 | } 129 | 130 | 131 | .interpolation-panel { 132 | background: #f5f5f5; 133 | border-radius: 10px; 134 | } 135 | 136 | .interpolation-panel .interpolation-image { 137 | width: 100%; 138 | border-radius: 5px; 139 | } 140 | 141 | .interpolation-video-column { 142 | } 143 | 144 | .interpolation-panel .slider { 145 | margin: 0 !important; 146 | } 147 | 148 | .interpolation-panel .slider { 149 | margin: 0 !important; 150 | } 151 | 152 | #interpolation-image-wrapper { 153 | width: 100%; 154 | } 155 | #interpolation-image-wrapper img { 156 | border-radius: 5px; 157 | } 158 | 159 | .row { 160 | display: flex; 161 | flex-direction: row; 162 | width: 70%; 163 | height: auto; 164 | margin: 10px 0; 165 | } 166 | 167 | .carousel { 168 | overflow: hidden; 169 | max-width: 100%; 170 | } 171 | .card{ 172 | margin-left: 1rem; 173 | margin-right: 1rem; 174 | margin-bottom: 1.5rem; 175 | } 176 | .carousel video{ 177 | width: 100%; 178 | height: auto; 179 | } 180 | .clustr { 181 | display: none; 182 | } 183 | -------------------------------------------------------------------------------- /docs/static/images/approach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/approach.png -------------------------------------------------------------------------------- /docs/static/images/bench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/bench.png -------------------------------------------------------------------------------- /docs/static/images/comparison_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/comparison_table.png -------------------------------------------------------------------------------- /docs/static/images/comparison_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/comparison_vis.png -------------------------------------------------------------------------------- /docs/static/images/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/static/images/figurines-insertion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/figurines-insertion.png -------------------------------------------------------------------------------- /docs/static/images/lawn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/lawn.png -------------------------------------------------------------------------------- /docs/static/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/overview.png -------------------------------------------------------------------------------- /docs/static/images/room.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/room.png -------------------------------------------------------------------------------- /docs/static/images/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/teaser.png -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | var INTERP_BASE = "./static/interpolation/stacked"; 4 | var NUM_INTERP_FRAMES = 240; 5 | 6 | var interp_images = []; 7 | function preloadInterpolationImages() { 8 | for (var i = 0; i < NUM_INTERP_FRAMES; i++) { 9 | var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg'; 10 | interp_images[i] = new Image(); 11 | interp_images[i].src = path; 12 | } 13 | } 14 | 15 | function setInterpolationImage(i) { 16 | var image = interp_images[i]; 17 | image.ondragstart = function() { return false; }; 18 | image.oncontextmenu = function() { return false; }; 19 | $('#interpolation-image-wrapper').empty().append(image); 20 | } 21 | 22 | 23 | $(document).ready(function() { 24 | // Check for click events on the navbar burger icon 25 | $(".navbar-burger").click(function() { 26 | // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu" 27 | $(".navbar-burger").toggleClass("is-active"); 28 | $(".navbar-menu").toggleClass("is-active"); 29 | 30 | }); 31 | 32 | var options = { 33 | slidesToScroll: 1, 34 | slidesToShow: 3, 35 | loop: true, 36 | infinite: true, 37 | autoplay: false, 38 | autoplaySpeed: 3000, 39 | } 40 | 41 | // Initialize all div with carousel class 42 | var carousels = bulmaCarousel.attach('.carousel', options); 43 | 44 | // Loop on each carousel initialized 45 | for(var i = 0; i < carousels.length; i++) { 46 | // Add listener to event 47 | carousels[i].on('before:show', state => { 48 | console.log(state); 49 | }); 50 | } 51 | 52 | // Access to bulmaCarousel instance of an element 53 | var element = document.querySelector('#my-element'); 54 | if (element && element.bulmaCarousel) { 55 | // bulmaCarousel instance is available as element.bulmaCarousel 56 | element.bulmaCarousel.on('before-show', function(state) { 57 | console.log(state); 58 | }); 59 | } 60 | 61 | /*var player = document.getElementById('interpolation-video'); 62 | player.addEventListener('loadedmetadata', function() { 63 | $('#interpolation-slider').on('input', function(event) { 64 | console.log(this.value, player.duration); 65 | player.currentTime = player.duration / 100 * this.value; 66 | }) 67 | }, false);*/ 68 | preloadInterpolationImages(); 69 | 70 | $('#interpolation-slider').on('input', function(event) { 71 | setInterpolationImage(this.value); 72 | }); 73 | setInterpolationImage(0); 74 | $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1); 75 | 76 | bulmaSlider.attach(); 77 | 78 | }) 79 | 80 | document.addEventListener('DOMContentLoaded', function() { 81 | bulmaCarousel.attach('#carousel-query', { 82 | slidesToScroll: 1, 83 | slidesToShow: 4, 84 | navigation: true, 85 | loop: true, 86 | }); 87 | }); 88 | 89 | -------------------------------------------------------------------------------- /docs/static/videos/._Icon: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/._Icon -------------------------------------------------------------------------------- /docs/static/videos/bouquet.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet.mp4 -------------------------------------------------------------------------------- /docs/static/videos/bouquet_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/bouquet_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet_sg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/figurines.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines.mp4 -------------------------------------------------------------------------------- /docs/static/videos/figurines_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/figurines_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines_sg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/insertion_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/insertion_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/insertion_vg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/insertion_vg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/ramen.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen.mp4 -------------------------------------------------------------------------------- /docs/static/videos/ramen_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/ramen_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen_sg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/replay.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/replay.mp4 -------------------------------------------------------------------------------- /docs/static/videos/shoe_rack.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack.mp4 -------------------------------------------------------------------------------- /docs/static/videos/shoe_rack_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/shoe_rack_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack_sg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/teaser.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teaser.mp4 -------------------------------------------------------------------------------- /docs/static/videos/teatime.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime.mp4 -------------------------------------------------------------------------------- /docs/static/videos/teatime_pca.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_pca.mp4 -------------------------------------------------------------------------------- /docs/static/videos/teatime_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/teatime_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_sg.mp4 -------------------------------------------------------------------------------- /docs/static/videos/waldo_kitchen.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen.mp4 -------------------------------------------------------------------------------- /docs/static/videos/waldo_kitchen_query.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen_query.mp4 -------------------------------------------------------------------------------- /docs/static/videos/waldo_kitchen_sg.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen_sg.mp4 -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: gaussian_splatting 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - cudatoolkit=11.6 8 | - plyfile 9 | - python=3.7.13 10 | - pip=22.3.1 11 | - pytorch=1.12.1 12 | - torchaudio=0.12.1 13 | - torchvision=0.13.1 14 | - tqdm 15 | - pip: 16 | - submodules/diff-gaussian-rasterization 17 | - submodules/simple-knn 18 | - submodules/fused-ssim 19 | - opencv-python 20 | - joblib 21 | -------------------------------------------------------------------------------- /eval/colormaps.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ Helper functions for visualizing outputs """ 16 | 17 | from dataclasses import dataclass 18 | from typing import Optional 19 | 20 | import matplotlib 21 | import torch 22 | from jaxtyping import Bool, Float 23 | from torch import Tensor 24 | import colors 25 | 26 | # Colormaps = Literal["default", "turbo", "viridis", "magma", "inferno", "cividis", "gray", "pca"] 27 | Colormaps = "turbo" 28 | 29 | @dataclass(frozen=True) 30 | class ColormapOptions: 31 | """Options for colormap""" 32 | 33 | colormap: Colormaps = "default" 34 | """ The colormap to use """ 35 | normalize: bool = False 36 | """ Whether to normalize the input tensor image """ 37 | colormap_min: float = 0 38 | """ Minimum value for the output colormap """ 39 | colormap_max: float = 1 40 | """ Maximum value for the output colormap """ 41 | invert: bool = False 42 | """ Whether to invert the output colormap """ 43 | 44 | 45 | def apply_colormap( 46 | image: Float[Tensor, "*bs channels"], 47 | colormap_options: ColormapOptions = ColormapOptions(), 48 | eps: float = 1e-9, 49 | ): 50 | """ 51 | Applies a colormap to a tensor image. 52 | If single channel, applies a colormap to the image. 53 | If 3 channel, treats the channels as RGB. 54 | If more than 3 channel, applies a PCA reduction on the dimensions to 3 channels 55 | 56 | Args: 57 | image: Input tensor image. 58 | eps: Epsilon value for numerical stability. 59 | 60 | Returns: 61 | Tensor with the colormap applied. 62 | """ 63 | 64 | # default for rgb images 65 | if image.shape[-1] == 3: 66 | return image 67 | 68 | # rendering depth outputs 69 | if image.shape[-1] == 1 and torch.is_floating_point(image): 70 | output = image 71 | if colormap_options.normalize: 72 | output = output - torch.min(output) 73 | output = output / (torch.max(output) + eps) 74 | output = ( 75 | output * (colormap_options.colormap_max - colormap_options.colormap_min) + colormap_options.colormap_min 76 | ) 77 | output = torch.clip(output, 0, 1) 78 | if colormap_options.invert: 79 | output = 1 - output 80 | return apply_float_colormap(output, colormap=colormap_options.colormap) 81 | 82 | # rendering boolean outputs 83 | if image.dtype == torch.bool: 84 | return apply_boolean_colormap(image) 85 | 86 | if image.shape[-1] > 3: 87 | return apply_pca_colormap(image) 88 | 89 | raise NotImplementedError 90 | 91 | 92 | def apply_float_colormap(image: Float[Tensor, "*bs 1"], colormap: Colormaps = "viridis"): 93 | """Convert single channel to a color image. 94 | 95 | Args: 96 | image: Single channel image. 97 | colormap: Colormap for image. 98 | 99 | Returns: 100 | Tensor: Colored image with colors in [0, 1] 101 | """ 102 | if colormap == "default": 103 | colormap = "turbo" 104 | 105 | image = torch.nan_to_num(image, 0) 106 | if colormap == "gray": 107 | return image.repeat(1, 1, 3) 108 | image_long = (image * 255).long() 109 | image_long_min = torch.min(image_long) 110 | image_long_max = torch.max(image_long) 111 | assert image_long_min >= 0, f"the min value is {image_long_min}" 112 | assert image_long_max <= 255, f"the max value is {image_long_max}" 113 | return torch.tensor(matplotlib.colormaps[colormap].colors, device=image.device)[image_long[..., 0]] 114 | 115 | 116 | def apply_depth_colormap( 117 | depth: Float[Tensor, "*bs 1"], 118 | accumulation: Optional[Float[Tensor, "*bs 1"]] = None, 119 | near_plane: Optional[float] = None, 120 | far_plane: Optional[float] = None, 121 | colormap_options: ColormapOptions = ColormapOptions(), 122 | ): 123 | """Converts a depth image to color for easier analysis. 124 | 125 | Args: 126 | depth: Depth image. 127 | accumulation: Ray accumulation used for masking vis. 128 | near_plane: Closest depth to consider. If None, use min image value. 129 | far_plane: Furthest depth to consider. If None, use max image value. 130 | colormap: Colormap to apply. 131 | 132 | Returns: 133 | Colored depth image with colors in [0, 1] 134 | """ 135 | 136 | near_plane = near_plane or float(torch.min(depth)) 137 | far_plane = far_plane or float(torch.max(depth)) 138 | 139 | depth = (depth - near_plane) / (far_plane - near_plane + 1e-10) 140 | depth = torch.clip(depth, 0, 1) 141 | # depth = torch.nan_to_num(depth, nan=0.0) # TODO(ethan): remove this 142 | 143 | colored_image = apply_colormap(depth, colormap_options=colormap_options) 144 | 145 | if accumulation is not None: 146 | colored_image = colored_image * accumulation + (1 - accumulation) 147 | 148 | return colored_image 149 | 150 | 151 | def apply_boolean_colormap( 152 | image: Bool[Tensor, "*bs 1"], 153 | true_color = colors.WHITE, 154 | false_color = colors.BLACK, 155 | ): 156 | """Converts a depth image to color for easier analysis. 157 | 158 | Args: 159 | image: Boolean image. 160 | true_color: Color to use for True. 161 | false_color: Color to use for False. 162 | 163 | Returns: 164 | Colored boolean image 165 | """ 166 | 167 | colored_image = torch.ones(image.shape[:-1] + (3,)) 168 | colored_image[image[..., 0], :] = true_color 169 | colored_image[~image[..., 0], :] = false_color 170 | return colored_image 171 | 172 | 173 | def apply_pca_colormap(image: Float[Tensor, "*bs dim"]): 174 | """Convert feature image to 3-channel RGB via PCA. The first three principle 175 | components are used for the color channels, with outlier rejection per-channel 176 | 177 | Args: 178 | image: image of arbitrary vectors 179 | 180 | Returns: 181 | Tensor: Colored image 182 | """ 183 | original_shape = image.shape 184 | image = image.view(-1, image.shape[-1]) 185 | _, _, v = torch.pca_lowrank(image) 186 | image = torch.matmul(image, v[..., :3]) 187 | d = torch.abs(image - torch.median(image, dim=0).values) 188 | mdev = torch.median(d, dim=0).values 189 | s = d / mdev 190 | m = 3.0 # this is a hyperparam controlling how many std dev outside for outliers 191 | rins = image[s[:, 0] < m, 0] 192 | gins = image[s[:, 1] < m, 1] 193 | bins = image[s[:, 2] < m, 2] 194 | 195 | if len(rins) == 0 or len(gins) == 0 or len(bins) == 0: 196 | return image.new_zeros(*original_shape[:-1], 3) 197 | 198 | image[:, 0] -= rins.min() 199 | image[:, 1] -= gins.min() 200 | image[:, 2] -= bins.min() 201 | 202 | image[:, 0] /= rins.max() - rins.min() 203 | image[:, 1] /= gins.max() - gins.min() 204 | image[:, 2] /= bins.max() - bins.min() 205 | 206 | image = torch.clamp(image, 0, 1) 207 | image_long = (image * 255).long() 208 | image_long_min = torch.min(image_long) 209 | image_long_max = torch.max(image_long) 210 | assert image_long_min >= 0, f"the min value is {image_long_min}" 211 | assert image_long_max <= 255, f"the max value is {image_long_max}" 212 | return image.view(*original_shape[:-1], 3) 213 | -------------------------------------------------------------------------------- /eval/colors.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Common Colors""" 16 | from typing import Union 17 | 18 | import torch 19 | from jaxtyping import Float 20 | from torch import Tensor 21 | 22 | WHITE = torch.tensor([1.0, 1.0, 1.0]) 23 | BLACK = torch.tensor([0.0, 0.0, 0.0]) 24 | RED = torch.tensor([1.0, 0.0, 0.0]) 25 | GREEN = torch.tensor([0.0, 1.0, 0.0]) 26 | BLUE = torch.tensor([0.0, 0.0, 1.0]) 27 | 28 | COLORS_DICT = { 29 | "white": WHITE, 30 | "black": BLACK, 31 | "red": RED, 32 | "green": GREEN, 33 | "blue": BLUE, 34 | } 35 | 36 | 37 | def get_color(color: Union[str, list]) -> Float[Tensor, "3"]: 38 | """ 39 | Args: 40 | Color as a string or a rgb list 41 | 42 | Returns: 43 | Parsed color 44 | """ 45 | if isinstance(color, str): 46 | color = color.lower() 47 | if color not in COLORS_DICT: 48 | raise ValueError(f"{color} is not a valid preset color") 49 | return COLORS_DICT[color] 50 | if isinstance(color, list): 51 | if len(color) != 3: 52 | raise ValueError(f"Color should be 3 values (RGB) instead got {color}") 53 | return torch.tensor(color) 54 | 55 | raise ValueError(f"Color should be an RGB list or string, instead got {type(color)}") 56 | -------------------------------------------------------------------------------- /eval/eval_utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | from scipy.signal import medfilt 4 | import numpy as np 5 | import torch 6 | 7 | def calculate_stability_metrics(scores, mask_sizes, thresh_range, eval_params=None): 8 | """ 9 | Calculate stability metrics for both score and mask size across different thresholds. 10 | 11 | This function evaluates how stable segmentation masks are to threshold variations by 12 | measuring the rate of change (gradient) in both relevancy scores and mask sizes. 13 | Stable segmentations show minimal changes in mask configuration when thresholds are 14 | slightly adjusted. 15 | 16 | Args: 17 | scores: Array of segmentation confidence scores at different thresholds 18 | mask_sizes: Array of corresponding mask sizes (as proportion of image) at different thresholds 19 | thresh_range: Array of threshold values used for evaluation 20 | eval_params: Dictionary containing parameters for evaluation: 21 | - "min_mask_size": Minimum valid mask size as proportion (e.g., 0.00001) 22 | - "max_mask_size": Maximum valid mask size as proportion (e.g., 0.95) 23 | 24 | Returns: 25 | Dictionary containing stability metrics: 26 | - 'smooth_score_grad': Smoothed gradient of scores (lower values indicate more stability) 27 | - 'smooth_mask_grad': Smoothed gradient of mask sizes (lower values indicate more stability) 28 | - 'valid_regions': Boolean mask indicating regions where mask size falls within valid range 29 | 30 | """ 31 | # Calculate gradients 32 | score_gradient = np.abs(np.gradient(scores, thresh_range)) 33 | mask_gradient = np.abs(np.gradient(mask_sizes, thresh_range)) 34 | 35 | # Smooth gradients 36 | smooth_score_grad = medfilt(score_gradient, kernel_size=5) 37 | smooth_mask_grad = medfilt(mask_gradient, kernel_size=5) 38 | 39 | # Filter out regions where mask_size > 0.95 and < 0.00001 40 | valid_regions = (np.array(mask_sizes) > eval_params["min_mask_size"]) & (np.array(mask_sizes) < eval_params["max_mask_size"]) 41 | 42 | assert len(smooth_score_grad[valid_regions]) != 0, "No valid regions found" 43 | 44 | return { 45 | 'smooth_score_grad': smooth_score_grad, 46 | 'smooth_mask_grad': smooth_mask_grad, 47 | 'valid_regions': valid_regions 48 | } 49 | 50 | def find_stable_regions(stability_metrics, eval_params=None): 51 | """ 52 | Find continuous regions where both score and mask size gradients are stable. 53 | 54 | This function identifies threshold ranges where segmentation results remain 55 | consistent (stable), which indicates reliable segmentation performance. 56 | 57 | Args: 58 | stability_metrics: Dictionary containing stability metrics: 59 | - 'smooth_score_grad': Smoothed gradient of scores 60 | - 'smooth_mask_grad': Smoothed gradient of mask sizes 61 | - 'valid_regions': Boolean mask of valid regions 62 | eval_params: Dictionary with evaluation parameters: 63 | - "stability_thresh": Maximum gradient value considered stable 64 | min_region_length: Minimum length of a region to be considered stable 65 | 66 | Returns: 67 | List of tuples containing (start_index, end_index) of stable regions 68 | """ 69 | 70 | score_stable = stability_metrics['smooth_score_grad'] < eval_params["stability_thresh"] 71 | mask_stable = stability_metrics['smooth_mask_grad'] < eval_params["stability_thresh"] 72 | valid_regions = stability_metrics['valid_regions'] 73 | 74 | # Both metrics must be stable 75 | combined_stable = score_stable & mask_stable & valid_regions 76 | 77 | # Find continuous stable regions 78 | stable_regions = [] 79 | start_idx = None 80 | 81 | for i in range(len(combined_stable)): 82 | if combined_stable[i]: 83 | if start_idx is None: 84 | start_idx = i 85 | else: 86 | if start_idx is not None and i - start_idx >= 5: 87 | # Region ends, must be at least 5 points long 88 | stable_regions.append((start_idx, i)) 89 | start_idx = None 90 | 91 | # Handle the case where the last region extends to the end 92 | if start_idx is not None and len(combined_stable) - start_idx >= 5: 93 | stable_regions.append((start_idx, len(combined_stable)-1)) 94 | 95 | return stable_regions 96 | 97 | def compute_dynamic_threshold(valid_map, object_name, eval_params=None, thresh_range=np.arange(0.01, 1, 0.01)): 98 | 99 | """ 100 | Computes the optimal threshold for segmentation by analyzing stability across three levels. 101 | 102 | This function normalizes outputs from each feature level, evaluates segmentation performance 103 | across a range of thresholds, identifies stable regions, and selects the feature level and threshold 104 | that demonstrate the most stable segmentation behavior. 105 | 106 | Process: 107 | 1. For each feature level, normalizes the relevancy scores to [0,1] 108 | 2. Evaluates scores and mask sizes at each threshold value 109 | 3. Calculates stability metrics based on how scores and mask sizes change with threshold 110 | 4. Identifies continuous regions where both metrics are stable 111 | 5. For each level, calculates a score sensitivity metric from the stable region 112 | 6. Selects the level with the lowest score sensitivity (most stable) 113 | 7. Returns the chosen level and its optimal threshold value 114 | """ 115 | n_head = valid_map.shape[0] 116 | total_pixels = valid_map.shape[1] * valid_map.shape[2] 117 | score_gradients = [] 118 | thresholds = [] 119 | 120 | for head_idx in range(n_head): 121 | output = valid_map[head_idx] 122 | 123 | output = output - torch.min(output) 124 | output = output / (torch.max(output) - torch.min(output) + 1e-9) 125 | output = output.numpy() 126 | 127 | # Calculate metrics 128 | scores = [] 129 | pixel_counts = [] 130 | 131 | for thresh in thresh_range: 132 | mask = output > thresh 133 | score = np.mean(output[mask]) if np.any(mask) else 0 134 | scores.append(score) 135 | 136 | normalized_count = np.sum(mask) / total_pixels 137 | pixel_counts.append(normalized_count) 138 | 139 | # Calculate stability metrics 140 | stability = calculate_stability_metrics(scores, pixel_counts, thresh_range, eval_params=eval_params) 141 | stable_regions = find_stable_regions(stability, eval_params=eval_params) 142 | 143 | if len(stable_regions) == 0: 144 | print(f"Warning: Found {len(stable_regions)} stable regions for {object_name} head {head_idx}") 145 | score_gradients.append(999) 146 | thresholds.append(0.5) 147 | else: 148 | valid_mask = stability['valid_regions'] 149 | # Find the last stable region 150 | (start_idx, end_idx) = stable_regions[-1] 151 | # Find the longest stable region 152 | # longest_region = max(stable_regions, key=lambda region: region[1] - region[0]) 153 | # (start_idx, end_idx) = longest_region 154 | if np.any(valid_mask[start_idx:end_idx+1]): 155 | score_sensitivity = (scores[end_idx]- scores[start_idx]) / (thresh_range[end_idx] - thresh_range[start_idx] + 1e-9) 156 | score_gradients.append(score_sensitivity) 157 | thresholds.append((thresh_range[start_idx] + thresh_range[end_idx]) / 2) # take the median threshold 158 | else: 159 | score_gradients.append(999) 160 | thresholds.append(0.5) 161 | 162 | chosen_lvl = np.argmin(score_gradients) 163 | threshold = thresholds[chosen_lvl] 164 | 165 | return chosen_lvl, threshold 166 | 167 | 168 | def plot_relevancy_and_threshold(relevancy_map, prompt_name, head_idx, save_path, threshold=0.5): 169 | """ 170 | Plot relevancy map and thresholded areas side by side 171 | """ 172 | if torch.is_tensor(relevancy_map): 173 | relevancy_map = relevancy_map.numpy() 174 | 175 | # Create threshold mask 176 | threshold_mask = relevancy_map > threshold 177 | 178 | # Create figure with two subplots 179 | fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) 180 | 181 | # Plot relevancy map 182 | im1 = ax1.imshow(relevancy_map, cmap='viridis') 183 | ax1.set_title(f'Relevancy Map\n{prompt_name}, Level {head_idx}') 184 | fig.colorbar(im1, ax=ax1, label='Relevancy Score') 185 | ax1.axis('off') 186 | 187 | # Plot thresholded map 188 | im2 = ax2.imshow(threshold_mask, cmap='binary') 189 | ax2.set_title(f'Thresholded Map (>{threshold})\n{prompt_name}, Level {head_idx}') 190 | ax2.axis('off') 191 | 192 | plt.tight_layout() 193 | plt.savefig(save_path, dpi=300, bbox_inches='tight') 194 | plt.close() -------------------------------------------------------------------------------- /eval/evaluate_iou_3dovs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import annotations 3 | 4 | import os 5 | import glob 6 | import random 7 | from collections import defaultdict 8 | from pathlib import Path 9 | from typing import Dict, Union 10 | from argparse import ArgumentParser 11 | import logging 12 | import cv2 13 | import numpy as np 14 | import torch 15 | import time 16 | from tqdm import tqdm 17 | from PIL import Image 18 | 19 | import sys 20 | sys.path.append("..") 21 | import colormaps 22 | from openclip_encoder import OpenCLIPNetwork 23 | from utils import smooth, vis_mask_save, stack_mask 24 | 25 | from eval_utils import plot_relevancy_and_threshold, compute_dynamic_threshold 26 | 27 | 28 | def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): 29 | logger = logging.getLogger(name) 30 | stream_handler = logging.StreamHandler() 31 | handlers = [stream_handler] 32 | 33 | if log_file is not None: 34 | file_handler = logging.FileHandler(log_file, file_mode) 35 | handlers.append(file_handler) 36 | 37 | formatter = logging.Formatter( 38 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s') 39 | for handler in handlers: 40 | handler.setFormatter(formatter) 41 | handler.setLevel(log_level) 42 | logger.addHandler(handler) 43 | logger.setLevel(log_level) 44 | return logger 45 | 46 | 47 | def eval_gt_3dovsdata(dataset_folder: Union[str, Path] = None, ouput_path: Path = None) -> Dict: 48 | """ 49 | Organizes ground truth annotations from the 3DOVS dataset. 50 | 51 | Args: 52 | dataset_folder: Path to the root directory containing the 3DOVS dataset 53 | ouput_path: Optional path where visualization images will be saved 54 | 55 | Returns: 56 | Tuple containing: 57 | - gt_ann: Dictionary where: 58 | - keys: Frame indices as strings 59 | - values: Nested dictionary where: 60 | - keys: Class labels 61 | - values: Dictionary containing 'mask' with the segmentation mask 62 | - image_dimensions: Tuple of (height, width) 63 | - img_paths: List of paths to the original images 64 | """ 65 | gt_folder = os.path.join(dataset_folder, 'segmentations') 66 | image_folder = os.path.join(dataset_folder, 'images') 67 | 68 | gt_paths = [os.path.join(gt_folder, name) for name in os.listdir(gt_folder) if os.path.isdir(os.path.join(gt_folder, name))] 69 | gt_paths = sorted(gt_paths, key=lambda x: int(x.split('/')[-1])) 70 | img_paths = {} 71 | with open(os.path.join(gt_folder, 'classes.txt'), 'r') as f: 72 | class_names = [line.strip() for line in f] 73 | 74 | gt_ann = {} 75 | for gt_path in gt_paths: 76 | img_ann = defaultdict(dict) 77 | 78 | idx = int(gt_path.split('/')[-1]) 79 | img_path = os.path.join(image_folder, f"{gt_path.split('/')[-1]}.jpg") 80 | img_paths[idx] = img_path 81 | with Image.open(img_path) as img: 82 | w, h = img.size 83 | 84 | for prompt_data in class_names: 85 | label = prompt_data 86 | mask_path = os.path.join(gt_path, f"{label}.png") 87 | mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) 88 | mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) 89 | if img_ann[label].get('mask', None) is not None: 90 | mask = stack_mask(img_ann[label]['mask'], mask) 91 | img_ann[label]['mask'] = mask 92 | 93 | # save for visulsization 94 | save_path = ouput_path / 'gt' / str(idx) / f'{label}.jpg' 95 | save_path.parent.mkdir(exist_ok=True, parents=True) 96 | vis_mask_save(mask, save_path) 97 | gt_ann[f'{idx}'] = img_ann 98 | 99 | return gt_ann, (h, w), img_paths 100 | 101 | 102 | def activate_stream(sem_map, 103 | clip_model, 104 | image_name: Path = None, 105 | img_ann: Dict = None, 106 | eval_params: Dict = None): 107 | 108 | valid_map = clip_model.get_max_across(sem_map) 109 | n_head, n_prompt, h, w = valid_map.shape 110 | valid_map = valid_map.cpu() 111 | 112 | # positive prompts 113 | chosen_iou_list, chosen_lvl_list = [], [] 114 | 115 | for k in range(n_prompt): 116 | 117 | chosen_lvl, thresh = compute_dynamic_threshold(valid_map[:, k], clip_model.positives[k], eval_params=eval_params) 118 | 119 | for i in range(n_head): 120 | 121 | # NOTE [mask] truncate the heatmap into mask 122 | output = valid_map[i][k] 123 | output = output - torch.min(output) 124 | output = output / (torch.max(output) - torch.min(output) + 1e-9) 125 | 126 | save_path = image_name / 'comparison_maps' / f'{clip_model.positives[k]}_level{i}_comparison.png' 127 | save_path.parent.mkdir(exist_ok=True, parents=True) 128 | plot_relevancy_and_threshold(output, clip_model.positives[k], i, save_path, threshold=thresh) 129 | 130 | if i == chosen_lvl: 131 | # Create Binary Mask through thresholding: 132 | mask_pred = (output.numpy() > thresh).astype(np.uint8) 133 | mask_pred = smooth(mask_pred) 134 | mask_gt = img_ann[clip_model.positives[k]]['mask'].astype(np.uint8) 135 | 136 | intersection = np.logical_and(mask_gt, mask_pred).sum() 137 | union = np.logical_or(mask_gt, mask_pred).sum() 138 | iou = intersection / (union + 1e-9) # Avoid division by zero 139 | 140 | chosen_iou_list.append(iou) 141 | chosen_lvl_list.append(chosen_lvl) 142 | 143 | # save for visulsization 144 | save_path = image_name / f'chosen_{clip_model.positives[k]}.png' 145 | vis_mask_save(mask_pred, save_path) 146 | 147 | return chosen_iou_list, chosen_lvl_list 148 | 149 | 150 | def evaluate(feat_dir, output_path, gt_path, logger, eval_params): 151 | 152 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 153 | # colormap_options = colormaps.ColormapOptions( 154 | # colormap="turbo", 155 | # normalize=True, 156 | # colormap_min=-1.0, 157 | # colormap_max=1.0, 158 | # ) 159 | 160 | gt_ann, image_shape, image_paths = eval_gt_3dovsdata(Path(gt_path), Path(output_path)) 161 | 162 | eval_index_list = [int(idx) for idx in list(gt_ann.keys())] 163 | feat_paths_lvl = [] 164 | for i in range(len(feat_dir)): 165 | # Create a mapping of index to file path 166 | index_to_file = {} 167 | for file_path in glob.glob(os.path.join(feat_dir[i], '*.npy')): 168 | file_idx = int(os.path.basename(file_path).split(".npy")[0]) 169 | index_to_file[file_idx] = file_path 170 | 171 | feat_paths_lvl.append(index_to_file) 172 | 173 | assert len(feat_paths_lvl) == len(feat_dir) 174 | 175 | # instantiate openclip 176 | clip_model = OpenCLIPNetwork(device) 177 | 178 | chosen_iou_all, chosen_lvl_list = [], [] 179 | for j, idx in enumerate(tqdm(eval_index_list)): 180 | image_name = Path(output_path) / f'{idx:0>2}' 181 | image_name.mkdir(exist_ok=True, parents=True) 182 | 183 | compressed_sem_feats = np.zeros((len(feat_dir), *image_shape, 512), dtype=np.float32) # compressed_sem_feats: (3, 7, 731, 988, 3) -> (granuity, num_frames, h, w, c) 184 | for i in range(len(feat_dir)): 185 | if idx not in feat_paths_lvl[i]: 186 | raise ValueError(f"Missing feature file for index {idx} in directory {feat_dir[i]}") 187 | compressed_sem_feats[i] = np.load(feat_paths_lvl[i][idx], mmap_mode='r') 188 | 189 | sem_feat = torch.from_numpy(compressed_sem_feats).float().to(device) 190 | # rgb_img = cv2.imread(image_paths[idx])[..., ::-1] 191 | # rgb_img = (rgb_img / 255.0).astype(np.float32) 192 | # rgb_img = torch.from_numpy(rgb_img).to(device) 193 | print(f"j: {j}, idx: {idx}, image_name: {image_name}, image_path: {image_paths[idx]}") 194 | 195 | img_ann = gt_ann[f'{idx}'] # -> a dictionary of labels, with key as path to mask 196 | clip_model.set_positives(list(img_ann.keys())) 197 | 198 | c_iou_list, c_lvl = activate_stream(sem_feat, clip_model, 199 | image_name, img_ann, 200 | eval_params=eval_params) 201 | 202 | chosen_iou_all.extend(c_iou_list) 203 | chosen_lvl_list.extend(c_lvl) 204 | 205 | # iou 206 | mean_iou_chosen = sum(chosen_iou_all) / len(chosen_iou_all) 207 | logger.info(f"iou chosen: {mean_iou_chosen:.4f}") 208 | logger.info(f"chosen_lvl: \n{chosen_lvl_list}") 209 | 210 | 211 | def seed_everything(seed_value): 212 | random.seed(seed_value) 213 | np.random.seed(seed_value) 214 | torch.manual_seed(seed_value) 215 | os.environ['PYTHONHASHSEED'] = str(seed_value) 216 | 217 | if torch.cuda.is_available(): 218 | torch.cuda.manual_seed(seed_value) 219 | torch.cuda.manual_seed_all(seed_value) 220 | torch.backends.cudnn.deterministic = True 221 | torch.backends.cudnn.benchmark = True 222 | 223 | 224 | if __name__ == "__main__": 225 | seed_num = 42 226 | seed_everything(seed_num) 227 | 228 | parser = ArgumentParser(description="prompt any label") 229 | parser.add_argument("--dataset_name", type=str, default=None) 230 | parser.add_argument("--gt_folder", type=str, default=None) 231 | parser.add_argument("--feat_folder", type=str, default=None) 232 | parser.add_argument("--stability_thresh", type=float, default=0.3) 233 | parser.add_argument("--min_mask_size", type=float, default=0.001) 234 | parser.add_argument("--max_mask_size", type=float, default=0.95) 235 | args = parser.parse_args() 236 | 237 | eval_params = { 238 | "stability_thresh": args.stability_thresh, 239 | "min_mask_size": args.min_mask_size, 240 | "max_mask_size": args.max_mask_size, 241 | } 242 | dataset_name = args.dataset_name 243 | feat_dir = [f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_1/renders_npy", 244 | f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_2/renders_npy", 245 | f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_3/renders_npy"] 246 | output_path = f"./eval_results/3DOVS/{args.dataset_name}" 247 | gt_path = args.gt_folder 248 | 249 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 250 | os.makedirs(output_path, exist_ok=True) 251 | log_file = os.path.join(output_path, f'{dataset_name}.log') 252 | logger = get_logger(f'{dataset_name}', log_file=log_file, log_level=logging.INFO) 253 | 254 | evaluate(feat_dir, output_path, gt_path, logger, eval_params) -------------------------------------------------------------------------------- /eval/openclip_encoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import torch 3 | import torchvision 4 | import open_clip 5 | 6 | 7 | class OpenCLIPNetwork: 8 | def __init__(self, device): 9 | self.process = torchvision.transforms.Compose( 10 | [ 11 | torchvision.transforms.Resize((224, 224)), 12 | torchvision.transforms.Normalize( 13 | mean=[0.48145466, 0.4578275, 0.40821073], 14 | std=[0.26862954, 0.26130258, 0.27577711], 15 | ), 16 | ] 17 | ) 18 | self.clip_model_type = "ViT-B-16" 19 | self.clip_model_pretrained = 'laion2b_s34b_b88k' 20 | self.clip_n_dims = 512 21 | model, _, _ = open_clip.create_model_and_transforms( 22 | self.clip_model_type, 23 | pretrained=self.clip_model_pretrained, 24 | precision="fp16", 25 | ) 26 | model.eval() 27 | 28 | self.tokenizer = open_clip.get_tokenizer(self.clip_model_type) 29 | self.model = model.to(device) 30 | 31 | self.negatives = ("object", "things", "stuff", "texture") 32 | self.positives = (" ",) 33 | with torch.no_grad(): 34 | tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.positives]).to(device) 35 | self.pos_embeds = model.encode_text(tok_phrases) 36 | tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.negatives]).to(device) 37 | self.neg_embeds = model.encode_text(tok_phrases) 38 | self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True) 39 | self.neg_embeds /= self.neg_embeds.norm(dim=-1, keepdim=True) 40 | 41 | @torch.no_grad() 42 | def get_relevancy(self, embed: torch.Tensor, positive_id: int) -> torch.Tensor: 43 | # embed: torch.Size([721240, 512]) -> (n_pixels, embed_dim), positive_id -> phrase id 44 | phrases_embeds = torch.cat([self.pos_embeds, self.neg_embeds], dim=0) 45 | # print(f'phrases_embeds: {phrases_embeds.shape}') # torch.Size([11, 512]) 46 | # print(f'pos_embeds: {self.pos_embeds.shape}') # torch.Size([7, 512]) 47 | # print(f'neg_embeds: {self.neg_embeds.shape}') # torch.Size([4, 512]) 48 | p = phrases_embeds.to(embed.dtype) 49 | output = torch.mm(embed, p.T) # 721240x512 * 512x11 -> 721240x11 50 | positive_vals = output[..., positive_id : positive_id + 1] # (721240, 1) similarities between the rendered embeddings and the positive query phrase 51 | negative_vals = output[..., len(self.positives) :] # (721240, 4) similarities between the rendered embeddings and the negative query phrases [object, things,...] 52 | repeated_pos = positive_vals.repeat(1, len(self.negatives)) # (721240, 1) -> (721240, 4) 53 | 54 | sims = torch.stack((repeated_pos, negative_vals), dim=-1) # torch.Size([721240, 4, 2]) 55 | softmax = torch.softmax(10 * sims, dim=-1) # torch.Size([721240, 4, 2]) 56 | best_id = softmax[..., 0].argmin(dim=1) # torch.Size([721240]) 57 | 58 | return torch.gather(softmax, 1, best_id[..., None, None].expand(best_id.shape[0], len(self.negatives), 2))[ 59 | :, 0, : 60 | ] 61 | 62 | def encode_image(self, input, mask=None): 63 | processed_input = self.process(input).half() 64 | return self.model.encode_image(processed_input, mask=mask) 65 | 66 | def encode_text(self, text_list, device): 67 | text = self.tokenizer(text_list).to(device) 68 | return self.model.encode_text(text) 69 | 70 | def set_positives(self, text_list): 71 | self.positives = text_list 72 | with torch.no_grad(): 73 | tok_phrases = torch.cat( 74 | [self.tokenizer(phrase) for phrase in self.positives] 75 | ).to(self.neg_embeds.device) 76 | self.pos_embeds = self.model.encode_text(tok_phrases) 77 | self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True) 78 | 79 | def set_semantics(self, text_list): 80 | self.semantic_labels = text_list 81 | with torch.no_grad(): 82 | tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.semantic_labels]).to("cuda") 83 | self.semantic_embeds = self.model.encode_text(tok_phrases) 84 | self.semantic_embeds /= self.semantic_embeds.norm(dim=-1, keepdim=True) 85 | 86 | def get_semantic_map(self, sem_map: torch.Tensor) -> torch.Tensor: 87 | # embed: 3xhxwx512 88 | n_levels, h, w, c = sem_map.shape 89 | pos_num = self.semantic_embeds.shape[0] 90 | phrases_embeds = torch.cat([self.semantic_embeds, self.neg_embeds], dim=0) 91 | p = phrases_embeds.to(sem_map.dtype) 92 | sem_pred = torch.zeros(n_levels, h, w) 93 | for i in range(n_levels): 94 | output = torch.mm(sem_map[i].view(-1, c), p.T) 95 | softmax = torch.softmax(10 * output, dim=-1) 96 | sem_pred[i] = torch.argmax(softmax, dim=-1).view(h, w) 97 | sem_pred[i][sem_pred[i] >= pos_num] = -1 98 | return sem_pred.long() 99 | 100 | def get_max_across(self, sem_map): # sem_map: torch.Size([3, 731, 988, 512]) -> (granuity, h, w, embed_dim) 101 | ''' 102 | processes a semantic map and returns a relevance map, 103 | highlighting the regions of the input image that are most relevant to specific phrases. 104 | ''' 105 | n_phrases = len(self.positives) 106 | n_phrases_sims = [None for _ in range(n_phrases)] 107 | 108 | n_levels, h, w, _ = sem_map.shape 109 | clip_output = sem_map.permute(1, 2, 0, 3).flatten(0, 1) # 3x731x988x512 -> 731x988x3x512 -> 721240x3x512 110 | 111 | n_levels_sims = [None for _ in range(n_levels)] 112 | for i in range(n_levels): 113 | for j in range(n_phrases): 114 | probs = self.get_relevancy(clip_output[..., i, :], j) # clip_output[..., i, :] -> 721240, j -> phrase id 115 | pos_prob = probs[..., 0:1] # pos_prob -> torch.Size([721240, 1]) 116 | n_phrases_sims[j] = pos_prob # phrase's level relevance score 117 | n_levels_sims[i] = torch.stack(n_phrases_sims) # each granularity level's relevance score for all phrases 118 | 119 | relev_map = torch.stack(n_levels_sims).view(n_levels, n_phrases, h, w) 120 | return relev_map -------------------------------------------------------------------------------- /eval/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.patches as patches 4 | import matplotlib.pyplot as plt 5 | import mediapy as media 6 | import cv2 7 | import colormaps 8 | from pathlib import Path 9 | 10 | 11 | def show_points(coords, labels, ax, marker_size=100): 12 | pos_points = coords[labels==1] 13 | neg_points = coords[labels==0] 14 | ax.scatter(pos_points[:, 0], pos_points[:, 1], color='firebrick', marker='o', 15 | s=marker_size, edgecolor='black', linewidth=2.5, alpha=1) 16 | ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='o', 17 | s=marker_size, edgecolor='black', linewidth=1.5, alpha=1) 18 | 19 | 20 | def show_box(boxes, ax, color=None): 21 | if type(color) == str and color == 'random': 22 | color = np.random.random(3) 23 | elif color is None: 24 | color = 'black' 25 | for box in boxes.reshape(-1, 4): 26 | x0, y0 = box[0], box[1] 27 | w, h = box[2] - box[0], box[3] - box[1] 28 | ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor=color, facecolor=(0,0,0,0), lw=4, 29 | capstyle='round', joinstyle='round', linestyle='dotted')) 30 | 31 | 32 | def show_result(image, point, bbox, save_path): 33 | plt.figure() 34 | plt.imshow(image) 35 | rect = patches.Rectangle((0, 0), image.shape[1]-1, image.shape[0]-1, linewidth=0, edgecolor='none', facecolor='white', alpha=0.3) 36 | plt.gca().add_patch(rect) 37 | input_point = point.reshape(1,-1) 38 | input_label = np.array([1]) 39 | show_points(input_point, input_label, plt.gca()) 40 | show_box(bbox, plt.gca()) 41 | plt.axis('off') 42 | plt.savefig(save_path, bbox_inches='tight', pad_inches=0.0, dpi=200) 43 | plt.close() 44 | 45 | 46 | def smooth(mask): 47 | h, w = mask.shape[:2] 48 | im_smooth = mask.copy() 49 | scale = 3 50 | for i in range(h): 51 | for j in range(w): 52 | square = mask[max(0, i-scale) : min(i+scale+1, h-1), 53 | max(0, j-scale) : min(j+scale+1, w-1)] 54 | im_smooth[i, j] = np.argmax(np.bincount(square.reshape(-1))) 55 | return im_smooth 56 | 57 | 58 | def colormap_saving(image: torch.Tensor, colormap_options, save_path): 59 | """ 60 | if image's shape is (h, w, 1): draw colored relevance map; 61 | if image's shape is (h, w, 3): return directively; 62 | if image's shape is (h, w, c): execute PCA and transform it into (h, w, 3). 63 | """ 64 | output_image = ( 65 | colormaps.apply_colormap( 66 | image=image, 67 | colormap_options=colormap_options, 68 | ).cpu().numpy() 69 | ) 70 | if save_path is not None: 71 | media.write_image(save_path.with_suffix(".png"), output_image, fmt="png") 72 | return output_image 73 | 74 | 75 | def vis_mask_save(mask, save_path: Path = None): 76 | mask_save = mask.copy() 77 | mask_save[mask == 1] = 255 78 | save_path.parent.mkdir(exist_ok=True, parents=True) 79 | cv2.imwrite(str(save_path), mask_save) 80 | 81 | 82 | def polygon_to_mask(img_shape, points_list): 83 | points = np.asarray(points_list, dtype=np.int32) 84 | mask = np.zeros(img_shape, dtype=np.uint8) 85 | cv2.fillPoly(mask, [points], 1) 86 | return mask 87 | 88 | 89 | def stack_mask(mask_base, mask_add): 90 | mask = mask_base.copy() 91 | mask[mask_add != 0] = 1 92 | return mask -------------------------------------------------------------------------------- /eval_3DOVS.sh: -------------------------------------------------------------------------------- 1 | # Set the dataset name 2 | DATASET_NAME="bench" 3 | 4 | # Path to the preprocessed 3DOVS dataset 5 | GT_FOLDER="/path/to/your/3DOVS-preprocess-full/$DATASET_NAME" 6 | 7 | # Name of the folder containing extracted features 8 | FEAT_FOLDER_NAME="ours_30000_langfeat" # Replace with your model's feature folder 9 | 10 | python eval/evaluate_iou_3dovs.py \ 11 | --dataset_name ${DATASET_NAME} \ 12 | --gt_folder ${GT_FOLDER} \ 13 | --feat_folder ${FEAT_FOLDER_NAME} \ 14 | --stability_thresh 0.4 \ 15 | --min_mask_size 0.005 \ 16 | --max_mask_size 0.9 17 | -------------------------------------------------------------------------------- /feature_map_renderer.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from scene import Scene 14 | import os 15 | from tqdm import tqdm 16 | from os import makedirs 17 | from gaussian_renderer import render 18 | import torchvision 19 | from utils.general_utils import safe_state 20 | from argparse import ArgumentParser 21 | from arguments import ModelParams, PipelineParams, OptimizationParams, get_combined_args 22 | from gaussian_renderer import GaussianModel 23 | import numpy as np 24 | from sklearn.decomposition import PCA 25 | import torch.utils.dlpack 26 | import matplotlib.pyplot as plt 27 | 28 | def render_set(model_path, name, iteration, source_path, views, gaussians, pipeline, background, feature_level): 29 | 30 | save_path = os.path.join(model_path, name, "ours_{}_langfeat_{}".format(iteration, feature_level)) 31 | render_path = os.path.join(save_path, "renders") 32 | gts_path = os.path.join(save_path, "gt") 33 | render_npy_path = os.path.join(save_path, "renders_npy") 34 | gts_npy_path = os.path.join(save_path,"gt_npy") 35 | 36 | os.makedirs(render_path, exist_ok=True) 37 | os.makedirs(gts_path, exist_ok=True) 38 | os.makedirs(render_npy_path, exist_ok=True) 39 | os.makedirs(gts_npy_path, exist_ok=True) 40 | 41 | 42 | for idx, view in enumerate(tqdm(views, desc="Rendering progress")): 43 | render_pkg = render(view, gaussians, pipeline, background, include_feature=True) 44 | rendering = render_pkg["render"] 45 | gt, mask = view.get_language_feature(language_feature_dir=f"{source_path}/language_features", feature_level=feature_level) #! modified 46 | 47 | np.save(os.path.join(render_npy_path, view.image_name.split('.')[0] + ".npy"),rendering.permute(1,2,0).cpu().numpy()) 48 | np.save(os.path.join(gts_npy_path, view.image_name.split('.')[0] + ".npy"),gt.permute(1,2,0).cpu().numpy()) 49 | 50 | _, H, W = gt.shape 51 | gt = gt.reshape(512, -1).T.cpu().numpy() 52 | rendering = rendering.reshape(512, -1).T.cpu().numpy() # (H*W, 512) 53 | 54 | pca = PCA(n_components=3) 55 | 56 | combined_np = np.concatenate((gt, rendering), axis=0) 57 | combined_features = pca.fit_transform(combined_np) # ((n+m)*H*W, 3) 58 | normalized_features = (combined_features - combined_features.min(axis=0)) / (combined_features.max(axis=0) - combined_features.min(axis=0)) 59 | reshaped_combined_features = normalized_features.reshape(2, H, W, 3) 60 | 61 | reduced_rendering = reshaped_combined_features[1] 62 | reduced_gt = reshaped_combined_features[0] 63 | 64 | rendering = torch.tensor(reduced_rendering).permute(2, 0, 1) 65 | gt = torch.tensor(reduced_gt).permute(2, 0, 1) 66 | 67 | torchvision.utils.save_image(rendering, os.path.join(render_path, view.image_name )) 68 | torchvision.utils.save_image(gt, os.path.join(gts_path, view.image_name)) 69 | 70 | def render_sets(dataset : ModelParams, opt : OptimizationParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, feature_level : int): 71 | 72 | with torch.no_grad(): 73 | gaussians = GaussianModel(dataset.sh_degree) 74 | scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False, include_feature=True) 75 | 76 | checkpoint = os.path.join(args.model_path, f'chkpnt{iteration}_langfeat_{feature_level}.pth') 77 | (model_params, first_iter) = torch.load(checkpoint) 78 | gaussians.restore_language_features(model_params, opt) 79 | bg_color = [1,1,1] if dataset.white_background else [0, 0, 0] 80 | background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") 81 | 82 | if not skip_train: 83 | render_set(args.model_path, "train", scene.loaded_iter, dataset.source_path, scene.getTrainCameras(), gaussians, pipeline, background, feature_level) 84 | 85 | if not skip_test: 86 | render_set(args.model_path, "test", scene.loaded_iter, dataset.source_path, scene.getTestCameras(), gaussians, pipeline, background, feature_level) 87 | 88 | 89 | if __name__ == "__main__": 90 | # Set up command line argument parser 91 | parser = ArgumentParser(description="Testing script parameters") 92 | model = ModelParams(parser, sentinel=True) 93 | pipeline = PipelineParams(parser) 94 | opt = OptimizationParams(parser) 95 | parser.add_argument("--iteration", default=-1, type=int) 96 | parser.add_argument("--skip_train", action="store_true") 97 | parser.add_argument("--skip_test", action="store_true") 98 | parser.add_argument("--quiet", action="store_true") 99 | args = get_combined_args(parser) 100 | 101 | # Initialize system state (RNG) 102 | safe_state(args.quiet) 103 | 104 | render_sets(model.extract(args), opt.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, args.feature_level) -------------------------------------------------------------------------------- /full_eval.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | from argparse import ArgumentParser 14 | import time 15 | 16 | mipnerf360_outdoor_scenes = ["bicycle", "flowers", "garden", "stump", "treehill"] 17 | mipnerf360_indoor_scenes = ["room", "counter", "kitchen", "bonsai"] 18 | tanks_and_temples_scenes = ["truck", "train"] 19 | deep_blending_scenes = ["drjohnson", "playroom"] 20 | 21 | parser = ArgumentParser(description="Full evaluation script parameters") 22 | parser.add_argument("--skip_training", action="store_true") 23 | parser.add_argument("--skip_rendering", action="store_true") 24 | parser.add_argument("--skip_metrics", action="store_true") 25 | parser.add_argument("--output_path", default="./eval") 26 | parser.add_argument("--use_depth", action="store_true") 27 | parser.add_argument("--use_expcomp", action="store_true") 28 | parser.add_argument("--fast", action="store_true") 29 | parser.add_argument("--aa", action="store_true") 30 | 31 | 32 | 33 | 34 | args, _ = parser.parse_known_args() 35 | 36 | all_scenes = [] 37 | all_scenes.extend(mipnerf360_outdoor_scenes) 38 | all_scenes.extend(mipnerf360_indoor_scenes) 39 | all_scenes.extend(tanks_and_temples_scenes) 40 | all_scenes.extend(deep_blending_scenes) 41 | 42 | if not args.skip_training or not args.skip_rendering: 43 | parser.add_argument('--mipnerf360', "-m360", required=True, type=str) 44 | parser.add_argument("--tanksandtemples", "-tat", required=True, type=str) 45 | parser.add_argument("--deepblending", "-db", required=True, type=str) 46 | args = parser.parse_args() 47 | if not args.skip_training: 48 | common_args = " --disable_viewer --quiet --eval --test_iterations -1 " 49 | 50 | if args.aa: 51 | common_args += " --antialiasing " 52 | if args.use_depth: 53 | common_args += " -d depths2/ " 54 | 55 | if args.use_expcomp: 56 | common_args += " --exposure_lr_init 0.001 --exposure_lr_final 0.0001 --exposure_lr_delay_steps 5000 --exposure_lr_delay_mult 0.001 --train_test_exp " 57 | 58 | if args.fast: 59 | common_args += " --optimizer_type sparse_adam " 60 | 61 | start_time = time.time() 62 | for scene in mipnerf360_outdoor_scenes: 63 | source = args.mipnerf360 + "/" + scene 64 | os.system("python train.py -s " + source + " -i images_4 -m " + args.output_path + "/" + scene + common_args) 65 | for scene in mipnerf360_indoor_scenes: 66 | source = args.mipnerf360 + "/" + scene 67 | os.system("python train.py -s " + source + " -i images_2 -m " + args.output_path + "/" + scene + common_args) 68 | m360_timing = (time.time() - start_time)/60.0 69 | 70 | start_time = time.time() 71 | for scene in tanks_and_temples_scenes: 72 | source = args.tanksandtemples + "/" + scene 73 | os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args) 74 | tandt_timing = (time.time() - start_time)/60.0 75 | 76 | start_time = time.time() 77 | for scene in deep_blending_scenes: 78 | source = args.deepblending + "/" + scene 79 | os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args) 80 | db_timing = (time.time() - start_time)/60.0 81 | 82 | with open(os.path.join(args.output_path,"timing.txt"), 'w') as file: 83 | file.write(f"m360: {m360_timing} minutes \n tandt: {tandt_timing} minutes \n db: {db_timing} minutes\n") 84 | 85 | if not args.skip_rendering: 86 | all_sources = [] 87 | for scene in mipnerf360_outdoor_scenes: 88 | all_sources.append(args.mipnerf360 + "/" + scene) 89 | for scene in mipnerf360_indoor_scenes: 90 | all_sources.append(args.mipnerf360 + "/" + scene) 91 | for scene in tanks_and_temples_scenes: 92 | all_sources.append(args.tanksandtemples + "/" + scene) 93 | for scene in deep_blending_scenes: 94 | all_sources.append(args.deepblending + "/" + scene) 95 | 96 | common_args = " --quiet --eval --skip_train" 97 | 98 | if args.aa: 99 | common_args += " --antialiasing " 100 | if args.use_expcomp: 101 | common_args += " --train_test_exp " 102 | 103 | for scene, source in zip(all_scenes, all_sources): 104 | os.system("python render.py --iteration 7000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) 105 | os.system("python render.py --iteration 30000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) 106 | 107 | if not args.skip_metrics: 108 | scenes_string = "" 109 | for scene in all_scenes: 110 | scenes_string += "\"" + args.output_path + "/" + scene + "\" " 111 | 112 | os.system("python metrics.py -m " + scenes_string) 113 | -------------------------------------------------------------------------------- /gaussian_feature_extractor.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from scene import Scene 14 | import os 15 | from tqdm import tqdm 16 | from os import makedirs 17 | from gaussian_renderer import render 18 | import torchvision 19 | from utils.general_utils import safe_state 20 | from argparse import ArgumentParser 21 | from arguments import ModelParams, PipelineParams, OptimizationParams, get_combined_args 22 | from gaussian_renderer import GaussianModel 23 | import numpy as np 24 | from sklearn.decomposition import PCA 25 | import torch.utils.dlpack 26 | import matplotlib.pyplot as plt 27 | import time 28 | 29 | 30 | def extract_gaussian_features(model_path, iteration, source_path, views, gaussians, pipeline, background, feature_level): 31 | 32 | language_feature_save_path = os.path.join(model_path, f'chkpnt{iteration}_langfeat_{feature_level}.pth') 33 | 34 | for _, view in enumerate(tqdm(views, desc="Rendering progress")): 35 | 36 | render_pkg= render(view, gaussians, pipeline, background) 37 | 38 | gt_language_feature, gt_mask = view.get_language_feature(language_feature_dir=f"{source_path}/language_features", feature_level=feature_level) 39 | activated = render_pkg["info"]["activated"] 40 | significance = render_pkg["info"]["significance"] 41 | means2D = render_pkg["info"]["means2d"] 42 | 43 | mask = activated[0] > 0 44 | gaussians.accumulate_gaussian_feature_per_view(gt_language_feature.permute(1, 2, 0), gt_mask.squeeze(0), mask, significance[0,mask], means2D[0, mask]) 45 | 46 | gaussians.finalize_gaussian_features() 47 | 48 | torch.save((gaussians.capture_language_feature(), 0), language_feature_save_path) 49 | print("checkpoint saved to: ", language_feature_save_path) 50 | 51 | def process_scene_language_features(dataset : ModelParams, opt : OptimizationParams, iteration : int, pipeline : PipelineParams, feature_level : int): 52 | 53 | with torch.no_grad(): 54 | gaussians = GaussianModel(dataset.sh_degree) 55 | scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False, include_feature=True) 56 | 57 | checkpoint = os.path.join(args.model_path, f'chkpnt{iteration}.pth') 58 | (model_params, _) = torch.load(checkpoint) 59 | gaussians.restore_rgb(model_params, opt) 60 | bg_color = [1,1,1] if dataset.white_background else [0, 0, 0] 61 | background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") 62 | 63 | extract_gaussian_features(args.model_path, iteration, dataset.source_path, scene.getTrainCameras(), gaussians, pipeline, background, feature_level) 64 | 65 | 66 | if __name__ == "__main__": 67 | # Set up command line argument parser 68 | parser = ArgumentParser(description="Testing script parameters") 69 | model = ModelParams(parser, sentinel=True) 70 | pipeline = PipelineParams(parser) 71 | opt = OptimizationParams(parser) 72 | parser.add_argument("--iteration", default=-1, type=int) 73 | parser.add_argument("--quiet", action="store_true") 74 | args = get_combined_args(parser) 75 | 76 | # Initialize system state (RNG) 77 | safe_state(args.quiet) 78 | 79 | process_scene_language_features(model.extract(args), opt.extract(args), args.iteration, pipeline.extract(args), args.feature_level) -------------------------------------------------------------------------------- /gaussian_renderer/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import math 14 | # from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer 15 | from scene.gaussian_model import GaussianModel 16 | from utils.sh_utils import eval_sh 17 | from gsplat import rasterization 18 | 19 | def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, separate_sh = False, override_color = None, use_trained_exp=False, include_feature = False): 20 | """ 21 | Render the scene. 22 | 23 | Background tensor (bg_color) must be on GPU! 24 | """ 25 | 26 | # Set up rasterization configuration 27 | tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) 28 | tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) 29 | 30 | focal_length_x = viewpoint_camera.image_width / (2 * tanfovx) 31 | focal_length_y = viewpoint_camera.image_height / (2 * tanfovy) 32 | 33 | K = torch.tensor( 34 | [ 35 | [focal_length_x, 0, viewpoint_camera.image_width / 2.0], 36 | [0, focal_length_y, viewpoint_camera.image_height / 2.0], 37 | [0, 0, 1], 38 | ], 39 | device="cuda", 40 | ) 41 | 42 | means3D = pc.get_xyz 43 | opacity = pc.get_opacity 44 | 45 | scales = pc.get_scaling * scaling_modifier 46 | rotations = pc.get_rotation 47 | 48 | if include_feature: 49 | features = pc.get_language_feature 50 | else: 51 | if override_color is not None: 52 | colors = override_color # [N, 3] 53 | sh_degree = None 54 | else: 55 | colors = pc.get_features # [N, K, 3] 56 | sh_degree = pc.active_sh_degree 57 | 58 | viewmat = viewpoint_camera.world_view_transform.transpose(0, 1) # [4, 4] 59 | if include_feature: 60 | render_colors, render_alphas, info = rasterization( 61 | means=means3D, # [N, 3] 62 | quats=rotations, # [N, 4] 63 | scales=scales, # [N, 3] 64 | opacities=opacity.squeeze(-1), # [N,] 65 | colors=features, # [N, D] 66 | viewmats=viewmat[None], # [1, 4, 4] 67 | Ks=K[None], # [1, 3, 3] 68 | width=int(viewpoint_camera.image_width), 69 | height=int(viewpoint_camera.image_height), 70 | packed=False 71 | ) 72 | else: 73 | # Rasterize visible Gaussians to image, obtain their radii (on screen). 74 | render_colors, render_alphas, info = rasterization( 75 | means=means3D, # [N, 3] 76 | quats=rotations, # [N, 4] 77 | scales=scales, # [N, 3] 78 | opacities=opacity.squeeze(-1), # [N,] 79 | colors=colors, 80 | viewmats=viewmat[None], # [1, 4, 4] 81 | Ks=K[None], # [1, 3, 3] 82 | backgrounds=bg_color[None], 83 | width=int(viewpoint_camera.image_width), 84 | height=int(viewpoint_camera.image_height), 85 | packed=False, 86 | sh_degree=sh_degree, 87 | ) 88 | 89 | rendered_image = render_colors[0].permute(2, 0, 1) 90 | radii = info["radii"].squeeze(0) # [N,] 91 | try: 92 | info["means2d"].retain_grad() # [1, N, 2] 93 | except: 94 | pass 95 | out = { 96 | "render": rendered_image, 97 | "viewspace_points": info["means2d"], 98 | "visibility_filter" : radii > 0, 99 | "radii": radii, 100 | "info": info, 101 | } 102 | 103 | return out 104 | -------------------------------------------------------------------------------- /gaussian_renderer/network_gui.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import traceback 14 | import socket 15 | import json 16 | from scene.cameras import MiniCam 17 | 18 | host = "127.0.0.1" 19 | port = 6009 20 | 21 | conn = None 22 | addr = None 23 | 24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | 26 | def init(wish_host, wish_port): 27 | global host, port, listener 28 | host = wish_host 29 | port = wish_port 30 | listener.bind((host, port)) 31 | listener.listen() 32 | listener.settimeout(0) 33 | 34 | def try_connect(): 35 | global conn, addr, listener 36 | try: 37 | conn, addr = listener.accept() 38 | print(f"\nConnected by {addr}") 39 | conn.settimeout(None) 40 | except Exception as inst: 41 | pass 42 | 43 | def read(): 44 | global conn 45 | messageLength = conn.recv(4) 46 | messageLength = int.from_bytes(messageLength, 'little') 47 | message = conn.recv(messageLength) 48 | return json.loads(message.decode("utf-8")) 49 | 50 | def send(message_bytes, verify): 51 | global conn 52 | if message_bytes != None: 53 | conn.sendall(message_bytes) 54 | conn.sendall(len(verify).to_bytes(4, 'little')) 55 | conn.sendall(bytes(verify, 'ascii')) 56 | 57 | def receive(): 58 | message = read() 59 | 60 | width = message["resolution_x"] 61 | height = message["resolution_y"] 62 | 63 | if width != 0 and height != 0: 64 | try: 65 | do_training = bool(message["train"]) 66 | fovy = message["fov_y"] 67 | fovx = message["fov_x"] 68 | znear = message["z_near"] 69 | zfar = message["z_far"] 70 | do_shs_python = bool(message["shs_python"]) 71 | do_rot_scale_python = bool(message["rot_scale_python"]) 72 | keep_alive = bool(message["keep_alive"]) 73 | scaling_modifier = message["scaling_modifier"] 74 | world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda() 75 | world_view_transform[:,1] = -world_view_transform[:,1] 76 | world_view_transform[:,2] = -world_view_transform[:,2] 77 | full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda() 78 | full_proj_transform[:,1] = -full_proj_transform[:,1] 79 | custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform) 80 | except Exception as e: 81 | print("") 82 | traceback.print_exc() 83 | raise e 84 | return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier 85 | else: 86 | return None, None, None, None, None, None -------------------------------------------------------------------------------- /lpipsPyTorch/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .modules.lpips import LPIPS 4 | 5 | 6 | def lpips(x: torch.Tensor, 7 | y: torch.Tensor, 8 | net_type: str = 'alex', 9 | version: str = '0.1'): 10 | r"""Function that measures 11 | Learned Perceptual Image Patch Similarity (LPIPS). 12 | 13 | Arguments: 14 | x, y (torch.Tensor): the input tensors to compare. 15 | net_type (str): the network type to compare the features: 16 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'. 17 | version (str): the version of LPIPS. Default: 0.1. 18 | """ 19 | device = x.device 20 | criterion = LPIPS(net_type, version).to(device) 21 | return criterion(x, y) 22 | -------------------------------------------------------------------------------- /lpipsPyTorch/modules/lpips.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .networks import get_network, LinLayers 5 | from .utils import get_state_dict 6 | 7 | 8 | class LPIPS(nn.Module): 9 | r"""Creates a criterion that measures 10 | Learned Perceptual Image Patch Similarity (LPIPS). 11 | 12 | Arguments: 13 | net_type (str): the network type to compare the features: 14 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'. 15 | version (str): the version of LPIPS. Default: 0.1. 16 | """ 17 | def __init__(self, net_type: str = 'alex', version: str = '0.1'): 18 | 19 | assert version in ['0.1'], 'v0.1 is only supported now' 20 | 21 | super(LPIPS, self).__init__() 22 | 23 | # pretrained network 24 | self.net = get_network(net_type) 25 | 26 | # linear layers 27 | self.lin = LinLayers(self.net.n_channels_list) 28 | self.lin.load_state_dict(get_state_dict(net_type, version)) 29 | 30 | def forward(self, x: torch.Tensor, y: torch.Tensor): 31 | feat_x, feat_y = self.net(x), self.net(y) 32 | 33 | diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)] 34 | res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)] 35 | 36 | return torch.sum(torch.cat(res, 0), 0, True) 37 | -------------------------------------------------------------------------------- /lpipsPyTorch/modules/networks.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | from itertools import chain 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torchvision import models 8 | 9 | from .utils import normalize_activation 10 | 11 | 12 | def get_network(net_type: str): 13 | if net_type == 'alex': 14 | return AlexNet() 15 | elif net_type == 'squeeze': 16 | return SqueezeNet() 17 | elif net_type == 'vgg': 18 | return VGG16() 19 | else: 20 | raise NotImplementedError('choose net_type from [alex, squeeze, vgg].') 21 | 22 | 23 | class LinLayers(nn.ModuleList): 24 | def __init__(self, n_channels_list: Sequence[int]): 25 | super(LinLayers, self).__init__([ 26 | nn.Sequential( 27 | nn.Identity(), 28 | nn.Conv2d(nc, 1, 1, 1, 0, bias=False) 29 | ) for nc in n_channels_list 30 | ]) 31 | 32 | for param in self.parameters(): 33 | param.requires_grad = False 34 | 35 | 36 | class BaseNet(nn.Module): 37 | def __init__(self): 38 | super(BaseNet, self).__init__() 39 | 40 | # register buffer 41 | self.register_buffer( 42 | 'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None]) 43 | self.register_buffer( 44 | 'std', torch.Tensor([.458, .448, .450])[None, :, None, None]) 45 | 46 | def set_requires_grad(self, state: bool): 47 | for param in chain(self.parameters(), self.buffers()): 48 | param.requires_grad = state 49 | 50 | def z_score(self, x: torch.Tensor): 51 | return (x - self.mean) / self.std 52 | 53 | def forward(self, x: torch.Tensor): 54 | x = self.z_score(x) 55 | 56 | output = [] 57 | for i, (_, layer) in enumerate(self.layers._modules.items(), 1): 58 | x = layer(x) 59 | if i in self.target_layers: 60 | output.append(normalize_activation(x)) 61 | if len(output) == len(self.target_layers): 62 | break 63 | return output 64 | 65 | 66 | class SqueezeNet(BaseNet): 67 | def __init__(self): 68 | super(SqueezeNet, self).__init__() 69 | 70 | self.layers = models.squeezenet1_1(True).features 71 | self.target_layers = [2, 5, 8, 10, 11, 12, 13] 72 | self.n_channels_list = [64, 128, 256, 384, 384, 512, 512] 73 | 74 | self.set_requires_grad(False) 75 | 76 | 77 | class AlexNet(BaseNet): 78 | def __init__(self): 79 | super(AlexNet, self).__init__() 80 | 81 | self.layers = models.alexnet(True).features 82 | self.target_layers = [2, 5, 8, 10, 12] 83 | self.n_channels_list = [64, 192, 384, 256, 256] 84 | 85 | self.set_requires_grad(False) 86 | 87 | 88 | class VGG16(BaseNet): 89 | def __init__(self): 90 | super(VGG16, self).__init__() 91 | 92 | self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features 93 | self.target_layers = [4, 9, 16, 23, 30] 94 | self.n_channels_list = [64, 128, 256, 512, 512] 95 | 96 | self.set_requires_grad(False) 97 | -------------------------------------------------------------------------------- /lpipsPyTorch/modules/utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | 5 | 6 | def normalize_activation(x, eps=1e-10): 7 | norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True)) 8 | return x / (norm_factor + eps) 9 | 10 | 11 | def get_state_dict(net_type: str = 'alex', version: str = '0.1'): 12 | # build url 13 | url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \ 14 | + f'master/lpips/weights/v{version}/{net_type}.pth' 15 | 16 | # download 17 | old_state_dict = torch.hub.load_state_dict_from_url( 18 | url, progress=True, 19 | map_location=None if torch.cuda.is_available() else torch.device('cpu') 20 | ) 21 | 22 | # rename keys 23 | new_state_dict = OrderedDict() 24 | for key, val in old_state_dict.items(): 25 | new_key = key 26 | new_key = new_key.replace('lin', '') 27 | new_key = new_key.replace('model.', '') 28 | new_state_dict[new_key] = val 29 | 30 | return new_state_dict 31 | -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from pathlib import Path 13 | import os 14 | from PIL import Image 15 | import torch 16 | import torchvision.transforms.functional as tf 17 | from utils.loss_utils import ssim 18 | from lpipsPyTorch import lpips 19 | import json 20 | from tqdm import tqdm 21 | from utils.image_utils import psnr 22 | from argparse import ArgumentParser 23 | 24 | def readImages(renders_dir, gt_dir): 25 | renders = [] 26 | gts = [] 27 | image_names = [] 28 | for fname in os.listdir(renders_dir): 29 | render = Image.open(renders_dir / fname) 30 | gt = Image.open(gt_dir / fname) 31 | renders.append(tf.to_tensor(render).unsqueeze(0)[:, :3, :, :].cuda()) 32 | gts.append(tf.to_tensor(gt).unsqueeze(0)[:, :3, :, :].cuda()) 33 | image_names.append(fname) 34 | return renders, gts, image_names 35 | 36 | def evaluate(model_paths): 37 | 38 | full_dict = {} 39 | per_view_dict = {} 40 | full_dict_polytopeonly = {} 41 | per_view_dict_polytopeonly = {} 42 | print("") 43 | 44 | for scene_dir in model_paths: 45 | try: 46 | print("Scene:", scene_dir) 47 | full_dict[scene_dir] = {} 48 | per_view_dict[scene_dir] = {} 49 | full_dict_polytopeonly[scene_dir] = {} 50 | per_view_dict_polytopeonly[scene_dir] = {} 51 | 52 | test_dir = Path(scene_dir) / "test" 53 | 54 | for method in os.listdir(test_dir): 55 | print("Method:", method) 56 | 57 | full_dict[scene_dir][method] = {} 58 | per_view_dict[scene_dir][method] = {} 59 | full_dict_polytopeonly[scene_dir][method] = {} 60 | per_view_dict_polytopeonly[scene_dir][method] = {} 61 | 62 | method_dir = test_dir / method 63 | gt_dir = method_dir/ "gt" 64 | renders_dir = method_dir / "renders" 65 | renders, gts, image_names = readImages(renders_dir, gt_dir) 66 | 67 | ssims = [] 68 | psnrs = [] 69 | lpipss = [] 70 | 71 | for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"): 72 | ssims.append(ssim(renders[idx], gts[idx])) 73 | psnrs.append(psnr(renders[idx], gts[idx])) 74 | lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg')) 75 | 76 | print(" SSIM : {:>12.7f}".format(torch.tensor(ssims).mean(), ".5")) 77 | print(" PSNR : {:>12.7f}".format(torch.tensor(psnrs).mean(), ".5")) 78 | print(" LPIPS: {:>12.7f}".format(torch.tensor(lpipss).mean(), ".5")) 79 | print("") 80 | 81 | full_dict[scene_dir][method].update({"SSIM": torch.tensor(ssims).mean().item(), 82 | "PSNR": torch.tensor(psnrs).mean().item(), 83 | "LPIPS": torch.tensor(lpipss).mean().item()}) 84 | per_view_dict[scene_dir][method].update({"SSIM": {name: ssim for ssim, name in zip(torch.tensor(ssims).tolist(), image_names)}, 85 | "PSNR": {name: psnr for psnr, name in zip(torch.tensor(psnrs).tolist(), image_names)}, 86 | "LPIPS": {name: lp for lp, name in zip(torch.tensor(lpipss).tolist(), image_names)}}) 87 | 88 | with open(scene_dir + "/results.json", 'w') as fp: 89 | json.dump(full_dict[scene_dir], fp, indent=True) 90 | with open(scene_dir + "/per_view.json", 'w') as fp: 91 | json.dump(per_view_dict[scene_dir], fp, indent=True) 92 | except: 93 | print("Unable to compute metrics for model", scene_dir) 94 | 95 | if __name__ == "__main__": 96 | device = torch.device("cuda:0") 97 | torch.cuda.set_device(device) 98 | 99 | # Set up command line argument parser 100 | parser = ArgumentParser(description="Training script parameters") 101 | parser.add_argument('--model_paths', '-m', required=True, nargs="+", type=str, default=[]) 102 | args = parser.parse_args() 103 | evaluate(args.model_paths) 104 | -------------------------------------------------------------------------------- /render.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from scene import Scene 14 | import os 15 | from tqdm import tqdm 16 | from os import makedirs 17 | from gaussian_renderer import render 18 | import torchvision 19 | from utils.general_utils import safe_state 20 | from argparse import ArgumentParser 21 | from arguments import ModelParams, PipelineParams, get_combined_args 22 | from gaussian_renderer import GaussianModel 23 | try: 24 | from diff_gaussian_rasterization import SparseGaussianAdam 25 | SPARSE_ADAM_AVAILABLE = True 26 | except: 27 | SPARSE_ADAM_AVAILABLE = False 28 | 29 | 30 | def render_set(model_path, name, iteration, views, gaussians, pipeline, background, train_test_exp, separate_sh): 31 | render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders") 32 | gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt") 33 | 34 | makedirs(render_path, exist_ok=True) 35 | makedirs(gts_path, exist_ok=True) 36 | 37 | for idx, view in enumerate(tqdm(views, desc="Rendering progress")): 38 | rendering = render(view, gaussians, pipeline, background, use_trained_exp=train_test_exp, separate_sh=separate_sh)["render"] 39 | gt = view.original_image[0:3, :, :] 40 | 41 | torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png")) 42 | torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png")) 43 | 44 | def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, separate_sh: bool): 45 | with torch.no_grad(): 46 | gaussians = GaussianModel(dataset.sh_degree) 47 | scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False) 48 | 49 | bg_color = [1,1,1] if dataset.white_background else [0, 0, 0] 50 | background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") 51 | 52 | if not skip_train: 53 | render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh) 54 | 55 | if not skip_test: 56 | render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh) 57 | 58 | if __name__ == "__main__": 59 | # Set up command line argument parser 60 | parser = ArgumentParser(description="Testing script parameters") 61 | model = ModelParams(parser, sentinel=True) 62 | pipeline = PipelineParams(parser) 63 | parser.add_argument("--iteration", default=-1, type=int) 64 | parser.add_argument("--skip_train", action="store_true") 65 | parser.add_argument("--skip_test", action="store_true") 66 | parser.add_argument("--quiet", action="store_true") 67 | args = get_combined_args(parser) 68 | print("Rendering " + args.model_path) 69 | 70 | # Initialize system state (RNG) 71 | safe_state(args.quiet) 72 | 73 | render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, SPARSE_ADAM_AVAILABLE) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | open-clip-torch 2 | plyfile 3 | tqdm 4 | opencv-python 5 | tensorboard 6 | jaxtyping 7 | matplotlib 8 | typing 9 | pathlib 10 | mediapy 11 | scikit-learn 12 | -------------------------------------------------------------------------------- /run_3DOVS.sh: -------------------------------------------------------------------------------- 1 | DATASET_NAME="bench" 2 | OUTPUT_DIR="/home/joanna_cheng/workspace/occamlgs_new/output/3DOVS" 3 | 4 | cd ~/workspace/occamlgs_new 5 | 6 | python train.py -s /scratch/joanna_cheng/3DOVS-preprocess-full/$DATASET_NAME -m $OUTPUT_DIR/$DATASET_NAME --iterations 30000 7 | python render.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 8 | 9 | python gaussian_feature_extractor.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 3 10 | python feature_map_renderer.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1 --skip_train -------------------------------------------------------------------------------- /run_lerf.sh: -------------------------------------------------------------------------------- 1 | DATASET_NAME="teatime" 2 | OUTPUT_DIR="/home/joanna_cheng/workspace/occamlgs/output/lerf" 3 | 4 | cd ~/workspace/occamlgs 5 | 6 | python train.py -s /scratch/joanna_cheng/lerf_ovs/$DATASET_NAME -m $OUTPUT_DIR/$DATASET_NAME --iterations 30000 7 | python render.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 8 | 9 | python gaussian_feature_extractor.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1 10 | python feature_map_renderer.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1 -------------------------------------------------------------------------------- /scene/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import random 14 | import json 15 | from utils.system_utils import searchForMaxIteration 16 | from scene.dataset_readers import sceneLoadTypeCallbacks 17 | from scene.gaussian_model import GaussianModel 18 | from arguments import ModelParams 19 | from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON 20 | 21 | class Scene: 22 | 23 | gaussians : GaussianModel 24 | 25 | def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0], include_feature=False): 26 | """b 27 | :param path: Path to colmap scene main folder. 28 | """ 29 | self.model_path = args.model_path 30 | self.loaded_iter = None 31 | self.gaussians = gaussians 32 | 33 | if load_iteration: 34 | if load_iteration == -1: 35 | self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud")) 36 | else: 37 | self.loaded_iter = load_iteration 38 | print("Loading trained model at iteration {}".format(self.loaded_iter)) 39 | 40 | self.train_cameras = {} 41 | self.test_cameras = {} 42 | if include_feature: 43 | llffhold=None 44 | else: 45 | llffhold=8 46 | if os.path.exists(os.path.join(args.source_path, "sparse")): 47 | scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.depths, args.eval, args.train_test_exp, llffhold=llffhold) 48 | elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")): 49 | print("Found transforms_train.json file, assuming Blender data set!") 50 | scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.depths, args.eval) 51 | else: 52 | assert False, "Could not recognize scene type!" 53 | 54 | if not self.loaded_iter: 55 | with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file: 56 | dest_file.write(src_file.read()) 57 | json_cams = [] 58 | camlist = [] 59 | if scene_info.test_cameras: 60 | camlist.extend(scene_info.test_cameras) 61 | if scene_info.train_cameras: 62 | camlist.extend(scene_info.train_cameras) 63 | for id, cam in enumerate(camlist): 64 | json_cams.append(camera_to_JSON(id, cam)) 65 | with open(os.path.join(self.model_path, "cameras.json"), 'w') as file: 66 | json.dump(json_cams, file) 67 | 68 | if shuffle: 69 | random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling 70 | random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling 71 | 72 | self.cameras_extent = scene_info.nerf_normalization["radius"] 73 | 74 | for resolution_scale in resolution_scales: 75 | print("Loading Training Cameras") 76 | self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args, scene_info.is_nerf_synthetic, False) 77 | print("Loading Test Cameras") 78 | self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args, scene_info.is_nerf_synthetic, True) 79 | 80 | if self.loaded_iter: 81 | self.gaussians.load_ply(os.path.join(self.model_path, 82 | "point_cloud", 83 | "iteration_" + str(self.loaded_iter), 84 | "point_cloud.ply"), args.train_test_exp) 85 | else: 86 | self.gaussians.create_from_pcd(scene_info.point_cloud, scene_info.train_cameras, self.cameras_extent) 87 | 88 | def save(self, iteration): 89 | point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration)) 90 | self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply")) 91 | 92 | def getTrainCameras(self, scale=1.0): 93 | return self.train_cameras[scale] 94 | 95 | def getTestCameras(self, scale=1.0): 96 | return self.test_cameras[scale] 97 | def getCameras(self): 98 | return self.cameras 99 | -------------------------------------------------------------------------------- /scene/cameras.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from torch import nn 14 | import numpy as np 15 | from utils.graphics_utils import getWorld2View2, getProjectionMatrix 16 | from utils.general_utils import PILtoTorch 17 | import cv2 18 | import os 19 | import pickle 20 | 21 | class Camera(nn.Module): 22 | def __init__(self, resolution, colmap_id, R, T, FoVx, FoVy, depth_params, image, invdepthmap, 23 | image_name, uid, 24 | trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda", 25 | train_test_exp = False, is_test_dataset = False, is_test_view = False 26 | ): 27 | super(Camera, self).__init__() 28 | 29 | self.uid = uid 30 | self.colmap_id = colmap_id 31 | self.R = R 32 | self.T = T 33 | self.FoVx = FoVx 34 | self.FoVy = FoVy 35 | self.image_name = image_name 36 | 37 | try: 38 | self.data_device = torch.device(data_device) 39 | except Exception as e: 40 | print(e) 41 | print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) 42 | self.data_device = torch.device("cuda") 43 | 44 | resized_image_rgb = PILtoTorch(image, resolution) 45 | gt_image = resized_image_rgb[:3, ...] 46 | self.alpha_mask = None 47 | if resized_image_rgb.shape[0] == 4: 48 | self.alpha_mask = resized_image_rgb[3:4, ...].to(self.data_device) 49 | else: 50 | self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device)) 51 | 52 | if train_test_exp and is_test_view: 53 | if is_test_dataset: 54 | self.alpha_mask[..., :self.alpha_mask.shape[-1] // 2] = 0 55 | else: 56 | self.alpha_mask[..., self.alpha_mask.shape[-1] // 2:] = 0 57 | 58 | self.original_image = gt_image.clamp(0.0, 1.0).to(self.data_device) 59 | self.image_width = self.original_image.shape[2] 60 | self.image_height = self.original_image.shape[1] 61 | 62 | self.invdepthmap = None 63 | self.depth_reliable = False 64 | if invdepthmap is not None: 65 | self.depth_mask = torch.ones_like(self.alpha_mask) 66 | self.invdepthmap = cv2.resize(invdepthmap, resolution) 67 | self.invdepthmap[self.invdepthmap < 0] = 0 68 | self.depth_reliable = True 69 | 70 | if depth_params is not None: 71 | if depth_params["scale"] < 0.2 * depth_params["med_scale"] or depth_params["scale"] > 5 * depth_params["med_scale"]: 72 | self.depth_reliable = False 73 | self.depth_mask *= 0 74 | 75 | if depth_params["scale"] > 0: 76 | self.invdepthmap = self.invdepthmap * depth_params["scale"] + depth_params["offset"] 77 | 78 | if self.invdepthmap.ndim != 2: 79 | self.invdepthmap = self.invdepthmap[..., 0] 80 | self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device) 81 | 82 | self.zfar = 100.0 83 | self.znear = 0.01 84 | 85 | self.trans = trans 86 | self.scale = scale 87 | 88 | self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() 89 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() 90 | self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) 91 | self.camera_center = self.world_view_transform.inverse()[3, :3] 92 | 93 | y, x = torch.meshgrid(torch.arange(0, self.image_height, device='cuda'), torch.arange(0, self.image_width, device='cuda')) 94 | self.x = x.reshape(-1, 1) 95 | self.y = y.reshape(-1, 1) 96 | 97 | def get_language_feature(self, language_feature_dir, feature_level): 98 | 99 | language_feature_name = os.path.join(language_feature_dir, self.image_name.split('.')[0]) 100 | 101 | seg_map = torch.from_numpy(np.load(language_feature_name + '_s.npy')) # seg_map: torch.Size([4, 730, 988]) 102 | feature_map = torch.from_numpy(np.load(language_feature_name + '_f.npy')) # feature_map: torch.Size([281, 512]) 103 | seg_map = seg_map.cuda() 104 | feature_map = feature_map.cuda() 105 | 106 | seg = seg_map[:, self.y, self.x].squeeze(-1).long() 107 | mask = seg != -1 108 | if feature_level == 0: # default 109 | point_feature1 = feature_map[seg[0:1]].squeeze(0) 110 | mask = mask[0:1].reshape(1, self.image_height, self.image_width) 111 | elif feature_level == 1: # s 112 | point_feature1 = feature_map[seg[1:2]].squeeze(0) 113 | mask = mask[1:2].reshape(1, self.image_height, self.image_width) 114 | elif feature_level == 2: # m 115 | point_feature1 = feature_map[seg[2:3]].squeeze(0) 116 | mask = mask[2:3].reshape(1, self.image_height, self.image_width) 117 | elif feature_level == 3: # l 118 | point_feature1 = feature_map[seg[3:4]].squeeze(0) 119 | mask = mask[3:4].reshape(1, self.image_height, self.image_width) 120 | else: 121 | raise ValueError("feature_level=", feature_level) 122 | 123 | point_feature = point_feature1.reshape(self.image_height, self.image_width, -1).permute(2, 0, 1) 124 | 125 | return point_feature, mask 126 | 127 | class MiniCam: 128 | def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): 129 | self.image_width = width 130 | self.image_height = height 131 | self.FoVy = fovy 132 | self.FoVx = fovx 133 | self.znear = znear 134 | self.zfar = zfar 135 | self.world_view_transform = world_view_transform 136 | self.full_proj_transform = full_proj_transform 137 | view_inv = torch.inverse(self.world_view_transform) 138 | self.camera_center = view_inv[3][:3] 139 | 140 | -------------------------------------------------------------------------------- /scene/colmap_loader.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import numpy as np 13 | import collections 14 | import struct 15 | 16 | CameraModel = collections.namedtuple( 17 | "CameraModel", ["model_id", "model_name", "num_params"]) 18 | Camera = collections.namedtuple( 19 | "Camera", ["id", "model", "width", "height", "params"]) 20 | BaseImage = collections.namedtuple( 21 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 22 | Point3D = collections.namedtuple( 23 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 24 | CAMERA_MODELS = { 25 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 26 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 27 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 28 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 29 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 30 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 31 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 32 | CameraModel(model_id=7, model_name="FOV", num_params=5), 33 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 34 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 35 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 36 | } 37 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) 38 | for camera_model in CAMERA_MODELS]) 39 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) 40 | for camera_model in CAMERA_MODELS]) 41 | 42 | 43 | def qvec2rotmat(qvec): 44 | return np.array([ 45 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 46 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 47 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 48 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 49 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 50 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 51 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 52 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 53 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 54 | 55 | def rotmat2qvec(R): 56 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 57 | K = np.array([ 58 | [Rxx - Ryy - Rzz, 0, 0, 0], 59 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 60 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 61 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 62 | eigvals, eigvecs = np.linalg.eigh(K) 63 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 64 | if qvec[0] < 0: 65 | qvec *= -1 66 | return qvec 67 | 68 | class Image(BaseImage): 69 | def qvec2rotmat(self): 70 | return qvec2rotmat(self.qvec) 71 | 72 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 73 | """Read and unpack the next bytes from a binary file. 74 | :param fid: 75 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 76 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 77 | :param endian_character: Any of {@, =, <, >, !} 78 | :return: Tuple of read and unpacked values. 79 | """ 80 | data = fid.read(num_bytes) 81 | return struct.unpack(endian_character + format_char_sequence, data) 82 | 83 | def read_points3D_text(path): 84 | """ 85 | see: src/base/reconstruction.cc 86 | void Reconstruction::ReadPoints3DText(const std::string& path) 87 | void Reconstruction::WritePoints3DText(const std::string& path) 88 | """ 89 | xyzs = None 90 | rgbs = None 91 | errors = None 92 | num_points = 0 93 | with open(path, "r") as fid: 94 | while True: 95 | line = fid.readline() 96 | if not line: 97 | break 98 | line = line.strip() 99 | if len(line) > 0 and line[0] != "#": 100 | num_points += 1 101 | 102 | 103 | xyzs = np.empty((num_points, 3)) 104 | rgbs = np.empty((num_points, 3)) 105 | errors = np.empty((num_points, 1)) 106 | count = 0 107 | with open(path, "r") as fid: 108 | while True: 109 | line = fid.readline() 110 | if not line: 111 | break 112 | line = line.strip() 113 | if len(line) > 0 and line[0] != "#": 114 | elems = line.split() 115 | xyz = np.array(tuple(map(float, elems[1:4]))) 116 | rgb = np.array(tuple(map(int, elems[4:7]))) 117 | error = np.array(float(elems[7])) 118 | xyzs[count] = xyz 119 | rgbs[count] = rgb 120 | errors[count] = error 121 | count += 1 122 | 123 | return xyzs, rgbs, errors 124 | 125 | def read_points3D_binary(path_to_model_file): 126 | """ 127 | see: src/base/reconstruction.cc 128 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 129 | void Reconstruction::WritePoints3DBinary(const std::string& path) 130 | """ 131 | 132 | 133 | with open(path_to_model_file, "rb") as fid: 134 | num_points = read_next_bytes(fid, 8, "Q")[0] 135 | 136 | xyzs = np.empty((num_points, 3)) 137 | rgbs = np.empty((num_points, 3)) 138 | errors = np.empty((num_points, 1)) 139 | 140 | for p_id in range(num_points): 141 | binary_point_line_properties = read_next_bytes( 142 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 143 | xyz = np.array(binary_point_line_properties[1:4]) 144 | rgb = np.array(binary_point_line_properties[4:7]) 145 | error = np.array(binary_point_line_properties[7]) 146 | track_length = read_next_bytes( 147 | fid, num_bytes=8, format_char_sequence="Q")[0] 148 | track_elems = read_next_bytes( 149 | fid, num_bytes=8*track_length, 150 | format_char_sequence="ii"*track_length) 151 | xyzs[p_id] = xyz 152 | rgbs[p_id] = rgb 153 | errors[p_id] = error 154 | return xyzs, rgbs, errors 155 | 156 | def read_intrinsics_text(path): 157 | """ 158 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py 159 | """ 160 | cameras = {} 161 | with open(path, "r") as fid: 162 | while True: 163 | line = fid.readline() 164 | if not line: 165 | break 166 | line = line.strip() 167 | if len(line) > 0 and line[0] != "#": 168 | elems = line.split() 169 | camera_id = int(elems[0]) 170 | model = elems[1] 171 | assert model == "PINHOLE", "While the loader support other types, the rest of the code assumes PINHOLE" 172 | width = int(elems[2]) 173 | height = int(elems[3]) 174 | params = np.array(tuple(map(float, elems[4:]))) 175 | cameras[camera_id] = Camera(id=camera_id, model=model, 176 | width=width, height=height, 177 | params=params) 178 | return cameras 179 | 180 | def read_extrinsics_binary(path_to_model_file): 181 | """ 182 | see: src/base/reconstruction.cc 183 | void Reconstruction::ReadImagesBinary(const std::string& path) 184 | void Reconstruction::WriteImagesBinary(const std::string& path) 185 | """ 186 | images = {} 187 | with open(path_to_model_file, "rb") as fid: 188 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 189 | for _ in range(num_reg_images): 190 | binary_image_properties = read_next_bytes( 191 | fid, num_bytes=64, format_char_sequence="idddddddi") 192 | image_id = binary_image_properties[0] 193 | qvec = np.array(binary_image_properties[1:5]) 194 | tvec = np.array(binary_image_properties[5:8]) 195 | camera_id = binary_image_properties[8] 196 | image_name = "" 197 | current_char = read_next_bytes(fid, 1, "c")[0] 198 | while current_char != b"\x00": # look for the ASCII 0 entry 199 | image_name += current_char.decode("utf-8") 200 | current_char = read_next_bytes(fid, 1, "c")[0] 201 | num_points2D = read_next_bytes(fid, num_bytes=8, 202 | format_char_sequence="Q")[0] 203 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 204 | format_char_sequence="ddq"*num_points2D) 205 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 206 | tuple(map(float, x_y_id_s[1::3]))]) 207 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 208 | images[image_id] = Image( 209 | id=image_id, qvec=qvec, tvec=tvec, 210 | camera_id=camera_id, name=image_name, 211 | xys=xys, point3D_ids=point3D_ids) 212 | return images 213 | 214 | 215 | def read_intrinsics_binary(path_to_model_file): 216 | """ 217 | see: src/base/reconstruction.cc 218 | void Reconstruction::WriteCamerasBinary(const std::string& path) 219 | void Reconstruction::ReadCamerasBinary(const std::string& path) 220 | """ 221 | cameras = {} 222 | with open(path_to_model_file, "rb") as fid: 223 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 224 | for _ in range(num_cameras): 225 | camera_properties = read_next_bytes( 226 | fid, num_bytes=24, format_char_sequence="iiQQ") 227 | camera_id = camera_properties[0] 228 | model_id = camera_properties[1] 229 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 230 | width = camera_properties[2] 231 | height = camera_properties[3] 232 | num_params = CAMERA_MODEL_IDS[model_id].num_params 233 | params = read_next_bytes(fid, num_bytes=8*num_params, 234 | format_char_sequence="d"*num_params) 235 | cameras[camera_id] = Camera(id=camera_id, 236 | model=model_name, 237 | width=width, 238 | height=height, 239 | params=np.array(params)) 240 | assert len(cameras) == num_cameras 241 | return cameras 242 | 243 | 244 | def read_extrinsics_text(path): 245 | """ 246 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py 247 | """ 248 | images = {} 249 | with open(path, "r") as fid: 250 | while True: 251 | line = fid.readline() 252 | if not line: 253 | break 254 | line = line.strip() 255 | if len(line) > 0 and line[0] != "#": 256 | elems = line.split() 257 | image_id = int(elems[0]) 258 | qvec = np.array(tuple(map(float, elems[1:5]))) 259 | tvec = np.array(tuple(map(float, elems[5:8]))) 260 | camera_id = int(elems[8]) 261 | image_name = elems[9] 262 | elems = fid.readline().split() 263 | xys = np.column_stack([tuple(map(float, elems[0::3])), 264 | tuple(map(float, elems[1::3]))]) 265 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 266 | images[image_id] = Image( 267 | id=image_id, qvec=qvec, tvec=tvec, 268 | camera_id=camera_id, name=image_name, 269 | xys=xys, point3D_ids=point3D_ids) 270 | return images 271 | 272 | 273 | def read_colmap_bin_array(path): 274 | """ 275 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_dense.py 276 | 277 | :param path: path to the colmap binary file. 278 | :return: nd array with the floating point values in the value 279 | """ 280 | with open(path, "rb") as fid: 281 | width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1, 282 | usecols=(0, 1, 2), dtype=int) 283 | fid.seek(0) 284 | num_delimiter = 0 285 | byte = fid.read(1) 286 | while True: 287 | if byte == b"&": 288 | num_delimiter += 1 289 | if num_delimiter >= 3: 290 | break 291 | byte = fid.read(1) 292 | array = np.fromfile(fid, np.float32) 293 | array = array.reshape((width, height, channels), order="F") 294 | return np.transpose(array, (1, 0, 2)).squeeze() 295 | -------------------------------------------------------------------------------- /scene/dataset_readers.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import sys 14 | from PIL import Image 15 | from typing import NamedTuple 16 | from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \ 17 | read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text 18 | from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal 19 | import numpy as np 20 | import json 21 | from pathlib import Path 22 | from plyfile import PlyData, PlyElement 23 | from utils.sh_utils import SH2RGB 24 | from scene.gaussian_model import BasicPointCloud 25 | 26 | class CameraInfo(NamedTuple): 27 | uid: int 28 | R: np.array 29 | T: np.array 30 | FovY: np.array 31 | FovX: np.array 32 | depth_params: dict 33 | image_path: str 34 | image_name: str 35 | depth_path: str 36 | width: int 37 | height: int 38 | is_test: bool 39 | 40 | class SceneInfo(NamedTuple): 41 | point_cloud: BasicPointCloud 42 | train_cameras: list 43 | test_cameras: list 44 | nerf_normalization: dict 45 | ply_path: str 46 | is_nerf_synthetic: bool 47 | 48 | def getNerfppNorm(cam_info): 49 | def get_center_and_diag(cam_centers): 50 | cam_centers = np.hstack(cam_centers) 51 | avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True) 52 | center = avg_cam_center 53 | dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True) 54 | diagonal = np.max(dist) 55 | return center.flatten(), diagonal 56 | 57 | cam_centers = [] 58 | 59 | for cam in cam_info: 60 | W2C = getWorld2View2(cam.R, cam.T) 61 | C2W = np.linalg.inv(W2C) 62 | cam_centers.append(C2W[:3, 3:4]) 63 | 64 | center, diagonal = get_center_and_diag(cam_centers) 65 | radius = diagonal * 1.1 66 | 67 | translate = -center 68 | 69 | return {"translate": translate, "radius": radius} 70 | 71 | def readColmapCameras(cam_extrinsics, cam_intrinsics, depths_params, images_folder, depths_folder, test_cam_names_list): 72 | cam_infos = [] 73 | for idx, key in enumerate(cam_extrinsics): 74 | sys.stdout.write('\r') 75 | # the exact output you're looking for: 76 | sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics))) 77 | sys.stdout.flush() 78 | 79 | extr = cam_extrinsics[key] 80 | intr = cam_intrinsics[extr.camera_id] 81 | height = intr.height 82 | width = intr.width 83 | 84 | uid = intr.id 85 | R = np.transpose(qvec2rotmat(extr.qvec)) 86 | T = np.array(extr.tvec) 87 | 88 | if intr.model=="SIMPLE_PINHOLE": 89 | focal_length_x = intr.params[0] 90 | FovY = focal2fov(focal_length_x, height) 91 | FovX = focal2fov(focal_length_x, width) 92 | elif intr.model=="PINHOLE": 93 | focal_length_x = intr.params[0] 94 | focal_length_y = intr.params[1] 95 | FovY = focal2fov(focal_length_y, height) 96 | FovX = focal2fov(focal_length_x, width) 97 | else: 98 | assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!" 99 | 100 | n_remove = len(extr.name.split('.')[-1]) + 1 101 | depth_params = None 102 | if depths_params is not None: 103 | try: 104 | depth_params = depths_params[extr.name[:-n_remove]] 105 | except: 106 | print("\n", key, "not found in depths_params") 107 | 108 | image_path = os.path.join(images_folder, extr.name) 109 | image_name = extr.name 110 | depth_path = os.path.join(depths_folder, f"{extr.name[:-n_remove]}.png") if depths_folder != "" else "" 111 | 112 | cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, depth_params=depth_params, 113 | image_path=image_path, image_name=image_name, depth_path=depth_path, 114 | width=width, height=height, is_test=image_name in test_cam_names_list) 115 | cam_infos.append(cam_info) 116 | 117 | sys.stdout.write('\n') 118 | return cam_infos 119 | 120 | def fetchPly(path): 121 | plydata = PlyData.read(path) 122 | vertices = plydata['vertex'] 123 | positions = np.vstack([vertices['x'], vertices['y'], vertices['z']]).T 124 | colors = np.vstack([vertices['red'], vertices['green'], vertices['blue']]).T / 255.0 125 | normals = np.vstack([vertices['nx'], vertices['ny'], vertices['nz']]).T 126 | return BasicPointCloud(points=positions, colors=colors, normals=normals) 127 | 128 | def storePly(path, xyz, rgb): 129 | # Define the dtype for the structured array 130 | dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), 131 | ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'), 132 | ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')] 133 | 134 | normals = np.zeros_like(xyz) 135 | 136 | elements = np.empty(xyz.shape[0], dtype=dtype) 137 | attributes = np.concatenate((xyz, normals, rgb), axis=1) 138 | elements[:] = list(map(tuple, attributes)) 139 | 140 | # Create the PlyData object and write to file 141 | vertex_element = PlyElement.describe(elements, 'vertex') 142 | ply_data = PlyData([vertex_element]) 143 | ply_data.write(path) 144 | 145 | def readColmapSceneInfo(path, images, depths, eval, train_test_exp, llffhold=8): 146 | try: 147 | cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin") 148 | cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin") 149 | cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file) 150 | cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file) 151 | except: 152 | cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt") 153 | cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt") 154 | cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file) 155 | cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file) 156 | 157 | depth_params_file = os.path.join(path, "sparse/0", "depth_params.json") 158 | ## if depth_params_file isnt there AND depths file is here -> throw error 159 | depths_params = None 160 | if depths != "": 161 | try: 162 | with open(depth_params_file, "r") as f: 163 | depths_params = json.load(f) 164 | all_scales = np.array([depths_params[key]["scale"] for key in depths_params]) 165 | if (all_scales > 0).sum(): 166 | med_scale = np.median(all_scales[all_scales > 0]) 167 | else: 168 | med_scale = 0 169 | for key in depths_params: 170 | depths_params[key]["med_scale"] = med_scale 171 | 172 | except FileNotFoundError: 173 | print(f"Error: depth_params.json file not found at path '{depth_params_file}'.") 174 | sys.exit(1) 175 | except Exception as e: 176 | print(f"An unexpected error occurred when trying to open depth_params.json file: {e}") 177 | sys.exit(1) 178 | 179 | if eval: 180 | if "360" in path: 181 | llffhold = 8 182 | if llffhold: 183 | print("------------LLFF HOLD-------------") 184 | cam_names = [cam_extrinsics[cam_id].name for cam_id in cam_extrinsics] 185 | cam_names = sorted(cam_names) 186 | test_cam_names_list = [name for idx, name in enumerate(cam_names) if idx % llffhold == 0] 187 | else: 188 | with open(os.path.join(path, "sparse/0", "test.txt"), 'r') as file: 189 | test_cam_names_list = [line.strip() for line in file] 190 | else: 191 | test_cam_names_list = [] 192 | 193 | reading_dir = "images" if images == None else images 194 | cam_infos_unsorted = readColmapCameras( 195 | cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, depths_params=depths_params, 196 | images_folder=os.path.join(path, reading_dir), 197 | depths_folder=os.path.join(path, depths) if depths != "" else "", test_cam_names_list=test_cam_names_list) 198 | cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name) 199 | 200 | train_cam_infos = [c for c in cam_infos if train_test_exp or not c.is_test] 201 | test_cam_infos = [c for c in cam_infos if c.is_test] 202 | 203 | nerf_normalization = getNerfppNorm(train_cam_infos) 204 | 205 | ply_path = os.path.join(path, "sparse/0/points3D.ply") 206 | bin_path = os.path.join(path, "sparse/0/points3D.bin") 207 | txt_path = os.path.join(path, "sparse/0/points3D.txt") 208 | if not os.path.exists(ply_path): 209 | print("Converting point3d.bin to .ply, will happen only the first time you open the scene.") 210 | try: 211 | xyz, rgb, _ = read_points3D_binary(bin_path) 212 | except: 213 | xyz, rgb, _ = read_points3D_text(txt_path) 214 | storePly(ply_path, xyz, rgb) 215 | try: 216 | pcd = fetchPly(ply_path) 217 | except: 218 | pcd = None 219 | 220 | scene_info = SceneInfo(point_cloud=pcd, 221 | train_cameras=train_cam_infos, 222 | test_cameras=test_cam_infos, 223 | nerf_normalization=nerf_normalization, 224 | ply_path=ply_path, 225 | is_nerf_synthetic=False) 226 | return scene_info 227 | 228 | def readCamerasFromTransforms(path, transformsfile, depths_folder, white_background, is_test, extension=".png"): 229 | cam_infos = [] 230 | 231 | with open(os.path.join(path, transformsfile)) as json_file: 232 | contents = json.load(json_file) 233 | fovx = contents["camera_angle_x"] 234 | 235 | frames = contents["frames"] 236 | for idx, frame in enumerate(frames): 237 | cam_name = os.path.join(path, frame["file_path"] + extension) 238 | 239 | # NeRF 'transform_matrix' is a camera-to-world transform 240 | c2w = np.array(frame["transform_matrix"]) 241 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 242 | c2w[:3, 1:3] *= -1 243 | 244 | # get the world-to-camera transform and set R, T 245 | w2c = np.linalg.inv(c2w) 246 | R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code 247 | T = w2c[:3, 3] 248 | 249 | image_path = os.path.join(path, cam_name) 250 | image_name = Path(cam_name).stem 251 | image = Image.open(image_path) 252 | 253 | im_data = np.array(image.convert("RGBA")) 254 | 255 | bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0]) 256 | 257 | norm_data = im_data / 255.0 258 | arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4]) 259 | image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB") 260 | 261 | fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1]) 262 | FovY = fovy 263 | FovX = fovx 264 | 265 | depth_path = os.path.join(depths_folder, f"{image_name}.png") if depths_folder != "" else "" 266 | 267 | cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, 268 | image_path=image_path, image_name=image_name, 269 | width=image.size[0], height=image.size[1], depth_path=depth_path, depth_params=None, is_test=is_test)) 270 | 271 | return cam_infos 272 | 273 | def readNerfSyntheticInfo(path, white_background, depths, eval, extension=".png"): 274 | 275 | depths_folder=os.path.join(path, depths) if depths != "" else "" 276 | print("Reading Training Transforms") 277 | train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", depths_folder, white_background, False, extension) 278 | print("Reading Test Transforms") 279 | test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", depths_folder, white_background, True, extension) 280 | 281 | if not eval: 282 | train_cam_infos.extend(test_cam_infos) 283 | test_cam_infos = [] 284 | 285 | nerf_normalization = getNerfppNorm(train_cam_infos) 286 | 287 | ply_path = os.path.join(path, "points3d.ply") 288 | if not os.path.exists(ply_path): 289 | # Since this data set has no colmap data, we start with random points 290 | num_pts = 100_000 291 | print(f"Generating random point cloud ({num_pts})...") 292 | 293 | # We create random points inside the bounds of the synthetic Blender scenes 294 | xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3 295 | shs = np.random.random((num_pts, 3)) / 255.0 296 | pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3))) 297 | 298 | storePly(ply_path, xyz, SH2RGB(shs) * 255) 299 | try: 300 | pcd = fetchPly(ply_path) 301 | except: 302 | pcd = None 303 | 304 | scene_info = SceneInfo(point_cloud=pcd, 305 | train_cameras=train_cam_infos, 306 | test_cameras=test_cam_infos, 307 | nerf_normalization=nerf_normalization, 308 | ply_path=ply_path, 309 | is_nerf_synthetic=True) 310 | return scene_info 311 | 312 | sceneLoadTypeCallbacks = { 313 | "Colmap": readColmapSceneInfo, 314 | "Blender" : readNerfSyntheticInfo 315 | } -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import torch 14 | from random import randint 15 | from utils.loss_utils import l1_loss, ssim 16 | from gaussian_renderer import render 17 | import sys 18 | from scene import Scene, GaussianModel 19 | from utils.general_utils import safe_state, get_expon_lr_func 20 | import uuid 21 | from tqdm import tqdm 22 | from utils.image_utils import psnr 23 | from argparse import ArgumentParser, Namespace 24 | from arguments import ModelParams, PipelineParams, OptimizationParams 25 | # try: 26 | # from torch.utils.tensorboard import SummaryWriter 27 | # TENSORBOARD_FOUND = True 28 | # except ImportError: 29 | # TENSORBOARD_FOUND = False 30 | TENSORBOARD_FOUND = False 31 | 32 | def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from): 33 | 34 | first_iter = 0 35 | tb_writer = prepare_output_and_logger(dataset) 36 | gaussians = GaussianModel(dataset.sh_degree, opt.optimizer_type) 37 | scene = Scene(dataset, gaussians) 38 | gaussians.training_setup(opt) 39 | if checkpoint: 40 | (model_params, first_iter) = torch.load(checkpoint) 41 | gaussians.restore_rgb(model_params, opt) 42 | 43 | bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0] 44 | background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") 45 | 46 | iter_start = torch.cuda.Event(enable_timing = True) 47 | iter_end = torch.cuda.Event(enable_timing = True) 48 | 49 | viewpoint_stack = scene.getTrainCameras().copy() 50 | viewpoint_indices = list(range(len(viewpoint_stack))) 51 | ema_loss_for_log = 0.0 52 | 53 | progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress") 54 | first_iter += 1 55 | for iteration in range(first_iter, opt.iterations + 1): 56 | 57 | iter_start.record() 58 | 59 | gaussians.update_learning_rate(iteration) 60 | 61 | # Every 1000 its we increase the levels of SH up to a maximum degree 62 | if iteration % 1000 == 0: 63 | gaussians.oneupSHdegree() 64 | 65 | # Pick a random Camera 66 | if not viewpoint_stack: 67 | viewpoint_stack = scene.getTrainCameras().copy() 68 | viewpoint_indices = list(range(len(viewpoint_stack))) 69 | rand_idx = randint(0, len(viewpoint_indices) - 1) 70 | viewpoint_cam = viewpoint_stack.pop(rand_idx) 71 | vind = viewpoint_indices.pop(rand_idx) 72 | 73 | # Render 74 | if (iteration - 1) == debug_from: 75 | pipe.debug = True 76 | 77 | bg = torch.rand((3), device="cuda") if opt.random_background else background 78 | 79 | render_pkg = render(viewpoint_cam, gaussians, pipe, bg) 80 | image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"] 81 | 82 | # Loss 83 | gt_image = viewpoint_cam.original_image.cuda() 84 | Ll1 = l1_loss(image, gt_image) 85 | 86 | ssim_value = ssim(image, gt_image) 87 | loss = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim_value) 88 | 89 | loss.backward() 90 | 91 | iter_end.record() 92 | 93 | with torch.no_grad(): 94 | # Progress bar 95 | ema_loss_for_log = 0.4 * loss.item() + 0.6 * ema_loss_for_log 96 | 97 | if iteration % 10 == 0: 98 | progress_bar.set_postfix({"Loss": f"{ema_loss_for_log:.{7}f}"}) 99 | progress_bar.update(10) 100 | if iteration == opt.iterations: 101 | progress_bar.close() 102 | 103 | # Log and save 104 | training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background)) 105 | if (iteration in saving_iterations): 106 | print("\n[ITER {}] Saving Gaussians".format(iteration)) 107 | scene.save(iteration) 108 | 109 | # Densification 110 | if iteration < opt.densify_until_iter: 111 | # Keep track of max radii in image-space for pruning 112 | gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter]) 113 | gaussians.add_densification_stats(viewspace_point_tensor, visibility_filter, image.shape[2], image.shape[1]) 114 | 115 | if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0: 116 | size_threshold = 20 if iteration > opt.opacity_reset_interval else None 117 | gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold, radii) 118 | 119 | if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter): 120 | gaussians.reset_opacity() 121 | 122 | # Optimizer step 123 | if iteration < opt.iterations: 124 | gaussians.optimizer.step() 125 | gaussians.optimizer.zero_grad(set_to_none = True) 126 | 127 | if (iteration in checkpoint_iterations): 128 | print("\n[ITER {}] Saving Checkpoint".format(iteration)) 129 | torch.save((gaussians.capture_rgb(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth") 130 | 131 | def prepare_output_and_logger(args): 132 | if not args.model_path: 133 | if os.getenv('OAR_JOB_ID'): 134 | unique_str=os.getenv('OAR_JOB_ID') 135 | else: 136 | unique_str = str(uuid.uuid4()) 137 | args.model_path = os.path.join("./output/", unique_str[0:10]) 138 | 139 | # Set up output folder 140 | print("Output folder: {}".format(args.model_path)) 141 | os.makedirs(args.model_path, exist_ok = True) 142 | with open(os.path.join(args.model_path, "cfg_args"), 'w') as cfg_log_f: 143 | cfg_log_f.write(str(Namespace(**vars(args)))) 144 | 145 | # Create Tensorboard writer 146 | tb_writer = None 147 | # if TENSORBOARD_FOUND: 148 | # tb_writer = SummaryWriter(args.model_path) 149 | # else: 150 | # print("Tensorboard not available: not logging progress") 151 | return tb_writer 152 | 153 | def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs): 154 | if tb_writer: 155 | tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration) 156 | tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration) 157 | tb_writer.add_scalar('iter_time', elapsed, iteration) 158 | 159 | # Report test and samples of training set 160 | if iteration in testing_iterations: 161 | torch.cuda.empty_cache() 162 | validation_configs = ({'name': 'test', 'cameras' : scene.getTestCameras()}, 163 | {'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(5, 30, 5)]}) 164 | 165 | for config in validation_configs: 166 | if config['cameras'] and len(config['cameras']) > 0: 167 | l1_test = 0.0 168 | psnr_test = 0.0 169 | for idx, viewpoint in enumerate(config['cameras']): 170 | image = torch.clamp(renderFunc(viewpoint, scene.gaussians, *renderArgs)["render"], 0.0, 1.0) 171 | gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0) 172 | if tb_writer and (idx < 5): 173 | tb_writer.add_images(config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration) 174 | if iteration == testing_iterations[0]: 175 | tb_writer.add_images(config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration) 176 | l1_test += l1_loss(image, gt_image).mean().double() 177 | psnr_test += psnr(image, gt_image).mean().double() 178 | psnr_test /= len(config['cameras']) 179 | l1_test /= len(config['cameras']) 180 | print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test)) 181 | if tb_writer: 182 | tb_writer.add_scalar(config['name'] + '/loss_viewpoint - l1_loss', l1_test, iteration) 183 | tb_writer.add_scalar(config['name'] + '/loss_viewpoint - psnr', psnr_test, iteration) 184 | 185 | if tb_writer: 186 | tb_writer.add_histogram("scene/opacity_histogram", scene.gaussians.get_opacity, iteration) 187 | tb_writer.add_scalar('total_points', scene.gaussians.get_xyz.shape[0], iteration) 188 | torch.cuda.empty_cache() 189 | 190 | if __name__ == "__main__": 191 | # Set up command line argument parser 192 | parser = ArgumentParser(description="Training script parameters") 193 | lp = ModelParams(parser) 194 | op = OptimizationParams(parser) 195 | pp = PipelineParams(parser) 196 | parser.add_argument('--ip', type=str, default="127.0.0.1") 197 | parser.add_argument('--port', type=int, default=6009) 198 | parser.add_argument('--debug_from', type=int, default=-1) 199 | parser.add_argument('--detect_anomaly', action='store_true', default=False) 200 | parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000]) 201 | parser.add_argument("--save_iterations", nargs="+", type=int, default=[7_000, 30_000]) 202 | parser.add_argument("--quiet", action="store_true") 203 | parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[7_000, 30_000]) 204 | parser.add_argument("--start_checkpoint", type=str, default = None) 205 | args = parser.parse_args(sys.argv[1:]) 206 | args.save_iterations.append(args.iterations) 207 | 208 | # Initialize system state (RNG) 209 | safe_state(args.quiet) 210 | 211 | torch.autograd.set_detect_anomaly(args.detect_anomaly) 212 | training(lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from) 213 | -------------------------------------------------------------------------------- /utils/camera_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from scene.cameras import Camera 13 | import numpy as np 14 | from utils.graphics_utils import fov2focal 15 | from PIL import Image 16 | import cv2 17 | 18 | WARNED = False 19 | 20 | def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dataset): 21 | image = Image.open(cam_info.image_path) 22 | 23 | if cam_info.depth_path != "": 24 | try: 25 | if is_nerf_synthetic: 26 | invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / 512 27 | else: 28 | invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / float(2**16) 29 | 30 | except FileNotFoundError: 31 | print(f"Error: The depth file at path '{cam_info.depth_path}' was not found.") 32 | raise 33 | except IOError: 34 | print(f"Error: Unable to open the image file '{cam_info.depth_path}'. It may be corrupted or an unsupported format.") 35 | raise 36 | except Exception as e: 37 | print(f"An unexpected error occurred when trying to read depth at {cam_info.depth_path}: {e}") 38 | raise 39 | else: 40 | invdepthmap = None 41 | 42 | orig_w, orig_h = image.size 43 | if args.resolution in [1, 2, 4, 8]: 44 | resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) 45 | else: # should be a type that converts to float 46 | if args.resolution == -1: 47 | if orig_w > 1600: 48 | global WARNED 49 | if not WARNED: 50 | print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " 51 | "If this is not desired, please explicitly specify '--resolution/-r' as 1") 52 | WARNED = True 53 | global_down = orig_w / 1600 54 | else: 55 | global_down = 1 56 | else: 57 | global_down = orig_w / args.resolution 58 | 59 | 60 | scale = float(global_down) * float(resolution_scale) 61 | resolution = (int(orig_w / scale), int(orig_h / scale)) 62 | 63 | return Camera(resolution, colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 64 | FoVx=cam_info.FovX, FoVy=cam_info.FovY, depth_params=cam_info.depth_params, 65 | image=image, invdepthmap=invdepthmap, 66 | image_name=cam_info.image_name, uid=id, data_device=args.data_device, 67 | train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test) 68 | 69 | def cameraList_from_camInfos(cam_infos, resolution_scale, args, is_nerf_synthetic, is_test_dataset): 70 | camera_list = [] 71 | 72 | for id, c in enumerate(cam_infos): 73 | camera_list.append(loadCam(args, id, c, resolution_scale, is_nerf_synthetic, is_test_dataset)) 74 | 75 | return camera_list 76 | 77 | def camera_to_JSON(id, camera : Camera): 78 | Rt = np.zeros((4, 4)) 79 | Rt[:3, :3] = camera.R.transpose() 80 | Rt[:3, 3] = camera.T 81 | Rt[3, 3] = 1.0 82 | 83 | W2C = np.linalg.inv(Rt) 84 | pos = W2C[:3, 3] 85 | rot = W2C[:3, :3] 86 | serializable_array_2d = [x.tolist() for x in rot] 87 | camera_entry = { 88 | 'id' : id, 89 | 'img_name' : camera.image_name, 90 | 'width' : camera.width, 91 | 'height' : camera.height, 92 | 'position': pos.tolist(), 93 | 'rotation': serializable_array_2d, 94 | 'fy' : fov2focal(camera.FovY, camera.height), 95 | 'fx' : fov2focal(camera.FovX, camera.width) 96 | } 97 | return camera_entry -------------------------------------------------------------------------------- /utils/general_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import sys 14 | from datetime import datetime 15 | import numpy as np 16 | import random 17 | 18 | def inverse_sigmoid(x): 19 | return torch.log(x/(1-x)) 20 | 21 | def PILtoTorch(pil_image, resolution): 22 | resized_image_PIL = pil_image.resize(resolution) 23 | resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0 24 | if len(resized_image.shape) == 3: 25 | return resized_image.permute(2, 0, 1) 26 | else: 27 | return resized_image.unsqueeze(dim=-1).permute(2, 0, 1) 28 | 29 | def get_expon_lr_func( 30 | lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000 31 | ): 32 | """ 33 | Copied from Plenoxels 34 | 35 | Continuous learning rate decay function. Adapted from JaxNeRF 36 | The returned rate is lr_init when step=0 and lr_final when step=max_steps, and 37 | is log-linearly interpolated elsewhere (equivalent to exponential decay). 38 | If lr_delay_steps>0 then the learning rate will be scaled by some smooth 39 | function of lr_delay_mult, such that the initial learning rate is 40 | lr_init*lr_delay_mult at the beginning of optimization but will be eased back 41 | to the normal learning rate when steps>lr_delay_steps. 42 | :param conf: config subtree 'lr' or similar 43 | :param max_steps: int, the number of steps during optimization. 44 | :return HoF which takes step as input 45 | """ 46 | 47 | def helper(step): 48 | if step < 0 or (lr_init == 0.0 and lr_final == 0.0): 49 | # Disable this parameter 50 | return 0.0 51 | if lr_delay_steps > 0: 52 | # A kind of reverse cosine decay. 53 | delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin( 54 | 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1) 55 | ) 56 | else: 57 | delay_rate = 1.0 58 | t = np.clip(step / max_steps, 0, 1) 59 | log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t) 60 | return delay_rate * log_lerp 61 | 62 | return helper 63 | 64 | def strip_lowerdiag(L): 65 | uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda") 66 | 67 | uncertainty[:, 0] = L[:, 0, 0] 68 | uncertainty[:, 1] = L[:, 0, 1] 69 | uncertainty[:, 2] = L[:, 0, 2] 70 | uncertainty[:, 3] = L[:, 1, 1] 71 | uncertainty[:, 4] = L[:, 1, 2] 72 | uncertainty[:, 5] = L[:, 2, 2] 73 | return uncertainty 74 | 75 | def strip_symmetric(sym): 76 | return strip_lowerdiag(sym) 77 | 78 | def build_rotation(r): 79 | norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3]) 80 | 81 | q = r / norm[:, None] 82 | 83 | R = torch.zeros((q.size(0), 3, 3), device='cuda') 84 | 85 | r = q[:, 0] 86 | x = q[:, 1] 87 | y = q[:, 2] 88 | z = q[:, 3] 89 | 90 | R[:, 0, 0] = 1 - 2 * (y*y + z*z) 91 | R[:, 0, 1] = 2 * (x*y - r*z) 92 | R[:, 0, 2] = 2 * (x*z + r*y) 93 | R[:, 1, 0] = 2 * (x*y + r*z) 94 | R[:, 1, 1] = 1 - 2 * (x*x + z*z) 95 | R[:, 1, 2] = 2 * (y*z - r*x) 96 | R[:, 2, 0] = 2 * (x*z - r*y) 97 | R[:, 2, 1] = 2 * (y*z + r*x) 98 | R[:, 2, 2] = 1 - 2 * (x*x + y*y) 99 | return R 100 | 101 | def build_scaling_rotation(s, r): 102 | L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda") 103 | R = build_rotation(r) 104 | 105 | L[:,0,0] = s[:,0] 106 | L[:,1,1] = s[:,1] 107 | L[:,2,2] = s[:,2] 108 | 109 | L = R @ L 110 | return L 111 | 112 | def safe_state(silent): 113 | old_f = sys.stdout 114 | class F: 115 | def __init__(self, silent): 116 | self.silent = silent 117 | 118 | def write(self, x): 119 | if not self.silent: 120 | if x.endswith("\n"): 121 | old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S"))))) 122 | else: 123 | old_f.write(x) 124 | 125 | def flush(self): 126 | old_f.flush() 127 | 128 | sys.stdout = F(silent) 129 | 130 | random.seed(0) 131 | np.random.seed(0) 132 | torch.manual_seed(0) 133 | torch.cuda.set_device(torch.device("cuda:0")) 134 | -------------------------------------------------------------------------------- /utils/graphics_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import math 14 | import numpy as np 15 | from typing import NamedTuple 16 | 17 | class BasicPointCloud(NamedTuple): 18 | points : np.array 19 | colors : np.array 20 | normals : np.array 21 | 22 | def geom_transform_points(points, transf_matrix): 23 | P, _ = points.shape 24 | ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) 25 | points_hom = torch.cat([points, ones], dim=1) 26 | points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) 27 | 28 | denom = points_out[..., 3:] + 0.0000001 29 | return (points_out[..., :3] / denom).squeeze(dim=0) 30 | 31 | def getWorld2View(R, t): 32 | Rt = np.zeros((4, 4)) 33 | Rt[:3, :3] = R.transpose() 34 | Rt[:3, 3] = t 35 | Rt[3, 3] = 1.0 36 | return np.float32(Rt) 37 | 38 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): 39 | Rt = np.zeros((4, 4)) 40 | Rt[:3, :3] = R.transpose() 41 | Rt[:3, 3] = t 42 | Rt[3, 3] = 1.0 43 | 44 | C2W = np.linalg.inv(Rt) 45 | cam_center = C2W[:3, 3] 46 | cam_center = (cam_center + translate) * scale 47 | C2W[:3, 3] = cam_center 48 | Rt = np.linalg.inv(C2W) 49 | return np.float32(Rt) 50 | 51 | def getProjectionMatrix(znear, zfar, fovX, fovY): 52 | tanHalfFovY = math.tan((fovY / 2)) 53 | tanHalfFovX = math.tan((fovX / 2)) 54 | 55 | top = tanHalfFovY * znear 56 | bottom = -top 57 | right = tanHalfFovX * znear 58 | left = -right 59 | 60 | P = torch.zeros(4, 4) 61 | 62 | z_sign = 1.0 63 | 64 | P[0, 0] = 2.0 * znear / (right - left) 65 | P[1, 1] = 2.0 * znear / (top - bottom) 66 | P[0, 2] = (right + left) / (right - left) 67 | P[1, 2] = (top + bottom) / (top - bottom) 68 | P[3, 2] = z_sign 69 | P[2, 2] = z_sign * zfar / (zfar - znear) 70 | P[2, 3] = -(zfar * znear) / (zfar - znear) 71 | return P 72 | 73 | def fov2focal(fov, pixels): 74 | return pixels / (2 * math.tan(fov / 2)) 75 | 76 | def focal2fov(focal, pixels): 77 | return 2*math.atan(pixels/(2*focal)) -------------------------------------------------------------------------------- /utils/image_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | 14 | def mse(img1, img2): 15 | return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 16 | 17 | def psnr(img1, img2): 18 | mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 19 | return 20 * torch.log10(1.0 / torch.sqrt(mse)) 20 | -------------------------------------------------------------------------------- /utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import torch.nn.functional as F 14 | from torch.autograd import Variable 15 | from math import exp 16 | try: 17 | from diff_gaussian_rasterization._C import fusedssim, fusedssim_backward 18 | except: 19 | pass 20 | 21 | C1 = 0.01 ** 2 22 | C2 = 0.03 ** 2 23 | 24 | class FusedSSIMMap(torch.autograd.Function): 25 | @staticmethod 26 | def forward(ctx, C1, C2, img1, img2): 27 | ssim_map = fusedssim(C1, C2, img1, img2) 28 | ctx.save_for_backward(img1.detach(), img2) 29 | ctx.C1 = C1 30 | ctx.C2 = C2 31 | return ssim_map 32 | 33 | @staticmethod 34 | def backward(ctx, opt_grad): 35 | img1, img2 = ctx.saved_tensors 36 | C1, C2 = ctx.C1, ctx.C2 37 | grad = fusedssim_backward(C1, C2, img1, img2, opt_grad) 38 | return None, None, grad, None 39 | 40 | def l1_loss(network_output, gt): 41 | return torch.abs((network_output - gt)).mean() 42 | 43 | def l2_loss(network_output, gt): 44 | return ((network_output - gt) ** 2).mean() 45 | 46 | def gaussian(window_size, sigma): 47 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) 48 | return gauss / gauss.sum() 49 | 50 | def create_window(window_size, channel): 51 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 52 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 53 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 54 | return window 55 | 56 | def ssim(img1, img2, window_size=11, size_average=True): 57 | channel = img1.size(-3) 58 | window = create_window(window_size, channel) 59 | 60 | if img1.is_cuda: 61 | window = window.cuda(img1.get_device()) 62 | window = window.type_as(img1) 63 | 64 | return _ssim(img1, img2, window, window_size, channel, size_average) 65 | 66 | def _ssim(img1, img2, window, window_size, channel, size_average=True): 67 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) 68 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) 69 | 70 | mu1_sq = mu1.pow(2) 71 | mu2_sq = mu2.pow(2) 72 | mu1_mu2 = mu1 * mu2 73 | 74 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq 75 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq 76 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 77 | 78 | C1 = 0.01 ** 2 79 | C2 = 0.03 ** 2 80 | 81 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) 82 | 83 | if size_average: 84 | return ssim_map.mean() 85 | else: 86 | return ssim_map.mean(1).mean(1).mean(1) 87 | 88 | 89 | def fast_ssim(img1, img2): 90 | ssim_map = FusedSSIMMap.apply(C1, C2, img1, img2) 91 | return ssim_map.mean() 92 | -------------------------------------------------------------------------------- /utils/make_depth_scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import cv2 4 | from joblib import delayed, Parallel 5 | import json 6 | from read_write_model import * 7 | 8 | def get_scales(key, cameras, images, points3d_ordered, args): 9 | image_meta = images[key] 10 | cam_intrinsic = cameras[image_meta.camera_id] 11 | 12 | pts_idx = images_metas[key].point3D_ids 13 | 14 | mask = pts_idx >= 0 15 | mask *= pts_idx < len(points3d_ordered) 16 | 17 | pts_idx = pts_idx[mask] 18 | valid_xys = image_meta.xys[mask] 19 | 20 | if len(pts_idx) > 0: 21 | pts = points3d_ordered[pts_idx] 22 | else: 23 | pts = np.array([0, 0, 0]) 24 | 25 | R = qvec2rotmat(image_meta.qvec) 26 | pts = np.dot(pts, R.T) + image_meta.tvec 27 | 28 | invcolmapdepth = 1. / pts[..., 2] 29 | n_remove = len(image_meta.name.split('.')[-1]) + 1 30 | invmonodepthmap = cv2.imread(f"{args.depths_dir}/{image_meta.name[:-n_remove]}.png", cv2.IMREAD_UNCHANGED) 31 | 32 | if invmonodepthmap is None: 33 | return None 34 | 35 | if invmonodepthmap.ndim != 2: 36 | invmonodepthmap = invmonodepthmap[..., 0] 37 | 38 | invmonodepthmap = invmonodepthmap.astype(np.float32) / (2**16) 39 | s = invmonodepthmap.shape[0] / cam_intrinsic.height 40 | 41 | maps = (valid_xys * s).astype(np.float32) 42 | valid = ( 43 | (maps[..., 0] >= 0) * 44 | (maps[..., 1] >= 0) * 45 | (maps[..., 0] < cam_intrinsic.width * s) * 46 | (maps[..., 1] < cam_intrinsic.height * s) * (invcolmapdepth > 0)) 47 | 48 | if valid.sum() > 10 and (invcolmapdepth.max() - invcolmapdepth.min()) > 1e-3: 49 | maps = maps[valid, :] 50 | invcolmapdepth = invcolmapdepth[valid] 51 | invmonodepth = cv2.remap(invmonodepthmap, maps[..., 0], maps[..., 1], interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)[..., 0] 52 | 53 | ## Median / dev 54 | t_colmap = np.median(invcolmapdepth) 55 | s_colmap = np.mean(np.abs(invcolmapdepth - t_colmap)) 56 | 57 | t_mono = np.median(invmonodepth) 58 | s_mono = np.mean(np.abs(invmonodepth - t_mono)) 59 | scale = s_colmap / s_mono 60 | offset = t_colmap - t_mono * scale 61 | else: 62 | scale = 0 63 | offset = 0 64 | return {"image_name": image_meta.name[:-n_remove], "scale": scale, "offset": offset} 65 | 66 | if __name__ == '__main__': 67 | parser = argparse.ArgumentParser() 68 | parser.add_argument('--base_dir', default="../data/big_gaussians/standalone_chunks/campus") 69 | parser.add_argument('--depths_dir', default="../data/big_gaussians/standalone_chunks/campus/depths_any") 70 | parser.add_argument('--model_type', default="bin") 71 | args = parser.parse_args() 72 | 73 | 74 | cam_intrinsics, images_metas, points3d = read_model(os.path.join(args.base_dir, "sparse", "0"), ext=f".{args.model_type}") 75 | 76 | pts_indices = np.array([points3d[key].id for key in points3d]) 77 | pts_xyzs = np.array([points3d[key].xyz for key in points3d]) 78 | points3d_ordered = np.zeros([pts_indices.max()+1, 3]) 79 | points3d_ordered[pts_indices] = pts_xyzs 80 | 81 | # depth_param_list = [get_scales(key, cam_intrinsics, images_metas, points3d_ordered, args) for key in images_metas] 82 | depth_param_list = Parallel(n_jobs=-1, backend="threading")( 83 | delayed(get_scales)(key, cam_intrinsics, images_metas, points3d_ordered, args) for key in images_metas 84 | ) 85 | 86 | depth_params = { 87 | depth_param["image_name"]: {"scale": depth_param["scale"], "offset": depth_param["offset"]} 88 | for depth_param in depth_param_list if depth_param != None 89 | } 90 | 91 | with open(f"{args.base_dir}/sparse/0/depth_params.json", "w") as f: 92 | json.dump(depth_params, f, indent=2) 93 | 94 | print(0) 95 | -------------------------------------------------------------------------------- /utils/sh_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The PlenOctree Authors. 2 | # Redistribution and use in source and binary forms, with or without 3 | # modification, are permitted provided that the following conditions are met: 4 | # 5 | # 1. Redistributions of source code must retain the above copyright notice, 6 | # this list of conditions and the following disclaimer. 7 | # 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 13 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 16 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 18 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 20 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 21 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 22 | # POSSIBILITY OF SUCH DAMAGE. 23 | 24 | import torch 25 | 26 | C0 = 0.28209479177387814 27 | C1 = 0.4886025119029199 28 | C2 = [ 29 | 1.0925484305920792, 30 | -1.0925484305920792, 31 | 0.31539156525252005, 32 | -1.0925484305920792, 33 | 0.5462742152960396 34 | ] 35 | C3 = [ 36 | -0.5900435899266435, 37 | 2.890611442640554, 38 | -0.4570457994644658, 39 | 0.3731763325901154, 40 | -0.4570457994644658, 41 | 1.445305721320277, 42 | -0.5900435899266435 43 | ] 44 | C4 = [ 45 | 2.5033429417967046, 46 | -1.7701307697799304, 47 | 0.9461746957575601, 48 | -0.6690465435572892, 49 | 0.10578554691520431, 50 | -0.6690465435572892, 51 | 0.47308734787878004, 52 | -1.7701307697799304, 53 | 0.6258357354491761, 54 | ] 55 | 56 | 57 | def eval_sh(deg, sh, dirs): 58 | """ 59 | Evaluate spherical harmonics at unit directions 60 | using hardcoded SH polynomials. 61 | Works with torch/np/jnp. 62 | ... Can be 0 or more batch dimensions. 63 | Args: 64 | deg: int SH deg. Currently, 0-3 supported 65 | sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2] 66 | dirs: jnp.ndarray unit directions [..., 3] 67 | Returns: 68 | [..., C] 69 | """ 70 | assert deg <= 4 and deg >= 0 71 | coeff = (deg + 1) ** 2 72 | assert sh.shape[-1] >= coeff 73 | 74 | result = C0 * sh[..., 0] 75 | if deg > 0: 76 | x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3] 77 | result = (result - 78 | C1 * y * sh[..., 1] + 79 | C1 * z * sh[..., 2] - 80 | C1 * x * sh[..., 3]) 81 | 82 | if deg > 1: 83 | xx, yy, zz = x * x, y * y, z * z 84 | xy, yz, xz = x * y, y * z, x * z 85 | result = (result + 86 | C2[0] * xy * sh[..., 4] + 87 | C2[1] * yz * sh[..., 5] + 88 | C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] + 89 | C2[3] * xz * sh[..., 7] + 90 | C2[4] * (xx - yy) * sh[..., 8]) 91 | 92 | if deg > 2: 93 | result = (result + 94 | C3[0] * y * (3 * xx - yy) * sh[..., 9] + 95 | C3[1] * xy * z * sh[..., 10] + 96 | C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] + 97 | C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] + 98 | C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] + 99 | C3[5] * z * (xx - yy) * sh[..., 14] + 100 | C3[6] * x * (xx - 3 * yy) * sh[..., 15]) 101 | 102 | if deg > 3: 103 | result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] + 104 | C4[1] * yz * (3 * xx - yy) * sh[..., 17] + 105 | C4[2] * xy * (7 * zz - 1) * sh[..., 18] + 106 | C4[3] * yz * (7 * zz - 3) * sh[..., 19] + 107 | C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] + 108 | C4[5] * xz * (7 * zz - 3) * sh[..., 21] + 109 | C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] + 110 | C4[7] * xz * (xx - 3 * yy) * sh[..., 23] + 111 | C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24]) 112 | return result 113 | 114 | def RGB2SH(rgb): 115 | return (rgb - 0.5) / C0 116 | 117 | def SH2RGB(sh): 118 | return sh * C0 + 0.5 -------------------------------------------------------------------------------- /utils/system_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from errno import EEXIST 13 | from os import makedirs, path 14 | import os 15 | 16 | def mkdir_p(folder_path): 17 | # Creates a directory. equivalent to using mkdir -p on the command line 18 | try: 19 | makedirs(folder_path) 20 | except OSError as exc: # Python >2.5 21 | if exc.errno == EEXIST and path.isdir(folder_path): 22 | pass 23 | else: 24 | raise 25 | 26 | def searchForMaxIteration(folder): 27 | saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)] 28 | return max(saved_iters) 29 | --------------------------------------------------------------------------------