├── .gitignore
├── .gitmodules
├── LICENSE.md
├── README.md
├── arguments
    └── __init__.py
├── convert.py
├── docs
    ├── index.html
    └── static
    │   ├── css
    │       ├── bulma-carousel.min.css
    │       ├── bulma-slider.min.css
    │       ├── bulma.css.map.txt
    │       ├── bulma.min.css
    │       ├── fontawesome.all.min.css
    │       └── index.css
    │   ├── images
    │       ├── approach.png
    │       ├── bench.png
    │       ├── comparison_table.png
    │       ├── comparison_vis.png
    │       ├── favicon.svg
    │       ├── figurines-insertion.png
    │       ├── lawn.png
    │       ├── overview.png
    │       ├── room.png
    │       └── teaser.png
    │   ├── js
    │       ├── bulma-carousel.js
    │       ├── bulma-carousel.min.js
    │       ├── bulma-slider.js
    │       ├── bulma-slider.min.js
    │       ├── fontawesome.all.min.js
    │       └── index.js
    │   └── videos
    │       ├── ._Icon
    │       ├── bouquet.mp4
    │       ├── bouquet_query.mp4
    │       ├── bouquet_sg.mp4
    │       ├── figurines.mp4
    │       ├── figurines_query.mp4
    │       ├── figurines_sg.mp4
    │       ├── insertion_query.mp4
    │       ├── insertion_vg.mp4
    │       ├── ramen.mp4
    │       ├── ramen_query.mp4
    │       ├── ramen_sg.mp4
    │       ├── replay.mp4
    │       ├── shoe_rack.mp4
    │       ├── shoe_rack_query.mp4
    │       ├── shoe_rack_sg.mp4
    │       ├── teaser.mp4
    │       ├── teatime.mp4
    │       ├── teatime_pca.mp4
    │       ├── teatime_query.mp4
    │       ├── teatime_sg.mp4
    │       ├── waldo_kitchen.mp4
    │       ├── waldo_kitchen_query.mp4
    │       └── waldo_kitchen_sg.mp4
├── environment.yml
├── eval
    ├── colormaps.py
    ├── colors.py
    ├── eval_utils.py
    ├── evaluate_iou_3dovs.py
    ├── openclip_encoder.py
    └── utils.py
├── eval_3DOVS.sh
├── feature_map_renderer.py
├── full_eval.py
├── gaussian_feature_extractor.py
├── gaussian_renderer
    ├── __init__.py
    └── network_gui.py
├── lpipsPyTorch
    ├── __init__.py
    └── modules
    │   ├── lpips.py
    │   ├── networks.py
    │   └── utils.py
├── metrics.py
├── render.py
├── requirements.txt
├── run_3DOVS.sh
├── run_lerf.sh
├── scene
    ├── __init__.py
    ├── cameras.py
    ├── colmap_loader.py
    ├── dataset_readers.py
    └── gaussian_model.py
├── train.py
└── utils
    ├── camera_utils.py
    ├── general_utils.py
    ├── graphics_utils.py
    ├── image_utils.py
    ├── loss_utils.py
    ├── make_depth_scale.py
    ├── read_write_model.py
    ├── sh_utils.py
    └── system_utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .vscode
3 | output
4 | build
5 | diff_rasterization/diff_rast.egg-info
6 | diff_rasterization/dist
7 | tensorboard_3d
8 | screenshots
9 | eval_results


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "submodules/simple-knn"]
 2 | 	path = submodules/simple-knn
 3 | 	url = https://gitlab.inria.fr/bkerbl/simple-knn.git
 4 | [submodule "submodules/diff-gaussian-rasterization"]
 5 | 	path = submodules/diff-gaussian-rasterization
 6 | 	url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git
 7 | 	branch = dr_aa
 8 | [submodule "SIBR_viewers"]
 9 | 	path = SIBR_viewers
10 | 	url = https://gitlab.inria.fr/sibr/sibr_core.git
11 | [submodule "submodules/fused-ssim"]
12 | 	path = submodules/fused-ssim
13 | 	url = https://github.com/rahul-goel/fused-ssim.git
14 | [submodule "submodules/gsplat"]
15 | 	path = submodules/gsplat
16 | 	url = git@github.com:JoannaCCJH/gsplat.git
17 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  
84 | 
85 | ## 6.  Files subject to permissive licenses
86 | The contents of the file ```utils/loss_utils.py``` are based on publicly available code authored by Evan Su, which falls under the permissive MIT license. 
87 | 
88 | Title: pytorch-ssim\
89 | Project code: https://github.com/Po-Hsun-Su/pytorch-ssim\
90 | Copyright Evan Su, 2017\
91 | License: https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/LICENSE.txt (MIT)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Occam's LGS: An Efficient Approach for Language Gaussian Splatting
  2 | 
  3 | [![arXiv](https://img.shields.io/badge/arXiv-2412.01807-b31b1b.svg)](https://arxiv.org/abs/2412.01807)
  4 | [![Project Page](https://img.shields.io/badge/Project-Page-blue)](https://insait-institute.github.io/OccamLGS/)
  5 | 
  6 | This is the official implementation of "Occam's LGS: An Efficient Approach for Language Gaussian Splatting".
  7 | 
  8 | ## Overview
  9 | 
 10 | Occam's LGS is a simple, training-free approach for Language-guided 3D Gaussian Splatting that achieves state-of-the-art results with a 100x speed improvement. Our method:
 11 | 
 12 | - 🎯 Lifts 2D language features to 3D Gaussian Splats without complex modules or training
 13 | - 🚀 Provides 100x faster optimization compared to existing methods  
 14 | - 🧩 Works with any feature dimension without compression
 15 | - 🎨 Enables easy scene manipulation and object insertion
 16 | 
 17 | ## Installation Guide
 18 | 
 19 | ### System Requirements
 20 | We use the following setting to run OccamLGS:
 21 | 
 22 | - NVIDIA GPU with CUDA support
 23 | - PyTorch 2.2.2
 24 | - Python 3.10
 25 | - GCC 11.4.0
 26 | 
 27 | ### Clone Repository
 28 | ```bash
 29 | git clone git@github.com:JoannaCCJH/occamlgs.git --recursive
 30 | ```
 31 | 
 32 | ### Environment Setup
 33 | ```bash
 34 | micromamba create -n occamlgs python=3.10
 35 | micromamba activate occamlgs
 36 | pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121
 37 | ```
 38 | 
 39 | ### Project Dependencies
 40 | ```bash
 41 | pip install -r requirements.txt
 42 | micromamba install -c conda-forge gxx=11.4.0
 43 | ```
 44 | 
 45 | ### Submodules
 46 | ```bash
 47 | pip install -e submodules/gsplat[dev]
 48 | pip install -e submodules/simple-knn
 49 | ```
 50 | 
 51 | ## Dataset Preparation
 52 | ### Input Dataset
 53 | The dataset follows a structured format where each 3D scene is organized as follows:
 54 | ```
 55 | lerf_ovs/
 56 | └── scene_name/           # Name of the specific scene (e.g., teatime)
 57 |     ├── distorted/        
 58 |     ├── images/           # Contains the original, unprocessed scene images
 59 |     ├── language_features/ # Pre-extracted language embeddings
 60 |     │   ├── frame_00001_f.npy
 61 |     │   └── frame_00001_s.npy
 62 |     │   ├── ...
 63 |     ├── sparse/0/      
 64 |     │   ├── test.txt     # Testing image list
 65 |     │   ├── cameras.bin 
 66 |     │   ├── images.bin
 67 |     │   └── points3D.bin 
 68 |     ├── stereo/         
 69 | ```
 70 | Notes:
 71 | - Language features are pre-extracted and stored as 512-dimensional vectors
 72 | - For detailed information about feature levels and language feature extraction methodology, please refer to the [LangSplat repository](https://github.com/minghanqin/LangSplat). 
 73 | 
 74 | ### Output Directory Structure
 75 | The pre-trained RGB model outputs are organized as follows:
 76 | ```
 77 | output/
 78 | └── dataset_name/
 79 |     └── scene_name/
 80 |         ├── point_cloud/
 81 |         │   └── iteration_30000/
 82 |         │       └── point_cloud.ply      # Point cloud at 30K iterations
 83 |         ├── cameras.json                 
 84 |         ├── cfg_args                     
 85 |         ├── chkpnt30000.pth             # Model checkpoint at 30K iterations
 86 |         └── input.ply                    
 87 | 
 88 | ```
 89 | After running the `gaussian_feature_extractor.py` for three levels of features, three additional checkpoint files are added:
 90 | 
 91 | ```
 92 | output/
 93 | └── dataset_name/
 94 |     └── scene_name/
 95 |         ├── point_cloud/
 96 |         │   └── iteration_30000/
 97 |         │       └── point_cloud.ply      # Point cloud at 30K iterations
 98 |         ├── cameras.json                
 99 |         ├── cfg_args                    
100 |         ├── chkpnt30000.pth             # RGB model checkpoint
101 |         ├── input.ply                   
102 |         ├── chkpnt30000_langfeat_1.pth  # Language features level 1
103 |         ├── chkpnt30000_langfeat_2.pth  # Language features level 2
104 |         └── chkpnt30000_langfeat_3.pth  # Language features level 3
105 | 
106 | ```
107 | 
108 | Note:  The script `gaussian_feature_extractor.py` generates three new semantic checkpoints, each containing a different level of language features while maintaining the same RGB model weights from the original checkpoint.
109 | 
110 | ## Usage
111 | 
112 | 
113 | ### Prerequisites
114 | 
115 | -  A pre-trained RGB Gaussian model (use `train.py` and `render.py` commands below to train a model on your scene using gsplat renderer)
116 | - `test.txt` file in `scene_name/sparse/0/` defining test set
117 | 
118 | 
119 | #### 1. Train and Render RGB Gaussian Model
120 | ```bash
121 | # Train gaussian model
122 | python train.py -s $DATA_SOURCE_PATH -m $MODEL_OUTPUT_PATH --iterations 30000
123 | 
124 | # Render trained model
125 | python render.py -m $MODEL_OUTPUT_PATH --iteration 30000
126 | ```
127 | 
128 | #### 2. Feature Extraction and Visualization
129 | ```bash
130 | #  gaussian feature vectors
131 | python gaussian_feature_extractor.py -m $MODEL_OUTPUT_PATH --iteration 30000 --eval --feature_level 1
132 | 
133 | # Render feature maps
134 | python feature_map_renderer.py -m $MODEL_OUTPUT_PATH --iteration 30000 --eval --feature_level 1
135 | ```
136 | ### Example Pipeline
137 | Check `run_lerf.sh` for a complete example using the "teatime" scene from LERF_OVS dataset and `run_3DOVS.sh` for a complete example using the "bench" scene from 3D-OVS dataset.
138 | 
139 | ## Evaluation
140 | ### LERF
141 | We follow the evaluation methodology established by LangSplat for our LERF assessments. For detailed information about the evaluation metrics and procedures, please refer to the LangSplat methodology.
142 | 
143 | ### 3DOVS
144 | Here is the instructions on how to evaluate 3DOVS Dataset.
145 | 1. Configure Parameters: Open `eval_3DOVS.sh` and adjust the following:
146 |     - `DATASET_NAME`: Set to your 3DOVS dataset split (e.g., "bench")
147 |     - `GT_FOLDER`: Path to your preprocessed 3DOVS data
148 |     - `FEAT_FOLDER_NAME`: Name of your model's feature output folder
149 | 2. Run the evaluation script
150 | ```bash
151 | sh eval_3DOVS.sh
152 | ```
153 | 3. View Results: Evaluation metrics and visualizations will be saved to the `/eval_results` directory
154 | 
155 | **Configuration Options**
156 | 
157 | The evaluation script supports several parameters:
158 | 
159 | - `--stability_thresh`: Threshold for stability analysis (default: 0.4)
160 | - `--min_mask_size`: Minimum valid mask size (default: 0.005)
161 | - `--max_mask_size`: Maximum valid mask size (default: 0.9)
162 | 
163 | For detailed information about our evaluation methodology, please refer to the supplementary materials in our paper.
164 | 
165 | 
166 | ## TODO
167 | - [x] Training and rendering code released
168 | - [x] GSplat rasterizer code released
169 | - [x] Evaluation code to be released
170 | - [ ] Corrected room scene labels to be released
171 | - [ ] Autoencoder for any-dimensional feature to be released
172 | 
173 | ## Acknowledgement
174 | Our code is built on [LangSplat](https://github.com/minghanqin/LangSplat), [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), and [gsplat](https://github.com/nerfstudio-project/gsplat). We gratefully appreciate their open source contribution!
175 | 
176 | ## BibTeX
177 | 
178 | ```bibtex
179 | @article{cheng2024occamslgssimpleapproach,
180 |  title={Occam's LGS: A Simple Approach for Language Gaussian Splatting}, 
181 |  author={Jiahuan Cheng and Jan-Nico Zaech and Luc Van Gool and Danda Pani Paudel},
182 |  year={2024},
183 |  eprint={2412.01807}
184 | }
185 | 


--------------------------------------------------------------------------------
/arguments/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | from argparse import ArgumentParser, Namespace
 13 | import sys
 14 | import os
 15 | 
 16 | class GroupParams:
 17 |     pass
 18 | 
 19 | class ParamGroup:
 20 |     def __init__(self, parser: ArgumentParser, name : str, fill_none = False):
 21 |         group = parser.add_argument_group(name)
 22 |         for key, value in vars(self).items():
 23 |             shorthand = False
 24 |             if key.startswith("_"):
 25 |                 shorthand = True
 26 |                 key = key[1:]
 27 |             t = type(value)
 28 |             value = value if not fill_none else None 
 29 |             if shorthand:
 30 |                 if t == bool:
 31 |                     group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true")
 32 |                 else:
 33 |                     group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t)
 34 |             else:
 35 |                 if t == bool:
 36 |                     group.add_argument("--" + key, default=value, action="store_true")
 37 |                 else:
 38 |                     group.add_argument("--" + key, default=value, type=t)
 39 | 
 40 |     def extract(self, args):
 41 |         group = GroupParams()
 42 |         for arg in vars(args).items():
 43 |             if arg[0] in vars(self) or ("_" + arg[0]) in vars(self):
 44 |                 setattr(group, arg[0], arg[1])
 45 |         return group
 46 | 
 47 | class ModelParams(ParamGroup): 
 48 |     def __init__(self, parser, sentinel=False):
 49 |         self.sh_degree = 3
 50 |         self._source_path = ""
 51 |         self._model_path = ""
 52 |         self._images = "images"
 53 |         self._depths = ""
 54 |         self._resolution = -1
 55 |         self._white_background = False
 56 |         self.train_test_exp = False
 57 |         self.data_device = "cuda"
 58 |         self.eval = False
 59 |         
 60 |         self._language_features_name = "language_features"
 61 |         self._feature_level = 2
 62 |         super().__init__(parser, "Loading Parameters", sentinel)
 63 | 
 64 |     def extract(self, args):
 65 |         g = super().extract(args)
 66 |         g.source_path = os.path.abspath(g.source_path)
 67 |         try :
 68 |             g.lf_path = os.path.join(g.source_path, g.language_features_name)
 69 |         except:
 70 |             pass
 71 |         return g
 72 | 
 73 | class PipelineParams(ParamGroup):
 74 |     def __init__(self, parser):
 75 |         self.convert_SHs_python = False
 76 |         self.compute_cov3D_python = False
 77 |         self.debug = False
 78 |         self.antialiasing = False
 79 |         super().__init__(parser, "Pipeline Parameters")
 80 | 
 81 | class OptimizationParams(ParamGroup):
 82 |     def __init__(self, parser):
 83 |         self.iterations = 30_000
 84 |         self.position_lr_init = 0.00016
 85 |         self.position_lr_final = 0.0000016
 86 |         self.position_lr_delay_mult = 0.01
 87 |         self.position_lr_max_steps = 30_000
 88 |         self.feature_lr = 0.0025
 89 |         self.opacity_lr = 0.025
 90 |         self.scaling_lr = 0.005
 91 |         self.rotation_lr = 0.001
 92 |         self.percent_dense = 0.01
 93 |         self.lambda_dssim = 0.2
 94 |         self.densification_interval = 100
 95 |         self.opacity_reset_interval = 3000
 96 |         self.densify_from_iter = 500
 97 |         self.densify_until_iter = 15_000
 98 |         self.densify_grad_threshold = 0.0002
 99 |         self.depth_l1_weight_init = 1.0
100 |         self.depth_l1_weight_final = 0.01
101 |         self.random_background = False
102 |         self.optimizer_type = "default"
103 |         
104 |         super().__init__(parser, "Optimization Parameters")
105 | 
106 | def get_combined_args(parser : ArgumentParser):
107 |     cmdlne_string = sys.argv[1:]
108 |     cfgfile_string = "Namespace()"
109 |     args_cmdline = parser.parse_args(cmdlne_string)
110 | 
111 |     try:
112 |         cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args")
113 |         print("Looking for config file in", cfgfilepath)
114 |         with open(cfgfilepath) as cfg_file:
115 |             print("Config file found: {}".format(cfgfilepath))
116 |             cfgfile_string = cfg_file.read()
117 |     except TypeError:
118 |         print("Config file not found at")
119 |         pass
120 |     args_cfgfile = eval(cfgfile_string)
121 | 
122 |     merged_dict = vars(args_cfgfile).copy()
123 |     for k,v in vars(args_cmdline).items():
124 |         if v != None:
125 |             merged_dict[k] = v
126 |     return Namespace(**merged_dict)
127 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import os
 13 | import logging
 14 | from argparse import ArgumentParser
 15 | import shutil
 16 | 
 17 | # This Python script is based on the shell converter script provided in the MipNerF 360 repository.
 18 | parser = ArgumentParser("Colmap converter")
 19 | parser.add_argument("--no_gpu", action='store_true')
 20 | parser.add_argument("--skip_matching", action='store_true')
 21 | parser.add_argument("--source_path", "-s", required=True, type=str)
 22 | parser.add_argument("--camera", default="OPENCV", type=str)
 23 | parser.add_argument("--colmap_executable", default="", type=str)
 24 | parser.add_argument("--resize", action="store_true")
 25 | parser.add_argument("--magick_executable", default="", type=str)
 26 | args = parser.parse_args()
 27 | colmap_command = '"{}"'.format(args.colmap_executable) if len(args.colmap_executable) > 0 else "colmap"
 28 | magick_command = '"{}"'.format(args.magick_executable) if len(args.magick_executable) > 0 else "magick"
 29 | use_gpu = 1 if not args.no_gpu else 0
 30 | 
 31 | if not args.skip_matching:
 32 |     os.makedirs(args.source_path + "/distorted/sparse", exist_ok=True)
 33 | 
 34 |     ## Feature extraction
 35 |     feat_extracton_cmd = colmap_command + " feature_extractor "\
 36 |         "--database_path " + args.source_path + "/distorted/database.db \
 37 |         --image_path " + args.source_path + "/input \
 38 |         --ImageReader.single_camera 1 \
 39 |         --ImageReader.camera_model " + args.camera + " \
 40 |         --SiftExtraction.use_gpu " + str(use_gpu)
 41 |     exit_code = os.system(feat_extracton_cmd)
 42 |     if exit_code != 0:
 43 |         logging.error(f"Feature extraction failed with code {exit_code}. Exiting.")
 44 |         exit(exit_code)
 45 | 
 46 |     ## Feature matching
 47 |     feat_matching_cmd = colmap_command + " exhaustive_matcher \
 48 |         --database_path " + args.source_path + "/distorted/database.db \
 49 |         --SiftMatching.use_gpu " + str(use_gpu)
 50 |     exit_code = os.system(feat_matching_cmd)
 51 |     if exit_code != 0:
 52 |         logging.error(f"Feature matching failed with code {exit_code}. Exiting.")
 53 |         exit(exit_code)
 54 | 
 55 |     ### Bundle adjustment
 56 |     # The default Mapper tolerance is unnecessarily large,
 57 |     # decreasing it speeds up bundle adjustment steps.
 58 |     mapper_cmd = (colmap_command + " mapper \
 59 |         --database_path " + args.source_path + "/distorted/database.db \
 60 |         --image_path "  + args.source_path + "/input \
 61 |         --output_path "  + args.source_path + "/distorted/sparse \
 62 |         --Mapper.ba_global_function_tolerance=0.000001")
 63 |     exit_code = os.system(mapper_cmd)
 64 |     if exit_code != 0:
 65 |         logging.error(f"Mapper failed with code {exit_code}. Exiting.")
 66 |         exit(exit_code)
 67 | 
 68 | ### Image undistortion
 69 | ## We need to undistort our images into ideal pinhole intrinsics.
 70 | img_undist_cmd = (colmap_command + " image_undistorter \
 71 |     --image_path " + args.source_path + "/input \
 72 |     --input_path " + args.source_path + "/distorted/sparse/0 \
 73 |     --output_path " + args.source_path + "\
 74 |     --output_type COLMAP")
 75 | exit_code = os.system(img_undist_cmd)
 76 | if exit_code != 0:
 77 |     logging.error(f"Mapper failed with code {exit_code}. Exiting.")
 78 |     exit(exit_code)
 79 | 
 80 | files = os.listdir(args.source_path + "/sparse")
 81 | os.makedirs(args.source_path + "/sparse/0", exist_ok=True)
 82 | # Copy each file from the source directory to the destination directory
 83 | for file in files:
 84 |     if file == '0':
 85 |         continue
 86 |     source_file = os.path.join(args.source_path, "sparse", file)
 87 |     destination_file = os.path.join(args.source_path, "sparse", "0", file)
 88 |     shutil.move(source_file, destination_file)
 89 | 
 90 | if(args.resize):
 91 |     print("Copying and resizing...")
 92 | 
 93 |     # Resize images.
 94 |     os.makedirs(args.source_path + "/images_2", exist_ok=True)
 95 |     os.makedirs(args.source_path + "/images_4", exist_ok=True)
 96 |     os.makedirs(args.source_path + "/images_8", exist_ok=True)
 97 |     # Get the list of files in the source directory
 98 |     files = os.listdir(args.source_path + "/images")
 99 |     # Copy each file from the source directory to the destination directory
100 |     for file in files:
101 |         source_file = os.path.join(args.source_path, "images", file)
102 | 
103 |         destination_file = os.path.join(args.source_path, "images_2", file)
104 |         shutil.copy2(source_file, destination_file)
105 |         exit_code = os.system(magick_command + " mogrify -resize 50% " + destination_file)
106 |         if exit_code != 0:
107 |             logging.error(f"50% resize failed with code {exit_code}. Exiting.")
108 |             exit(exit_code)
109 | 
110 |         destination_file = os.path.join(args.source_path, "images_4", file)
111 |         shutil.copy2(source_file, destination_file)
112 |         exit_code = os.system(magick_command + " mogrify -resize 25% " + destination_file)
113 |         if exit_code != 0:
114 |             logging.error(f"25% resize failed with code {exit_code}. Exiting.")
115 |             exit(exit_code)
116 | 
117 |         destination_file = os.path.join(args.source_path, "images_8", file)
118 |         shutil.copy2(source_file, destination_file)
119 |         exit_code = os.system(magick_command + " mogrify -resize 12.5% " + destination_file)
120 |         if exit_code != 0:
121 |             logging.error(f"12.5% resize failed with code {exit_code}. Exiting.")
122 |             exit(exit_code)
123 | 
124 | print("Done.")
125 | 


--------------------------------------------------------------------------------
/docs/static/css/bulma-carousel.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10}


--------------------------------------------------------------------------------
/docs/static/css/bulma-slider.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff}


--------------------------------------------------------------------------------
/docs/static/css/index.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: 'Noto Sans', sans-serif;
  3 | }
  4 | 
  5 | 
  6 | .footer .icon-link {
  7 |     font-size: 25px;
  8 |     color: #000;
  9 | }
 10 | 
 11 | .link-block a {
 12 |     margin-top: 5px;
 13 |     margin-bottom: 5px;
 14 | }
 15 | 
 16 | .dnerf {
 17 |   font-variant: small-caps;
 18 | }
 19 | 
 20 | 
 21 | .teaser .hero-body {
 22 |   padding-top: 0;
 23 |   padding-bottom: 3rem;
 24 | }
 25 | 
 26 | .teaser {
 27 |   font-family: 'Google Sans', sans-serif;
 28 | }
 29 | 
 30 | 
 31 | .publication-title {
 32 | }
 33 | 
 34 | .publication-banner {
 35 |   max-height: parent;
 36 | 
 37 | }
 38 | 
 39 | .publication-banner video {
 40 |   position: relative;
 41 |   left: auto;
 42 |   top: auto;
 43 |   transform: none;
 44 |   object-fit: fit;
 45 | }
 46 | 
 47 | .publication-header .hero-body {
 48 | }
 49 | 
 50 | .publication-title {
 51 |     font-family: 'Google Sans', sans-serif;
 52 | }
 53 | 
 54 | .publication-authors {
 55 |     font-family: 'Google Sans', sans-serif;
 56 | }
 57 | 
 58 | .publication-venue {
 59 |     color: #555;
 60 |     width: fit-content;
 61 |     font-weight: bold;
 62 | }
 63 | 
 64 | .publication-awards {
 65 |     color: #ff3860;
 66 |     width: fit-content;
 67 |     font-weight: bolder;
 68 | }
 69 | 
 70 | .publication-authors {
 71 | }
 72 | 
 73 | .publication-authors a {
 74 |    color: hsl(204, 86%, 53%) !important;
 75 | }
 76 | 
 77 | .publication-authors a:hover {
 78 |     text-decoration: underline;
 79 | }
 80 | 
 81 | .author-block {
 82 |   display: inline-block;
 83 | }
 84 | 
 85 | .publication-banner img {
 86 | }
 87 | 
 88 | .publication-authors {
 89 |   /*color: #4286f4;*/
 90 | }
 91 | 
 92 | .publication-video {
 93 |     position: relative;
 94 |     width: 100%;
 95 |     height: 0;
 96 |     padding-bottom: 56.25%;
 97 | 
 98 |     overflow: hidden;
 99 |     border-radius: 10px !important;
100 | }
101 | 
102 | .publication-video iframe {
103 |     position: absolute;
104 |     top: 0;
105 |     left: 0;
106 |     width: 100%;
107 |     height: 100%;
108 | }
109 | 
110 | .publication-body img {
111 | }
112 | 
113 | .results-carousel {
114 |   overflow: hidden;
115 | }
116 | 
117 | .results-carousel .item {
118 |   margin: 5px;
119 |   overflow: hidden;
120 |   border: 1px solid #bbb;
121 |   border-radius: 10px;
122 |   padding: 0;
123 |   font-size: 0;
124 | }
125 | 
126 | .results-carousel video {
127 |   margin: 0;
128 | }
129 | 
130 | 
131 | .interpolation-panel {
132 |   background: #f5f5f5;
133 |   border-radius: 10px;
134 | }
135 | 
136 | .interpolation-panel .interpolation-image {
137 |   width: 100%;
138 |   border-radius: 5px;
139 | }
140 | 
141 | .interpolation-video-column {
142 | }
143 | 
144 | .interpolation-panel .slider {
145 |   margin: 0 !important;
146 | }
147 | 
148 | .interpolation-panel .slider {
149 |   margin: 0 !important;
150 | }
151 | 
152 | #interpolation-image-wrapper {
153 |   width: 100%;
154 | }
155 | #interpolation-image-wrapper img {
156 |   border-radius: 5px;
157 | }
158 | 
159 | .row {
160 |   display: flex;
161 |   flex-direction: row;
162 |   width: 70%;
163 |   height: auto;
164 |   margin: 10px 0;
165 | }
166 | 
167 | .carousel {
168 |   overflow: hidden;
169 |   max-width: 100%;
170 | }
171 | .card{
172 |   margin-left: 1rem;
173 |   margin-right: 1rem;
174 |   margin-bottom: 1.5rem;
175 | }
176 | .carousel video{
177 |   width: 100%;
178 |   height: auto;
179 | }
180 | .clustr {
181 |   display: none;
182 | }
183 | 


--------------------------------------------------------------------------------
/docs/static/images/approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/approach.png


--------------------------------------------------------------------------------
/docs/static/images/bench.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/bench.png


--------------------------------------------------------------------------------
/docs/static/images/comparison_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/comparison_table.png


--------------------------------------------------------------------------------
/docs/static/images/comparison_vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/comparison_vis.png


--------------------------------------------------------------------------------
/docs/static/images/favicon.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="iso-8859-1"?>
 2 | <!-- Generator: Adobe Illustrator 19.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 512 512" style="enable-background:new 0 0 512 512;" xml:space="preserve">
 5 | <circle style="fill:#FFDA44;" cx="256" cy="256" r="256"/>
 6 | <path style="fill:#338AF3;" d="M0,256C0,114.616,114.616,0,256,0s256,114.616,256,256"/>
 7 | <g>
 8 | </g>
 9 | <g>
10 | </g>
11 | <g>
12 | </g>
13 | <g>
14 | </g>
15 | <g>
16 | </g>
17 | <g>
18 | </g>
19 | <g>
20 | </g>
21 | <g>
22 | </g>
23 | <g>
24 | </g>
25 | <g>
26 | </g>
27 | <g>
28 | </g>
29 | <g>
30 | </g>
31 | <g>
32 | </g>
33 | <g>
34 | </g>
35 | <g>
36 | </g>
37 | </svg>
38 | 


--------------------------------------------------------------------------------
/docs/static/images/figurines-insertion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/figurines-insertion.png


--------------------------------------------------------------------------------
/docs/static/images/lawn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/lawn.png


--------------------------------------------------------------------------------
/docs/static/images/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/overview.png


--------------------------------------------------------------------------------
/docs/static/images/room.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/room.png


--------------------------------------------------------------------------------
/docs/static/images/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/images/teaser.png


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.min.js:
--------------------------------------------------------------------------------
1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e<arguments.length;e++){var n=arguments[e];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(t[r]=n[r])}return t},u=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}(),o="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t};var l=function(t){return"string"==typeof t||!!t&&"object"===(void 0===t?"undefined":o(t))&&"[object String]"===Object.prototype.toString.call(t)},a=function(t){function o(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{};!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,o);var n=function(t,e){if(!t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!e||"object"!=typeof e&&"function"!=typeof e?t:e}(this,(o.__proto__||Object.getPrototypeOf(o)).call(this));if(n.element="string"==typeof t?document.querySelector(t):t,!n.element)throw new Error("An invalid selector or non-DOM node has been provided.");return n._clickEvents=["click"],n.options=i({},e),n.onSliderInput=n.onSliderInput.bind(n),n.init(),n}return function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function, not "+typeof e);t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}),e&&(Object.setPrototypeOf?Object.setPrototypeOf(t,e):t.__proto__=e)}(o,r["a"]),u(o,[{key:"init",value:function(){if(this._id="bulmaSlider"+(new Date).getTime()+Math.floor(Math.random()*Math.floor(9999)),this.output=this._findOutputForSlider(),this._bindEvents(),this.output&&this.element.classList.contains("has-output-tooltip")){var t=this._getSliderOutputPosition();this.output.style.left=t.position}this.emit("bulmaslider:ready",this.element.value)}},{key:"_findOutputForSlider",value:function(){var e=this,n=null,t=document.getElementsByTagName("output")||[];return Array.from(t).forEach(function(t){if(t.htmlFor==e.element.getAttribute("id"))return n=t,!0}),n}},{key:"_getSliderOutputPosition",value:function(){var t,e=window.getComputedStyle(this.element,null),n=parseInt(e.getPropertyValue("width"),10);t=this.element.getAttribute("min")?this.element.getAttribute("min"):0;var r=(this.element.value-t)/(this.element.getAttribute("max")-t);return{position:(r<0?0:1<r?n:n*r)+"px"}}},{key:"_bindEvents",value:function(){this.output&&this.element.addEventListener("input",this.onSliderInput,!1)}},{key:"onSliderInput",value:function(t){if(t.preventDefault(),this.element.classList.contains("has-output-tooltip")){var e=this._getSliderOutputPosition();this.output.style.left=e.position}var n=this.output.hasAttribute("data-prefix")?this.output.getAttribute("data-prefix"):"",r=this.output.hasAttribute("data-postfix")?this.output.getAttribute("data-postfix"):"";this.output.value=n+this.element.value+r,this.emit("bulmaslider:ready",this.element.value)}}],[{key:"attach",value:function(){var n=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'input[type="range"].slider',r=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},i=new Array;return(l(t)?document.querySelectorAll(t):Array.isArray(t)?t:[t]).forEach(function(t){if(void 0===t[n.constructor.name]){var e=new o(t,r);t[n.constructor.name]=e,i.push(e)}else i.push(t[n.constructor.name])}),i}}]),o}();e.default=a},function(t,e,n){"use strict";var r=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}();var i=function(){function e(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:[];!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),this._listeners=new Map(t),this._middlewares=new Map}return r(e,[{key:"listenerCount",value:function(t){return this._listeners.has(t)?this._listeners.get(t).length:0}},{key:"removeListeners",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null,n=1<arguments.length&&void 0!==arguments[1]&&arguments[1];null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeListeners(t,n)}):(this._listeners.delete(t),n&&this.removeMiddleware(t)):this._listeners=new Map}},{key:"middleware",value:function(t,e){var n=this;Array.isArray(t)?name.forEach(function(t){return n.middleware(t,e)}):(Array.isArray(this._middlewares.get(t))||this._middlewares.set(t,[]),this._middlewares.get(t).push(e))}},{key:"removeMiddleware",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeMiddleware(t)}):this._middlewares.delete(t):this._middlewares=new Map}},{key:"on",value:function(t,e){var n=this,r=2<arguments.length&&void 0!==arguments[2]&&arguments[2];if(Array.isArray(t))t.forEach(function(t){return n.on(t,e)});else{var i=(t=t.toString()).split(/,|, | /);1<i.length?i.forEach(function(t){return n.on(t,e)}):(Array.isArray(this._listeners.get(t))||this._listeners.set(t,[]),this._listeners.get(t).push({once:r,callback:e}))}}},{key:"once",value:function(t,e){this.on(t,e,!0)}},{key:"emit",value:function(n,r){var i=this,o=2<arguments.length&&void 0!==arguments[2]&&arguments[2];n=n.toString();var u=this._listeners.get(n),l=null,a=0,s=o;if(Array.isArray(u))for(u.forEach(function(t,e){o||(l=i._middlewares.get(n),Array.isArray(l)?(l.forEach(function(t){t(r,function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t&&(r=t),a++},n)}),a>=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default});


--------------------------------------------------------------------------------
/docs/static/js/index.js:
--------------------------------------------------------------------------------
 1 | window.HELP_IMPROVE_VIDEOJS = false;
 2 | 
 3 | var INTERP_BASE = "./static/interpolation/stacked";
 4 | var NUM_INTERP_FRAMES = 240;
 5 | 
 6 | var interp_images = [];
 7 | function preloadInterpolationImages() {
 8 |   for (var i = 0; i < NUM_INTERP_FRAMES; i++) {
 9 |     var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg';
10 |     interp_images[i] = new Image();
11 |     interp_images[i].src = path;
12 |   }
13 | }
14 | 
15 | function setInterpolationImage(i) {
16 |   var image = interp_images[i];
17 |   image.ondragstart = function() { return false; };
18 |   image.oncontextmenu = function() { return false; };
19 |   $('#interpolation-image-wrapper').empty().append(image);
20 | }
21 | 
22 | 
23 | $(document).ready(function() {
24 |     // Check for click events on the navbar burger icon
25 |     $(".navbar-burger").click(function() {
26 |       // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu"
27 |       $(".navbar-burger").toggleClass("is-active");
28 |       $(".navbar-menu").toggleClass("is-active");
29 | 
30 |     });
31 | 
32 |     var options = {
33 | 			slidesToScroll: 1,
34 | 			slidesToShow: 3,
35 | 			loop: true,
36 | 			infinite: true,
37 | 			autoplay: false,
38 | 			autoplaySpeed: 3000,
39 |     }
40 | 
41 | 		// Initialize all div with carousel class
42 |     var carousels = bulmaCarousel.attach('.carousel', options);
43 | 
44 |     // Loop on each carousel initialized
45 |     for(var i = 0; i < carousels.length; i++) {
46 |     	// Add listener to  event
47 |     	carousels[i].on('before:show', state => {
48 |     		console.log(state);
49 |     	});
50 |     }
51 | 
52 |     // Access to bulmaCarousel instance of an element
53 |     var element = document.querySelector('#my-element');
54 |     if (element && element.bulmaCarousel) {
55 |     	// bulmaCarousel instance is available as element.bulmaCarousel
56 |     	element.bulmaCarousel.on('before-show', function(state) {
57 |     		console.log(state);
58 |     	});
59 |     }
60 | 
61 |     /*var player = document.getElementById('interpolation-video');
62 |     player.addEventListener('loadedmetadata', function() {
63 |       $('#interpolation-slider').on('input', function(event) {
64 |         console.log(this.value, player.duration);
65 |         player.currentTime = player.duration / 100 * this.value;
66 |       })
67 |     }, false);*/
68 |     preloadInterpolationImages();
69 | 
70 |     $('#interpolation-slider').on('input', function(event) {
71 |       setInterpolationImage(this.value);
72 |     });
73 |     setInterpolationImage(0);
74 |     $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1);
75 | 
76 |     bulmaSlider.attach();
77 | 
78 | })
79 | 
80 | document.addEventListener('DOMContentLoaded', function() {
81 |   bulmaCarousel.attach('#carousel-query', {
82 |     slidesToScroll: 1,
83 |     slidesToShow: 4,
84 |     navigation: true,
85 |     loop: true,
86 |   });
87 | });
88 | 
89 | 


--------------------------------------------------------------------------------
/docs/static/videos/._Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/._Icon


--------------------------------------------------------------------------------
/docs/static/videos/bouquet.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet.mp4


--------------------------------------------------------------------------------
/docs/static/videos/bouquet_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/bouquet_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/bouquet_sg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/figurines.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines.mp4


--------------------------------------------------------------------------------
/docs/static/videos/figurines_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/figurines_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/figurines_sg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/insertion_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/insertion_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/insertion_vg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/insertion_vg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/ramen.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen.mp4


--------------------------------------------------------------------------------
/docs/static/videos/ramen_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/ramen_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/ramen_sg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/replay.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/replay.mp4


--------------------------------------------------------------------------------
/docs/static/videos/shoe_rack.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack.mp4


--------------------------------------------------------------------------------
/docs/static/videos/shoe_rack_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/shoe_rack_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/shoe_rack_sg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/teaser.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teaser.mp4


--------------------------------------------------------------------------------
/docs/static/videos/teatime.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime.mp4


--------------------------------------------------------------------------------
/docs/static/videos/teatime_pca.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_pca.mp4


--------------------------------------------------------------------------------
/docs/static/videos/teatime_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/teatime_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/teatime_sg.mp4


--------------------------------------------------------------------------------
/docs/static/videos/waldo_kitchen.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen.mp4


--------------------------------------------------------------------------------
/docs/static/videos/waldo_kitchen_query.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen_query.mp4


--------------------------------------------------------------------------------
/docs/static/videos/waldo_kitchen_sg.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/insait-institute/OccamLGS/70d2e6967421147fdc048c79cf1d35cf87487913/docs/static/videos/waldo_kitchen_sg.mp4


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: gaussian_splatting
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - cudatoolkit=11.6
 8 |   - plyfile
 9 |   - python=3.7.13
10 |   - pip=22.3.1
11 |   - pytorch=1.12.1
12 |   - torchaudio=0.12.1
13 |   - torchvision=0.13.1
14 |   - tqdm
15 |   - pip:
16 |     - submodules/diff-gaussian-rasterization
17 |     - submodules/simple-knn
18 |     - submodules/fused-ssim
19 |     - opencv-python
20 |     - joblib
21 | 


--------------------------------------------------------------------------------
/eval/colormaps.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """ Helper functions for visualizing outputs """
 16 | 
 17 | from dataclasses import dataclass
 18 | from typing import Optional
 19 | 
 20 | import matplotlib
 21 | import torch
 22 | from jaxtyping import Bool, Float
 23 | from torch import Tensor
 24 | import colors
 25 | 
 26 | # Colormaps = Literal["default", "turbo", "viridis", "magma", "inferno", "cividis", "gray", "pca"]
 27 | Colormaps = "turbo"
 28 | 
 29 | @dataclass(frozen=True)
 30 | class ColormapOptions:
 31 |     """Options for colormap"""
 32 | 
 33 |     colormap: Colormaps = "default"
 34 |     """ The colormap to use """
 35 |     normalize: bool = False
 36 |     """ Whether to normalize the input tensor image """
 37 |     colormap_min: float = 0
 38 |     """ Minimum value for the output colormap """
 39 |     colormap_max: float = 1
 40 |     """ Maximum value for the output colormap """
 41 |     invert: bool = False
 42 |     """ Whether to invert the output colormap """
 43 | 
 44 | 
 45 | def apply_colormap(
 46 |     image: Float[Tensor, "*bs channels"],
 47 |     colormap_options: ColormapOptions = ColormapOptions(),
 48 |     eps: float = 1e-9,
 49 | ):
 50 |     """
 51 |     Applies a colormap to a tensor image.
 52 |     If single channel, applies a colormap to the image.
 53 |     If 3 channel, treats the channels as RGB.
 54 |     If more than 3 channel, applies a PCA reduction on the dimensions to 3 channels
 55 | 
 56 |     Args:
 57 |         image: Input tensor image.
 58 |         eps: Epsilon value for numerical stability.
 59 | 
 60 |     Returns:
 61 |         Tensor with the colormap applied.
 62 |     """
 63 | 
 64 |     # default for rgb images
 65 |     if image.shape[-1] == 3:
 66 |         return image
 67 | 
 68 |     # rendering depth outputs
 69 |     if image.shape[-1] == 1 and torch.is_floating_point(image):
 70 |         output = image
 71 |         if colormap_options.normalize:
 72 |             output = output - torch.min(output)
 73 |             output = output / (torch.max(output) + eps)
 74 |         output = (
 75 |             output * (colormap_options.colormap_max - colormap_options.colormap_min) + colormap_options.colormap_min
 76 |         )
 77 |         output = torch.clip(output, 0, 1)
 78 |         if colormap_options.invert:
 79 |             output = 1 - output
 80 |         return apply_float_colormap(output, colormap=colormap_options.colormap)
 81 | 
 82 |     # rendering boolean outputs
 83 |     if image.dtype == torch.bool:
 84 |         return apply_boolean_colormap(image)
 85 | 
 86 |     if image.shape[-1] > 3:
 87 |         return apply_pca_colormap(image)
 88 | 
 89 |     raise NotImplementedError
 90 | 
 91 | 
 92 | def apply_float_colormap(image: Float[Tensor, "*bs 1"], colormap: Colormaps = "viridis"):
 93 |     """Convert single channel to a color image.
 94 | 
 95 |     Args:
 96 |         image: Single channel image.
 97 |         colormap: Colormap for image.
 98 | 
 99 |     Returns:
100 |         Tensor: Colored image with colors in [0, 1]
101 |     """
102 |     if colormap == "default":
103 |         colormap = "turbo"
104 | 
105 |     image = torch.nan_to_num(image, 0)
106 |     if colormap == "gray":
107 |         return image.repeat(1, 1, 3)
108 |     image_long = (image * 255).long()
109 |     image_long_min = torch.min(image_long)
110 |     image_long_max = torch.max(image_long)
111 |     assert image_long_min >= 0, f"the min value is {image_long_min}"
112 |     assert image_long_max <= 255, f"the max value is {image_long_max}"
113 |     return torch.tensor(matplotlib.colormaps[colormap].colors, device=image.device)[image_long[..., 0]]
114 | 
115 | 
116 | def apply_depth_colormap(
117 |     depth: Float[Tensor, "*bs 1"],
118 |     accumulation: Optional[Float[Tensor, "*bs 1"]] = None,
119 |     near_plane: Optional[float] = None,
120 |     far_plane: Optional[float] = None,
121 |     colormap_options: ColormapOptions = ColormapOptions(),
122 | ):
123 |     """Converts a depth image to color for easier analysis.
124 | 
125 |     Args:
126 |         depth: Depth image.
127 |         accumulation: Ray accumulation used for masking vis.
128 |         near_plane: Closest depth to consider. If None, use min image value.
129 |         far_plane: Furthest depth to consider. If None, use max image value.
130 |         colormap: Colormap to apply.
131 | 
132 |     Returns:
133 |         Colored depth image with colors in [0, 1]
134 |     """
135 | 
136 |     near_plane = near_plane or float(torch.min(depth))
137 |     far_plane = far_plane or float(torch.max(depth))
138 | 
139 |     depth = (depth - near_plane) / (far_plane - near_plane + 1e-10)
140 |     depth = torch.clip(depth, 0, 1)
141 |     # depth = torch.nan_to_num(depth, nan=0.0) # TODO(ethan): remove this
142 | 
143 |     colored_image = apply_colormap(depth, colormap_options=colormap_options)
144 | 
145 |     if accumulation is not None:
146 |         colored_image = colored_image * accumulation + (1 - accumulation)
147 | 
148 |     return colored_image
149 | 
150 | 
151 | def apply_boolean_colormap(
152 |     image: Bool[Tensor, "*bs 1"],
153 |     true_color = colors.WHITE,
154 |     false_color = colors.BLACK,
155 | ):
156 |     """Converts a depth image to color for easier analysis.
157 | 
158 |     Args:
159 |         image: Boolean image.
160 |         true_color: Color to use for True.
161 |         false_color: Color to use for False.
162 | 
163 |     Returns:
164 |         Colored boolean image
165 |     """
166 | 
167 |     colored_image = torch.ones(image.shape[:-1] + (3,))
168 |     colored_image[image[..., 0], :] = true_color
169 |     colored_image[~image[..., 0], :] = false_color
170 |     return colored_image
171 | 
172 | 
173 | def apply_pca_colormap(image: Float[Tensor, "*bs dim"]):
174 |     """Convert feature image to 3-channel RGB via PCA. The first three principle
175 |     components are used for the color channels, with outlier rejection per-channel
176 | 
177 |     Args:
178 |         image: image of arbitrary vectors
179 | 
180 |     Returns:
181 |         Tensor: Colored image
182 |     """
183 |     original_shape = image.shape
184 |     image = image.view(-1, image.shape[-1])
185 |     _, _, v = torch.pca_lowrank(image)
186 |     image = torch.matmul(image, v[..., :3])
187 |     d = torch.abs(image - torch.median(image, dim=0).values)
188 |     mdev = torch.median(d, dim=0).values
189 |     s = d / mdev
190 |     m = 3.0  # this is a hyperparam controlling how many std dev outside for outliers
191 |     rins = image[s[:, 0] < m, 0]
192 |     gins = image[s[:, 1] < m, 1]
193 |     bins = image[s[:, 2] < m, 2]
194 | 
195 |     if len(rins) == 0 or len(gins) == 0 or len(bins) == 0:
196 |         return image.new_zeros(*original_shape[:-1], 3)
197 | 
198 |     image[:, 0] -= rins.min()
199 |     image[:, 1] -= gins.min()
200 |     image[:, 2] -= bins.min()
201 | 
202 |     image[:, 0] /= rins.max() - rins.min()
203 |     image[:, 1] /= gins.max() - gins.min()
204 |     image[:, 2] /= bins.max() - bins.min()
205 | 
206 |     image = torch.clamp(image, 0, 1)
207 |     image_long = (image * 255).long()
208 |     image_long_min = torch.min(image_long)
209 |     image_long_max = torch.max(image_long)
210 |     assert image_long_min >= 0, f"the min value is {image_long_min}"
211 |     assert image_long_max <= 255, f"the max value is {image_long_max}"
212 |     return image.view(*original_shape[:-1], 3)
213 | 


--------------------------------------------------------------------------------
/eval/colors.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Common Colors"""
16 | from typing import Union
17 | 
18 | import torch
19 | from jaxtyping import Float
20 | from torch import Tensor
21 | 
22 | WHITE = torch.tensor([1.0, 1.0, 1.0])
23 | BLACK = torch.tensor([0.0, 0.0, 0.0])
24 | RED = torch.tensor([1.0, 0.0, 0.0])
25 | GREEN = torch.tensor([0.0, 1.0, 0.0])
26 | BLUE = torch.tensor([0.0, 0.0, 1.0])
27 | 
28 | COLORS_DICT = {
29 |     "white": WHITE,
30 |     "black": BLACK,
31 |     "red": RED,
32 |     "green": GREEN,
33 |     "blue": BLUE,
34 | }
35 | 
36 | 
37 | def get_color(color: Union[str, list]) -> Float[Tensor, "3"]:
38 |     """
39 |     Args:
40 |         Color as a string or a rgb list
41 | 
42 |     Returns:
43 |         Parsed color
44 |     """
45 |     if isinstance(color, str):
46 |         color = color.lower()
47 |         if color not in COLORS_DICT:
48 |             raise ValueError(f"{color} is not a valid preset color")
49 |         return COLORS_DICT[color]
50 |     if isinstance(color, list):
51 |         if len(color) != 3:
52 |             raise ValueError(f"Color should be 3 values (RGB) instead got {color}")
53 |         return torch.tensor(color)
54 | 
55 |     raise ValueError(f"Color should be an RGB list or string, instead got {type(color)}")
56 | 


--------------------------------------------------------------------------------
/eval/eval_utils.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | 
  3 | from scipy.signal import medfilt
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | def calculate_stability_metrics(scores, mask_sizes, thresh_range, eval_params=None):
  8 |     """
  9 |     Calculate stability metrics for both score and mask size across different thresholds.
 10 |     
 11 |     This function evaluates how stable segmentation masks are to threshold variations by
 12 |     measuring the rate of change (gradient) in both relevancy scores and mask sizes.
 13 |     Stable segmentations show minimal changes in mask configuration when thresholds are
 14 |     slightly adjusted.
 15 |     
 16 |     Args:
 17 |         scores: Array of segmentation confidence scores at different thresholds
 18 |         mask_sizes: Array of corresponding mask sizes (as proportion of image) at different thresholds
 19 |         thresh_range: Array of threshold values used for evaluation
 20 |         eval_params: Dictionary containing parameters for evaluation:
 21 |                      - "min_mask_size": Minimum valid mask size as proportion (e.g., 0.00001)
 22 |                      - "max_mask_size": Maximum valid mask size as proportion (e.g., 0.95)
 23 |     
 24 |     Returns:
 25 |         Dictionary containing stability metrics:
 26 |         - 'smooth_score_grad': Smoothed gradient of scores (lower values indicate more stability)
 27 |         - 'smooth_mask_grad': Smoothed gradient of mask sizes (lower values indicate more stability)
 28 |         - 'valid_regions': Boolean mask indicating regions where mask size falls within valid range
 29 |         
 30 |     """
 31 |     # Calculate gradients
 32 |     score_gradient = np.abs(np.gradient(scores, thresh_range))
 33 |     mask_gradient = np.abs(np.gradient(mask_sizes, thresh_range))
 34 |     
 35 |     # Smooth gradients
 36 |     smooth_score_grad = medfilt(score_gradient, kernel_size=5)
 37 |     smooth_mask_grad = medfilt(mask_gradient, kernel_size=5)
 38 | 
 39 |     # Filter out regions where mask_size > 0.95 and < 0.00001
 40 |     valid_regions = (np.array(mask_sizes) > eval_params["min_mask_size"]) & (np.array(mask_sizes) < eval_params["max_mask_size"])
 41 |     
 42 |     assert len(smooth_score_grad[valid_regions]) != 0, "No valid regions found"
 43 | 
 44 |     return {
 45 |         'smooth_score_grad': smooth_score_grad,
 46 |         'smooth_mask_grad': smooth_mask_grad,
 47 |         'valid_regions': valid_regions
 48 |     }
 49 | 
 50 | def find_stable_regions(stability_metrics, eval_params=None):
 51 |     """
 52 |     Find continuous regions where both score and mask size gradients are stable.
 53 |     
 54 |     This function identifies threshold ranges where segmentation results remain
 55 |     consistent (stable), which indicates reliable segmentation performance.
 56 |     
 57 |     Args:
 58 |         stability_metrics: Dictionary containing stability metrics:
 59 |             - 'smooth_score_grad': Smoothed gradient of scores
 60 |             - 'smooth_mask_grad': Smoothed gradient of mask sizes
 61 |             - 'valid_regions': Boolean mask of valid regions
 62 |         eval_params: Dictionary with evaluation parameters:
 63 |             - "stability_thresh": Maximum gradient value considered stable
 64 |         min_region_length: Minimum length of a region to be considered stable
 65 |     
 66 |     Returns:
 67 |         List of tuples containing (start_index, end_index) of stable regions
 68 |     """
 69 | 
 70 |     score_stable = stability_metrics['smooth_score_grad'] < eval_params["stability_thresh"]
 71 |     mask_stable = stability_metrics['smooth_mask_grad'] < eval_params["stability_thresh"]
 72 |     valid_regions = stability_metrics['valid_regions']
 73 |     
 74 |     # Both metrics must be stable
 75 |     combined_stable = score_stable & mask_stable & valid_regions
 76 |     
 77 |     # Find continuous stable regions
 78 |     stable_regions = []
 79 |     start_idx = None
 80 |     
 81 |     for i in range(len(combined_stable)):
 82 |         if combined_stable[i]:
 83 |             if start_idx is None:
 84 |                 start_idx = i
 85 |         else:
 86 |             if start_idx is not None and i - start_idx >= 5:
 87 |             # Region ends, must be at least 5 points long
 88 |                 stable_regions.append((start_idx, i))
 89 |             start_idx = None
 90 |     
 91 |     # Handle the case where the last region extends to the end
 92 |     if start_idx is not None and len(combined_stable) - start_idx >= 5:
 93 |         stable_regions.append((start_idx, len(combined_stable)-1))
 94 |     
 95 |     return stable_regions
 96 |     
 97 | def compute_dynamic_threshold(valid_map, object_name, eval_params=None, thresh_range=np.arange(0.01, 1, 0.01)):
 98 |     
 99 |     """
100 |     Computes the optimal threshold for segmentation by analyzing stability across three levels.
101 |     
102 |     This function normalizes outputs from each feature level, evaluates segmentation performance
103 |     across a range of thresholds, identifies stable regions, and selects the feature level and threshold
104 |     that demonstrate the most stable segmentation behavior.
105 |     
106 |     Process:
107 |         1. For each feature level, normalizes the relevancy scores to [0,1]
108 |         2. Evaluates scores and mask sizes at each threshold value
109 |         3. Calculates stability metrics based on how scores and mask sizes change with threshold
110 |         4. Identifies continuous regions where both metrics are stable
111 |         5. For each level, calculates a score sensitivity metric from the stable region
112 |         6. Selects the level with the lowest score sensitivity (most stable)
113 |         7. Returns the chosen level and its optimal threshold value
114 |     """
115 |     n_head = valid_map.shape[0]
116 |     total_pixels = valid_map.shape[1] * valid_map.shape[2]
117 |     score_gradients = []
118 |     thresholds = []
119 |             
120 |     for head_idx in range(n_head):
121 |         output = valid_map[head_idx]
122 | 
123 |         output = output - torch.min(output)
124 |         output = output / (torch.max(output) -  torch.min(output) + 1e-9)
125 |         output = output.numpy()
126 |         
127 |         # Calculate metrics
128 |         scores = []
129 |         pixel_counts = []
130 |         
131 |         for thresh in thresh_range:
132 |             mask = output > thresh
133 |             score = np.mean(output[mask]) if np.any(mask) else 0
134 |             scores.append(score)
135 |             
136 |             normalized_count = np.sum(mask) / total_pixels
137 |             pixel_counts.append(normalized_count)
138 | 
139 |         # Calculate stability metrics
140 |         stability = calculate_stability_metrics(scores, pixel_counts, thresh_range, eval_params=eval_params)
141 |         stable_regions = find_stable_regions(stability, eval_params=eval_params)
142 |         
143 |         if len(stable_regions) == 0:
144 |             print(f"Warning: Found {len(stable_regions)} stable regions for {object_name} head {head_idx}")
145 |             score_gradients.append(999)
146 |             thresholds.append(0.5)
147 |         else:
148 |             valid_mask = stability['valid_regions']
149 |             # Find the last stable region
150 |             (start_idx, end_idx) = stable_regions[-1]
151 |             # Find the longest stable region
152 |             # longest_region = max(stable_regions, key=lambda region: region[1] - region[0])
153 |             # (start_idx, end_idx) = longest_region
154 |             if np.any(valid_mask[start_idx:end_idx+1]):
155 |                 score_sensitivity = (scores[end_idx]- scores[start_idx]) / (thresh_range[end_idx] - thresh_range[start_idx] + 1e-9)
156 |                 score_gradients.append(score_sensitivity)
157 |                 thresholds.append((thresh_range[start_idx] + thresh_range[end_idx]) / 2) # take the median threshold
158 |             else:
159 |                 score_gradients.append(999)
160 |                 thresholds.append(0.5)
161 |                 
162 |     chosen_lvl = np.argmin(score_gradients)
163 |     threshold = thresholds[chosen_lvl]
164 |     
165 |     return chosen_lvl, threshold
166 |     
167 | 
168 | def plot_relevancy_and_threshold(relevancy_map, prompt_name, head_idx, save_path, threshold=0.5):
169 |     """
170 |     Plot relevancy map and thresholded areas side by side
171 |     """
172 |     if torch.is_tensor(relevancy_map):
173 |         relevancy_map = relevancy_map.numpy()
174 |     
175 |     # Create threshold mask
176 |     threshold_mask = relevancy_map > threshold
177 |     
178 |     # Create figure with two subplots
179 |     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
180 |     
181 |     # Plot relevancy map
182 |     im1 = ax1.imshow(relevancy_map, cmap='viridis')
183 |     ax1.set_title(f'Relevancy Map\n{prompt_name}, Level {head_idx}')
184 |     fig.colorbar(im1, ax=ax1, label='Relevancy Score')
185 |     ax1.axis('off')
186 |     
187 |     # Plot thresholded map
188 |     im2 = ax2.imshow(threshold_mask, cmap='binary')
189 |     ax2.set_title(f'Thresholded Map (>{threshold})\n{prompt_name}, Level {head_idx}')
190 |     ax2.axis('off')
191 |     
192 |     plt.tight_layout()
193 |     plt.savefig(save_path, dpi=300, bbox_inches='tight')
194 |     plt.close()


--------------------------------------------------------------------------------
/eval/evaluate_iou_3dovs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import annotations
  3 | 
  4 | import os
  5 | import glob
  6 | import random
  7 | from collections import defaultdict
  8 | from pathlib import Path
  9 | from typing import Dict, Union
 10 | from argparse import ArgumentParser
 11 | import logging
 12 | import cv2
 13 | import numpy as np
 14 | import torch
 15 | import time
 16 | from tqdm import tqdm
 17 | from PIL import Image
 18 | 
 19 | import sys
 20 | sys.path.append("..")
 21 | import colormaps
 22 | from openclip_encoder import OpenCLIPNetwork
 23 | from utils import smooth, vis_mask_save, stack_mask
 24 | 
 25 | from eval_utils import plot_relevancy_and_threshold, compute_dynamic_threshold
 26 | 
 27 | 
 28 | def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'):
 29 |     logger = logging.getLogger(name)
 30 |     stream_handler = logging.StreamHandler()
 31 |     handlers = [stream_handler]
 32 | 
 33 |     if log_file is not None:
 34 |         file_handler = logging.FileHandler(log_file, file_mode)
 35 |         handlers.append(file_handler)
 36 | 
 37 |     formatter = logging.Formatter(
 38 |         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 39 |     for handler in handlers:
 40 |         handler.setFormatter(formatter)
 41 |         handler.setLevel(log_level)
 42 |         logger.addHandler(handler)
 43 |     logger.setLevel(log_level)
 44 |     return logger
 45 | 
 46 | 
 47 | def eval_gt_3dovsdata(dataset_folder: Union[str, Path] = None, ouput_path: Path = None) -> Dict:
 48 |     """
 49 |     Organizes ground truth annotations from the 3DOVS dataset.
 50 |     
 51 |     Args:
 52 |         dataset_folder: Path to the root directory containing the 3DOVS dataset
 53 |         ouput_path: Optional path where visualization images will be saved
 54 |         
 55 |     Returns:
 56 |         Tuple containing:
 57 |         - gt_ann: Dictionary where:
 58 |             - keys: Frame indices as strings
 59 |             - values: Nested dictionary where:
 60 |                 - keys: Class labels
 61 |                 - values: Dictionary containing 'mask' with the segmentation mask
 62 |         - image_dimensions: Tuple of (height, width)
 63 |         - img_paths: List of paths to the original images
 64 |     """
 65 |     gt_folder = os.path.join(dataset_folder, 'segmentations')
 66 |     image_folder = os.path.join(dataset_folder, 'images')
 67 |     
 68 |     gt_paths = [os.path.join(gt_folder, name) for name in os.listdir(gt_folder) if os.path.isdir(os.path.join(gt_folder, name))]
 69 |     gt_paths = sorted(gt_paths, key=lambda x: int(x.split('/')[-1]))
 70 |     img_paths = {}
 71 |     with open(os.path.join(gt_folder, 'classes.txt'), 'r') as f:
 72 |         class_names = [line.strip() for line in f]
 73 |         
 74 |     gt_ann = {}
 75 |     for gt_path in gt_paths:
 76 |         img_ann = defaultdict(dict)
 77 |         
 78 |         idx = int(gt_path.split('/')[-1])
 79 |         img_path = os.path.join(image_folder, f"{gt_path.split('/')[-1]}.jpg")
 80 |         img_paths[idx] = img_path
 81 |         with Image.open(img_path) as img:
 82 |             w, h = img.size
 83 | 
 84 |         for prompt_data in class_names:
 85 |             label = prompt_data
 86 |             mask_path = os.path.join(gt_path, f"{label}.png")
 87 |             mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
 88 |             mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
 89 |             if img_ann[label].get('mask', None) is not None:
 90 |                 mask = stack_mask(img_ann[label]['mask'], mask)
 91 |             img_ann[label]['mask'] = mask
 92 |             
 93 |             # save for visulsization
 94 |             save_path = ouput_path / 'gt' / str(idx) / f'{label}.jpg'
 95 |             save_path.parent.mkdir(exist_ok=True, parents=True)
 96 |             vis_mask_save(mask, save_path)
 97 |         gt_ann[f'{idx}'] = img_ann
 98 | 
 99 |     return gt_ann, (h, w), img_paths
100 | 
101 | 
102 | def activate_stream(sem_map, 
103 |                     clip_model, 
104 |                     image_name: Path = None,
105 |                     img_ann: Dict = None, 
106 |                     eval_params: Dict = None):
107 |     
108 |     valid_map = clip_model.get_max_across(sem_map)
109 |     n_head, n_prompt, h, w = valid_map.shape
110 |     valid_map = valid_map.cpu()
111 |     
112 |     # positive prompts
113 |     chosen_iou_list, chosen_lvl_list = [], []
114 |     
115 |     for k in range(n_prompt):
116 |         
117 |         chosen_lvl, thresh = compute_dynamic_threshold(valid_map[:, k], clip_model.positives[k], eval_params=eval_params)
118 |         
119 |         for i in range(n_head):
120 |             
121 |             # NOTE [mask] truncate the heatmap into mask
122 |             output = valid_map[i][k]
123 |             output = output - torch.min(output)
124 |             output = output / (torch.max(output) -  torch.min(output) + 1e-9)
125 |             
126 |             save_path = image_name / 'comparison_maps' / f'{clip_model.positives[k]}_level{i}_comparison.png'
127 |             save_path.parent.mkdir(exist_ok=True, parents=True)
128 |             plot_relevancy_and_threshold(output, clip_model.positives[k], i, save_path, threshold=thresh)
129 |             
130 |             if i == chosen_lvl:
131 |                 # Create Binary Mask through thresholding:
132 |                 mask_pred = (output.numpy() > thresh).astype(np.uint8)
133 |                 mask_pred = smooth(mask_pred)
134 |                 mask_gt = img_ann[clip_model.positives[k]]['mask'].astype(np.uint8)
135 |                 
136 |                 intersection = np.logical_and(mask_gt, mask_pred).sum()
137 |                 union = np.logical_or(mask_gt, mask_pred).sum()
138 |                 iou = intersection / (union + 1e-9)  # Avoid division by zero
139 |             
140 |         chosen_iou_list.append(iou)
141 |         chosen_lvl_list.append(chosen_lvl)
142 |         
143 |         # save for visulsization
144 |         save_path = image_name / f'chosen_{clip_model.positives[k]}.png'
145 |         vis_mask_save(mask_pred, save_path)
146 | 
147 |     return chosen_iou_list, chosen_lvl_list
148 | 
149 | 
150 | def evaluate(feat_dir, output_path, gt_path, logger, eval_params):
151 | 
152 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
153 |     # colormap_options = colormaps.ColormapOptions(
154 |     #     colormap="turbo",
155 |     #     normalize=True,
156 |     #     colormap_min=-1.0,
157 |     #     colormap_max=1.0,
158 |     # )
159 | 
160 |     gt_ann, image_shape, image_paths = eval_gt_3dovsdata(Path(gt_path), Path(output_path))
161 |      
162 |     eval_index_list = [int(idx) for idx in list(gt_ann.keys())]
163 |     feat_paths_lvl = []   
164 |     for i in range(len(feat_dir)):
165 |         # Create a mapping of index to file path
166 |         index_to_file = {}
167 |         for file_path in glob.glob(os.path.join(feat_dir[i], '*.npy')):
168 |             file_idx = int(os.path.basename(file_path).split(".npy")[0])
169 |             index_to_file[file_idx] = file_path
170 |         
171 |         feat_paths_lvl.append(index_to_file)
172 |     
173 |     assert len(feat_paths_lvl) == len(feat_dir)   
174 |     
175 |     # instantiate openclip
176 |     clip_model = OpenCLIPNetwork(device)
177 | 
178 |     chosen_iou_all, chosen_lvl_list = [], []
179 |     for j, idx in enumerate(tqdm(eval_index_list)):
180 |         image_name = Path(output_path) / f'{idx:0>2}'
181 |         image_name.mkdir(exist_ok=True, parents=True)
182 |         
183 |         compressed_sem_feats = np.zeros((len(feat_dir), *image_shape, 512), dtype=np.float32) # compressed_sem_feats: (3, 7, 731, 988, 3) -> (granuity, num_frames, h, w, c)
184 |         for i in range(len(feat_dir)):
185 |             if idx not in feat_paths_lvl[i]:
186 |                 raise ValueError(f"Missing feature file for index {idx} in directory {feat_dir[i]}")
187 |             compressed_sem_feats[i] = np.load(feat_paths_lvl[i][idx], mmap_mode='r')
188 |         
189 |         sem_feat = torch.from_numpy(compressed_sem_feats).float().to(device)
190 |         # rgb_img = cv2.imread(image_paths[idx])[..., ::-1]
191 |         # rgb_img = (rgb_img / 255.0).astype(np.float32)
192 |         # rgb_img = torch.from_numpy(rgb_img).to(device)
193 |         print(f"j: {j}, idx: {idx}, image_name: {image_name}, image_path: {image_paths[idx]}") 
194 |         
195 |         img_ann = gt_ann[f'{idx}'] # -> a dictionary of labels, with key as path to mask
196 |         clip_model.set_positives(list(img_ann.keys()))
197 |         
198 |         c_iou_list, c_lvl = activate_stream(sem_feat, clip_model, 
199 |                                             image_name, img_ann,
200 |                                             eval_params=eval_params)
201 | 
202 |         chosen_iou_all.extend(c_iou_list)
203 |         chosen_lvl_list.extend(c_lvl)
204 | 
205 |     # iou
206 |     mean_iou_chosen = sum(chosen_iou_all) / len(chosen_iou_all)
207 |     logger.info(f"iou chosen: {mean_iou_chosen:.4f}")
208 |     logger.info(f"chosen_lvl: \n{chosen_lvl_list}")
209 | 
210 | 
211 | def seed_everything(seed_value):
212 |     random.seed(seed_value)
213 |     np.random.seed(seed_value)
214 |     torch.manual_seed(seed_value)
215 |     os.environ['PYTHONHASHSEED'] = str(seed_value)
216 |     
217 |     if torch.cuda.is_available(): 
218 |         torch.cuda.manual_seed(seed_value)
219 |         torch.cuda.manual_seed_all(seed_value)
220 |         torch.backends.cudnn.deterministic = True
221 |         torch.backends.cudnn.benchmark = True
222 | 
223 | 
224 | if __name__ == "__main__":
225 |     seed_num = 42
226 |     seed_everything(seed_num)
227 |     
228 |     parser = ArgumentParser(description="prompt any label")
229 |     parser.add_argument("--dataset_name", type=str, default=None)
230 |     parser.add_argument("--gt_folder", type=str, default=None)
231 |     parser.add_argument("--feat_folder", type=str, default=None)
232 |     parser.add_argument("--stability_thresh", type=float, default=0.3)
233 |     parser.add_argument("--min_mask_size", type=float, default=0.001)
234 |     parser.add_argument("--max_mask_size", type=float, default=0.95)
235 |     args = parser.parse_args()
236 | 
237 |     eval_params = {
238 |         "stability_thresh": args.stability_thresh,
239 |         "min_mask_size": args.min_mask_size,
240 |         "max_mask_size": args.max_mask_size,
241 |     }
242 |     dataset_name = args.dataset_name
243 |     feat_dir = [f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_1/renders_npy",
244 |                 f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_2/renders_npy",
245 |                 f"./output/3DOVS/{args.dataset_name}/test/{args.feat_folder}_3/renders_npy"]
246 |     output_path = f"./eval_results/3DOVS/{args.dataset_name}"
247 |     gt_path = args.gt_folder
248 | 
249 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
250 |     os.makedirs(output_path, exist_ok=True)
251 |     log_file = os.path.join(output_path, f'{dataset_name}.log')
252 |     logger = get_logger(f'{dataset_name}', log_file=log_file, log_level=logging.INFO)
253 | 
254 |     evaluate(feat_dir, output_path, gt_path, logger, eval_params)


--------------------------------------------------------------------------------
/eval/openclip_encoder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import torch
  3 | import torchvision
  4 | import open_clip
  5 | 
  6 | 
  7 | class OpenCLIPNetwork:
  8 |     def __init__(self, device):
  9 |         self.process = torchvision.transforms.Compose(
 10 |             [
 11 |                 torchvision.transforms.Resize((224, 224)),
 12 |                 torchvision.transforms.Normalize(
 13 |                     mean=[0.48145466, 0.4578275, 0.40821073],
 14 |                     std=[0.26862954, 0.26130258, 0.27577711],
 15 |                 ),
 16 |             ]
 17 |         )
 18 |         self.clip_model_type = "ViT-B-16"
 19 |         self.clip_model_pretrained = 'laion2b_s34b_b88k'
 20 |         self.clip_n_dims = 512
 21 |         model, _, _ = open_clip.create_model_and_transforms(
 22 |             self.clip_model_type,
 23 |             pretrained=self.clip_model_pretrained,
 24 |             precision="fp16",
 25 |         )
 26 |         model.eval()
 27 |         
 28 |         self.tokenizer = open_clip.get_tokenizer(self.clip_model_type)
 29 |         self.model = model.to(device)
 30 | 
 31 |         self.negatives = ("object", "things", "stuff", "texture")
 32 |         self.positives = (" ",)
 33 |         with torch.no_grad():
 34 |             tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.positives]).to(device)
 35 |             self.pos_embeds = model.encode_text(tok_phrases)
 36 |             tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.negatives]).to(device)
 37 |             self.neg_embeds = model.encode_text(tok_phrases)
 38 |         self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True)
 39 |         self.neg_embeds /= self.neg_embeds.norm(dim=-1, keepdim=True)
 40 | 
 41 |     @torch.no_grad()
 42 |     def get_relevancy(self, embed: torch.Tensor, positive_id: int) -> torch.Tensor:
 43 |         # embed: torch.Size([721240, 512]) -> (n_pixels, embed_dim), positive_id -> phrase id
 44 |         phrases_embeds = torch.cat([self.pos_embeds, self.neg_embeds], dim=0)
 45 |         # print(f'phrases_embeds: {phrases_embeds.shape}') # torch.Size([11, 512])
 46 |         # print(f'pos_embeds: {self.pos_embeds.shape}') # torch.Size([7, 512])
 47 |         # print(f'neg_embeds: {self.neg_embeds.shape}') # torch.Size([4, 512])
 48 |         p = phrases_embeds.to(embed.dtype)
 49 |         output = torch.mm(embed, p.T) # 721240x512 * 512x11 -> 721240x11
 50 |         positive_vals = output[..., positive_id : positive_id + 1] # (721240, 1) similarities between the rendered embeddings and the positive query phrase
 51 |         negative_vals = output[..., len(self.positives) :] # (721240, 4) similarities between the rendered embeddings and the negative query phrases [object, things,...]
 52 |         repeated_pos = positive_vals.repeat(1, len(self.negatives)) # (721240, 1) -> (721240, 4)
 53 | 
 54 |         sims = torch.stack((repeated_pos, negative_vals), dim=-1) # torch.Size([721240, 4, 2])
 55 |         softmax = torch.softmax(10 * sims, dim=-1) # torch.Size([721240, 4, 2])
 56 |         best_id = softmax[..., 0].argmin(dim=1) # torch.Size([721240])
 57 | 
 58 |         return torch.gather(softmax, 1, best_id[..., None, None].expand(best_id.shape[0], len(self.negatives), 2))[
 59 |             :, 0, :
 60 |         ]
 61 | 
 62 |     def encode_image(self, input, mask=None):
 63 |         processed_input = self.process(input).half()
 64 |         return self.model.encode_image(processed_input, mask=mask)
 65 | 
 66 |     def encode_text(self, text_list, device):
 67 |         text = self.tokenizer(text_list).to(device)
 68 |         return self.model.encode_text(text)
 69 |     
 70 |     def set_positives(self, text_list):
 71 |         self.positives = text_list
 72 |         with torch.no_grad():
 73 |             tok_phrases = torch.cat(
 74 |                 [self.tokenizer(phrase) for phrase in self.positives]
 75 |                 ).to(self.neg_embeds.device)
 76 |             self.pos_embeds = self.model.encode_text(tok_phrases)
 77 |         self.pos_embeds /= self.pos_embeds.norm(dim=-1, keepdim=True)
 78 |     
 79 |     def set_semantics(self, text_list):
 80 |         self.semantic_labels = text_list
 81 |         with torch.no_grad():
 82 |             tok_phrases = torch.cat([self.tokenizer(phrase) for phrase in self.semantic_labels]).to("cuda")
 83 |             self.semantic_embeds = self.model.encode_text(tok_phrases)
 84 |         self.semantic_embeds /= self.semantic_embeds.norm(dim=-1, keepdim=True)
 85 |     
 86 |     def get_semantic_map(self, sem_map: torch.Tensor) -> torch.Tensor:
 87 |         # embed: 3xhxwx512
 88 |         n_levels, h, w, c = sem_map.shape
 89 |         pos_num = self.semantic_embeds.shape[0]
 90 |         phrases_embeds = torch.cat([self.semantic_embeds, self.neg_embeds], dim=0)
 91 |         p = phrases_embeds.to(sem_map.dtype)
 92 |         sem_pred = torch.zeros(n_levels, h, w)
 93 |         for i in range(n_levels):
 94 |             output = torch.mm(sem_map[i].view(-1, c), p.T)
 95 |             softmax = torch.softmax(10 * output, dim=-1)
 96 |             sem_pred[i] = torch.argmax(softmax, dim=-1).view(h, w)
 97 |             sem_pred[i][sem_pred[i] >= pos_num] = -1
 98 |         return sem_pred.long()
 99 | 
100 |     def get_max_across(self, sem_map): # sem_map: torch.Size([3, 731, 988, 512]) -> (granuity, h, w, embed_dim)
101 |         '''
102 |         processes a semantic map and returns a relevance map, 
103 |         highlighting the regions of the input image that are most relevant to specific phrases.
104 |         '''
105 |         n_phrases = len(self.positives)
106 |         n_phrases_sims = [None for _ in range(n_phrases)]
107 |         
108 |         n_levels, h, w, _ = sem_map.shape
109 |         clip_output = sem_map.permute(1, 2, 0, 3).flatten(0, 1) # 3x731x988x512 -> 731x988x3x512 -> 721240x3x512
110 | 
111 |         n_levels_sims = [None for _ in range(n_levels)]
112 |         for i in range(n_levels):
113 |             for j in range(n_phrases):
114 |                 probs = self.get_relevancy(clip_output[..., i, :], j) # clip_output[..., i, :] -> 721240, j -> phrase id
115 |                 pos_prob = probs[..., 0:1] # pos_prob -> torch.Size([721240, 1])
116 |                 n_phrases_sims[j] = pos_prob # phrase's level relevance score
117 |             n_levels_sims[i] = torch.stack(n_phrases_sims) # each granularity level's relevance score for all phrases
118 |         
119 |         relev_map = torch.stack(n_levels_sims).view(n_levels, n_phrases, h, w)
120 |         return relev_map


--------------------------------------------------------------------------------
/eval/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import matplotlib.patches as patches
 4 | import matplotlib.pyplot as plt
 5 | import mediapy as media
 6 | import cv2
 7 | import colormaps
 8 | from pathlib import Path
 9 | 
10 | 
11 | def show_points(coords, labels, ax, marker_size=100):
12 |     pos_points = coords[labels==1]
13 |     neg_points = coords[labels==0]
14 |     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='firebrick', marker='o',
15 |                s=marker_size, edgecolor='black', linewidth=2.5, alpha=1)
16 |     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='o',
17 |                s=marker_size, edgecolor='black', linewidth=1.5, alpha=1)   
18 | 
19 | 
20 | def show_box(boxes, ax, color=None):
21 |     if type(color) == str and color == 'random':
22 |         color = np.random.random(3)
23 |     elif color is None:
24 |         color = 'black'
25 |     for box in boxes.reshape(-1, 4):
26 |         x0, y0 = box[0], box[1]
27 |         w, h = box[2] - box[0], box[3] - box[1]
28 |         ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor=color, facecolor=(0,0,0,0), lw=4, 
29 |                                    capstyle='round', joinstyle='round', linestyle='dotted')) 
30 | 
31 | 
32 | def show_result(image, point, bbox, save_path):
33 |     plt.figure()
34 |     plt.imshow(image)
35 |     rect = patches.Rectangle((0, 0), image.shape[1]-1, image.shape[0]-1, linewidth=0, edgecolor='none', facecolor='white', alpha=0.3)
36 |     plt.gca().add_patch(rect)
37 |     input_point = point.reshape(1,-1)
38 |     input_label = np.array([1])
39 |     show_points(input_point, input_label, plt.gca())
40 |     show_box(bbox, plt.gca())
41 |     plt.axis('off')
42 |     plt.savefig(save_path, bbox_inches='tight', pad_inches=0.0, dpi=200)
43 |     plt.close()
44 | 
45 | 
46 | def smooth(mask):
47 |     h, w = mask.shape[:2]
48 |     im_smooth = mask.copy()
49 |     scale = 3
50 |     for i in range(h):
51 |         for j in range(w):
52 |             square = mask[max(0, i-scale) : min(i+scale+1, h-1),
53 |                           max(0, j-scale) : min(j+scale+1, w-1)]
54 |             im_smooth[i, j] = np.argmax(np.bincount(square.reshape(-1)))
55 |     return im_smooth
56 | 
57 | 
58 | def colormap_saving(image: torch.Tensor, colormap_options, save_path):
59 |     """
60 |     if image's shape is (h, w, 1): draw colored relevance map;
61 |     if image's shape is (h, w, 3): return directively;
62 |     if image's shape is (h, w, c): execute PCA and transform it into (h, w, 3).
63 |     """
64 |     output_image = (
65 |         colormaps.apply_colormap(
66 |             image=image,
67 |             colormap_options=colormap_options,
68 |         ).cpu().numpy()
69 |     )
70 |     if save_path is not None:
71 |         media.write_image(save_path.with_suffix(".png"), output_image, fmt="png")
72 |     return output_image
73 | 
74 | 
75 | def vis_mask_save(mask, save_path: Path = None):
76 |     mask_save = mask.copy()
77 |     mask_save[mask == 1] = 255
78 |     save_path.parent.mkdir(exist_ok=True, parents=True)
79 |     cv2.imwrite(str(save_path), mask_save)
80 | 
81 | 
82 | def polygon_to_mask(img_shape, points_list):
83 |     points = np.asarray(points_list, dtype=np.int32)
84 |     mask = np.zeros(img_shape, dtype=np.uint8)
85 |     cv2.fillPoly(mask, [points], 1)
86 |     return mask
87 | 
88 | 
89 | def stack_mask(mask_base, mask_add):
90 |     mask = mask_base.copy()
91 |     mask[mask_add != 0] = 1
92 |     return mask


--------------------------------------------------------------------------------
/eval_3DOVS.sh:
--------------------------------------------------------------------------------
 1 | # Set the dataset name
 2 | DATASET_NAME="bench"
 3 | 
 4 | # Path to the preprocessed 3DOVS dataset
 5 | GT_FOLDER="/path/to/your/3DOVS-preprocess-full/$DATASET_NAME"
 6 | 
 7 | # Name of the folder containing extracted features
 8 | FEAT_FOLDER_NAME="ours_30000_langfeat"  # Replace with your model's feature folder
 9 | 
10 | python eval/evaluate_iou_3dovs.py \
11 |                     --dataset_name ${DATASET_NAME} \
12 |                     --gt_folder ${GT_FOLDER} \
13 |                     --feat_folder ${FEAT_FOLDER_NAME} \
14 |                     --stability_thresh 0.4 \
15 |                     --min_mask_size 0.005 \
16 |                     --max_mask_size 0.9
17 | 


--------------------------------------------------------------------------------
/feature_map_renderer.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import torch
 13 | from scene import Scene
 14 | import os
 15 | from tqdm import tqdm
 16 | from os import makedirs
 17 | from gaussian_renderer import render
 18 | import torchvision
 19 | from utils.general_utils import safe_state
 20 | from argparse import ArgumentParser
 21 | from arguments import ModelParams, PipelineParams, OptimizationParams, get_combined_args
 22 | from gaussian_renderer import GaussianModel
 23 | import numpy as np
 24 | from sklearn.decomposition import PCA
 25 | import torch.utils.dlpack
 26 | import matplotlib.pyplot as plt
 27 |             
 28 | def render_set(model_path, name, iteration, source_path, views, gaussians, pipeline, background, feature_level):
 29 |     
 30 |     save_path = os.path.join(model_path, name, "ours_{}_langfeat_{}".format(iteration, feature_level))
 31 |     render_path = os.path.join(save_path, "renders")
 32 |     gts_path = os.path.join(save_path, "gt")
 33 |     render_npy_path = os.path.join(save_path, "renders_npy")
 34 |     gts_npy_path = os.path.join(save_path,"gt_npy")
 35 |     
 36 |     os.makedirs(render_path, exist_ok=True)
 37 |     os.makedirs(gts_path, exist_ok=True)
 38 |     os.makedirs(render_npy_path, exist_ok=True)
 39 |     os.makedirs(gts_npy_path, exist_ok=True)
 40 |     
 41 |     
 42 |     for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
 43 |         render_pkg = render(view, gaussians, pipeline, background, include_feature=True)
 44 |         rendering = render_pkg["render"]
 45 |         gt, mask = view.get_language_feature(language_feature_dir=f"{source_path}/language_features", feature_level=feature_level) #! modified
 46 |         
 47 |         np.save(os.path.join(render_npy_path, view.image_name.split('.')[0] + ".npy"),rendering.permute(1,2,0).cpu().numpy())
 48 |         np.save(os.path.join(gts_npy_path, view.image_name.split('.')[0] + ".npy"),gt.permute(1,2,0).cpu().numpy())
 49 |         
 50 |         _, H, W = gt.shape
 51 |         gt = gt.reshape(512, -1).T.cpu().numpy()
 52 |         rendering = rendering.reshape(512, -1).T.cpu().numpy() # (H*W, 512)
 53 |         
 54 |         pca = PCA(n_components=3)
 55 | 
 56 |         combined_np = np.concatenate((gt, rendering), axis=0)
 57 |         combined_features = pca.fit_transform(combined_np) # ((n+m)*H*W, 3)
 58 |         normalized_features = (combined_features - combined_features.min(axis=0)) / (combined_features.max(axis=0) - combined_features.min(axis=0))
 59 |         reshaped_combined_features = normalized_features.reshape(2, H, W, 3)
 60 |         
 61 |         reduced_rendering = reshaped_combined_features[1]
 62 |         reduced_gt = reshaped_combined_features[0]
 63 |         
 64 |         rendering = torch.tensor(reduced_rendering).permute(2, 0, 1)
 65 |         gt = torch.tensor(reduced_gt).permute(2, 0, 1)
 66 |         
 67 |         torchvision.utils.save_image(rendering, os.path.join(render_path, view.image_name ))
 68 |         torchvision.utils.save_image(gt, os.path.join(gts_path, view.image_name))
 69 | 
 70 | def render_sets(dataset : ModelParams, opt : OptimizationParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, feature_level : int):
 71 | 
 72 |     with torch.no_grad():
 73 |         gaussians = GaussianModel(dataset.sh_degree)
 74 |         scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False, include_feature=True)
 75 | 
 76 |         checkpoint = os.path.join(args.model_path, f'chkpnt{iteration}_langfeat_{feature_level}.pth')
 77 |         (model_params, first_iter) = torch.load(checkpoint)
 78 |         gaussians.restore_language_features(model_params, opt)
 79 |         bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
 80 |         background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
 81 | 
 82 |         if not skip_train:
 83 |              render_set(args.model_path, "train", scene.loaded_iter, dataset.source_path, scene.getTrainCameras(), gaussians, pipeline, background, feature_level)
 84 | 
 85 |         if not skip_test:
 86 |              render_set(args.model_path, "test", scene.loaded_iter, dataset.source_path, scene.getTestCameras(), gaussians, pipeline, background, feature_level)
 87 | 
 88 | 
 89 | if __name__ == "__main__":
 90 |     # Set up command line argument parser
 91 |     parser = ArgumentParser(description="Testing script parameters")
 92 |     model = ModelParams(parser, sentinel=True)
 93 |     pipeline = PipelineParams(parser)
 94 |     opt = OptimizationParams(parser)
 95 |     parser.add_argument("--iteration", default=-1, type=int)
 96 |     parser.add_argument("--skip_train", action="store_true")
 97 |     parser.add_argument("--skip_test", action="store_true")
 98 |     parser.add_argument("--quiet", action="store_true")
 99 |     args = get_combined_args(parser)
100 | 
101 |     # Initialize system state (RNG)
102 |     safe_state(args.quiet)
103 | 
104 |     render_sets(model.extract(args), opt.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, args.feature_level)


--------------------------------------------------------------------------------
/full_eval.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import os
 13 | from argparse import ArgumentParser
 14 | import time
 15 | 
 16 | mipnerf360_outdoor_scenes = ["bicycle", "flowers", "garden", "stump", "treehill"]
 17 | mipnerf360_indoor_scenes = ["room", "counter", "kitchen", "bonsai"]
 18 | tanks_and_temples_scenes = ["truck", "train"]
 19 | deep_blending_scenes = ["drjohnson", "playroom"]
 20 | 
 21 | parser = ArgumentParser(description="Full evaluation script parameters")
 22 | parser.add_argument("--skip_training", action="store_true")
 23 | parser.add_argument("--skip_rendering", action="store_true")
 24 | parser.add_argument("--skip_metrics", action="store_true")
 25 | parser.add_argument("--output_path", default="./eval")
 26 | parser.add_argument("--use_depth", action="store_true")
 27 | parser.add_argument("--use_expcomp", action="store_true")
 28 | parser.add_argument("--fast", action="store_true")
 29 | parser.add_argument("--aa", action="store_true")
 30 | 
 31 | 
 32 | 
 33 | 
 34 | args, _ = parser.parse_known_args()
 35 | 
 36 | all_scenes = []
 37 | all_scenes.extend(mipnerf360_outdoor_scenes)
 38 | all_scenes.extend(mipnerf360_indoor_scenes)
 39 | all_scenes.extend(tanks_and_temples_scenes)
 40 | all_scenes.extend(deep_blending_scenes)
 41 | 
 42 | if not args.skip_training or not args.skip_rendering:
 43 |     parser.add_argument('--mipnerf360', "-m360", required=True, type=str)
 44 |     parser.add_argument("--tanksandtemples", "-tat", required=True, type=str)
 45 |     parser.add_argument("--deepblending", "-db", required=True, type=str)
 46 |     args = parser.parse_args()
 47 | if not args.skip_training:
 48 |     common_args = " --disable_viewer --quiet --eval --test_iterations -1 "
 49 |     
 50 |     if args.aa:
 51 |         common_args += " --antialiasing "
 52 |     if args.use_depth:
 53 |         common_args += " -d depths2/ "
 54 | 
 55 |     if args.use_expcomp:
 56 |         common_args += " --exposure_lr_init 0.001 --exposure_lr_final 0.0001 --exposure_lr_delay_steps 5000 --exposure_lr_delay_mult 0.001 --train_test_exp "
 57 | 
 58 |     if args.fast:
 59 |         common_args += " --optimizer_type sparse_adam "
 60 | 
 61 |     start_time = time.time()
 62 |     for scene in mipnerf360_outdoor_scenes:
 63 |         source = args.mipnerf360 + "/" + scene
 64 |         os.system("python train.py -s " + source + " -i images_4 -m " + args.output_path + "/" + scene + common_args)
 65 |     for scene in mipnerf360_indoor_scenes:
 66 |         source = args.mipnerf360 + "/" + scene
 67 |         os.system("python train.py -s " + source + " -i images_2 -m " + args.output_path + "/" + scene + common_args)
 68 |     m360_timing = (time.time() - start_time)/60.0
 69 | 
 70 |     start_time = time.time()
 71 |     for scene in tanks_and_temples_scenes:
 72 |         source = args.tanksandtemples + "/" + scene
 73 |         os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args)
 74 |     tandt_timing = (time.time() - start_time)/60.0
 75 | 
 76 |     start_time = time.time()
 77 |     for scene in deep_blending_scenes:
 78 |         source = args.deepblending + "/" + scene
 79 |         os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args)
 80 |     db_timing = (time.time() - start_time)/60.0
 81 | 
 82 | with open(os.path.join(args.output_path,"timing.txt"), 'w') as file:
 83 |     file.write(f"m360: {m360_timing} minutes \n tandt: {tandt_timing} minutes \n db: {db_timing} minutes\n")
 84 | 
 85 | if not args.skip_rendering:
 86 |     all_sources = []
 87 |     for scene in mipnerf360_outdoor_scenes:
 88 |         all_sources.append(args.mipnerf360 + "/" + scene)
 89 |     for scene in mipnerf360_indoor_scenes:
 90 |         all_sources.append(args.mipnerf360 + "/" + scene)
 91 |     for scene in tanks_and_temples_scenes:
 92 |         all_sources.append(args.tanksandtemples + "/" + scene)
 93 |     for scene in deep_blending_scenes:
 94 |         all_sources.append(args.deepblending + "/" + scene)
 95 |     
 96 |     common_args = " --quiet --eval --skip_train"
 97 |     
 98 |     if args.aa:
 99 |         common_args += " --antialiasing "
100 |     if args.use_expcomp:
101 |         common_args += " --train_test_exp "
102 | 
103 |     for scene, source in zip(all_scenes, all_sources):
104 |         os.system("python render.py --iteration 7000 -s " + source + " -m " + args.output_path + "/" + scene + common_args)
105 |         os.system("python render.py --iteration 30000 -s " + source + " -m " + args.output_path + "/" + scene + common_args)
106 | 
107 | if not args.skip_metrics:
108 |     scenes_string = ""
109 |     for scene in all_scenes:
110 |         scenes_string += "\"" + args.output_path + "/" + scene + "\" "
111 | 
112 |     os.system("python metrics.py -m " + scenes_string)
113 | 


--------------------------------------------------------------------------------
/gaussian_feature_extractor.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | from scene import Scene
14 | import os
15 | from tqdm import tqdm
16 | from os import makedirs
17 | from gaussian_renderer import render
18 | import torchvision
19 | from utils.general_utils import safe_state
20 | from argparse import ArgumentParser
21 | from arguments import ModelParams, PipelineParams, OptimizationParams, get_combined_args
22 | from gaussian_renderer import GaussianModel
23 | import numpy as np
24 | from sklearn.decomposition import PCA
25 | import torch.utils.dlpack
26 | import matplotlib.pyplot as plt
27 | import time
28 | 
29 | 
30 | def extract_gaussian_features(model_path, iteration, source_path, views, gaussians, pipeline, background, feature_level):
31 | 
32 |     language_feature_save_path = os.path.join(model_path, f'chkpnt{iteration}_langfeat_{feature_level}.pth')
33 |     
34 |     for _, view in enumerate(tqdm(views, desc="Rendering progress")):
35 | 
36 |         render_pkg= render(view, gaussians, pipeline, background)
37 | 
38 |         gt_language_feature, gt_mask = view.get_language_feature(language_feature_dir=f"{source_path}/language_features", feature_level=feature_level)
39 |         activated = render_pkg["info"]["activated"]
40 |         significance = render_pkg["info"]["significance"]
41 |         means2D = render_pkg["info"]["means2d"]
42 |         
43 |         mask = activated[0] > 0
44 |         gaussians.accumulate_gaussian_feature_per_view(gt_language_feature.permute(1, 2, 0), gt_mask.squeeze(0), mask, significance[0,mask], means2D[0, mask])
45 |         
46 |     gaussians.finalize_gaussian_features()
47 | 
48 |     torch.save((gaussians.capture_language_feature(), 0), language_feature_save_path)
49 |     print("checkpoint saved to: ", language_feature_save_path)
50 |             
51 | def process_scene_language_features(dataset : ModelParams, opt : OptimizationParams, iteration : int, pipeline : PipelineParams, feature_level : int):
52 | 
53 |     with torch.no_grad():
54 |         gaussians = GaussianModel(dataset.sh_degree)
55 |         scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False, include_feature=True)
56 | 
57 |         checkpoint = os.path.join(args.model_path, f'chkpnt{iteration}.pth')
58 |         (model_params, _) = torch.load(checkpoint)
59 |         gaussians.restore_rgb(model_params, opt)
60 |         bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
61 |         background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
62 | 
63 |         extract_gaussian_features(args.model_path, iteration, dataset.source_path, scene.getTrainCameras(), gaussians, pipeline, background, feature_level)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     # Set up command line argument parser
68 |     parser = ArgumentParser(description="Testing script parameters")
69 |     model = ModelParams(parser, sentinel=True)
70 |     pipeline = PipelineParams(parser)
71 |     opt = OptimizationParams(parser)
72 |     parser.add_argument("--iteration", default=-1, type=int)
73 |     parser.add_argument("--quiet", action="store_true")
74 |     args = get_combined_args(parser)
75 | 
76 |     # Initialize system state (RNG)
77 |     safe_state(args.quiet)
78 | 
79 |     process_scene_language_features(model.extract(args), opt.extract(args), args.iteration, pipeline.extract(args), args.feature_level)


--------------------------------------------------------------------------------
/gaussian_renderer/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import torch
 13 | import math
 14 | # from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer
 15 | from scene.gaussian_model import GaussianModel
 16 | from utils.sh_utils import eval_sh
 17 | from gsplat import rasterization
 18 | 
 19 | def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, separate_sh = False, override_color = None, use_trained_exp=False, include_feature = False):
 20 |     """
 21 |     Render the scene. 
 22 |     
 23 |     Background tensor (bg_color) must be on GPU!
 24 |     """
 25 |  
 26 |     # Set up rasterization configuration
 27 |     tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
 28 |     tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
 29 |     
 30 |     focal_length_x = viewpoint_camera.image_width / (2 * tanfovx)
 31 |     focal_length_y = viewpoint_camera.image_height / (2 * tanfovy)
 32 |     
 33 |     K = torch.tensor(
 34 |         [
 35 |             [focal_length_x, 0, viewpoint_camera.image_width / 2.0],
 36 |             [0, focal_length_y, viewpoint_camera.image_height / 2.0],
 37 |             [0, 0, 1],
 38 |         ],
 39 |         device="cuda",
 40 |     )
 41 | 
 42 |     means3D = pc.get_xyz
 43 |     opacity = pc.get_opacity
 44 | 
 45 |     scales = pc.get_scaling * scaling_modifier
 46 |     rotations = pc.get_rotation
 47 | 
 48 |     if include_feature:
 49 |         features = pc.get_language_feature
 50 |     else:
 51 |         if override_color is not None:
 52 |             colors = override_color # [N, 3]
 53 |             sh_degree = None
 54 |         else:
 55 |             colors = pc.get_features # [N, K, 3]
 56 |             sh_degree = pc.active_sh_degree
 57 | 
 58 |     viewmat = viewpoint_camera.world_view_transform.transpose(0, 1) # [4, 4]
 59 |     if include_feature:
 60 |         render_colors, render_alphas, info = rasterization(
 61 |             means=means3D,  # [N, 3]
 62 |             quats=rotations,  # [N, 4]
 63 |             scales=scales,  # [N, 3]
 64 |             opacities=opacity.squeeze(-1),  # [N,]
 65 |             colors=features, # [N, D]
 66 |             viewmats=viewmat[None],  # [1, 4, 4]
 67 |             Ks=K[None],  # [1, 3, 3]
 68 |             width=int(viewpoint_camera.image_width),
 69 |             height=int(viewpoint_camera.image_height),
 70 |             packed=False
 71 |         )
 72 |     else:
 73 |         # Rasterize visible Gaussians to image, obtain their radii (on screen). 
 74 |         render_colors, render_alphas, info = rasterization(
 75 |             means=means3D,  # [N, 3]
 76 |             quats=rotations,  # [N, 4]
 77 |             scales=scales,  # [N, 3]
 78 |             opacities=opacity.squeeze(-1),  # [N,]
 79 |             colors=colors,
 80 |             viewmats=viewmat[None],  # [1, 4, 4]
 81 |             Ks=K[None],  # [1, 3, 3]
 82 |             backgrounds=bg_color[None],
 83 |             width=int(viewpoint_camera.image_width),
 84 |             height=int(viewpoint_camera.image_height),
 85 |             packed=False,
 86 |             sh_degree=sh_degree,
 87 |         )
 88 | 
 89 |     rendered_image = render_colors[0].permute(2, 0, 1)
 90 |     radii = info["radii"].squeeze(0) # [N,]
 91 |     try:
 92 |         info["means2d"].retain_grad() # [1, N, 2]
 93 |     except:
 94 |         pass
 95 |     out = {
 96 |         "render": rendered_image,
 97 |         "viewspace_points": info["means2d"],
 98 |         "visibility_filter" : radii > 0,
 99 |         "radii": radii,
100 |          "info": info,
101 |         }
102 |     
103 |     return out
104 | 


--------------------------------------------------------------------------------
/gaussian_renderer/network_gui.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | import traceback
14 | import socket
15 | import json
16 | from scene.cameras import MiniCam
17 | 
18 | host = "127.0.0.1"
19 | port = 6009
20 | 
21 | conn = None
22 | addr = None
23 | 
24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
25 | 
26 | def init(wish_host, wish_port):
27 |     global host, port, listener
28 |     host = wish_host
29 |     port = wish_port
30 |     listener.bind((host, port))
31 |     listener.listen()
32 |     listener.settimeout(0)
33 | 
34 | def try_connect():
35 |     global conn, addr, listener
36 |     try:
37 |         conn, addr = listener.accept()
38 |         print(f"\nConnected by {addr}")
39 |         conn.settimeout(None)
40 |     except Exception as inst:
41 |         pass
42 |             
43 | def read():
44 |     global conn
45 |     messageLength = conn.recv(4)
46 |     messageLength = int.from_bytes(messageLength, 'little')
47 |     message = conn.recv(messageLength)
48 |     return json.loads(message.decode("utf-8"))
49 | 
50 | def send(message_bytes, verify):
51 |     global conn
52 |     if message_bytes != None:
53 |         conn.sendall(message_bytes)
54 |     conn.sendall(len(verify).to_bytes(4, 'little'))
55 |     conn.sendall(bytes(verify, 'ascii'))
56 | 
57 | def receive():
58 |     message = read()
59 | 
60 |     width = message["resolution_x"]
61 |     height = message["resolution_y"]
62 | 
63 |     if width != 0 and height != 0:
64 |         try:
65 |             do_training = bool(message["train"])
66 |             fovy = message["fov_y"]
67 |             fovx = message["fov_x"]
68 |             znear = message["z_near"]
69 |             zfar = message["z_far"]
70 |             do_shs_python = bool(message["shs_python"])
71 |             do_rot_scale_python = bool(message["rot_scale_python"])
72 |             keep_alive = bool(message["keep_alive"])
73 |             scaling_modifier = message["scaling_modifier"]
74 |             world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda()
75 |             world_view_transform[:,1] = -world_view_transform[:,1]
76 |             world_view_transform[:,2] = -world_view_transform[:,2]
77 |             full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda()
78 |             full_proj_transform[:,1] = -full_proj_transform[:,1]
79 |             custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform)
80 |         except Exception as e:
81 |             print("")
82 |             traceback.print_exc()
83 |             raise e
84 |         return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier
85 |     else:
86 |         return None, None, None, None, None, None


--------------------------------------------------------------------------------
/lpipsPyTorch/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .modules.lpips import LPIPS
 4 | 
 5 | 
 6 | def lpips(x: torch.Tensor,
 7 |           y: torch.Tensor,
 8 |           net_type: str = 'alex',
 9 |           version: str = '0.1'):
10 |     r"""Function that measures
11 |     Learned Perceptual Image Patch Similarity (LPIPS).
12 | 
13 |     Arguments:
14 |         x, y (torch.Tensor): the input tensors to compare.
15 |         net_type (str): the network type to compare the features: 
16 |                         'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
17 |         version (str): the version of LPIPS. Default: 0.1.
18 |     """
19 |     device = x.device
20 |     criterion = LPIPS(net_type, version).to(device)
21 |     return criterion(x, y)
22 | 


--------------------------------------------------------------------------------
/lpipsPyTorch/modules/lpips.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .networks import get_network, LinLayers
 5 | from .utils import get_state_dict
 6 | 
 7 | 
 8 | class LPIPS(nn.Module):
 9 |     r"""Creates a criterion that measures
10 |     Learned Perceptual Image Patch Similarity (LPIPS).
11 | 
12 |     Arguments:
13 |         net_type (str): the network type to compare the features: 
14 |                         'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
15 |         version (str): the version of LPIPS. Default: 0.1.
16 |     """
17 |     def __init__(self, net_type: str = 'alex', version: str = '0.1'):
18 | 
19 |         assert version in ['0.1'], 'v0.1 is only supported now'
20 | 
21 |         super(LPIPS, self).__init__()
22 | 
23 |         # pretrained network
24 |         self.net = get_network(net_type)
25 | 
26 |         # linear layers
27 |         self.lin = LinLayers(self.net.n_channels_list)
28 |         self.lin.load_state_dict(get_state_dict(net_type, version))
29 | 
30 |     def forward(self, x: torch.Tensor, y: torch.Tensor):
31 |         feat_x, feat_y = self.net(x), self.net(y)
32 | 
33 |         diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
34 |         res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
35 | 
36 |         return torch.sum(torch.cat(res, 0), 0, True)
37 | 


--------------------------------------------------------------------------------
/lpipsPyTorch/modules/networks.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | from itertools import chain
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torchvision import models
 8 | 
 9 | from .utils import normalize_activation
10 | 
11 | 
12 | def get_network(net_type: str):
13 |     if net_type == 'alex':
14 |         return AlexNet()
15 |     elif net_type == 'squeeze':
16 |         return SqueezeNet()
17 |     elif net_type == 'vgg':
18 |         return VGG16()
19 |     else:
20 |         raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
21 | 
22 | 
23 | class LinLayers(nn.ModuleList):
24 |     def __init__(self, n_channels_list: Sequence[int]):
25 |         super(LinLayers, self).__init__([
26 |             nn.Sequential(
27 |                 nn.Identity(),
28 |                 nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
29 |             ) for nc in n_channels_list
30 |         ])
31 | 
32 |         for param in self.parameters():
33 |             param.requires_grad = False
34 | 
35 | 
36 | class BaseNet(nn.Module):
37 |     def __init__(self):
38 |         super(BaseNet, self).__init__()
39 | 
40 |         # register buffer
41 |         self.register_buffer(
42 |             'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
43 |         self.register_buffer(
44 |             'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
45 | 
46 |     def set_requires_grad(self, state: bool):
47 |         for param in chain(self.parameters(), self.buffers()):
48 |             param.requires_grad = state
49 | 
50 |     def z_score(self, x: torch.Tensor):
51 |         return (x - self.mean) / self.std
52 | 
53 |     def forward(self, x: torch.Tensor):
54 |         x = self.z_score(x)
55 | 
56 |         output = []
57 |         for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
58 |             x = layer(x)
59 |             if i in self.target_layers:
60 |                 output.append(normalize_activation(x))
61 |             if len(output) == len(self.target_layers):
62 |                 break
63 |         return output
64 | 
65 | 
66 | class SqueezeNet(BaseNet):
67 |     def __init__(self):
68 |         super(SqueezeNet, self).__init__()
69 | 
70 |         self.layers = models.squeezenet1_1(True).features
71 |         self.target_layers = [2, 5, 8, 10, 11, 12, 13]
72 |         self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
73 | 
74 |         self.set_requires_grad(False)
75 | 
76 | 
77 | class AlexNet(BaseNet):
78 |     def __init__(self):
79 |         super(AlexNet, self).__init__()
80 | 
81 |         self.layers = models.alexnet(True).features
82 |         self.target_layers = [2, 5, 8, 10, 12]
83 |         self.n_channels_list = [64, 192, 384, 256, 256]
84 | 
85 |         self.set_requires_grad(False)
86 | 
87 | 
88 | class VGG16(BaseNet):
89 |     def __init__(self):
90 |         super(VGG16, self).__init__()
91 | 
92 |         self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features
93 |         self.target_layers = [4, 9, 16, 23, 30]
94 |         self.n_channels_list = [64, 128, 256, 512, 512]
95 | 
96 |         self.set_requires_grad(False)
97 | 


--------------------------------------------------------------------------------
/lpipsPyTorch/modules/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def normalize_activation(x, eps=1e-10):
 7 |     norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
 8 |     return x / (norm_factor + eps)
 9 | 
10 | 
11 | def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
12 |     # build url
13 |     url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
14 |         + f'master/lpips/weights/v{version}/{net_type}.pth'
15 | 
16 |     # download
17 |     old_state_dict = torch.hub.load_state_dict_from_url(
18 |         url, progress=True,
19 |         map_location=None if torch.cuda.is_available() else torch.device('cpu')
20 |     )
21 | 
22 |     # rename keys
23 |     new_state_dict = OrderedDict()
24 |     for key, val in old_state_dict.items():
25 |         new_key = key
26 |         new_key = new_key.replace('lin', '')
27 |         new_key = new_key.replace('model.', '')
28 |         new_state_dict[new_key] = val
29 | 
30 |     return new_state_dict
31 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | from pathlib import Path
 13 | import os
 14 | from PIL import Image
 15 | import torch
 16 | import torchvision.transforms.functional as tf
 17 | from utils.loss_utils import ssim
 18 | from lpipsPyTorch import lpips
 19 | import json
 20 | from tqdm import tqdm
 21 | from utils.image_utils import psnr
 22 | from argparse import ArgumentParser
 23 | 
 24 | def readImages(renders_dir, gt_dir):
 25 |     renders = []
 26 |     gts = []
 27 |     image_names = []
 28 |     for fname in os.listdir(renders_dir):
 29 |         render = Image.open(renders_dir / fname)
 30 |         gt = Image.open(gt_dir / fname)
 31 |         renders.append(tf.to_tensor(render).unsqueeze(0)[:, :3, :, :].cuda())
 32 |         gts.append(tf.to_tensor(gt).unsqueeze(0)[:, :3, :, :].cuda())
 33 |         image_names.append(fname)
 34 |     return renders, gts, image_names
 35 | 
 36 | def evaluate(model_paths):
 37 | 
 38 |     full_dict = {}
 39 |     per_view_dict = {}
 40 |     full_dict_polytopeonly = {}
 41 |     per_view_dict_polytopeonly = {}
 42 |     print("")
 43 | 
 44 |     for scene_dir in model_paths:
 45 |         try:
 46 |             print("Scene:", scene_dir)
 47 |             full_dict[scene_dir] = {}
 48 |             per_view_dict[scene_dir] = {}
 49 |             full_dict_polytopeonly[scene_dir] = {}
 50 |             per_view_dict_polytopeonly[scene_dir] = {}
 51 | 
 52 |             test_dir = Path(scene_dir) / "test"
 53 | 
 54 |             for method in os.listdir(test_dir):
 55 |                 print("Method:", method)
 56 | 
 57 |                 full_dict[scene_dir][method] = {}
 58 |                 per_view_dict[scene_dir][method] = {}
 59 |                 full_dict_polytopeonly[scene_dir][method] = {}
 60 |                 per_view_dict_polytopeonly[scene_dir][method] = {}
 61 | 
 62 |                 method_dir = test_dir / method
 63 |                 gt_dir = method_dir/ "gt"
 64 |                 renders_dir = method_dir / "renders"
 65 |                 renders, gts, image_names = readImages(renders_dir, gt_dir)
 66 | 
 67 |                 ssims = []
 68 |                 psnrs = []
 69 |                 lpipss = []
 70 | 
 71 |                 for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"):
 72 |                     ssims.append(ssim(renders[idx], gts[idx]))
 73 |                     psnrs.append(psnr(renders[idx], gts[idx]))
 74 |                     lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg'))
 75 | 
 76 |                 print("  SSIM : {:>12.7f}".format(torch.tensor(ssims).mean(), ".5"))
 77 |                 print("  PSNR : {:>12.7f}".format(torch.tensor(psnrs).mean(), ".5"))
 78 |                 print("  LPIPS: {:>12.7f}".format(torch.tensor(lpipss).mean(), ".5"))
 79 |                 print("")
 80 | 
 81 |                 full_dict[scene_dir][method].update({"SSIM": torch.tensor(ssims).mean().item(),
 82 |                                                         "PSNR": torch.tensor(psnrs).mean().item(),
 83 |                                                         "LPIPS": torch.tensor(lpipss).mean().item()})
 84 |                 per_view_dict[scene_dir][method].update({"SSIM": {name: ssim for ssim, name in zip(torch.tensor(ssims).tolist(), image_names)},
 85 |                                                             "PSNR": {name: psnr for psnr, name in zip(torch.tensor(psnrs).tolist(), image_names)},
 86 |                                                             "LPIPS": {name: lp for lp, name in zip(torch.tensor(lpipss).tolist(), image_names)}})
 87 | 
 88 |             with open(scene_dir + "/results.json", 'w') as fp:
 89 |                 json.dump(full_dict[scene_dir], fp, indent=True)
 90 |             with open(scene_dir + "/per_view.json", 'w') as fp:
 91 |                 json.dump(per_view_dict[scene_dir], fp, indent=True)
 92 |         except:
 93 |             print("Unable to compute metrics for model", scene_dir)
 94 | 
 95 | if __name__ == "__main__":
 96 |     device = torch.device("cuda:0")
 97 |     torch.cuda.set_device(device)
 98 | 
 99 |     # Set up command line argument parser
100 |     parser = ArgumentParser(description="Training script parameters")
101 |     parser.add_argument('--model_paths', '-m', required=True, nargs="+", type=str, default=[])
102 |     args = parser.parse_args()
103 |     evaluate(args.model_paths)
104 | 


--------------------------------------------------------------------------------
/render.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | from scene import Scene
14 | import os
15 | from tqdm import tqdm
16 | from os import makedirs
17 | from gaussian_renderer import render
18 | import torchvision
19 | from utils.general_utils import safe_state
20 | from argparse import ArgumentParser
21 | from arguments import ModelParams, PipelineParams, get_combined_args
22 | from gaussian_renderer import GaussianModel
23 | try:
24 |     from diff_gaussian_rasterization import SparseGaussianAdam
25 |     SPARSE_ADAM_AVAILABLE = True
26 | except:
27 |     SPARSE_ADAM_AVAILABLE = False
28 | 
29 | 
30 | def render_set(model_path, name, iteration, views, gaussians, pipeline, background, train_test_exp, separate_sh):
31 |     render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders")
32 |     gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt")
33 | 
34 |     makedirs(render_path, exist_ok=True)
35 |     makedirs(gts_path, exist_ok=True)
36 | 
37 |     for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
38 |         rendering = render(view, gaussians, pipeline, background, use_trained_exp=train_test_exp, separate_sh=separate_sh)["render"]
39 |         gt = view.original_image[0:3, :, :]
40 | 
41 |         torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png"))
42 |         torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png"))
43 | 
44 | def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, separate_sh: bool):
45 |     with torch.no_grad():
46 |         gaussians = GaussianModel(dataset.sh_degree)
47 |         scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False)
48 | 
49 |         bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
50 |         background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
51 | 
52 |         if not skip_train:
53 |              render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh)
54 | 
55 |         if not skip_test:
56 |              render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh)
57 | 
58 | if __name__ == "__main__":
59 |     # Set up command line argument parser
60 |     parser = ArgumentParser(description="Testing script parameters")
61 |     model = ModelParams(parser, sentinel=True)
62 |     pipeline = PipelineParams(parser)
63 |     parser.add_argument("--iteration", default=-1, type=int)
64 |     parser.add_argument("--skip_train", action="store_true")
65 |     parser.add_argument("--skip_test", action="store_true")
66 |     parser.add_argument("--quiet", action="store_true")
67 |     args = get_combined_args(parser)
68 |     print("Rendering " + args.model_path)
69 | 
70 |     # Initialize system state (RNG)
71 |     safe_state(args.quiet)
72 | 
73 |     render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, SPARSE_ADAM_AVAILABLE)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | open-clip-torch
 2 | plyfile
 3 | tqdm
 4 | opencv-python
 5 | tensorboard
 6 | jaxtyping
 7 | matplotlib
 8 | typing
 9 | pathlib
10 | mediapy
11 | scikit-learn
12 | 


--------------------------------------------------------------------------------
/run_3DOVS.sh:
--------------------------------------------------------------------------------
 1 | DATASET_NAME="bench"
 2 | OUTPUT_DIR="/home/joanna_cheng/workspace/occamlgs_new/output/3DOVS"
 3 | 
 4 | cd ~/workspace/occamlgs_new
 5 | 
 6 | python train.py -s /scratch/joanna_cheng/3DOVS-preprocess-full/$DATASET_NAME -m $OUTPUT_DIR/$DATASET_NAME --iterations 30000
 7 | python render.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000
 8 | 
 9 | python gaussian_feature_extractor.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 3
10 | python feature_map_renderer.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1 --skip_train


--------------------------------------------------------------------------------
/run_lerf.sh:
--------------------------------------------------------------------------------
 1 | DATASET_NAME="teatime"
 2 | OUTPUT_DIR="/home/joanna_cheng/workspace/occamlgs/output/lerf"
 3 | 
 4 | cd ~/workspace/occamlgs
 5 | 
 6 | python train.py -s /scratch/joanna_cheng/lerf_ovs/$DATASET_NAME -m $OUTPUT_DIR/$DATASET_NAME --iterations 30000
 7 | python render.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000
 8 | 
 9 | python gaussian_feature_extractor.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1
10 | python feature_map_renderer.py -m $OUTPUT_DIR/$DATASET_NAME --iteration 30000 --eval --feature_level 1


--------------------------------------------------------------------------------
/scene/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import os
13 | import random
14 | import json
15 | from utils.system_utils import searchForMaxIteration
16 | from scene.dataset_readers import sceneLoadTypeCallbacks
17 | from scene.gaussian_model import GaussianModel
18 | from arguments import ModelParams
19 | from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON
20 | 
21 | class Scene:
22 | 
23 |     gaussians : GaussianModel
24 | 
25 |     def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0], include_feature=False):
26 |         """b
27 |         :param path: Path to colmap scene main folder.
28 |         """
29 |         self.model_path = args.model_path
30 |         self.loaded_iter = None
31 |         self.gaussians = gaussians
32 | 
33 |         if load_iteration:
34 |             if load_iteration == -1:
35 |                 self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud"))
36 |             else:
37 |                 self.loaded_iter = load_iteration
38 |             print("Loading trained model at iteration {}".format(self.loaded_iter))
39 | 
40 |         self.train_cameras = {}
41 |         self.test_cameras = {}
42 |         if include_feature:
43 |             llffhold=None
44 |         else:
45 |             llffhold=8
46 |         if os.path.exists(os.path.join(args.source_path, "sparse")):
47 |             scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.depths, args.eval, args.train_test_exp, llffhold=llffhold)
48 |         elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")):
49 |             print("Found transforms_train.json file, assuming Blender data set!")
50 |             scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.depths, args.eval)
51 |         else:
52 |             assert False, "Could not recognize scene type!"
53 | 
54 |         if not self.loaded_iter:
55 |             with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file:
56 |                 dest_file.write(src_file.read())
57 |             json_cams = []
58 |             camlist = []
59 |             if scene_info.test_cameras:
60 |                 camlist.extend(scene_info.test_cameras)
61 |             if scene_info.train_cameras:
62 |                 camlist.extend(scene_info.train_cameras)
63 |             for id, cam in enumerate(camlist):
64 |                 json_cams.append(camera_to_JSON(id, cam))
65 |             with open(os.path.join(self.model_path, "cameras.json"), 'w') as file:
66 |                 json.dump(json_cams, file)
67 | 
68 |         if shuffle:
69 |             random.shuffle(scene_info.train_cameras)  # Multi-res consistent random shuffling
70 |             random.shuffle(scene_info.test_cameras)  # Multi-res consistent random shuffling
71 | 
72 |         self.cameras_extent = scene_info.nerf_normalization["radius"]
73 | 
74 |         for resolution_scale in resolution_scales:
75 |             print("Loading Training Cameras")
76 |             self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args, scene_info.is_nerf_synthetic, False)
77 |             print("Loading Test Cameras")
78 |             self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args, scene_info.is_nerf_synthetic, True)
79 | 
80 |         if self.loaded_iter:
81 |             self.gaussians.load_ply(os.path.join(self.model_path,
82 |                                                            "point_cloud",
83 |                                                            "iteration_" + str(self.loaded_iter),
84 |                                                            "point_cloud.ply"), args.train_test_exp)
85 |         else:
86 |             self.gaussians.create_from_pcd(scene_info.point_cloud, scene_info.train_cameras, self.cameras_extent)
87 | 
88 |     def save(self, iteration):
89 |         point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration))
90 |         self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply"))
91 | 
92 |     def getTrainCameras(self, scale=1.0):
93 |         return self.train_cameras[scale]
94 | 
95 |     def getTestCameras(self, scale=1.0):
96 |         return self.test_cameras[scale]
97 |     def getCameras(self):
98 |         return self.cameras
99 | 


--------------------------------------------------------------------------------
/scene/cameras.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import torch
 13 | from torch import nn
 14 | import numpy as np
 15 | from utils.graphics_utils import getWorld2View2, getProjectionMatrix
 16 | from utils.general_utils import PILtoTorch
 17 | import cv2
 18 | import os
 19 | import pickle
 20 | 
 21 | class Camera(nn.Module):
 22 |     def __init__(self, resolution, colmap_id, R, T, FoVx, FoVy, depth_params, image, invdepthmap,
 23 |                  image_name, uid,
 24 |                  trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda",
 25 |                  train_test_exp = False, is_test_dataset = False, is_test_view = False
 26 |                  ):
 27 |         super(Camera, self).__init__()
 28 | 
 29 |         self.uid = uid
 30 |         self.colmap_id = colmap_id
 31 |         self.R = R
 32 |         self.T = T
 33 |         self.FoVx = FoVx
 34 |         self.FoVy = FoVy
 35 |         self.image_name = image_name
 36 | 
 37 |         try:
 38 |             self.data_device = torch.device(data_device)
 39 |         except Exception as e:
 40 |             print(e)
 41 |             print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
 42 |             self.data_device = torch.device("cuda")
 43 | 
 44 |         resized_image_rgb = PILtoTorch(image, resolution)
 45 |         gt_image = resized_image_rgb[:3, ...]
 46 |         self.alpha_mask = None
 47 |         if resized_image_rgb.shape[0] == 4:
 48 |             self.alpha_mask = resized_image_rgb[3:4, ...].to(self.data_device)
 49 |         else: 
 50 |             self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device))
 51 | 
 52 |         if train_test_exp and is_test_view:
 53 |             if is_test_dataset:
 54 |                 self.alpha_mask[..., :self.alpha_mask.shape[-1] // 2] = 0
 55 |             else:
 56 |                 self.alpha_mask[..., self.alpha_mask.shape[-1] // 2:] = 0
 57 | 
 58 |         self.original_image = gt_image.clamp(0.0, 1.0).to(self.data_device)
 59 |         self.image_width = self.original_image.shape[2]
 60 |         self.image_height = self.original_image.shape[1]
 61 | 
 62 |         self.invdepthmap = None
 63 |         self.depth_reliable = False
 64 |         if invdepthmap is not None:
 65 |             self.depth_mask = torch.ones_like(self.alpha_mask)
 66 |             self.invdepthmap = cv2.resize(invdepthmap, resolution)
 67 |             self.invdepthmap[self.invdepthmap < 0] = 0
 68 |             self.depth_reliable = True
 69 | 
 70 |             if depth_params is not None:
 71 |                 if depth_params["scale"] < 0.2 * depth_params["med_scale"] or depth_params["scale"] > 5 * depth_params["med_scale"]:
 72 |                     self.depth_reliable = False
 73 |                     self.depth_mask *= 0
 74 |                 
 75 |                 if depth_params["scale"] > 0:
 76 |                     self.invdepthmap = self.invdepthmap * depth_params["scale"] + depth_params["offset"]
 77 | 
 78 |             if self.invdepthmap.ndim != 2:
 79 |                 self.invdepthmap = self.invdepthmap[..., 0]
 80 |             self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device)
 81 | 
 82 |         self.zfar = 100.0
 83 |         self.znear = 0.01
 84 | 
 85 |         self.trans = trans
 86 |         self.scale = scale
 87 | 
 88 |         self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
 89 |         self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
 90 |         self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
 91 |         self.camera_center = self.world_view_transform.inverse()[3, :3]
 92 |         
 93 |         y, x = torch.meshgrid(torch.arange(0, self.image_height, device='cuda'), torch.arange(0, self.image_width, device='cuda'))
 94 |         self.x = x.reshape(-1, 1)
 95 |         self.y = y.reshape(-1, 1)
 96 |         
 97 |     def get_language_feature(self, language_feature_dir, feature_level):
 98 |         
 99 |         language_feature_name = os.path.join(language_feature_dir, self.image_name.split('.')[0])
100 |         
101 |         seg_map = torch.from_numpy(np.load(language_feature_name + '_s.npy'))  # seg_map: torch.Size([4, 730, 988])
102 |         feature_map = torch.from_numpy(np.load(language_feature_name + '_f.npy')) # feature_map: torch.Size([281, 512])
103 |         seg_map = seg_map.cuda()
104 |         feature_map = feature_map.cuda()
105 |         
106 |         seg = seg_map[:, self.y, self.x].squeeze(-1).long()
107 |         mask = seg != -1
108 |         if feature_level == 0: # default
109 |             point_feature1 = feature_map[seg[0:1]].squeeze(0)
110 |             mask = mask[0:1].reshape(1, self.image_height, self.image_width)
111 |         elif feature_level == 1: # s
112 |             point_feature1 = feature_map[seg[1:2]].squeeze(0)
113 |             mask = mask[1:2].reshape(1, self.image_height, self.image_width)
114 |         elif feature_level == 2: # m
115 |             point_feature1 = feature_map[seg[2:3]].squeeze(0)
116 |             mask = mask[2:3].reshape(1, self.image_height, self.image_width)
117 |         elif feature_level == 3: # l
118 |             point_feature1 = feature_map[seg[3:4]].squeeze(0)
119 |             mask = mask[3:4].reshape(1, self.image_height, self.image_width)
120 |         else:
121 |             raise ValueError("feature_level=", feature_level)
122 |         
123 |         point_feature = point_feature1.reshape(self.image_height, self.image_width, -1).permute(2, 0, 1)
124 |        
125 |         return point_feature, mask
126 |         
127 | class MiniCam:
128 |     def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
129 |         self.image_width = width
130 |         self.image_height = height    
131 |         self.FoVy = fovy
132 |         self.FoVx = fovx
133 |         self.znear = znear
134 |         self.zfar = zfar
135 |         self.world_view_transform = world_view_transform
136 |         self.full_proj_transform = full_proj_transform
137 |         view_inv = torch.inverse(self.world_view_transform)
138 |         self.camera_center = view_inv[3][:3]
139 | 
140 | 


--------------------------------------------------------------------------------
/scene/colmap_loader.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import numpy as np
 13 | import collections
 14 | import struct
 15 | 
 16 | CameraModel = collections.namedtuple(
 17 |     "CameraModel", ["model_id", "model_name", "num_params"])
 18 | Camera = collections.namedtuple(
 19 |     "Camera", ["id", "model", "width", "height", "params"])
 20 | BaseImage = collections.namedtuple(
 21 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 22 | Point3D = collections.namedtuple(
 23 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 24 | CAMERA_MODELS = {
 25 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 26 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 27 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 28 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 29 |     CameraModel(model_id=4, model_name="OPENCV", num_params=8),
 30 |     CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
 31 |     CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
 32 |     CameraModel(model_id=7, model_name="FOV", num_params=5),
 33 |     CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
 34 |     CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
 35 |     CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 36 | }
 37 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
 38 |                          for camera_model in CAMERA_MODELS])
 39 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
 40 |                            for camera_model in CAMERA_MODELS])
 41 | 
 42 | 
 43 | def qvec2rotmat(qvec):
 44 |     return np.array([
 45 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
 46 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
 47 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
 48 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
 49 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
 50 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
 51 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
 52 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
 53 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
 54 | 
 55 | def rotmat2qvec(R):
 56 |     Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
 57 |     K = np.array([
 58 |         [Rxx - Ryy - Rzz, 0, 0, 0],
 59 |         [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
 60 |         [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
 61 |         [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
 62 |     eigvals, eigvecs = np.linalg.eigh(K)
 63 |     qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
 64 |     if qvec[0] < 0:
 65 |         qvec *= -1
 66 |     return qvec
 67 | 
 68 | class Image(BaseImage):
 69 |     def qvec2rotmat(self):
 70 |         return qvec2rotmat(self.qvec)
 71 | 
 72 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 73 |     """Read and unpack the next bytes from a binary file.
 74 |     :param fid:
 75 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 76 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 77 |     :param endian_character: Any of {@, =, <, >, !}
 78 |     :return: Tuple of read and unpacked values.
 79 |     """
 80 |     data = fid.read(num_bytes)
 81 |     return struct.unpack(endian_character + format_char_sequence, data)
 82 | 
 83 | def read_points3D_text(path):
 84 |     """
 85 |     see: src/base/reconstruction.cc
 86 |         void Reconstruction::ReadPoints3DText(const std::string& path)
 87 |         void Reconstruction::WritePoints3DText(const std::string& path)
 88 |     """
 89 |     xyzs = None
 90 |     rgbs = None
 91 |     errors = None
 92 |     num_points = 0
 93 |     with open(path, "r") as fid:
 94 |         while True:
 95 |             line = fid.readline()
 96 |             if not line:
 97 |                 break
 98 |             line = line.strip()
 99 |             if len(line) > 0 and line[0] != "#":
100 |                 num_points += 1
101 | 
102 | 
103 |     xyzs = np.empty((num_points, 3))
104 |     rgbs = np.empty((num_points, 3))
105 |     errors = np.empty((num_points, 1))
106 |     count = 0
107 |     with open(path, "r") as fid:
108 |         while True:
109 |             line = fid.readline()
110 |             if not line:
111 |                 break
112 |             line = line.strip()
113 |             if len(line) > 0 and line[0] != "#":
114 |                 elems = line.split()
115 |                 xyz = np.array(tuple(map(float, elems[1:4])))
116 |                 rgb = np.array(tuple(map(int, elems[4:7])))
117 |                 error = np.array(float(elems[7]))
118 |                 xyzs[count] = xyz
119 |                 rgbs[count] = rgb
120 |                 errors[count] = error
121 |                 count += 1
122 | 
123 |     return xyzs, rgbs, errors
124 | 
125 | def read_points3D_binary(path_to_model_file):
126 |     """
127 |     see: src/base/reconstruction.cc
128 |         void Reconstruction::ReadPoints3DBinary(const std::string& path)
129 |         void Reconstruction::WritePoints3DBinary(const std::string& path)
130 |     """
131 | 
132 | 
133 |     with open(path_to_model_file, "rb") as fid:
134 |         num_points = read_next_bytes(fid, 8, "Q")[0]
135 | 
136 |         xyzs = np.empty((num_points, 3))
137 |         rgbs = np.empty((num_points, 3))
138 |         errors = np.empty((num_points, 1))
139 | 
140 |         for p_id in range(num_points):
141 |             binary_point_line_properties = read_next_bytes(
142 |                 fid, num_bytes=43, format_char_sequence="QdddBBBd")
143 |             xyz = np.array(binary_point_line_properties[1:4])
144 |             rgb = np.array(binary_point_line_properties[4:7])
145 |             error = np.array(binary_point_line_properties[7])
146 |             track_length = read_next_bytes(
147 |                 fid, num_bytes=8, format_char_sequence="Q")[0]
148 |             track_elems = read_next_bytes(
149 |                 fid, num_bytes=8*track_length,
150 |                 format_char_sequence="ii"*track_length)
151 |             xyzs[p_id] = xyz
152 |             rgbs[p_id] = rgb
153 |             errors[p_id] = error
154 |     return xyzs, rgbs, errors
155 | 
156 | def read_intrinsics_text(path):
157 |     """
158 |     Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
159 |     """
160 |     cameras = {}
161 |     with open(path, "r") as fid:
162 |         while True:
163 |             line = fid.readline()
164 |             if not line:
165 |                 break
166 |             line = line.strip()
167 |             if len(line) > 0 and line[0] != "#":
168 |                 elems = line.split()
169 |                 camera_id = int(elems[0])
170 |                 model = elems[1]
171 |                 assert model == "PINHOLE", "While the loader support other types, the rest of the code assumes PINHOLE"
172 |                 width = int(elems[2])
173 |                 height = int(elems[3])
174 |                 params = np.array(tuple(map(float, elems[4:])))
175 |                 cameras[camera_id] = Camera(id=camera_id, model=model,
176 |                                             width=width, height=height,
177 |                                             params=params)
178 |     return cameras
179 | 
180 | def read_extrinsics_binary(path_to_model_file):
181 |     """
182 |     see: src/base/reconstruction.cc
183 |         void Reconstruction::ReadImagesBinary(const std::string& path)
184 |         void Reconstruction::WriteImagesBinary(const std::string& path)
185 |     """
186 |     images = {}
187 |     with open(path_to_model_file, "rb") as fid:
188 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
189 |         for _ in range(num_reg_images):
190 |             binary_image_properties = read_next_bytes(
191 |                 fid, num_bytes=64, format_char_sequence="idddddddi")
192 |             image_id = binary_image_properties[0]
193 |             qvec = np.array(binary_image_properties[1:5])
194 |             tvec = np.array(binary_image_properties[5:8])
195 |             camera_id = binary_image_properties[8]
196 |             image_name = ""
197 |             current_char = read_next_bytes(fid, 1, "c")[0]
198 |             while current_char != b"\x00":   # look for the ASCII 0 entry
199 |                 image_name += current_char.decode("utf-8")
200 |                 current_char = read_next_bytes(fid, 1, "c")[0]
201 |             num_points2D = read_next_bytes(fid, num_bytes=8,
202 |                                            format_char_sequence="Q")[0]
203 |             x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
204 |                                        format_char_sequence="ddq"*num_points2D)
205 |             xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
206 |                                    tuple(map(float, x_y_id_s[1::3]))])
207 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
208 |             images[image_id] = Image(
209 |                 id=image_id, qvec=qvec, tvec=tvec,
210 |                 camera_id=camera_id, name=image_name,
211 |                 xys=xys, point3D_ids=point3D_ids)
212 |     return images
213 | 
214 | 
215 | def read_intrinsics_binary(path_to_model_file):
216 |     """
217 |     see: src/base/reconstruction.cc
218 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
219 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
220 |     """
221 |     cameras = {}
222 |     with open(path_to_model_file, "rb") as fid:
223 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
224 |         for _ in range(num_cameras):
225 |             camera_properties = read_next_bytes(
226 |                 fid, num_bytes=24, format_char_sequence="iiQQ")
227 |             camera_id = camera_properties[0]
228 |             model_id = camera_properties[1]
229 |             model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
230 |             width = camera_properties[2]
231 |             height = camera_properties[3]
232 |             num_params = CAMERA_MODEL_IDS[model_id].num_params
233 |             params = read_next_bytes(fid, num_bytes=8*num_params,
234 |                                      format_char_sequence="d"*num_params)
235 |             cameras[camera_id] = Camera(id=camera_id,
236 |                                         model=model_name,
237 |                                         width=width,
238 |                                         height=height,
239 |                                         params=np.array(params))
240 |         assert len(cameras) == num_cameras
241 |     return cameras
242 | 
243 | 
244 | def read_extrinsics_text(path):
245 |     """
246 |     Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
247 |     """
248 |     images = {}
249 |     with open(path, "r") as fid:
250 |         while True:
251 |             line = fid.readline()
252 |             if not line:
253 |                 break
254 |             line = line.strip()
255 |             if len(line) > 0 and line[0] != "#":
256 |                 elems = line.split()
257 |                 image_id = int(elems[0])
258 |                 qvec = np.array(tuple(map(float, elems[1:5])))
259 |                 tvec = np.array(tuple(map(float, elems[5:8])))
260 |                 camera_id = int(elems[8])
261 |                 image_name = elems[9]
262 |                 elems = fid.readline().split()
263 |                 xys = np.column_stack([tuple(map(float, elems[0::3])),
264 |                                        tuple(map(float, elems[1::3]))])
265 |                 point3D_ids = np.array(tuple(map(int, elems[2::3])))
266 |                 images[image_id] = Image(
267 |                     id=image_id, qvec=qvec, tvec=tvec,
268 |                     camera_id=camera_id, name=image_name,
269 |                     xys=xys, point3D_ids=point3D_ids)
270 |     return images
271 | 
272 | 
273 | def read_colmap_bin_array(path):
274 |     """
275 |     Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_dense.py
276 | 
277 |     :param path: path to the colmap binary file.
278 |     :return: nd array with the floating point values in the value
279 |     """
280 |     with open(path, "rb") as fid:
281 |         width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1,
282 |                                                 usecols=(0, 1, 2), dtype=int)
283 |         fid.seek(0)
284 |         num_delimiter = 0
285 |         byte = fid.read(1)
286 |         while True:
287 |             if byte == b"&":
288 |                 num_delimiter += 1
289 |                 if num_delimiter >= 3:
290 |                     break
291 |             byte = fid.read(1)
292 |         array = np.fromfile(fid, np.float32)
293 |     array = array.reshape((width, height, channels), order="F")
294 |     return np.transpose(array, (1, 0, 2)).squeeze()
295 | 


--------------------------------------------------------------------------------
/scene/dataset_readers.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import os
 13 | import sys
 14 | from PIL import Image
 15 | from typing import NamedTuple
 16 | from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
 17 |     read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
 18 | from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal
 19 | import numpy as np
 20 | import json
 21 | from pathlib import Path
 22 | from plyfile import PlyData, PlyElement
 23 | from utils.sh_utils import SH2RGB
 24 | from scene.gaussian_model import BasicPointCloud
 25 | 
 26 | class CameraInfo(NamedTuple):
 27 |     uid: int
 28 |     R: np.array
 29 |     T: np.array
 30 |     FovY: np.array
 31 |     FovX: np.array
 32 |     depth_params: dict
 33 |     image_path: str
 34 |     image_name: str
 35 |     depth_path: str
 36 |     width: int
 37 |     height: int
 38 |     is_test: bool
 39 | 
 40 | class SceneInfo(NamedTuple):
 41 |     point_cloud: BasicPointCloud
 42 |     train_cameras: list
 43 |     test_cameras: list
 44 |     nerf_normalization: dict
 45 |     ply_path: str
 46 |     is_nerf_synthetic: bool
 47 | 
 48 | def getNerfppNorm(cam_info):
 49 |     def get_center_and_diag(cam_centers):
 50 |         cam_centers = np.hstack(cam_centers)
 51 |         avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True)
 52 |         center = avg_cam_center
 53 |         dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True)
 54 |         diagonal = np.max(dist)
 55 |         return center.flatten(), diagonal
 56 | 
 57 |     cam_centers = []
 58 | 
 59 |     for cam in cam_info:
 60 |         W2C = getWorld2View2(cam.R, cam.T)
 61 |         C2W = np.linalg.inv(W2C)
 62 |         cam_centers.append(C2W[:3, 3:4])
 63 | 
 64 |     center, diagonal = get_center_and_diag(cam_centers)
 65 |     radius = diagonal * 1.1
 66 | 
 67 |     translate = -center
 68 | 
 69 |     return {"translate": translate, "radius": radius}
 70 | 
 71 | def readColmapCameras(cam_extrinsics, cam_intrinsics, depths_params, images_folder, depths_folder, test_cam_names_list):
 72 |     cam_infos = []
 73 |     for idx, key in enumerate(cam_extrinsics):
 74 |         sys.stdout.write('\r')
 75 |         # the exact output you're looking for:
 76 |         sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics)))
 77 |         sys.stdout.flush()
 78 | 
 79 |         extr = cam_extrinsics[key]
 80 |         intr = cam_intrinsics[extr.camera_id]
 81 |         height = intr.height
 82 |         width = intr.width
 83 | 
 84 |         uid = intr.id
 85 |         R = np.transpose(qvec2rotmat(extr.qvec))
 86 |         T = np.array(extr.tvec)
 87 | 
 88 |         if intr.model=="SIMPLE_PINHOLE":
 89 |             focal_length_x = intr.params[0]
 90 |             FovY = focal2fov(focal_length_x, height)
 91 |             FovX = focal2fov(focal_length_x, width)
 92 |         elif intr.model=="PINHOLE":
 93 |             focal_length_x = intr.params[0]
 94 |             focal_length_y = intr.params[1]
 95 |             FovY = focal2fov(focal_length_y, height)
 96 |             FovX = focal2fov(focal_length_x, width)
 97 |         else:
 98 |             assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
 99 | 
100 |         n_remove = len(extr.name.split('.')[-1]) + 1
101 |         depth_params = None
102 |         if depths_params is not None:
103 |             try:
104 |                 depth_params = depths_params[extr.name[:-n_remove]]
105 |             except:
106 |                 print("\n", key, "not found in depths_params")
107 | 
108 |         image_path = os.path.join(images_folder, extr.name)
109 |         image_name = extr.name
110 |         depth_path = os.path.join(depths_folder, f"{extr.name[:-n_remove]}.png") if depths_folder != "" else ""
111 | 
112 |         cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, depth_params=depth_params,
113 |                               image_path=image_path, image_name=image_name, depth_path=depth_path,
114 |                               width=width, height=height, is_test=image_name in test_cam_names_list)
115 |         cam_infos.append(cam_info)
116 | 
117 |     sys.stdout.write('\n')
118 |     return cam_infos
119 | 
120 | def fetchPly(path):
121 |     plydata = PlyData.read(path)
122 |     vertices = plydata['vertex']
123 |     positions = np.vstack([vertices['x'], vertices['y'], vertices['z']]).T
124 |     colors = np.vstack([vertices['red'], vertices['green'], vertices['blue']]).T / 255.0
125 |     normals = np.vstack([vertices['nx'], vertices['ny'], vertices['nz']]).T
126 |     return BasicPointCloud(points=positions, colors=colors, normals=normals)
127 | 
128 | def storePly(path, xyz, rgb):
129 |     # Define the dtype for the structured array
130 |     dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
131 |             ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
132 |             ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')]
133 |     
134 |     normals = np.zeros_like(xyz)
135 | 
136 |     elements = np.empty(xyz.shape[0], dtype=dtype)
137 |     attributes = np.concatenate((xyz, normals, rgb), axis=1)
138 |     elements[:] = list(map(tuple, attributes))
139 | 
140 |     # Create the PlyData object and write to file
141 |     vertex_element = PlyElement.describe(elements, 'vertex')
142 |     ply_data = PlyData([vertex_element])
143 |     ply_data.write(path)
144 | 
145 | def readColmapSceneInfo(path, images, depths, eval, train_test_exp, llffhold=8):
146 |     try:
147 |         cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
148 |         cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
149 |         cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
150 |         cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
151 |     except:
152 |         cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt")
153 |         cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt")
154 |         cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file)
155 |         cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file)
156 | 
157 |     depth_params_file = os.path.join(path, "sparse/0", "depth_params.json")
158 |     ## if depth_params_file isnt there AND depths file is here -> throw error
159 |     depths_params = None
160 |     if depths != "":
161 |         try:
162 |             with open(depth_params_file, "r") as f:
163 |                 depths_params = json.load(f)
164 |             all_scales = np.array([depths_params[key]["scale"] for key in depths_params])
165 |             if (all_scales > 0).sum():
166 |                 med_scale = np.median(all_scales[all_scales > 0])
167 |             else:
168 |                 med_scale = 0
169 |             for key in depths_params:
170 |                 depths_params[key]["med_scale"] = med_scale
171 | 
172 |         except FileNotFoundError:
173 |             print(f"Error: depth_params.json file not found at path '{depth_params_file}'.")
174 |             sys.exit(1)
175 |         except Exception as e:
176 |             print(f"An unexpected error occurred when trying to open depth_params.json file: {e}")
177 |             sys.exit(1)
178 | 
179 |     if eval:
180 |         if "360" in path:
181 |             llffhold = 8
182 |         if llffhold:
183 |             print("------------LLFF HOLD-------------")
184 |             cam_names = [cam_extrinsics[cam_id].name for cam_id in cam_extrinsics]
185 |             cam_names = sorted(cam_names)
186 |             test_cam_names_list = [name for idx, name in enumerate(cam_names) if idx % llffhold == 0]
187 |         else:
188 |             with open(os.path.join(path, "sparse/0", "test.txt"), 'r') as file:
189 |                 test_cam_names_list = [line.strip() for line in file]
190 |     else:
191 |         test_cam_names_list = []
192 | 
193 |     reading_dir = "images" if images == None else images
194 |     cam_infos_unsorted = readColmapCameras(
195 |         cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, depths_params=depths_params,
196 |         images_folder=os.path.join(path, reading_dir), 
197 |         depths_folder=os.path.join(path, depths) if depths != "" else "", test_cam_names_list=test_cam_names_list)
198 |     cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name)
199 | 
200 |     train_cam_infos = [c for c in cam_infos if train_test_exp or not c.is_test]
201 |     test_cam_infos = [c for c in cam_infos if c.is_test]
202 | 
203 |     nerf_normalization = getNerfppNorm(train_cam_infos)
204 | 
205 |     ply_path = os.path.join(path, "sparse/0/points3D.ply")
206 |     bin_path = os.path.join(path, "sparse/0/points3D.bin")
207 |     txt_path = os.path.join(path, "sparse/0/points3D.txt")
208 |     if not os.path.exists(ply_path):
209 |         print("Converting point3d.bin to .ply, will happen only the first time you open the scene.")
210 |         try:
211 |             xyz, rgb, _ = read_points3D_binary(bin_path)
212 |         except:
213 |             xyz, rgb, _ = read_points3D_text(txt_path)
214 |         storePly(ply_path, xyz, rgb)
215 |     try:
216 |         pcd = fetchPly(ply_path)
217 |     except:
218 |         pcd = None
219 | 
220 |     scene_info = SceneInfo(point_cloud=pcd,
221 |                            train_cameras=train_cam_infos,
222 |                            test_cameras=test_cam_infos,
223 |                            nerf_normalization=nerf_normalization,
224 |                            ply_path=ply_path,
225 |                            is_nerf_synthetic=False)
226 |     return scene_info
227 | 
228 | def readCamerasFromTransforms(path, transformsfile, depths_folder, white_background, is_test, extension=".png"):
229 |     cam_infos = []
230 | 
231 |     with open(os.path.join(path, transformsfile)) as json_file:
232 |         contents = json.load(json_file)
233 |         fovx = contents["camera_angle_x"]
234 | 
235 |         frames = contents["frames"]
236 |         for idx, frame in enumerate(frames):
237 |             cam_name = os.path.join(path, frame["file_path"] + extension)
238 | 
239 |             # NeRF 'transform_matrix' is a camera-to-world transform
240 |             c2w = np.array(frame["transform_matrix"])
241 |             # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
242 |             c2w[:3, 1:3] *= -1
243 | 
244 |             # get the world-to-camera transform and set R, T
245 |             w2c = np.linalg.inv(c2w)
246 |             R = np.transpose(w2c[:3,:3])  # R is stored transposed due to 'glm' in CUDA code
247 |             T = w2c[:3, 3]
248 | 
249 |             image_path = os.path.join(path, cam_name)
250 |             image_name = Path(cam_name).stem
251 |             image = Image.open(image_path)
252 | 
253 |             im_data = np.array(image.convert("RGBA"))
254 | 
255 |             bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0])
256 | 
257 |             norm_data = im_data / 255.0
258 |             arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
259 |             image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
260 | 
261 |             fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
262 |             FovY = fovy 
263 |             FovX = fovx
264 | 
265 |             depth_path = os.path.join(depths_folder, f"{image_name}.png") if depths_folder != "" else ""
266 | 
267 |             cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX,
268 |                             image_path=image_path, image_name=image_name,
269 |                             width=image.size[0], height=image.size[1], depth_path=depth_path, depth_params=None, is_test=is_test))
270 |             
271 |     return cam_infos
272 | 
273 | def readNerfSyntheticInfo(path, white_background, depths, eval, extension=".png"):
274 | 
275 |     depths_folder=os.path.join(path, depths) if depths != "" else ""
276 |     print("Reading Training Transforms")
277 |     train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", depths_folder, white_background, False, extension)
278 |     print("Reading Test Transforms")
279 |     test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", depths_folder, white_background, True, extension)
280 |     
281 |     if not eval:
282 |         train_cam_infos.extend(test_cam_infos)
283 |         test_cam_infos = []
284 | 
285 |     nerf_normalization = getNerfppNorm(train_cam_infos)
286 | 
287 |     ply_path = os.path.join(path, "points3d.ply")
288 |     if not os.path.exists(ply_path):
289 |         # Since this data set has no colmap data, we start with random points
290 |         num_pts = 100_000
291 |         print(f"Generating random point cloud ({num_pts})...")
292 |         
293 |         # We create random points inside the bounds of the synthetic Blender scenes
294 |         xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3
295 |         shs = np.random.random((num_pts, 3)) / 255.0
296 |         pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3)))
297 | 
298 |         storePly(ply_path, xyz, SH2RGB(shs) * 255)
299 |     try:
300 |         pcd = fetchPly(ply_path)
301 |     except:
302 |         pcd = None
303 | 
304 |     scene_info = SceneInfo(point_cloud=pcd,
305 |                            train_cameras=train_cam_infos,
306 |                            test_cameras=test_cam_infos,
307 |                            nerf_normalization=nerf_normalization,
308 |                            ply_path=ply_path,
309 |                            is_nerf_synthetic=True)
310 |     return scene_info
311 | 
312 | sceneLoadTypeCallbacks = {
313 |     "Colmap": readColmapSceneInfo,
314 |     "Blender" : readNerfSyntheticInfo
315 | }


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import os
 13 | import torch
 14 | from random import randint
 15 | from utils.loss_utils import l1_loss, ssim
 16 | from gaussian_renderer import render
 17 | import sys
 18 | from scene import Scene, GaussianModel
 19 | from utils.general_utils import safe_state, get_expon_lr_func
 20 | import uuid
 21 | from tqdm import tqdm
 22 | from utils.image_utils import psnr
 23 | from argparse import ArgumentParser, Namespace
 24 | from arguments import ModelParams, PipelineParams, OptimizationParams
 25 | # try:
 26 | #     from torch.utils.tensorboard import SummaryWriter
 27 | #     TENSORBOARD_FOUND = True
 28 | # except ImportError:
 29 | #     TENSORBOARD_FOUND = False
 30 | TENSORBOARD_FOUND = False
 31 | 
 32 | def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from):
 33 | 
 34 |     first_iter = 0
 35 |     tb_writer = prepare_output_and_logger(dataset)
 36 |     gaussians = GaussianModel(dataset.sh_degree, opt.optimizer_type)
 37 |     scene = Scene(dataset, gaussians)
 38 |     gaussians.training_setup(opt)
 39 |     if checkpoint:
 40 |         (model_params, first_iter) = torch.load(checkpoint)
 41 |         gaussians.restore_rgb(model_params, opt)
 42 | 
 43 |     bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
 44 |     background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
 45 | 
 46 |     iter_start = torch.cuda.Event(enable_timing = True)
 47 |     iter_end = torch.cuda.Event(enable_timing = True)
 48 | 
 49 |     viewpoint_stack = scene.getTrainCameras().copy()
 50 |     viewpoint_indices = list(range(len(viewpoint_stack)))
 51 |     ema_loss_for_log = 0.0
 52 | 
 53 |     progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress")
 54 |     first_iter += 1
 55 |     for iteration in range(first_iter, opt.iterations + 1):
 56 | 
 57 |         iter_start.record()
 58 | 
 59 |         gaussians.update_learning_rate(iteration)
 60 | 
 61 |         # Every 1000 its we increase the levels of SH up to a maximum degree
 62 |         if iteration % 1000 == 0:
 63 |             gaussians.oneupSHdegree()
 64 | 
 65 |         # Pick a random Camera
 66 |         if not viewpoint_stack:
 67 |             viewpoint_stack = scene.getTrainCameras().copy()
 68 |             viewpoint_indices = list(range(len(viewpoint_stack)))
 69 |         rand_idx = randint(0, len(viewpoint_indices) - 1)
 70 |         viewpoint_cam = viewpoint_stack.pop(rand_idx)
 71 |         vind = viewpoint_indices.pop(rand_idx)
 72 | 
 73 |         # Render
 74 |         if (iteration - 1) == debug_from:
 75 |             pipe.debug = True
 76 | 
 77 |         bg = torch.rand((3), device="cuda") if opt.random_background else background
 78 | 
 79 |         render_pkg = render(viewpoint_cam, gaussians, pipe, bg)
 80 |         image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"]
 81 | 
 82 |         # Loss
 83 |         gt_image = viewpoint_cam.original_image.cuda()
 84 |         Ll1 = l1_loss(image, gt_image)
 85 | 
 86 |         ssim_value = ssim(image, gt_image)
 87 |         loss = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim_value)
 88 | 
 89 |         loss.backward()
 90 | 
 91 |         iter_end.record()
 92 | 
 93 |         with torch.no_grad():
 94 |             # Progress bar
 95 |             ema_loss_for_log = 0.4 * loss.item() + 0.6 * ema_loss_for_log
 96 | 
 97 |             if iteration % 10 == 0:
 98 |                 progress_bar.set_postfix({"Loss": f"{ema_loss_for_log:.{7}f}"})
 99 |                 progress_bar.update(10)
100 |             if iteration == opt.iterations:
101 |                 progress_bar.close()
102 | 
103 |             # Log and save
104 |             training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background))
105 |             if (iteration in saving_iterations):
106 |                 print("\n[ITER {}] Saving Gaussians".format(iteration))
107 |                 scene.save(iteration)
108 | 
109 |             # Densification
110 |             if iteration < opt.densify_until_iter:
111 |                 # Keep track of max radii in image-space for pruning
112 |                 gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
113 |                 gaussians.add_densification_stats(viewspace_point_tensor, visibility_filter, image.shape[2], image.shape[1])
114 | 
115 |                 if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0:
116 |                     size_threshold = 20 if iteration > opt.opacity_reset_interval else None
117 |                     gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold, radii)
118 |                 
119 |                 if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
120 |                     gaussians.reset_opacity()
121 | 
122 |             # Optimizer step
123 |             if iteration < opt.iterations:
124 |                 gaussians.optimizer.step()
125 |                 gaussians.optimizer.zero_grad(set_to_none = True)
126 | 
127 |             if (iteration in checkpoint_iterations):
128 |                 print("\n[ITER {}] Saving Checkpoint".format(iteration))
129 |                 torch.save((gaussians.capture_rgb(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
130 | 
131 | def prepare_output_and_logger(args):    
132 |     if not args.model_path:
133 |         if os.getenv('OAR_JOB_ID'):
134 |             unique_str=os.getenv('OAR_JOB_ID')
135 |         else:
136 |             unique_str = str(uuid.uuid4())
137 |         args.model_path = os.path.join("./output/", unique_str[0:10])
138 |         
139 |     # Set up output folder
140 |     print("Output folder: {}".format(args.model_path))
141 |     os.makedirs(args.model_path, exist_ok = True)
142 |     with open(os.path.join(args.model_path, "cfg_args"), 'w') as cfg_log_f:
143 |         cfg_log_f.write(str(Namespace(**vars(args))))
144 | 
145 |     # Create Tensorboard writer
146 |     tb_writer = None
147 |     # if TENSORBOARD_FOUND:
148 |     #     tb_writer = SummaryWriter(args.model_path)
149 |     # else:
150 |     #     print("Tensorboard not available: not logging progress")
151 |     return tb_writer
152 | 
153 | def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs):
154 |     if tb_writer:
155 |         tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration)
156 |         tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration)
157 |         tb_writer.add_scalar('iter_time', elapsed, iteration)
158 | 
159 |     # Report test and samples of training set
160 |     if iteration in testing_iterations:
161 |         torch.cuda.empty_cache()
162 |         validation_configs = ({'name': 'test', 'cameras' : scene.getTestCameras()}, 
163 |                               {'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(5, 30, 5)]})
164 | 
165 |         for config in validation_configs:
166 |             if config['cameras'] and len(config['cameras']) > 0:
167 |                 l1_test = 0.0
168 |                 psnr_test = 0.0
169 |                 for idx, viewpoint in enumerate(config['cameras']):
170 |                     image = torch.clamp(renderFunc(viewpoint, scene.gaussians, *renderArgs)["render"], 0.0, 1.0)
171 |                     gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0)
172 |                     if tb_writer and (idx < 5):
173 |                         tb_writer.add_images(config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration)
174 |                         if iteration == testing_iterations[0]:
175 |                             tb_writer.add_images(config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration)
176 |                     l1_test += l1_loss(image, gt_image).mean().double()
177 |                     psnr_test += psnr(image, gt_image).mean().double()
178 |                 psnr_test /= len(config['cameras'])
179 |                 l1_test /= len(config['cameras'])          
180 |                 print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test))
181 |                 if tb_writer:
182 |                     tb_writer.add_scalar(config['name'] + '/loss_viewpoint - l1_loss', l1_test, iteration)
183 |                     tb_writer.add_scalar(config['name'] + '/loss_viewpoint - psnr', psnr_test, iteration)
184 | 
185 |         if tb_writer:
186 |             tb_writer.add_histogram("scene/opacity_histogram", scene.gaussians.get_opacity, iteration)
187 |             tb_writer.add_scalar('total_points', scene.gaussians.get_xyz.shape[0], iteration)
188 |         torch.cuda.empty_cache()
189 | 
190 | if __name__ == "__main__":
191 |     # Set up command line argument parser
192 |     parser = ArgumentParser(description="Training script parameters")
193 |     lp = ModelParams(parser)
194 |     op = OptimizationParams(parser)
195 |     pp = PipelineParams(parser)
196 |     parser.add_argument('--ip', type=str, default="127.0.0.1")
197 |     parser.add_argument('--port', type=int, default=6009)
198 |     parser.add_argument('--debug_from', type=int, default=-1)
199 |     parser.add_argument('--detect_anomaly', action='store_true', default=False)
200 |     parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000])
201 |     parser.add_argument("--save_iterations", nargs="+", type=int, default=[7_000, 30_000])
202 |     parser.add_argument("--quiet", action="store_true")
203 |     parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[7_000, 30_000])
204 |     parser.add_argument("--start_checkpoint", type=str, default = None)
205 |     args = parser.parse_args(sys.argv[1:])
206 |     args.save_iterations.append(args.iterations)
207 | 
208 |     # Initialize system state (RNG)
209 |     safe_state(args.quiet)
210 |     
211 |     torch.autograd.set_detect_anomaly(args.detect_anomaly)
212 |     training(lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
213 | 


--------------------------------------------------------------------------------
/utils/camera_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from scene.cameras import Camera
13 | import numpy as np
14 | from utils.graphics_utils import fov2focal
15 | from PIL import Image
16 | import cv2
17 | 
18 | WARNED = False
19 | 
20 | def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dataset):
21 |     image = Image.open(cam_info.image_path)
22 | 
23 |     if cam_info.depth_path != "":
24 |         try:
25 |             if is_nerf_synthetic:
26 |                 invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / 512
27 |             else:
28 |                 invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / float(2**16)
29 | 
30 |         except FileNotFoundError:
31 |             print(f"Error: The depth file at path '{cam_info.depth_path}' was not found.")
32 |             raise
33 |         except IOError:
34 |             print(f"Error: Unable to open the image file '{cam_info.depth_path}'. It may be corrupted or an unsupported format.")
35 |             raise
36 |         except Exception as e:
37 |             print(f"An unexpected error occurred when trying to read depth at {cam_info.depth_path}: {e}")
38 |             raise
39 |     else:
40 |         invdepthmap = None
41 |         
42 |     orig_w, orig_h = image.size
43 |     if args.resolution in [1, 2, 4, 8]:
44 |         resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution))
45 |     else:  # should be a type that converts to float
46 |         if args.resolution == -1:
47 |             if orig_w > 1600:
48 |                 global WARNED
49 |                 if not WARNED:
50 |                     print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
51 |                         "If this is not desired, please explicitly specify '--resolution/-r' as 1")
52 |                     WARNED = True
53 |                 global_down = orig_w / 1600
54 |             else:
55 |                 global_down = 1
56 |         else:
57 |             global_down = orig_w / args.resolution
58 |     
59 | 
60 |         scale = float(global_down) * float(resolution_scale)
61 |         resolution = (int(orig_w / scale), int(orig_h / scale))
62 | 
63 |     return Camera(resolution, colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 
64 |                   FoVx=cam_info.FovX, FoVy=cam_info.FovY, depth_params=cam_info.depth_params,
65 |                   image=image, invdepthmap=invdepthmap,
66 |                   image_name=cam_info.image_name, uid=id, data_device=args.data_device,
67 |                   train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test)
68 | 
69 | def cameraList_from_camInfos(cam_infos, resolution_scale, args, is_nerf_synthetic, is_test_dataset):
70 |     camera_list = []
71 | 
72 |     for id, c in enumerate(cam_infos):
73 |         camera_list.append(loadCam(args, id, c, resolution_scale, is_nerf_synthetic, is_test_dataset))
74 | 
75 |     return camera_list
76 | 
77 | def camera_to_JSON(id, camera : Camera):
78 |     Rt = np.zeros((4, 4))
79 |     Rt[:3, :3] = camera.R.transpose()
80 |     Rt[:3, 3] = camera.T
81 |     Rt[3, 3] = 1.0
82 | 
83 |     W2C = np.linalg.inv(Rt)
84 |     pos = W2C[:3, 3]
85 |     rot = W2C[:3, :3]
86 |     serializable_array_2d = [x.tolist() for x in rot]
87 |     camera_entry = {
88 |         'id' : id,
89 |         'img_name' : camera.image_name,
90 |         'width' : camera.width,
91 |         'height' : camera.height,
92 |         'position': pos.tolist(),
93 |         'rotation': serializable_array_2d,
94 |         'fy' : fov2focal(camera.FovY, camera.height),
95 |         'fx' : fov2focal(camera.FovX, camera.width)
96 |     }
97 |     return camera_entry


--------------------------------------------------------------------------------
/utils/general_utils.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import torch
 13 | import sys
 14 | from datetime import datetime
 15 | import numpy as np
 16 | import random
 17 | 
 18 | def inverse_sigmoid(x):
 19 |     return torch.log(x/(1-x))
 20 | 
 21 | def PILtoTorch(pil_image, resolution):
 22 |     resized_image_PIL = pil_image.resize(resolution)
 23 |     resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
 24 |     if len(resized_image.shape) == 3:
 25 |         return resized_image.permute(2, 0, 1)
 26 |     else:
 27 |         return resized_image.unsqueeze(dim=-1).permute(2, 0, 1)
 28 | 
 29 | def get_expon_lr_func(
 30 |     lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000
 31 | ):
 32 |     """
 33 |     Copied from Plenoxels
 34 | 
 35 |     Continuous learning rate decay function. Adapted from JaxNeRF
 36 |     The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
 37 |     is log-linearly interpolated elsewhere (equivalent to exponential decay).
 38 |     If lr_delay_steps>0 then the learning rate will be scaled by some smooth
 39 |     function of lr_delay_mult, such that the initial learning rate is
 40 |     lr_init*lr_delay_mult at the beginning of optimization but will be eased back
 41 |     to the normal learning rate when steps>lr_delay_steps.
 42 |     :param conf: config subtree 'lr' or similar
 43 |     :param max_steps: int, the number of steps during optimization.
 44 |     :return HoF which takes step as input
 45 |     """
 46 | 
 47 |     def helper(step):
 48 |         if step < 0 or (lr_init == 0.0 and lr_final == 0.0):
 49 |             # Disable this parameter
 50 |             return 0.0
 51 |         if lr_delay_steps > 0:
 52 |             # A kind of reverse cosine decay.
 53 |             delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
 54 |                 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1)
 55 |             )
 56 |         else:
 57 |             delay_rate = 1.0
 58 |         t = np.clip(step / max_steps, 0, 1)
 59 |         log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
 60 |         return delay_rate * log_lerp
 61 | 
 62 |     return helper
 63 | 
 64 | def strip_lowerdiag(L):
 65 |     uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
 66 | 
 67 |     uncertainty[:, 0] = L[:, 0, 0]
 68 |     uncertainty[:, 1] = L[:, 0, 1]
 69 |     uncertainty[:, 2] = L[:, 0, 2]
 70 |     uncertainty[:, 3] = L[:, 1, 1]
 71 |     uncertainty[:, 4] = L[:, 1, 2]
 72 |     uncertainty[:, 5] = L[:, 2, 2]
 73 |     return uncertainty
 74 | 
 75 | def strip_symmetric(sym):
 76 |     return strip_lowerdiag(sym)
 77 | 
 78 | def build_rotation(r):
 79 |     norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3])
 80 | 
 81 |     q = r / norm[:, None]
 82 | 
 83 |     R = torch.zeros((q.size(0), 3, 3), device='cuda')
 84 | 
 85 |     r = q[:, 0]
 86 |     x = q[:, 1]
 87 |     y = q[:, 2]
 88 |     z = q[:, 3]
 89 | 
 90 |     R[:, 0, 0] = 1 - 2 * (y*y + z*z)
 91 |     R[:, 0, 1] = 2 * (x*y - r*z)
 92 |     R[:, 0, 2] = 2 * (x*z + r*y)
 93 |     R[:, 1, 0] = 2 * (x*y + r*z)
 94 |     R[:, 1, 1] = 1 - 2 * (x*x + z*z)
 95 |     R[:, 1, 2] = 2 * (y*z - r*x)
 96 |     R[:, 2, 0] = 2 * (x*z - r*y)
 97 |     R[:, 2, 1] = 2 * (y*z + r*x)
 98 |     R[:, 2, 2] = 1 - 2 * (x*x + y*y)
 99 |     return R
100 | 
101 | def build_scaling_rotation(s, r):
102 |     L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda")
103 |     R = build_rotation(r)
104 | 
105 |     L[:,0,0] = s[:,0]
106 |     L[:,1,1] = s[:,1]
107 |     L[:,2,2] = s[:,2]
108 | 
109 |     L = R @ L
110 |     return L
111 | 
112 | def safe_state(silent):
113 |     old_f = sys.stdout
114 |     class F:
115 |         def __init__(self, silent):
116 |             self.silent = silent
117 | 
118 |         def write(self, x):
119 |             if not self.silent:
120 |                 if x.endswith("\n"):
121 |                     old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S")))))
122 |                 else:
123 |                     old_f.write(x)
124 | 
125 |         def flush(self):
126 |             old_f.flush()
127 | 
128 |     sys.stdout = F(silent)
129 | 
130 |     random.seed(0)
131 |     np.random.seed(0)
132 |     torch.manual_seed(0)
133 |     torch.cuda.set_device(torch.device("cuda:0"))
134 | 


--------------------------------------------------------------------------------
/utils/graphics_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | import math
14 | import numpy as np
15 | from typing import NamedTuple
16 | 
17 | class BasicPointCloud(NamedTuple):
18 |     points : np.array
19 |     colors : np.array
20 |     normals : np.array
21 | 
22 | def geom_transform_points(points, transf_matrix):
23 |     P, _ = points.shape
24 |     ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
25 |     points_hom = torch.cat([points, ones], dim=1)
26 |     points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))
27 | 
28 |     denom = points_out[..., 3:] + 0.0000001
29 |     return (points_out[..., :3] / denom).squeeze(dim=0)
30 | 
31 | def getWorld2View(R, t):
32 |     Rt = np.zeros((4, 4))
33 |     Rt[:3, :3] = R.transpose()
34 |     Rt[:3, 3] = t
35 |     Rt[3, 3] = 1.0
36 |     return np.float32(Rt)
37 | 
38 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
39 |     Rt = np.zeros((4, 4))
40 |     Rt[:3, :3] = R.transpose()
41 |     Rt[:3, 3] = t
42 |     Rt[3, 3] = 1.0
43 | 
44 |     C2W = np.linalg.inv(Rt)
45 |     cam_center = C2W[:3, 3]
46 |     cam_center = (cam_center + translate) * scale
47 |     C2W[:3, 3] = cam_center
48 |     Rt = np.linalg.inv(C2W)
49 |     return np.float32(Rt)
50 | 
51 | def getProjectionMatrix(znear, zfar, fovX, fovY):
52 |     tanHalfFovY = math.tan((fovY / 2))
53 |     tanHalfFovX = math.tan((fovX / 2))
54 | 
55 |     top = tanHalfFovY * znear
56 |     bottom = -top
57 |     right = tanHalfFovX * znear
58 |     left = -right
59 | 
60 |     P = torch.zeros(4, 4)
61 | 
62 |     z_sign = 1.0
63 | 
64 |     P[0, 0] = 2.0 * znear / (right - left)
65 |     P[1, 1] = 2.0 * znear / (top - bottom)
66 |     P[0, 2] = (right + left) / (right - left)
67 |     P[1, 2] = (top + bottom) / (top - bottom)
68 |     P[3, 2] = z_sign
69 |     P[2, 2] = z_sign * zfar / (zfar - znear)
70 |     P[2, 3] = -(zfar * znear) / (zfar - znear)
71 |     return P
72 | 
73 | def fov2focal(fov, pixels):
74 |     return pixels / (2 * math.tan(fov / 2))
75 | 
76 | def focal2fov(focal, pixels):
77 |     return 2*math.atan(pixels/(2*focal))


--------------------------------------------------------------------------------
/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | 
14 | def mse(img1, img2):
15 |     return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
16 | 
17 | def psnr(img1, img2):
18 |     mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
19 |     return 20 * torch.log10(1.0 / torch.sqrt(mse))
20 | 


--------------------------------------------------------------------------------
/utils/loss_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | import torch.nn.functional as F
14 | from torch.autograd import Variable
15 | from math import exp
16 | try:
17 |     from diff_gaussian_rasterization._C import fusedssim, fusedssim_backward
18 | except:
19 |     pass
20 | 
21 | C1 = 0.01 ** 2
22 | C2 = 0.03 ** 2
23 | 
24 | class FusedSSIMMap(torch.autograd.Function):
25 |     @staticmethod
26 |     def forward(ctx, C1, C2, img1, img2):
27 |         ssim_map = fusedssim(C1, C2, img1, img2)
28 |         ctx.save_for_backward(img1.detach(), img2)
29 |         ctx.C1 = C1
30 |         ctx.C2 = C2
31 |         return ssim_map
32 | 
33 |     @staticmethod
34 |     def backward(ctx, opt_grad):
35 |         img1, img2 = ctx.saved_tensors
36 |         C1, C2 = ctx.C1, ctx.C2
37 |         grad = fusedssim_backward(C1, C2, img1, img2, opt_grad)
38 |         return None, None, grad, None
39 | 
40 | def l1_loss(network_output, gt):
41 |     return torch.abs((network_output - gt)).mean()
42 | 
43 | def l2_loss(network_output, gt):
44 |     return ((network_output - gt) ** 2).mean()
45 | 
46 | def gaussian(window_size, sigma):
47 |     gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
48 |     return gauss / gauss.sum()
49 | 
50 | def create_window(window_size, channel):
51 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
52 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
53 |     window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
54 |     return window
55 | 
56 | def ssim(img1, img2, window_size=11, size_average=True):
57 |     channel = img1.size(-3)
58 |     window = create_window(window_size, channel)
59 | 
60 |     if img1.is_cuda:
61 |         window = window.cuda(img1.get_device())
62 |     window = window.type_as(img1)
63 | 
64 |     return _ssim(img1, img2, window, window_size, channel, size_average)
65 | 
66 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
67 |     mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
68 |     mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
69 | 
70 |     mu1_sq = mu1.pow(2)
71 |     mu2_sq = mu2.pow(2)
72 |     mu1_mu2 = mu1 * mu2
73 | 
74 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
75 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
76 |     sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
77 | 
78 |     C1 = 0.01 ** 2
79 |     C2 = 0.03 ** 2
80 | 
81 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
82 | 
83 |     if size_average:
84 |         return ssim_map.mean()
85 |     else:
86 |         return ssim_map.mean(1).mean(1).mean(1)
87 | 
88 | 
89 | def fast_ssim(img1, img2):
90 |     ssim_map = FusedSSIMMap.apply(C1, C2, img1, img2)
91 |     return ssim_map.mean()
92 | 


--------------------------------------------------------------------------------
/utils/make_depth_scale.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import argparse
 3 | import cv2
 4 | from joblib import delayed, Parallel
 5 | import json
 6 | from read_write_model import *
 7 | 
 8 | def get_scales(key, cameras, images, points3d_ordered, args):
 9 |     image_meta = images[key]
10 |     cam_intrinsic = cameras[image_meta.camera_id]
11 | 
12 |     pts_idx = images_metas[key].point3D_ids
13 | 
14 |     mask = pts_idx >= 0
15 |     mask *= pts_idx < len(points3d_ordered)
16 | 
17 |     pts_idx = pts_idx[mask]
18 |     valid_xys = image_meta.xys[mask]
19 | 
20 |     if len(pts_idx) > 0:
21 |         pts = points3d_ordered[pts_idx]
22 |     else:
23 |         pts = np.array([0, 0, 0])
24 | 
25 |     R = qvec2rotmat(image_meta.qvec)
26 |     pts = np.dot(pts, R.T) + image_meta.tvec
27 | 
28 |     invcolmapdepth = 1. / pts[..., 2] 
29 |     n_remove = len(image_meta.name.split('.')[-1]) + 1
30 |     invmonodepthmap = cv2.imread(f"{args.depths_dir}/{image_meta.name[:-n_remove]}.png", cv2.IMREAD_UNCHANGED)
31 |     
32 |     if invmonodepthmap is None:
33 |         return None
34 |     
35 |     if invmonodepthmap.ndim != 2:
36 |         invmonodepthmap = invmonodepthmap[..., 0]
37 | 
38 |     invmonodepthmap = invmonodepthmap.astype(np.float32) / (2**16)
39 |     s = invmonodepthmap.shape[0] / cam_intrinsic.height
40 | 
41 |     maps = (valid_xys * s).astype(np.float32)
42 |     valid = (
43 |         (maps[..., 0] >= 0) * 
44 |         (maps[..., 1] >= 0) * 
45 |         (maps[..., 0] < cam_intrinsic.width * s) * 
46 |         (maps[..., 1] < cam_intrinsic.height * s) * (invcolmapdepth > 0))
47 |     
48 |     if valid.sum() > 10 and (invcolmapdepth.max() - invcolmapdepth.min()) > 1e-3:
49 |         maps = maps[valid, :]
50 |         invcolmapdepth = invcolmapdepth[valid]
51 |         invmonodepth = cv2.remap(invmonodepthmap, maps[..., 0], maps[..., 1], interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)[..., 0]
52 |         
53 |         ## Median / dev
54 |         t_colmap = np.median(invcolmapdepth)
55 |         s_colmap = np.mean(np.abs(invcolmapdepth - t_colmap))
56 | 
57 |         t_mono = np.median(invmonodepth)
58 |         s_mono = np.mean(np.abs(invmonodepth - t_mono))
59 |         scale = s_colmap / s_mono
60 |         offset = t_colmap - t_mono * scale
61 |     else:
62 |         scale = 0
63 |         offset = 0
64 |     return {"image_name": image_meta.name[:-n_remove], "scale": scale, "offset": offset}
65 | 
66 | if __name__ == '__main__':
67 |     parser = argparse.ArgumentParser()
68 |     parser.add_argument('--base_dir', default="../data/big_gaussians/standalone_chunks/campus")
69 |     parser.add_argument('--depths_dir', default="../data/big_gaussians/standalone_chunks/campus/depths_any")
70 |     parser.add_argument('--model_type', default="bin")
71 |     args = parser.parse_args()
72 | 
73 | 
74 |     cam_intrinsics, images_metas, points3d = read_model(os.path.join(args.base_dir, "sparse", "0"), ext=f".{args.model_type}")
75 | 
76 |     pts_indices = np.array([points3d[key].id for key in points3d])
77 |     pts_xyzs = np.array([points3d[key].xyz for key in points3d])
78 |     points3d_ordered = np.zeros([pts_indices.max()+1, 3])
79 |     points3d_ordered[pts_indices] = pts_xyzs
80 | 
81 |     # depth_param_list = [get_scales(key, cam_intrinsics, images_metas, points3d_ordered, args) for key in images_metas]
82 |     depth_param_list = Parallel(n_jobs=-1, backend="threading")(
83 |         delayed(get_scales)(key, cam_intrinsics, images_metas, points3d_ordered, args) for key in images_metas
84 |     )
85 | 
86 |     depth_params = {
87 |         depth_param["image_name"]: {"scale": depth_param["scale"], "offset": depth_param["offset"]}
88 |         for depth_param in depth_param_list if depth_param != None
89 |     }
90 | 
91 |     with open(f"{args.base_dir}/sparse/0/depth_params.json", "w") as f:
92 |         json.dump(depth_params, f, indent=2)
93 | 
94 |     print(0)
95 | 


--------------------------------------------------------------------------------
/utils/sh_utils.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2021 The PlenOctree Authors.
  2 | #  Redistribution and use in source and binary forms, with or without
  3 | #  modification, are permitted provided that the following conditions are met:
  4 | #
  5 | #  1. Redistributions of source code must retain the above copyright notice,
  6 | #  this list of conditions and the following disclaimer.
  7 | #
  8 | #  2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #  this list of conditions and the following disclaimer in the documentation
 10 | #  and/or other materials provided with the distribution.
 11 | #
 12 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 13 | #  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 14 | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 15 | #  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 16 | #  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 17 | #  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 18 | #  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | #  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 20 | #  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 21 | #  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 22 | #  POSSIBILITY OF SUCH DAMAGE.
 23 | 
 24 | import torch
 25 | 
 26 | C0 = 0.28209479177387814
 27 | C1 = 0.4886025119029199
 28 | C2 = [
 29 |     1.0925484305920792,
 30 |     -1.0925484305920792,
 31 |     0.31539156525252005,
 32 |     -1.0925484305920792,
 33 |     0.5462742152960396
 34 | ]
 35 | C3 = [
 36 |     -0.5900435899266435,
 37 |     2.890611442640554,
 38 |     -0.4570457994644658,
 39 |     0.3731763325901154,
 40 |     -0.4570457994644658,
 41 |     1.445305721320277,
 42 |     -0.5900435899266435
 43 | ]
 44 | C4 = [
 45 |     2.5033429417967046,
 46 |     -1.7701307697799304,
 47 |     0.9461746957575601,
 48 |     -0.6690465435572892,
 49 |     0.10578554691520431,
 50 |     -0.6690465435572892,
 51 |     0.47308734787878004,
 52 |     -1.7701307697799304,
 53 |     0.6258357354491761,
 54 | ]   
 55 | 
 56 | 
 57 | def eval_sh(deg, sh, dirs):
 58 |     """
 59 |     Evaluate spherical harmonics at unit directions
 60 |     using hardcoded SH polynomials.
 61 |     Works with torch/np/jnp.
 62 |     ... Can be 0 or more batch dimensions.
 63 |     Args:
 64 |         deg: int SH deg. Currently, 0-3 supported
 65 |         sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
 66 |         dirs: jnp.ndarray unit directions [..., 3]
 67 |     Returns:
 68 |         [..., C]
 69 |     """
 70 |     assert deg <= 4 and deg >= 0
 71 |     coeff = (deg + 1) ** 2
 72 |     assert sh.shape[-1] >= coeff
 73 | 
 74 |     result = C0 * sh[..., 0]
 75 |     if deg > 0:
 76 |         x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
 77 |         result = (result -
 78 |                 C1 * y * sh[..., 1] +
 79 |                 C1 * z * sh[..., 2] -
 80 |                 C1 * x * sh[..., 3])
 81 | 
 82 |         if deg > 1:
 83 |             xx, yy, zz = x * x, y * y, z * z
 84 |             xy, yz, xz = x * y, y * z, x * z
 85 |             result = (result +
 86 |                     C2[0] * xy * sh[..., 4] +
 87 |                     C2[1] * yz * sh[..., 5] +
 88 |                     C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
 89 |                     C2[3] * xz * sh[..., 7] +
 90 |                     C2[4] * (xx - yy) * sh[..., 8])
 91 | 
 92 |             if deg > 2:
 93 |                 result = (result +
 94 |                 C3[0] * y * (3 * xx - yy) * sh[..., 9] +
 95 |                 C3[1] * xy * z * sh[..., 10] +
 96 |                 C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] +
 97 |                 C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
 98 |                 C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
 99 |                 C3[5] * z * (xx - yy) * sh[..., 14] +
100 |                 C3[6] * x * (xx - 3 * yy) * sh[..., 15])
101 | 
102 |                 if deg > 3:
103 |                     result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
104 |                             C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
105 |                             C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
106 |                             C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
107 |                             C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
108 |                             C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
109 |                             C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
110 |                             C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
111 |                             C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
112 |     return result
113 | 
114 | def RGB2SH(rgb):
115 |     return (rgb - 0.5) / C0
116 | 
117 | def SH2RGB(sh):
118 |     return sh * C0 + 0.5


--------------------------------------------------------------------------------
/utils/system_utils.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from errno import EEXIST
13 | from os import makedirs, path
14 | import os
15 | 
16 | def mkdir_p(folder_path):
17 |     # Creates a directory. equivalent to using mkdir -p on the command line
18 |     try:
19 |         makedirs(folder_path)
20 |     except OSError as exc: # Python >2.5
21 |         if exc.errno == EEXIST and path.isdir(folder_path):
22 |             pass
23 |         else:
24 |             raise
25 | 
26 | def searchForMaxIteration(folder):
27 |     saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
28 |     return max(saved_iters)
29 | 


--------------------------------------------------------------------------------