├── LICENSE ├── README.md ├── ZoeDepth ├── .gitignore ├── LICENSE ├── README.md ├── environment.yml ├── evaluate.py ├── hubconf.py ├── sanity.py ├── sanity_hub.py ├── train_mix.py ├── train_mono.py ├── train_test_inputs │ ├── kitti_eigen_test_files_with_gt.txt │ ├── kitti_eigen_train_files_with_gt.txt │ ├── nyudepthv2_test_files_with_gt.txt │ └── nyudepthv2_train_files_with_gt.txt ├── ui │ ├── app.py │ ├── gradio_depth_pred.py │ ├── gradio_im_to_3d.py │ ├── gradio_pano_to_3d.py │ └── ui_requirements.txt └── zoedepth │ ├── data │ ├── __init__.py │ ├── data_mono.py │ ├── ddad.py │ ├── diml_indoor_test.py │ ├── diml_outdoor_test.py │ ├── diode.py │ ├── hypersim.py │ ├── ibims.py │ ├── preprocess.py │ ├── sun_rgbd_loader.py │ ├── transforms.py │ ├── vkitti.py │ └── vkitti2.py │ ├── models │ ├── __init__.py │ ├── base_models │ │ ├── __init__.py │ │ └── midas.py │ ├── builder.py │ ├── depth_model.py │ ├── layers │ │ ├── attractor.py │ │ ├── dist_layers.py │ │ ├── localbins_layers.py │ │ └── patch_transformer.py │ ├── model_io.py │ ├── zoedepth │ │ ├── __init__.py │ │ ├── config_zoedepth.json │ │ ├── config_zoedepth_kitti.json │ │ └── zoedepth_v1.py │ └── zoedepth_nk │ │ ├── __init__.py │ │ ├── config_zoedepth_nk.json │ │ └── zoedepth_nk_v1.py │ ├── trainers │ ├── base_trainer.py │ ├── builder.py │ ├── loss.py │ ├── zoedepth_nk_trainer.py │ └── zoedepth_trainer.py │ └── utils │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── easydict │ └── __init__.py │ ├── geometry.py │ └── misc.py ├── arguments.py ├── bloomscene.py ├── cameras └── rotate360.json ├── environment.yml ├── examples ├── 01_childroom.png ├── 01_childroom.txt ├── 02_sunroom.png ├── 02_sunroom.txt ├── 03_beach.png ├── 03_beach.txt ├── 04_snow.png ├── 04_snow.txt ├── 05_christmas.png ├── 05_christmas.txt ├── 06_room.png ├── 06_room.txt ├── 07_museum.png ├── 07_museum.txt ├── 08_small_cabin.png ├── 08_small_cabin.txt ├── 08_small_cabin_negative.txt ├── 09_street.png └── 09_street.txt ├── gaussian_renderer └── __init__.py ├── images └── BloomScene.png ├── run.py ├── scene ├── __init__.py ├── cameras.py ├── dataset_readers.py └── gaussian_model.py ├── submodules ├── depth-diff-gaussian-rasterization │ ├── CMakeLists.txt │ ├── LICENSE.md │ ├── README.md │ ├── cuda_rasterizer │ │ ├── auxiliary.h │ │ ├── backward.cu │ │ ├── backward.h │ │ ├── config.h │ │ ├── forward.cu │ │ ├── forward.h │ │ ├── rasterizer.h │ │ ├── rasterizer_impl.cu │ │ └── rasterizer_impl.h │ ├── depth_diff_gaussian_rasterization │ │ └── __init__.py │ ├── ext.cpp │ ├── rasterize_points.cu │ ├── rasterize_points.h │ ├── setup.py │ └── third_party │ │ └── stbi_image_write.h ├── gridencoder │ ├── __init__.py │ ├── backend.py │ ├── grid.py │ ├── setup.py │ └── src │ │ ├── bindings.cpp │ │ ├── gridencoder.cu │ │ ├── gridencoder.h │ │ └── try.cu └── simple-knn │ ├── ext.cpp │ ├── setup.py │ ├── simple_knn.cu │ ├── simple_knn.h │ ├── simple_knn │ └── .gitkeep │ ├── spatial.cu │ └── spatial.h └── utils ├── __init__.py ├── camera.py ├── depth.py ├── encodings.py ├── entropy_models.py ├── general.py ├── graphics.py ├── loss.py ├── metrics.py ├── pose_noise_util.py ├── system.py └── trajectory.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 SparklingH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## BloomScene 2 | The official implementation of AAAI 2025 paper ''BloomScene: Lightweight Structured 3D Gaussian Splatting for Crossmodal Scene Generation''. 3 | ![BloomScene](images/BloomScene.png) 4 | 5 | > ### [BloomScene: Lightweight Structured 3D Gaussian Splatting for Crossmodal Scene Generation](#) 6 | > **Xiaolu Hou***, **Mingcheng Li***, Dingkang Yang, Jiawei Chen, Ziyun Qian, Xiao Zhao, Yue Jiang, Jinjie Wei, Qingyao Xu, Lihua Zhang 7 | > *Accepted by AAAI 2025* 8 | 9 | ## Abstract 10 | With the widespread use of virtual reality applications, 3D scene generation has become a new challenging research frontier. 3D scenes have highly complex structures and need to ensure that the output is dense, coherent, and contains all necessary structures. Many current 3D scene generation methods rely on pre-trained text-to-image diffusion models and monocular depth estimators. However, the generated scenes occupy large amounts of storage space and often lack effective regularisation methods, leading to geometric distortions. To this end, we propose BloomScene, a lightweight structured 3D Gaussian splatting for crossmodal scene generation, which creates diverse and high-quality 3D scenes from text or image inputs. Specifically, a crossmodal progressive scene generation framework is proposed to generate coherent scenes utilizing incremental point cloud reconstruction and 3D Gaussian splatting. Additionally, we propose a hierarchical depth prior-based regularization mechanism that utilizes multi-level constraints on depth accuracy and smoothness to enhance the realism and continuity of the generated scenes. Ultimately, we propose a structured context-guided compression mechanism that exploits structured hash grids to model the context of unorganized anchor attributes, which significantly eliminates structural redundancy and reduces storage overhead. Comprehensive experiments across multiple scenes demonstrate the significant potential and advantages of our framework compared with several baselines. 11 | 12 | ## Getting Started 13 | We provide pretrained image inpainting model. The download URLs are as follows: 14 | 15 | - Baidu Disk URL for [Image inpainting model (Runway)](https://pan.baidu.com/s/1kK110nhCK5maU1_oD-06yw?pwd=1pd2) 16 | 17 | - Google Drive URL for [Image inpainting model (Runway)](https://drive.google.com/drive/folders/1tP--db0MJtx1oaIRp-OU2hR0fzP_gWmm?usp=sharing) 18 | 19 | Please download the model file and put it under `./BloomScene/models--runwayml--stable-diffusion-inpainting` 20 | 21 | ## Install 22 | ### Ubuntu 23 | We tested our code on a server with Ubuntu 18.04, CUDA 11.4, gcc 9.4.0 24 | 25 | #### Installation script 26 | 27 | ```bash 28 | conda env create --file environment.yml 29 | conda activate bloomscene 30 | # torch-scatter 31 | Download https://data.pyg.org/whl/torch-2.0.0%2Bcu117/torch_scatter-2.1.2%2Bpt20cu117-cp39-cp39-linux_x86_64.whl 32 | pip install 33 | 34 | cd submodules/depth-diff-gaussian-rasterization 35 | python setup.py install 36 | cd ../simple-knn 37 | python setup.py install 38 | cd ../gridencoder 39 | python setup.py install 40 | cd ../.. 41 | ``` 42 | 43 | #### Run with your own samples 44 | ```bash 45 | # Default Example 46 | python run.py --image --text [Other options] 47 | ``` 48 | - Replace and with the paths to your image and text files. 49 | 50 | 51 |
52 | Other options 53 | 54 | - `--image`: Input image for scene generation. 55 | - `--text`: Text prompt for scene generation. 56 | - `--neg_text`: Optional. Negative text prompt for scene generation. 57 | - `--lambdae`: Optional. Try variable bitrate. 58 | - `--seed`: Manual seed for reproducibility. 59 | - `--dep_value`: Pixel-level depth regularization. 60 | - `--dep_value_lbd`: lambda for pixel-level depth regularization. 61 | - `--dep_domin`: Distribution-level depth regularization. 62 | - `--dep_domin_lbd`: lambda for distribution-level depth regularization. 63 | - `--dep_smooth`: Depth smoothness regularization. 64 | - `--dep_smooth_lbd`: lambda for depth smoothness regularization. 65 | - `--diff_steps`: Optional. Number of inference steps for running Stable Diffusion Inpainting. 66 | - `--save_dir`: Optional. Directory to save the generated scenes and videos. Specify to organize outputs. 67 | - `--campath_gen`: Camera path for scene generation (options: `rotate360`). 68 | - `--campath_render`: Camera path for video rendering (options: `rotate360`). 69 |
70 | 71 | 72 | 76 | 77 | 78 | ## Acknowledgement 79 | 80 | Many thanks to [LucidDreamer](https://github.com/luciddreamer-cvlab/LucidDreamer), [ZoeDepth](https://github.com/isl-org/ZoeDepth), [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), [Scaffold-GS](https://github.com/city-super/Scaffold-GS), [HAC](https://github.com/YihangChen-ee/HAC) and Runway for their excellent codebase. -------------------------------------------------------------------------------- /ZoeDepth/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | **.gif 3 | .vscode/ 4 | *.rdb 5 | **.xml 6 | wandb/ 7 | slurm/ 8 | tmp/ 9 | .logs/ 10 | checkpoints/ 11 | external_jobs/ 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | ptlflow_logs/ 17 | output/ 18 | log/ 19 | .idea/ 20 | # C extensions 21 | *.so 22 | results/ 23 | **.DS_Store 24 | **.pt 25 | demo/ 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | pip-wheel-metadata/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | ~shortcuts/ 47 | **/wandb_logs/ 48 | **.db 49 | # PyInstaller 50 | # Usually these files are written by a python script from a template 51 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 52 | *.manifest 53 | *.spec 54 | 55 | # Installer logs 56 | pip-log.txt 57 | pip-delete-this-directory.txt 58 | 59 | # Unit test / coverage reports 60 | htmlcov/ 61 | .tox/ 62 | .nox/ 63 | .coverage 64 | .coverage.* 65 | .cache 66 | nosetests.xml 67 | coverage.xml 68 | *.cover 69 | *.py,cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | target/ 95 | 96 | # Jupyter Notebook 97 | .ipynb_checkpoints 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | # pyenv 104 | .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | -------------------------------------------------------------------------------- /ZoeDepth/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ZoeDepth/environment.yml: -------------------------------------------------------------------------------- 1 | name: zoe 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | dependencies: 7 | - cuda=11.7.1 8 | - h5py=3.7.0 9 | - hdf5=1.12.2 10 | - matplotlib=3.6.2 11 | - matplotlib-base=3.6.2 12 | - numpy=1.24.1 13 | - opencv=4.6.0 14 | - pip=22.3.1 15 | - python=3.9.7 16 | - pytorch=1.13.1 17 | - pytorch-cuda=11.7 18 | - pytorch-mutex=1.0 19 | - scipy=1.10.0 20 | - torchaudio=0.13.1 21 | - torchvision=0.14.1 22 | - pip: 23 | - huggingface-hub==0.11.1 24 | - timm==0.6.12 25 | - tqdm==4.64.1 26 | - wandb==0.13.9 27 | -------------------------------------------------------------------------------- /ZoeDepth/sanity.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | from torchvision.transforms import ToTensor 27 | from PIL import Image 28 | from zoedepth.utils.misc import get_image_from_url, colorize 29 | import torch 30 | 31 | from zoedepth.models.builder import build_model 32 | from zoedepth.utils.config import get_config 33 | from pprint import pprint 34 | 35 | 36 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 37 | 38 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 39 | if DEVICE == "cpu": 40 | print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.") 41 | 42 | print("*" * 20 + " Testing zoedepth " + "*" * 20) 43 | conf = get_config("zoedepth", "infer") 44 | 45 | 46 | print("Config:") 47 | pprint(conf) 48 | 49 | model = build_model(conf).to(DEVICE) 50 | model.eval() 51 | x = torch.rand(1, 3, 384, 512).to(DEVICE) 52 | 53 | print("-"*20 + "Testing on a random input" + "-"*20) 54 | 55 | with torch.no_grad(): 56 | out = model(x) 57 | 58 | if isinstance(out, dict): 59 | # print shapes of all outputs 60 | for k, v in out.items(): 61 | if v is not None: 62 | print(k, v.shape) 63 | else: 64 | print([o.shape for o in out if o is not None]) 65 | 66 | print("\n\n") 67 | print("-"*20 + " Testing on an indoor scene from url " + "-"*20) 68 | 69 | # Test img 70 | url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU" 71 | img = get_image_from_url(url) 72 | orig_size = img.size 73 | X = ToTensor()(img) 74 | X = X.unsqueeze(0).to(DEVICE) 75 | 76 | print("X.shape", X.shape) 77 | print("predicting") 78 | 79 | with torch.no_grad(): 80 | out = model.infer(X).cpu() 81 | 82 | # or just, 83 | # out = model.infer_pil(img) 84 | 85 | 86 | print("output.shape", out.shape) 87 | pred = Image.fromarray(colorize(out)) 88 | # Stack img and pred side by side for comparison and save 89 | pred = pred.resize(orig_size, Image.ANTIALIAS) 90 | stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1])) 91 | stacked.paste(img, (0, 0)) 92 | stacked.paste(pred, (orig_size[0], 0)) 93 | 94 | stacked.save("pred.png") 95 | print("saved pred.png") 96 | 97 | 98 | model.infer_pil(img, output_type="pil").save("pred_raw.png") -------------------------------------------------------------------------------- /ZoeDepth/sanity_hub.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import numpy as np 27 | from torchvision.transforms import ToTensor 28 | from PIL import Image 29 | from zoedepth.utils.misc import get_image_from_url, colorize 30 | 31 | from zoedepth.models.builder import build_model 32 | from zoedepth.utils.config import get_config 33 | from pprint import pprint 34 | 35 | 36 | 37 | # Trigger reload of MiDaS 38 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 39 | 40 | 41 | model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True) 42 | model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True) 43 | model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True) 44 | -------------------------------------------------------------------------------- /ZoeDepth/train_mono.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from zoedepth.utils.misc import count_parameters, parallelize 26 | from zoedepth.utils.config import get_config 27 | from zoedepth.utils.arg_utils import parse_unknown 28 | from zoedepth.trainers.builder import get_trainer 29 | from zoedepth.models.builder import build_model 30 | from zoedepth.data.data_mono import DepthDataLoader 31 | import torch.utils.data.distributed 32 | import torch.multiprocessing as mp 33 | import torch 34 | import numpy as np 35 | from pprint import pprint 36 | import argparse 37 | import os 38 | 39 | os.environ["PYOPENGL_PLATFORM"] = "egl" 40 | os.environ["WANDB_START_METHOD"] = "thread" 41 | 42 | 43 | def fix_random_seed(seed: int): 44 | import random 45 | 46 | import numpy 47 | import torch 48 | 49 | random.seed(seed) 50 | numpy.random.seed(seed) 51 | torch.manual_seed(seed) 52 | torch.cuda.manual_seed(seed) 53 | torch.cuda.manual_seed_all(seed) 54 | 55 | torch.backends.cudnn.deterministic = True 56 | torch.backends.cudnn.benchmark = True 57 | 58 | 59 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"): 60 | import glob 61 | import os 62 | 63 | from zoedepth.models.model_io import load_wts 64 | 65 | if hasattr(config, "checkpoint"): 66 | checkpoint = config.checkpoint 67 | elif hasattr(config, "ckpt_pattern"): 68 | pattern = config.ckpt_pattern 69 | matches = glob.glob(os.path.join( 70 | checkpoint_dir, f"*{pattern}*{ckpt_type}*")) 71 | if not (len(matches) > 0): 72 | raise ValueError(f"No matches found for the pattern {pattern}") 73 | 74 | checkpoint = matches[0] 75 | 76 | else: 77 | return model 78 | model = load_wts(model, checkpoint) 79 | print("Loaded weights from {0}".format(checkpoint)) 80 | return model 81 | 82 | 83 | def main_worker(gpu, ngpus_per_node, config): 84 | try: 85 | seed = config.seed if 'seed' in config and config.seed else 43 86 | fix_random_seed(seed) 87 | 88 | config.gpu = gpu 89 | 90 | model = build_model(config) 91 | model = load_ckpt(config, model) 92 | model = parallelize(config, model) 93 | 94 | total_params = f"{round(count_parameters(model)/1e6,2)}M" 95 | config.total_params = total_params 96 | print(f"Total parameters : {total_params}") 97 | 98 | train_loader = DepthDataLoader(config, "train").data 99 | test_loader = DepthDataLoader(config, "online_eval").data 100 | 101 | trainer = get_trainer(config)( 102 | config, model, train_loader, test_loader, device=config.gpu) 103 | 104 | trainer.train() 105 | finally: 106 | import wandb 107 | wandb.finish() 108 | 109 | 110 | if __name__ == '__main__': 111 | mp.set_start_method('forkserver') 112 | 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument("-m", "--model", type=str, default="synunet") 115 | parser.add_argument("-d", "--dataset", type=str, default='nyu') 116 | parser.add_argument("--trainer", type=str, default=None) 117 | 118 | args, unknown_args = parser.parse_known_args() 119 | overwrite_kwargs = parse_unknown(unknown_args) 120 | 121 | overwrite_kwargs["model"] = args.model 122 | if args.trainer is not None: 123 | overwrite_kwargs["trainer"] = args.trainer 124 | 125 | config = get_config(args.model, "train", args.dataset, **overwrite_kwargs) 126 | # git_commit() 127 | if config.use_shared_dict: 128 | shared_dict = mp.Manager().dict() 129 | else: 130 | shared_dict = None 131 | config.shared_dict = shared_dict 132 | 133 | config.batch_size = config.bs 134 | config.mode = 'train' 135 | if config.root != "." and not os.path.isdir(config.root): 136 | os.makedirs(config.root) 137 | 138 | try: 139 | node_str = os.environ['SLURM_JOB_NODELIST'].replace( 140 | '[', '').replace(']', '') 141 | nodes = node_str.split(',') 142 | 143 | config.world_size = len(nodes) 144 | config.rank = int(os.environ['SLURM_PROCID']) 145 | # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints" 146 | 147 | except KeyError as e: 148 | # We are NOT using SLURM 149 | config.world_size = 1 150 | config.rank = 0 151 | nodes = ["127.0.0.1"] 152 | 153 | if config.distributed: 154 | 155 | print(config.rank) 156 | port = np.random.randint(15000, 15025) 157 | config.dist_url = 'tcp://{}:{}'.format(nodes[0], port) 158 | print(config.dist_url) 159 | config.dist_backend = 'nccl' 160 | config.gpu = None 161 | 162 | ngpus_per_node = torch.cuda.device_count() 163 | config.num_workers = config.workers 164 | config.ngpus_per_node = ngpus_per_node 165 | print("Config:") 166 | pprint(config) 167 | if config.distributed: 168 | config.world_size = ngpus_per_node * config.world_size 169 | mp.spawn(main_worker, nprocs=ngpus_per_node, 170 | args=(ngpus_per_node, config)) 171 | else: 172 | if ngpus_per_node == 1: 173 | config.gpu = 0 174 | main_worker(config.gpu, ngpus_per_node, config) 175 | -------------------------------------------------------------------------------- /ZoeDepth/ui/app.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import torch 27 | 28 | from .gradio_depth_pred import create_demo as create_depth_pred_demo 29 | from .gradio_im_to_3d import create_demo as create_im_to_3d_demo 30 | from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo 31 | 32 | 33 | css = """ 34 | #img-display-container { 35 | max-height: 50vh; 36 | } 37 | #img-display-input { 38 | max-height: 40vh; 39 | } 40 | #img-display-output { 41 | max-height: 40vh; 42 | } 43 | 44 | """ 45 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' 46 | model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval() 47 | 48 | title = "# ZoeDepth" 49 | description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**. 50 | 51 | ZoeDepth is a deep learning model for metric depth estimation from a single image. 52 | 53 | Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details.""" 54 | 55 | with gr.Blocks(css=css) as demo: 56 | gr.Markdown(title) 57 | gr.Markdown(description) 58 | with gr.Tab("Depth Prediction"): 59 | create_depth_pred_demo(model) 60 | with gr.Tab("Image to 3D"): 61 | create_im_to_3d_demo(model) 62 | with gr.Tab("360 Panorama to 3D"): 63 | create_pano_to_3d_demo(model) 64 | 65 | if __name__ == '__main__': 66 | demo.queue().launch() -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_depth_pred.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | from zoedepth.utils.misc import colorize 27 | from PIL import Image 28 | import tempfile 29 | 30 | def predict_depth(model, image): 31 | depth = model.infer_pil(image) 32 | return depth 33 | 34 | def create_demo(model): 35 | gr.Markdown("### Depth Prediction demo") 36 | with gr.Row(): 37 | input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto") 38 | depth_image = gr.Image(label="Depth Map", elem_id='img-display-output') 39 | raw_file = gr.File(label="16-bit raw depth, multiplier:256") 40 | submit = gr.Button("Submit") 41 | 42 | def on_submit(image): 43 | depth = predict_depth(model, image) 44 | colored_depth = colorize(depth, cmap='gray_r') 45 | tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False) 46 | raw_depth = Image.fromarray((depth*256).astype('uint16')) 47 | raw_depth.save(tmp.name) 48 | return [colored_depth, tmp.name] 49 | 50 | submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file]) 51 | # examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"], 52 | # inputs=[input_image]) -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_im_to_3d.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import numpy as np 27 | import trimesh 28 | from zoedepth.utils.geometry import depth_to_points, create_triangles 29 | from functools import partial 30 | import tempfile 31 | 32 | 33 | def depth_edges_mask(depth): 34 | """Returns a mask of edges in the depth map. 35 | Args: 36 | depth: 2D numpy array of shape (H, W) with dtype float32. 37 | Returns: 38 | mask: 2D numpy array of shape (H, W) with dtype bool. 39 | """ 40 | # Compute the x and y gradients of the depth map. 41 | depth_dx, depth_dy = np.gradient(depth) 42 | # Compute the gradient magnitude. 43 | depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) 44 | # Compute the edge mask. 45 | mask = depth_grad > 0.05 46 | return mask 47 | 48 | 49 | def predict_depth(model, image): 50 | depth = model.infer_pil(image) 51 | return depth 52 | 53 | def get_mesh(model, image, keep_edges=False): 54 | image.thumbnail((1024,1024)) # limit the size of the input image 55 | depth = predict_depth(model, image) 56 | pts3d = depth_to_points(depth[None]) 57 | pts3d = pts3d.reshape(-1, 3) 58 | 59 | # Create a trimesh mesh from the points 60 | # Each pixel is connected to its 4 neighbors 61 | # colors are the RGB values of the image 62 | 63 | verts = pts3d.reshape(-1, 3) 64 | image = np.array(image) 65 | if keep_edges: 66 | triangles = create_triangles(image.shape[0], image.shape[1]) 67 | else: 68 | triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) 69 | colors = image.reshape(-1, 3) 70 | mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) 71 | 72 | # Save as glb 73 | glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False) 74 | glb_path = glb_file.name 75 | mesh.export(glb_path) 76 | return glb_path 77 | 78 | def create_demo(model): 79 | 80 | gr.Markdown("### Image to 3D mesh") 81 | gr.Markdown("Convert a single 2D image to a 3D mesh") 82 | 83 | with gr.Row(): 84 | image = gr.Image(label="Input Image", type='pil') 85 | result = gr.Model3D(label="3d mesh reconstruction", clear_color=[ 86 | 1.0, 1.0, 1.0, 1.0]) 87 | 88 | checkbox = gr.Checkbox(label="Keep occlusion edges", value=False) 89 | submit = gr.Button("Submit") 90 | submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result]) 91 | # examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"], 92 | # inputs=[image]) 93 | 94 | -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_pano_to_3d.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import numpy as np 27 | import trimesh 28 | from zoedepth.utils.geometry import create_triangles 29 | from functools import partial 30 | import tempfile 31 | 32 | def depth_edges_mask(depth): 33 | """Returns a mask of edges in the depth map. 34 | Args: 35 | depth: 2D numpy array of shape (H, W) with dtype float32. 36 | Returns: 37 | mask: 2D numpy array of shape (H, W) with dtype bool. 38 | """ 39 | # Compute the x and y gradients of the depth map. 40 | depth_dx, depth_dy = np.gradient(depth) 41 | # Compute the gradient magnitude. 42 | depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) 43 | # Compute the edge mask. 44 | mask = depth_grad > 0.05 45 | return mask 46 | 47 | 48 | def pano_depth_to_world_points(depth): 49 | """ 50 | 360 depth to world points 51 | given 2D depth is an equirectangular projection of a spherical image 52 | Treat depth as radius 53 | 54 | longitude : -pi to pi 55 | latitude : -pi/2 to pi/2 56 | """ 57 | 58 | # Convert depth to radius 59 | radius = depth.flatten() 60 | 61 | lon = np.linspace(-np.pi, np.pi, depth.shape[1]) 62 | lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0]) 63 | 64 | lon, lat = np.meshgrid(lon, lat) 65 | lon = lon.flatten() 66 | lat = lat.flatten() 67 | 68 | # Convert to cartesian coordinates 69 | x = radius * np.cos(lat) * np.cos(lon) 70 | y = radius * np.cos(lat) * np.sin(lon) 71 | z = radius * np.sin(lat) 72 | 73 | pts3d = np.stack([x, y, z], axis=1) 74 | 75 | return pts3d 76 | 77 | 78 | def predict_depth(model, image): 79 | depth = model.infer_pil(image) 80 | return depth 81 | 82 | def get_mesh(model, image, keep_edges=False): 83 | image.thumbnail((1024,1024)) # limit the size of the image 84 | depth = predict_depth(model, image) 85 | pts3d = pano_depth_to_world_points(depth) 86 | 87 | # Create a trimesh mesh from the points 88 | # Each pixel is connected to its 4 neighbors 89 | # colors are the RGB values of the image 90 | 91 | verts = pts3d.reshape(-1, 3) 92 | image = np.array(image) 93 | if keep_edges: 94 | triangles = create_triangles(image.shape[0], image.shape[1]) 95 | else: 96 | triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) 97 | colors = image.reshape(-1, 3) 98 | mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) 99 | 100 | # Save as glb 101 | glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False) 102 | glb_path = glb_file.name 103 | mesh.export(glb_path) 104 | return glb_path 105 | 106 | def create_demo(model): 107 | gr.Markdown("### Panorama to 3D mesh") 108 | gr.Markdown("Convert a 360 spherical panorama to a 3D mesh") 109 | gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.") 110 | 111 | with gr.Row(): 112 | input_image = gr.Image(label="Input Image", type='pil') 113 | result = gr.Model3D(label="3d mesh reconstruction", clear_color=[ 114 | 1.0, 1.0, 1.0, 1.0]) 115 | 116 | checkbox = gr.Checkbox(label="Keep occlusion edges", value=True) 117 | submit = gr.Button("Submit") 118 | submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result]) 119 | # examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"], 120 | # inputs=[input_image]) -------------------------------------------------------------------------------- /ZoeDepth/ui/ui_requirements.txt: -------------------------------------------------------------------------------- 1 | gradio 2 | trimesh==3.9.42 -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/ddad.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self, resize_shape): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(resize_shape) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "ddad"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DDAD(Dataset): 83 | def __init__(self, data_dir_root, resize_shape): 84 | import glob 85 | 86 | # image paths are of the form /{outleft, depthmap}/*.png 87 | self.image_files = glob.glob(os.path.join(data_dir_root, '*.png')) 88 | self.depth_files = [r.replace("_rgb.png", "_depth.npy") 89 | for r in self.image_files] 90 | self.transform = ToTensor(resize_shape) 91 | 92 | def __getitem__(self, idx): 93 | 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 98 | depth = np.load(depth_path) # meters 99 | 100 | # depth[depth > 8] = -1 101 | depth = depth[..., None] 102 | 103 | sample = dict(image=image, depth=depth) 104 | sample = self.transform(sample) 105 | 106 | if idx == 0: 107 | print(sample["image"].shape) 108 | 109 | return sample 110 | 111 | def __len__(self): 112 | return len(self.image_files) 113 | 114 | 115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs): 116 | dataset = DDAD(data_dir_root, resize_shape) 117 | return DataLoader(dataset, batch_size, **kwargs) 118 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diml_indoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize((480, 640)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diml_indoor"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | if isinstance(img, torch.ByteTensor): 76 | return img.float() 77 | else: 78 | return img 79 | 80 | 81 | class DIML_Indoor(Dataset): 82 | def __init__(self, data_dir_root): 83 | import glob 84 | 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "LR", '*', 'color', '*.png')) 88 | self.depth_files = [r.replace("color", "depth_filled").replace( 89 | "_c.png", "_depth_filled.png") for r in self.image_files] 90 | self.transform = ToTensor() 91 | 92 | def __getitem__(self, idx): 93 | image_path = self.image_files[idx] 94 | depth_path = self.depth_files[idx] 95 | 96 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 97 | depth = np.asarray(Image.open(depth_path), 98 | dtype='uint16') / 1000.0 # mm to meters 99 | 100 | # print(np.shape(image)) 101 | # print(np.shape(depth)) 102 | 103 | # depth[depth > 8] = -1 104 | depth = depth[..., None] 105 | 106 | sample = dict(image=image, depth=depth) 107 | 108 | # return sample 109 | sample = self.transform(sample) 110 | 111 | if idx == 0: 112 | print(sample["image"].shape) 113 | 114 | return sample 115 | 116 | def __len__(self): 117 | return len(self.image_files) 118 | 119 | 120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs): 121 | dataset = DIML_Indoor(data_dir_root) 122 | return DataLoader(dataset, batch_size, **kwargs) 123 | 124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR") 125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR") 126 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diml_outdoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class DIML_Outdoor(Dataset): 79 | def __init__(self, data_dir_root): 80 | import glob 81 | 82 | # image paths are of the form /{outleft, depthmap}/*.png 83 | self.image_files = glob.glob(os.path.join( 84 | data_dir_root, "*", 'outleft', '*.png')) 85 | self.depth_files = [r.replace("outleft", "depthmap") 86 | for r in self.image_files] 87 | self.transform = ToTensor() 88 | 89 | def __getitem__(self, idx): 90 | image_path = self.image_files[idx] 91 | depth_path = self.depth_files[idx] 92 | 93 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 94 | depth = np.asarray(Image.open(depth_path), 95 | dtype='uint16') / 1000.0 # mm to meters 96 | 97 | # depth[depth > 8] = -1 98 | depth = depth[..., None] 99 | 100 | sample = dict(image=image, depth=depth, dataset="diml_outdoor") 101 | 102 | # return sample 103 | return self.transform(sample) 104 | 105 | def __len__(self): 106 | return len(self.image_files) 107 | 108 | 109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs): 110 | dataset = DIML_Outdoor(data_dir_root) 111 | return DataLoader(dataset, batch_size, **kwargs) 112 | 113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR") 114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR") 115 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diode.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(480) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diode"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DIODE(Dataset): 83 | def __init__(self, data_dir_root): 84 | import glob 85 | 86 | # image paths are of the form /scene_#/scan_#/*.png 87 | self.image_files = glob.glob( 88 | os.path.join(data_dir_root, '*', '*', '*.png')) 89 | self.depth_files = [r.replace(".png", "_depth.npy") 90 | for r in self.image_files] 91 | self.depth_mask_files = [ 92 | r.replace(".png", "_depth_mask.npy") for r in self.image_files] 93 | self.transform = ToTensor() 94 | 95 | def __getitem__(self, idx): 96 | image_path = self.image_files[idx] 97 | depth_path = self.depth_files[idx] 98 | depth_mask_path = self.depth_mask_files[idx] 99 | 100 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 101 | depth = np.load(depth_path) # in meters 102 | valid = np.load(depth_mask_path) # binary 103 | 104 | # depth[depth > 8] = -1 105 | # depth = depth[..., None] 106 | 107 | sample = dict(image=image, depth=depth, valid=valid) 108 | 109 | # return sample 110 | sample = self.transform(sample) 111 | 112 | if idx == 0: 113 | print(sample["image"].shape) 114 | 115 | return sample 116 | 117 | def __len__(self): 118 | return len(self.image_files) 119 | 120 | 121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs): 122 | dataset = DIODE(data_dir_root) 123 | return DataLoader(dataset, batch_size, **kwargs) 124 | 125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor") 126 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/hypersim.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import glob 26 | import os 27 | 28 | import h5py 29 | import numpy as np 30 | import torch 31 | from PIL import Image 32 | from torch.utils.data import DataLoader, Dataset 33 | from torchvision import transforms 34 | 35 | 36 | def hypersim_distance_to_depth(npyDistance): 37 | intWidth, intHeight, fltFocal = 1024, 768, 886.81 38 | 39 | npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape( 40 | 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None] 41 | npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, 42 | intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None] 43 | npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32) 44 | npyImageplane = np.concatenate( 45 | [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2) 46 | 47 | npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal 48 | return npyDepth 49 | 50 | 51 | class ToTensor(object): 52 | def __init__(self): 53 | # self.normalize = transforms.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x: x 56 | self.resize = transforms.Resize((480, 640)) 57 | 58 | def __call__(self, sample): 59 | image, depth = sample['image'], sample['depth'] 60 | image = self.to_tensor(image) 61 | image = self.normalize(image) 62 | depth = self.to_tensor(depth) 63 | 64 | image = self.resize(image) 65 | 66 | return {'image': image, 'depth': depth, 'dataset': "hypersim"} 67 | 68 | def to_tensor(self, pic): 69 | 70 | if isinstance(pic, np.ndarray): 71 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 72 | return img 73 | 74 | # # handle PIL Image 75 | if pic.mode == 'I': 76 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 77 | elif pic.mode == 'I;16': 78 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 79 | else: 80 | img = torch.ByteTensor( 81 | torch.ByteStorage.from_buffer(pic.tobytes())) 82 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 83 | if pic.mode == 'YCbCr': 84 | nchannel = 3 85 | elif pic.mode == 'I;16': 86 | nchannel = 1 87 | else: 88 | nchannel = len(pic.mode) 89 | img = img.view(pic.size[1], pic.size[0], nchannel) 90 | 91 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 92 | if isinstance(img, torch.ByteTensor): 93 | return img.float() 94 | else: 95 | return img 96 | 97 | 98 | class HyperSim(Dataset): 99 | def __init__(self, data_dir_root): 100 | # image paths are of the form //images/scene_cam_#_final_preview/*.tonemap.jpg 101 | # depth paths are of the form //images/scene_cam_#_final_preview/*.depth_meters.hdf5 102 | self.image_files = glob.glob(os.path.join( 103 | data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg')) 104 | self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace( 105 | ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files] 106 | self.transform = ToTensor() 107 | 108 | def __getitem__(self, idx): 109 | image_path = self.image_files[idx] 110 | depth_path = self.depth_files[idx] 111 | 112 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 113 | 114 | # depth from hdf5 115 | depth_fd = h5py.File(depth_path, "r") 116 | # in meters (Euclidean distance) 117 | distance_meters = np.array(depth_fd['dataset']) 118 | depth = hypersim_distance_to_depth( 119 | distance_meters) # in meters (planar depth) 120 | 121 | # depth[depth > 8] = -1 122 | depth = depth[..., None] 123 | 124 | sample = dict(image=image, depth=depth) 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = HyperSim(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/preprocess.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | from dataclasses import dataclass 27 | from typing import Tuple, List 28 | 29 | # dataclass to store the crop parameters 30 | @dataclass 31 | class CropParams: 32 | top: int 33 | bottom: int 34 | left: int 35 | right: int 36 | 37 | 38 | 39 | def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams: 40 | gray_image = np.mean(rgb_image, axis=channel_axis) 41 | h, w = gray_image.shape 42 | 43 | 44 | def num_value_pixels(arr): 45 | return np.sum(np.abs(arr - value) < level_diff_threshold) 46 | 47 | def is_above_tolerance(arr, total_pixels): 48 | return (num_value_pixels(arr) / total_pixels) > tolerance 49 | 50 | # Crop top border until number of value pixels become below tolerance 51 | top = min_border 52 | while is_above_tolerance(gray_image[top, :], w) and top < h-1: 53 | top += 1 54 | if top > cut_off: 55 | break 56 | 57 | # Crop bottom border until number of value pixels become below tolerance 58 | bottom = h - min_border 59 | while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0: 60 | bottom -= 1 61 | if h - bottom > cut_off: 62 | break 63 | 64 | # Crop left border until number of value pixels become below tolerance 65 | left = min_border 66 | while is_above_tolerance(gray_image[:, left], h) and left < w-1: 67 | left += 1 68 | if left > cut_off: 69 | break 70 | 71 | # Crop right border until number of value pixels become below tolerance 72 | right = w - min_border 73 | while is_above_tolerance(gray_image[:, right], h) and right > 0: 74 | right -= 1 75 | if w - right > cut_off: 76 | break 77 | 78 | 79 | return CropParams(top, bottom, left, right) 80 | 81 | 82 | def get_white_border(rgb_image, value=255, **kwargs) -> CropParams: 83 | """Crops the white border of the RGB. 84 | 85 | Args: 86 | rgb: RGB image, shape (H, W, 3). 87 | Returns: 88 | Crop parameters. 89 | """ 90 | if value == 255: 91 | # assert range of values in rgb image is [0, 255] 92 | assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]." 93 | assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]." 94 | elif value == 1: 95 | # assert range of values in rgb image is [0, 1] 96 | assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]." 97 | 98 | return get_border_params(rgb_image, value=value, **kwargs) 99 | 100 | def get_black_border(rgb_image, **kwargs) -> CropParams: 101 | """Crops the black border of the RGB. 102 | 103 | Args: 104 | rgb: RGB image, shape (H, W, 3). 105 | 106 | Returns: 107 | Crop parameters. 108 | """ 109 | 110 | return get_border_params(rgb_image, value=0, **kwargs) 111 | 112 | def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray: 113 | """Crops the image according to the crop parameters. 114 | 115 | Args: 116 | image: RGB or depth image, shape (H, W, 3) or (H, W). 117 | crop_params: Crop parameters. 118 | 119 | Returns: 120 | Cropped image. 121 | """ 122 | return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right] 123 | 124 | def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]: 125 | """Crops the images according to the crop parameters. 126 | 127 | Args: 128 | images: RGB or depth images, shape (H, W, 3) or (H, W). 129 | crop_params: Crop parameters. 130 | 131 | Returns: 132 | Cropped images. 133 | """ 134 | return tuple(crop_image(image, crop_params) for image in images) 135 | 136 | def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]: 137 | """Crops the white and black border of the RGB and depth images. 138 | 139 | Args: 140 | rgb: RGB image, shape (H, W, 3). This image is used to determine the border. 141 | other_images: The other images to crop according to the border of the RGB image. 142 | Returns: 143 | Cropped RGB and other images. 144 | """ 145 | # crop black border 146 | crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) 147 | cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params) 148 | 149 | # crop white border 150 | crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) 151 | cropped_images = crop_images(*cropped_images, crop_params=crop_params) 152 | 153 | return cropped_images 154 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/sun_rgbd_loader.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "sunrgbd"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class SunRGBD(Dataset): 79 | def __init__(self, data_dir_root): 80 | # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze() 81 | # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs] 82 | # self.all_test = [os.path.join(data_dir_root, t) for t in all_test] 83 | import glob 84 | self.image_files = glob.glob( 85 | os.path.join(data_dir_root, 'rgb', 'rgb', '*')) 86 | self.depth_files = [ 87 | r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files] 88 | self.transform = ToTensor() 89 | 90 | def __getitem__(self, idx): 91 | image_path = self.image_files[idx] 92 | depth_path = self.depth_files[idx] 93 | 94 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 95 | depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0 96 | depth[depth > 8] = -1 97 | depth = depth[..., None] 98 | return self.transform(dict(image=image, depth=depth)) 99 | 100 | def __len__(self): 101 | return len(self.image_files) 102 | 103 | 104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs): 105 | dataset = SunRGBD(data_dir_root) 106 | return DataLoader(dataset, batch_size, **kwargs) 107 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/vkitti.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | from torch.utils.data import Dataset, DataLoader 27 | from torchvision import transforms 28 | import os 29 | 30 | from PIL import Image 31 | import numpy as np 32 | import cv2 33 | 34 | 35 | class ToTensor(object): 36 | def __init__(self): 37 | self.normalize = transforms.Normalize( 38 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 39 | # self.resize = transforms.Resize((375, 1242)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | 44 | image = self.to_tensor(image) 45 | image = self.normalize(image) 46 | depth = self.to_tensor(depth) 47 | 48 | # image = self.resize(image) 49 | 50 | return {'image': image, 'depth': depth, 'dataset': "vkitti"} 51 | 52 | def to_tensor(self, pic): 53 | 54 | if isinstance(pic, np.ndarray): 55 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 56 | return img 57 | 58 | # # handle PIL Image 59 | if pic.mode == 'I': 60 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 61 | elif pic.mode == 'I;16': 62 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 63 | else: 64 | img = torch.ByteTensor( 65 | torch.ByteStorage.from_buffer(pic.tobytes())) 66 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 67 | if pic.mode == 'YCbCr': 68 | nchannel = 3 69 | elif pic.mode == 'I;16': 70 | nchannel = 1 71 | else: 72 | nchannel = len(pic.mode) 73 | img = img.view(pic.size[1], pic.size[0], nchannel) 74 | 75 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class VKITTI(Dataset): 83 | def __init__(self, data_dir_root, do_kb_crop=True): 84 | import glob 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "test_color", '*.png')) 88 | self.depth_files = [r.replace("test_color", "test_depth") 89 | for r in self.image_files] 90 | self.do_kb_crop = True 91 | self.transform = ToTensor() 92 | 93 | def __getitem__(self, idx): 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = Image.open(image_path) 98 | depth = Image.open(depth_path) 99 | depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | 100 | cv2.IMREAD_ANYDEPTH) 101 | print("dpeth min max", depth.min(), depth.max()) 102 | 103 | # print(np.shape(image)) 104 | # print(np.shape(depth)) 105 | 106 | # depth[depth > 8] = -1 107 | 108 | if self.do_kb_crop and False: 109 | height = image.height 110 | width = image.width 111 | top_margin = int(height - 352) 112 | left_margin = int((width - 1216) / 2) 113 | depth = depth.crop( 114 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 115 | image = image.crop( 116 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 117 | # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216] 118 | 119 | image = np.asarray(image, dtype=np.float32) / 255.0 120 | # depth = np.asarray(depth, dtype=np.uint16) /1. 121 | depth = depth[..., None] 122 | sample = dict(image=image, depth=depth) 123 | 124 | # return sample 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = VKITTI(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | 140 | 141 | if __name__ == "__main__": 142 | loader = get_vkitti_loader( 143 | data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test") 144 | print("Total files", len(loader.dataset)) 145 | for i, sample in enumerate(loader): 146 | print(sample["image"].shape) 147 | print(sample["depth"].shape) 148 | print(sample["dataset"]) 149 | print(sample['depth'].min(), sample['depth'].max()) 150 | if i > 5: 151 | break 152 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from zoedepth.models.depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/layers/dist_layers.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | def log_binom(n, k, eps=1e-7): 30 | """ log(nCk) using stirling approximation """ 31 | n = n + eps 32 | k = k + eps 33 | return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps) 34 | 35 | 36 | class LogBinomial(nn.Module): 37 | def __init__(self, n_classes=256, act=torch.softmax): 38 | """Compute log binomial distribution for n_classes 39 | 40 | Args: 41 | n_classes (int, optional): number of output classes. Defaults to 256. 42 | """ 43 | super().__init__() 44 | self.K = n_classes 45 | self.act = act 46 | self.register_buffer('k_idx', torch.arange( 47 | 0, n_classes).view(1, -1, 1, 1)) 48 | self.register_buffer('K_minus_1', torch.Tensor( 49 | [self.K-1]).view(1, -1, 1, 1)) 50 | 51 | def forward(self, x, t=1., eps=1e-4): 52 | """Compute log binomial distribution for x 53 | 54 | Args: 55 | x (torch.Tensor - NCHW): probabilities 56 | t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1.. 57 | eps (float, optional): Small number for numerical stability. Defaults to 1e-4. 58 | 59 | Returns: 60 | torch.Tensor -NCHW: log binomial distribution logbinomial(p;t) 61 | """ 62 | if x.ndim == 3: 63 | x = x.unsqueeze(1) # make it nchw 64 | 65 | one_minus_x = torch.clamp(1 - x, eps, 1) 66 | x = torch.clamp(x, eps, 1) 67 | y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \ 68 | torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x) 69 | return self.act(y/t, dim=1) 70 | 71 | 72 | class ConditionalLogBinomial(nn.Module): 73 | def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax): 74 | """Conditional Log Binomial distribution 75 | 76 | Args: 77 | in_features (int): number of input channels in main feature 78 | condition_dim (int): number of input channels in condition feature 79 | n_classes (int, optional): Number of classes. Defaults to 256. 80 | bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2. 81 | p_eps (float, optional): small eps value. Defaults to 1e-4. 82 | max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50. 83 | min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7. 84 | """ 85 | super().__init__() 86 | self.p_eps = p_eps 87 | self.max_temp = max_temp 88 | self.min_temp = min_temp 89 | self.log_binomial_transform = LogBinomial(n_classes, act=act) 90 | bottleneck = (in_features + condition_dim) // bottleneck_factor 91 | self.mlp = nn.Sequential( 92 | nn.Conv2d(in_features + condition_dim, bottleneck, 93 | kernel_size=1, stride=1, padding=0), 94 | nn.GELU(), 95 | # 2 for p linear norm, 2 for t linear norm 96 | nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0), 97 | nn.Softplus() 98 | ) 99 | 100 | def forward(self, x, cond): 101 | """Forward pass 102 | 103 | Args: 104 | x (torch.Tensor - NCHW): Main feature 105 | cond (torch.Tensor - NCHW): condition feature 106 | 107 | Returns: 108 | torch.Tensor: Output log binomial distribution 109 | """ 110 | pt = self.mlp(torch.concat((x, cond), dim=1)) 111 | p, t = pt[:, :2, ...], pt[:, 2:, ...] 112 | 113 | p = p + self.p_eps 114 | p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...]) 115 | 116 | t = t + self.p_eps 117 | t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...]) 118 | t = t.unsqueeze(1) 119 | t = (self.max_temp - self.min_temp) * t + self.min_temp 120 | 121 | return self.log_binomial_transform(p, t) 122 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/layers/patch_transformer.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | class PatchTransformerEncoder(nn.Module): 30 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): 31 | """ViT-like transformer block 32 | 33 | Args: 34 | in_channels (int): Input channels 35 | patch_size (int, optional): patch size. Defaults to 10. 36 | embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. 37 | num_heads (int, optional): number of attention heads. Defaults to 4. 38 | use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. 39 | """ 40 | super(PatchTransformerEncoder, self).__init__() 41 | self.use_class_token = use_class_token 42 | encoder_layers = nn.TransformerEncoderLayer( 43 | embedding_dim, num_heads, dim_feedforward=1024) 44 | self.transformer_encoder = nn.TransformerEncoder( 45 | encoder_layers, num_layers=4) # takes shape S,N,E 46 | 47 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, 48 | kernel_size=patch_size, stride=patch_size, padding=0) 49 | 50 | def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): 51 | """Generate positional encodings 52 | 53 | Args: 54 | sequence_length (int): Sequence length 55 | embedding_dim (int): Embedding dimension 56 | 57 | Returns: 58 | torch.Tensor SBE: Positional encodings 59 | """ 60 | position = torch.arange( 61 | 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) 62 | index = torch.arange( 63 | 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) 64 | div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) 65 | pos_encoding = position * div_term 66 | pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) 67 | pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) 68 | return pos_encoding 69 | 70 | 71 | def forward(self, x): 72 | """Forward pass 73 | 74 | Args: 75 | x (torch.Tensor - NCHW): Input feature tensor 76 | 77 | Returns: 78 | torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim 79 | """ 80 | embeddings = self.embedding_convPxP(x).flatten( 81 | 2) # .shape = n,c,s = n, embedding_dim, s 82 | if self.use_class_token: 83 | # extra special token at start ? 84 | embeddings = nn.functional.pad(embeddings, (1, 0)) 85 | 86 | # change to S,N,E format required by transformer 87 | embeddings = embeddings.permute(2, 0, 1) 88 | S, N, E = embeddings.shape 89 | embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device) 90 | x = self.transformer_encoder(embeddings) # .shape = S, N, E 91 | return x 92 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt", 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt" 57 | } 58 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/easydict/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyDict 3 | Copy/pasted from https://github.com/makinacorpus/easydict 4 | Original author: Mathieu Leplatre 5 | """ 6 | 7 | class EasyDict(dict): 8 | """ 9 | Get attributes 10 | 11 | >>> d = EasyDict({'foo':3}) 12 | >>> d['foo'] 13 | 3 14 | >>> d.foo 15 | 3 16 | >>> d.bar 17 | Traceback (most recent call last): 18 | ... 19 | AttributeError: 'EasyDict' object has no attribute 'bar' 20 | 21 | Works recursively 22 | 23 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) 24 | >>> isinstance(d.bar, dict) 25 | True 26 | >>> d.bar.x 27 | 1 28 | 29 | Bullet-proof 30 | 31 | >>> EasyDict({}) 32 | {} 33 | >>> EasyDict(d={}) 34 | {} 35 | >>> EasyDict(None) 36 | {} 37 | >>> d = {'a': 1} 38 | >>> EasyDict(**d) 39 | {'a': 1} 40 | >>> EasyDict((('a', 1), ('b', 2))) 41 | {'a': 1, 'b': 2} 42 | 43 | Set attributes 44 | 45 | >>> d = EasyDict() 46 | >>> d.foo = 3 47 | >>> d.foo 48 | 3 49 | >>> d.bar = {'prop': 'value'} 50 | >>> d.bar.prop 51 | 'value' 52 | >>> d 53 | {'foo': 3, 'bar': {'prop': 'value'}} 54 | >>> d.bar.prop = 'newer' 55 | >>> d.bar.prop 56 | 'newer' 57 | 58 | 59 | Values extraction 60 | 61 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) 62 | >>> isinstance(d.bar, list) 63 | True 64 | >>> from operator import attrgetter 65 | >>> list(map(attrgetter('x'), d.bar)) 66 | [1, 3] 67 | >>> list(map(attrgetter('y'), d.bar)) 68 | [2, 4] 69 | >>> d = EasyDict() 70 | >>> list(d.keys()) 71 | [] 72 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) 73 | >>> d.foo 74 | 3 75 | >>> d.bar.x 76 | 1 77 | 78 | Still like a dict though 79 | 80 | >>> o = EasyDict({'clean':True}) 81 | >>> list(o.items()) 82 | [('clean', True)] 83 | 84 | And like a class 85 | 86 | >>> class Flower(EasyDict): 87 | ... power = 1 88 | ... 89 | >>> f = Flower() 90 | >>> f.power 91 | 1 92 | >>> f = Flower({'height': 12}) 93 | >>> f.height 94 | 12 95 | >>> f['power'] 96 | 1 97 | >>> sorted(f.keys()) 98 | ['height', 'power'] 99 | 100 | update and pop items 101 | >>> d = EasyDict(a=1, b='2') 102 | >>> e = EasyDict(c=3.0, a=9.0) 103 | >>> d.update(e) 104 | >>> d.c 105 | 3.0 106 | >>> d['c'] 107 | 3.0 108 | >>> d.get('c') 109 | 3.0 110 | >>> d.update(a=4, b=4) 111 | >>> d.b 112 | 4 113 | >>> d.pop('a') 114 | 4 115 | >>> d.a 116 | Traceback (most recent call last): 117 | ... 118 | AttributeError: 'EasyDict' object has no attribute 'a' 119 | """ 120 | def __init__(self, d=None, **kwargs): 121 | if d is None: 122 | d = {} 123 | else: 124 | d = dict(d) 125 | if kwargs: 126 | d.update(**kwargs) 127 | for k, v in d.items(): 128 | setattr(self, k, v) 129 | # Class attributes 130 | for k in self.__class__.__dict__.keys(): 131 | if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): 132 | setattr(self, k, getattr(self, k)) 133 | 134 | def __setattr__(self, name, value): 135 | if isinstance(value, (list, tuple)): 136 | value = [self.__class__(x) 137 | if isinstance(x, dict) else x for x in value] 138 | elif isinstance(value, dict) and not isinstance(value, self.__class__): 139 | value = self.__class__(value) 140 | super(EasyDict, self).__setattr__(name, value) 141 | super(EasyDict, self).__setitem__(name, value) 142 | 143 | __setitem__ = __setattr__ 144 | 145 | def update(self, e=None, **f): 146 | d = e or dict() 147 | d.update(f) 148 | for k in d: 149 | setattr(self, k, d[k]) 150 | 151 | def pop(self, k, d=None): 152 | delattr(self, k) 153 | return super(EasyDict, self).pop(k, d) 154 | 155 | 156 | if __name__ == "__main__": 157 | import doctest 158 | doctest.testmod() -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/geometry.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | 27 | def get_intrinsics(H,W): 28 | """ 29 | Intrinsics for a pinhole camera model. 30 | Assume fov of 55 degrees and central principal point. 31 | """ 32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) 33 | cx = 0.5 * W 34 | cy = 0.5 * H 35 | return np.array([[f, 0, cx], 36 | [0, f, cy], 37 | [0, 0, 1]]) 38 | 39 | def depth_to_points(depth, R=None, t=None): 40 | 41 | K = get_intrinsics(depth.shape[1], depth.shape[2]) 42 | Kinv = np.linalg.inv(K) 43 | if R is None: 44 | R = np.eye(3) 45 | if t is None: 46 | t = np.zeros(3) 47 | 48 | # M converts from your coordinate to PyTorch3D's coordinate system 49 | M = np.eye(3) 50 | M[0, 0] = -1.0 51 | M[1, 1] = -1.0 52 | 53 | height, width = depth.shape[1:3] 54 | 55 | x = np.arange(width) 56 | y = np.arange(height) 57 | coord = np.stack(np.meshgrid(x, y), -1) 58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 59 | coord = coord.astype(np.float32) 60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) 61 | coord = coord[None] # bs, h, w, 3 62 | 63 | D = depth[:, :, :, None, None] 64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) 65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] 66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's 67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1 68 | # from reference to targe tviewpoint 69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] 70 | # pts3D_2 = pts3D_1 71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w 72 | return pts3D_2[:, :, :, :3, 0][0] 73 | 74 | 75 | def create_triangles(h, w, mask=None): 76 | """ 77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 78 | Creates mesh triangle indices from a given pixel grid size. 79 | This function is not and need not be differentiable as triangle indices are 80 | fixed. 81 | Args: 82 | h: (int) denoting the height of the image. 83 | w: (int) denoting the width of the image. 84 | Returns: 85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) 86 | """ 87 | x, y = np.meshgrid(range(w - 1), range(h - 1)) 88 | tl = y * w + x 89 | tr = y * w + x + 1 90 | bl = (y + 1) * w + x 91 | br = (y + 1) * w + x + 1 92 | triangles = np.array([tl, bl, tr, br, tr, bl]) 93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape( 94 | ((w - 1) * (h - 1) * 2, 3)) 95 | if mask is not None: 96 | mask = mask.reshape(-1) 97 | triangles = triangles[mask[triangles].all(1)] 98 | return triangles 99 | -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class GSParams: 5 | def __init__(self): 6 | self.sh_degree = 3 7 | self.feat_dim = 32 8 | self.n_offsets = 10 9 | self.voxel_size = 0.001 10 | self.update_depth = 3 11 | self.update_init_factor = 16 12 | self.update_hierachy_factor = 4 13 | 14 | self.use_feat_bank = False 15 | self._source_path = "" 16 | self._model_path = "" 17 | self.images = "images" 18 | self.resolution = -1 19 | self.white_background = False 20 | self.data_device = "cuda" 21 | self.eval = True 22 | 23 | self.iterations = 2990 # 3_000 2990 24 | self.position_lr_init = 0.0016 # BloomScene: 0.00016 25 | self.position_lr_final = 0.0000016 26 | self.position_lr_delay_mult = 0.01 27 | self.position_lr_max_steps = 2990 # 3_000 28 | 29 | self.offset_lr_init = 0.01 30 | self.offset_lr_final = 0.0001 31 | self.offset_lr_delay_mult = 0.01 32 | self.offset_lr_max_steps = 2990 33 | 34 | self.mask_lr_init = 0.01 35 | self.mask_lr_final = 0.0001 36 | self.mask_lr_delay_mult = 0.01 37 | self.mask_lr_max_steps = 2990 38 | 39 | self.feature_lr = 0.0025 40 | self.opacity_lr = 0.05 41 | self.scaling_lr = 0.005 42 | self.rotation_lr = 0.001 43 | 44 | self.mlp_opacity_lr_init = 0.002 45 | self.mlp_opacity_lr_final = 0.00002 46 | self.mlp_opacity_lr_delay_mult = 0.01 47 | self.mlp_opacity_lr_max_steps = 2990 48 | 49 | self.mlp_cov_lr_init = 0.004 50 | self.mlp_cov_lr_final = 0.004 51 | self.mlp_cov_lr_delay_mult = 0.01 52 | self.mlp_cov_lr_max_steps = 2990 53 | 54 | self.mlp_color_lr_init = 0.008 55 | self.mlp_color_lr_final = 0.00005 56 | self.mlp_color_lr_delay_mult = 0.01 57 | self.mlp_color_lr_max_steps = 2990 58 | 59 | self.mlp_featurebank_lr_init = 0.01 60 | self.mlp_featurebank_lr_final = 0.00001 61 | self.mlp_featurebank_lr_delay_mult = 0.01 62 | self.mlp_featurebank_lr_max_steps = 2990 63 | 64 | self.encoding_xyz_lr_init = 0.005 65 | self.encoding_xyz_lr_final = 0.00001 66 | self.encoding_xyz_lr_delay_mult = 0.33 67 | self.encoding_xyz_lr_max_steps = 2990 68 | 69 | self.mlp_grid_lr_init = 0.005 70 | self.mlp_grid_lr_final = 0.00001 71 | self.mlp_grid_lr_delay_mult = 0.01 72 | self.mlp_grid_lr_max_steps = 2990 73 | 74 | self.mlp_deform_lr_init = 0.005 75 | self.mlp_deform_lr_final = 0.0005 76 | self.mlp_deform_lr_delay_mult = 0.01 77 | self.mlp_deform_lr_max_steps = 2990 78 | 79 | # for anchor densification 80 | self.start_stat = 200 81 | self.update_from = 500 82 | self.update_interval = 100 83 | self.update_until = 2000 84 | 85 | self.percent_dense = 0.01 86 | self.lambda_dssim = 0.2 87 | self.densification_interval = 100 88 | self.opacity_reset_interval = 2990 89 | self.densify_from_iter = 500 90 | self.densify_until_iter = 2990 91 | self.densify_grad_threshold = 0.0002 92 | 93 | self.min_opacity = 0.005 # 0.2 94 | self.success_threshold = 0.8 95 | 96 | self.convert_SHs_python = False 97 | self.compute_cov3D_python = False 98 | self.debug = False 99 | 100 | 101 | 102 | class CameraParams: 103 | def __init__(self, H: int = 512, W: int = 512): 104 | self.H = H 105 | self.W = W 106 | self.focal = (5.8269e+02, 5.8269e+02) 107 | self.fov = (2*np.arctan(self.W / (2*self.focal[0])), 2*np.arctan(self.H / (2*self.focal[1]))) 108 | self.K = np.array([ 109 | [self.focal[0], 0., self.W/2], 110 | [0., self.focal[1], self.H/2], 111 | [0., 0., 1.], 112 | ]).astype(np.float32) -------------------------------------------------------------------------------- /examples/01_childroom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/01_childroom.png -------------------------------------------------------------------------------- /examples/01_childroom.txt: -------------------------------------------------------------------------------- 1 | A children's room filled with toys and books, colorful, playful, highly detailed, vibrant. -------------------------------------------------------------------------------- /examples/02_sunroom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/02_sunroom.png -------------------------------------------------------------------------------- /examples/02_sunroom.txt: -------------------------------------------------------------------------------- 1 | A sunroom with floor-to-ceiling windows overlooking the garden, comfortable chairs, and a coffee table inside, bright, airy, photorealistic, high-resolution. -------------------------------------------------------------------------------- /examples/03_beach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/03_beach.png -------------------------------------------------------------------------------- /examples/03_beach.txt: -------------------------------------------------------------------------------- 1 | A sunny beach with fine sand and blue water, with a backdrop of blue sky and white clouds, highly detailed, vibrant, photorealistic, clear skies. -------------------------------------------------------------------------------- /examples/04_snow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/04_snow.png -------------------------------------------------------------------------------- /examples/04_snow.txt: -------------------------------------------------------------------------------- 1 | A winter snow scene with snow-covered trees and houses, crisp, highly detailed, photorealistic, clear reflections. -------------------------------------------------------------------------------- /examples/05_christmas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/05_christmas.png -------------------------------------------------------------------------------- /examples/05_christmas.txt: -------------------------------------------------------------------------------- 1 | a cozy living room in christmas -------------------------------------------------------------------------------- /examples/06_room.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/06_room.png -------------------------------------------------------------------------------- /examples/06_room.txt: -------------------------------------------------------------------------------- 1 | a living room with a lit furnace, couch and cozy curtains, bright lamps that make the room look well-lit. -------------------------------------------------------------------------------- /examples/07_museum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/07_museum.png -------------------------------------------------------------------------------- /examples/07_museum.txt: -------------------------------------------------------------------------------- 1 | Simple museum, pictures, paintings, artistic, best quality, dimly lit -------------------------------------------------------------------------------- /examples/08_small_cabin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/08_small_cabin.png -------------------------------------------------------------------------------- /examples/08_small_cabin.txt: -------------------------------------------------------------------------------- 1 | A small cabin on top of a snowy mountain in the style of Disney, artstation -------------------------------------------------------------------------------- /examples/08_small_cabin_negative.txt: -------------------------------------------------------------------------------- 1 | low quality, ugly -------------------------------------------------------------------------------- /examples/09_street.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/09_street.png -------------------------------------------------------------------------------- /examples/09_street.txt: -------------------------------------------------------------------------------- 1 | A suburban street in North Carolina on a bright, sunny day -------------------------------------------------------------------------------- /images/BloomScene.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/images/BloomScene.png -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import argparse 4 | import torch 5 | import datetime 6 | import json 7 | import random 8 | import numpy as np 9 | from PIL import Image 10 | from bloomscene import BloomScene 11 | from utils.metrics import clip_score_and_iqa, brisque_and_niqe_score 12 | 13 | 14 | def fix_random_seed(seed: int): 15 | random.seed(seed) 16 | np.random.seed(seed) 17 | torch.manual_seed(seed) 18 | torch.cuda.manual_seed(seed) 19 | torch.cuda.manual_seed_all(seed) 20 | 21 | torch.backends.cudnn.deterministic = True 22 | torch.backends.cudnn.benchmark = True 23 | 24 | if __name__ == "__main__": 25 | ### option 26 | parser = argparse.ArgumentParser(description='Arguments for BloomScene') 27 | # Input options 28 | parser.add_argument('--image', '-img', type=str, default='examples/01_childroom.png', help='Input image for scene generation') 29 | parser.add_argument('--text', '-t', type=str, default='examples/01_childroom.txt', help='Text prompt for scene generation') 30 | parser.add_argument('--neg_text', '-nt', type=str, default='', help='Negative text prompt for scene generation') 31 | 32 | # Camera options 33 | parser.add_argument('--campath_gen', '-cg', type=str, default='rotate360', choices=['rotate360'], help='Camera extrinsic trajectories for scene generation') 34 | parser.add_argument('--campath_render', '-cr', type=str, default='rotate360', choices=['rotate360'], help='Camera extrinsic trajectories for video rendering') 35 | 36 | # Inpainting options 37 | parser.add_argument('--seed', type=int, default=1, help='Manual seed for running Stable Diffusion inpainting') 38 | parser.add_argument('--diff_steps', type=int, default=50, help='Number of inference steps for running Stable Diffusion inpainting') 39 | 40 | # Save options 41 | parser.add_argument('--save_dir', '-s', type=str, default='', help='Save directory') 42 | 43 | # DPR options 44 | parser.add_argument('--dep_value', action='store_true', help='Pixel-level depth regularization or not') 45 | parser.add_argument('--dep_domin', action='store_true', help='Distribution-level depth regularization or not') 46 | parser.add_argument('--dep_smooth', action='store_true', help='Depth smoothness regularization or not') 47 | parser.add_argument('--dep_value_lbd', type=float, default=0.7, help='Depth regularization..') 48 | parser.add_argument('--dep_domin_lbd', type=float, default=0.1, help='Depth regularization..') 49 | parser.add_argument('--dep_smooth_lbd', type=float, default=1.0, help='Depth regularization..') 50 | 51 | # SCC options 52 | parser.add_argument("--n_features", type=int, default = 4) 53 | parser.add_argument("--log2", type=int, default = 13) 54 | parser.add_argument("--log2_2D", type=int, default = 15) 55 | parser.add_argument("--lambdae", type=float, default = 0.002) 56 | parser.add_argument("--testing_iterations", nargs="+", type=int, default=[2990]) 57 | parser.add_argument("--saving_iterations", nargs="+", type=int, default=[2990]) 58 | 59 | 60 | args = parser.parse_args() 61 | 62 | 63 | fix_random_seed(args.seed) 64 | 65 | ### input (example) 66 | rgb_cond = Image.open(args.image).resize((512,512)) 67 | 68 | 69 | if args.text.endswith('.txt'): 70 | with open(args.text, 'r') as f: 71 | txt_cond = f.readline() 72 | else: 73 | txt_cond = args.text 74 | 75 | if args.neg_text.endswith('.txt'): 76 | with open(args.neg_text, 'r') as f: 77 | neg_txt_cond = f.readline() 78 | else: 79 | neg_txt_cond = args.neg_text 80 | 81 | # Make default save directory if blank 82 | 83 | if args.save_dir == '': 84 | img_name = os.path.splitext(os.path.basename(args.image))[0] 85 | now_str = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') 86 | args.save_dir = f'./outputs/{img_name}_{args.campath_gen}_{args.seed}_{now_str}' 87 | 88 | 89 | if not os.path.exists(args.save_dir): 90 | os.makedirs(args.save_dir, exist_ok=True) 91 | 92 | 93 | with open(os.path.join(args.save_dir, "settings.json"), "w") as f: 94 | json.dump(vars(args), f, indent=4, sort_keys=True) 95 | 96 | 97 | bs = BloomScene(args, save_dir=args.save_dir) 98 | start_time = datetime.datetime.now() 99 | print("start..", start_time.strftime('%Y-%m-%d %H:%M:%S')) 100 | 101 | bs.create(rgb_cond, txt_cond, neg_txt_cond, args.campath_gen, args.seed, args.diff_steps) 102 | end_time = datetime.datetime.now() 103 | print("end..", end_time.strftime('%Y-%m-%d %H:%M:%S')) 104 | 105 | bs.render_video(args.campath_render) 106 | 107 | 108 | # # eval 109 | image_folder = os.path.join(args.save_dir, "eval", "render_rgb") 110 | clip_score_and_iqa(image_folder=image_folder, text=txt_cond, out_path=args.save_dir) 111 | brisque_and_niqe_score(image_folder=image_folder, out_path=args.save_dir) 112 | -------------------------------------------------------------------------------- /scene/__init__.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from arguments import GSParams 4 | from scene.dataset_readers import readDataInfo 5 | from scene.gaussian_model import GaussianModel 6 | from utils.pose_noise_util import apply_noise_bloomscene 7 | 8 | 9 | class Scene: 10 | gaussians: GaussianModel 11 | 12 | def __init__(self, traindata, gaussians: GaussianModel, opt: GSParams): 13 | self.traindata = traindata 14 | self.gaussians = gaussians 15 | 16 | info = readDataInfo(traindata, opt.white_background, opt.eval) 17 | random.shuffle(info.train_cameras) # Multi-res consistent random shuffling 18 | self.cameras_extent = info.nerf_normalization["radius"] 19 | 20 | print("Loading Training Cameras ...") 21 | self.train_cameras = info.train_cameras 22 | print("Loading Preset Cameras ...") 23 | self.preset_cameras = {} 24 | print("Loading Eval Cameras ...") 25 | self.eval_cameras = apply_noise_bloomscene(self.train_cameras) 26 | 27 | for campath in info.preset_cameras.keys(): 28 | self.preset_cameras[campath] = info.preset_cameras[campath] 29 | 30 | self.gaussians.create_from_pcd(info.point_cloud, self.cameras_extent) 31 | self.gaussians.training_setup(opt) 32 | 33 | def getTrainCameras(self): 34 | return self.train_cameras 35 | 36 | def getPresetCameras(self, preset): 37 | assert preset in self.preset_cameras 38 | return self.preset_cameras[preset] 39 | 40 | def getEvalCameras(self): 41 | return self.eval_cameras -------------------------------------------------------------------------------- /scene/cameras.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import numpy as np 12 | 13 | import torch 14 | from torch import nn 15 | 16 | from utils.graphics import getWorld2View2, getProjectionMatrix 17 | from utils.loss import image2canny 18 | 19 | 20 | class Camera(nn.Module): 21 | def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, 22 | image_name, uid, original_depth, 23 | trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda" 24 | ): 25 | super(Camera, self).__init__() 26 | 27 | self.uid = uid 28 | self.colmap_id = colmap_id 29 | self.R = R 30 | self.T = T 31 | self.FoVx = FoVx 32 | self.FoVy = FoVy 33 | self.image_name = image_name 34 | 35 | try: 36 | self.data_device = torch.device(data_device) 37 | except Exception as e: 38 | print(e) 39 | print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) 40 | self.data_device = torch.device("cuda") 41 | 42 | 43 | self.original_image = image.clamp(0.0, 1.0).to(self.data_device) 44 | self.canny_mask = image2canny(self.original_image.permute(1,2,0), 50, 150, isEdge1=False).detach().to(self.data_device) 45 | self.image_width = self.original_image.shape[2] 46 | self.image_height = self.original_image.shape[1] 47 | 48 | if gt_alpha_mask is not None: 49 | self.original_image *= gt_alpha_mask.to(self.data_device) 50 | else: 51 | self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device) 52 | 53 | self.zfar = 100.0 54 | self.znear = 0.01 55 | 56 | self.trans = trans 57 | self.scale = scale 58 | 59 | self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() 60 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() 61 | self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) 62 | self.camera_center = self.world_view_transform.inverse()[3, :3] 63 | 64 | self.original_depth = original_depth 65 | 66 | 67 | class MiniCam: 68 | def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): 69 | self.image_width = width 70 | self.image_height = height 71 | self.FoVy = fovy 72 | self.FoVx = fovx 73 | self.znear = znear 74 | self.zfar = zfar 75 | self.world_view_transform = world_view_transform 76 | self.full_proj_transform = full_proj_transform 77 | view_inv = torch.inverse(self.world_view_transform) 78 | self.camera_center = view_inv[3][:3] 79 | 80 | -------------------------------------------------------------------------------- /scene/dataset_readers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from PIL import Image 4 | from typing import NamedTuple 5 | 6 | from scene.gaussian_model import BasicPointCloud 7 | from scene.cameras import MiniCam, Camera 8 | from utils.graphics import getWorld2View2, focal2fov, fov2focal 9 | from utils.graphics import getProjectionMatrix 10 | from utils.trajectory import get_camerapaths 11 | 12 | 13 | class CameraInfo(NamedTuple): 14 | uid: int 15 | R: np.array 16 | T: np.array 17 | FovY: np.array 18 | FovX: np.array 19 | image: np.array 20 | image_path: str 21 | image_name: str 22 | width: int 23 | height: int 24 | 25 | 26 | class SceneInfo(NamedTuple): 27 | point_cloud: BasicPointCloud 28 | train_cameras: list 29 | test_cameras: list 30 | preset_cameras: list 31 | nerf_normalization: dict 32 | ply_path: str 33 | 34 | 35 | def getNerfppNorm(cam_info): 36 | def get_center_and_diag(cam_centers): 37 | cam_centers = np.hstack(cam_centers) 38 | avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True) 39 | center = avg_cam_center 40 | dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True) 41 | diagonal = np.max(dist) 42 | return center.flatten(), diagonal 43 | 44 | cam_centers = [] 45 | 46 | for cam in cam_info: 47 | W2C = getWorld2View2(cam.R, cam.T) 48 | C2W = np.linalg.inv(W2C) 49 | cam_centers.append(C2W[:3, 3:4]) 50 | 51 | center, diagonal = get_center_and_diag(cam_centers) 52 | radius = diagonal * 1.1 53 | 54 | translate = -center 55 | 56 | return {"translate": translate, "radius": radius} 57 | 58 | 59 | 60 | def loadCamerasFromData(traindata, white_background): 61 | cameras = [] 62 | 63 | fovx = traindata["camera_angle_x"] 64 | frames = traindata["frames"] 65 | for idx, frame in enumerate(frames): 66 | # NeRF 'transform_matrix' is a camera-to-world transform 67 | c2w = np.array(frame["transform_matrix"]) 68 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 69 | c2w[:3, 1:3] *= -1 70 | 71 | # get the world-to-camera transform and set R, T 72 | w2c = np.linalg.inv(c2w) 73 | R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code 74 | T = w2c[:3, 3] 75 | 76 | image = frame["image"] if "image" in frame else None 77 | im_data = np.array(image.convert("RGBA")) 78 | 79 | bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0]) 80 | 81 | norm_data = im_data / 255.0 82 | arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4]) 83 | image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB") 84 | loaded_mask = np.ones_like(norm_data[:, :, 3:4]) 85 | 86 | fovy = focal2fov(fov2focal(fovx, image.size[1]), image.size[0]) 87 | FovY = fovy 88 | FovX = fovx 89 | 90 | image = torch.Tensor(arr).permute(2,0,1) # [3, 512, 512] 91 | loaded_mask = None #torch.Tensor(loaded_mask).permute(2,0,1) 92 | 93 | # depth 94 | original_depth = torch.Tensor(frame["depth"]) if frame["depth"] is not None else None 95 | 96 | cameras.append(Camera(colmap_id=idx, R=R, T=T, FoVx=FovX, FoVy=FovY, image=image, original_depth=original_depth, 97 | gt_alpha_mask=loaded_mask, image_name='', uid=idx, data_device='cuda')) 98 | 99 | return cameras 100 | 101 | 102 | def loadCameraPreset(traindata, presetdata): 103 | cam_infos = {} 104 | ## camera setting (for H, W and focal) 105 | fovx = traindata["camera_angle_x"] * 0.95 106 | W, H = traindata["frames"][0]["image"].size 107 | 108 | for camkey in presetdata: 109 | cam_infos[camkey] = [] 110 | for idx, frame in enumerate(presetdata[camkey]["frames"]): 111 | # NeRF 'transform_matrix' is a camera-to-world transform 112 | c2w = np.array(frame["transform_matrix"]) 113 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 114 | c2w[:3, 1:3] *= -1 115 | 116 | # get the world-to-camera transform and set R, T 117 | w2c = np.linalg.inv(c2w) 118 | R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code 119 | T = w2c[:3, 3] 120 | 121 | fovy = focal2fov(fov2focal(fovx, W), H) 122 | FovY = fovy 123 | FovX = fovx 124 | 125 | znear, zfar = 0.01, 100 126 | world_view_transform = torch.tensor(getWorld2View2(R, T, np.array([0.0, 0.0, 0.0]), 1.0)).transpose(0, 1).cuda() 127 | projection_matrix = getProjectionMatrix(znear=znear, zfar=zfar, fovX=FovX, fovY=FovY).transpose(0,1).cuda() 128 | full_proj_transform = (world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0))).squeeze(0) 129 | 130 | cam_infos[camkey].append(MiniCam(width=W, height=H, fovy=FovY, fovx=FovX, znear=znear, zfar=zfar, 131 | world_view_transform=world_view_transform, full_proj_transform=full_proj_transform)) 132 | 133 | return cam_infos 134 | 135 | 136 | 137 | def readDataInfo(traindata, white_background, eval): 138 | print("Reading Training Transforms") 139 | 140 | train_cameras = loadCamerasFromData(traindata, white_background) 141 | preset_minicams = loadCameraPreset(traindata, presetdata=get_camerapaths()) 142 | 143 | nerf_normalization = getNerfppNorm(train_cameras) 144 | 145 | pcd = BasicPointCloud(points=traindata['pcd_points'].T, colors=traindata['pcd_colors'], normals=None) 146 | 147 | 148 | scene_info = SceneInfo(point_cloud=pcd, 149 | train_cameras=train_cameras, 150 | test_cameras=[], 151 | preset_cameras=preset_minicams, 152 | nerf_normalization=nerf_normalization, 153 | ply_path='') 154 | return scene_info -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | cmake_minimum_required(VERSION 3.20) 13 | 14 | project(DiffRast LANGUAGES CUDA CXX) 15 | 16 | set(CMAKE_CXX_STANDARD 17) 17 | set(CMAKE_CXX_EXTENSIONS OFF) 18 | set(CMAKE_CUDA_STANDARD 17) 19 | 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 21 | 22 | add_library(CudaRasterizer 23 | cuda_rasterizer/backward.h 24 | cuda_rasterizer/backward.cu 25 | cuda_rasterizer/forward.h 26 | cuda_rasterizer/forward.cu 27 | cuda_rasterizer/auxiliary.h 28 | cuda_rasterizer/rasterizer_impl.cu 29 | cuda_rasterizer/rasterizer_impl.h 30 | cuda_rasterizer/rasterizer.h 31 | ) 32 | 33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "70;75;86") 34 | 35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer) 36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 37 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/LICENSE.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. 84 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/README.md: -------------------------------------------------------------------------------- 1 | # Differential Gaussian Rasterization 2 | 3 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us. 4 | 5 |
6 |
7 |

BibTeX

8 |
@Article{kerbl3Dgaussians,
 9 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
10 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
11 |       journal      = {ACM Transactions on Graphics},
12 |       number       = {4},
13 |       volume       = {42},
14 |       month        = {July},
15 |       year         = {2023},
16 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
17 | }
18 |
19 |
-------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 14 | 15 | #include "config.h" 16 | #include "stdio.h" 17 | 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y) 19 | #define NUM_WARPS (BLOCK_SIZE/32) 20 | 21 | // Spherical harmonics coefficients 22 | __device__ const float SH_C0 = 0.28209479177387814f; 23 | __device__ const float SH_C1 = 0.4886025119029199f; 24 | __device__ const float SH_C2[] = { 25 | 1.0925484305920792f, 26 | -1.0925484305920792f, 27 | 0.31539156525252005f, 28 | -1.0925484305920792f, 29 | 0.5462742152960396f 30 | }; 31 | __device__ const float SH_C3[] = { 32 | -0.5900435899266435f, 33 | 2.890611442640554f, 34 | -0.4570457994644658f, 35 | 0.3731763325901154f, 36 | -0.4570457994644658f, 37 | 1.445305721320277f, 38 | -0.5900435899266435f 39 | }; 40 | 41 | __forceinline__ __device__ float ndc2Pix(float v, int S) 42 | { 43 | return ((v + 1.0) * S - 1.0) * 0.5; 44 | } 45 | 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid) 47 | { 48 | rect_min = { 49 | min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))), 50 | min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y))) 51 | }; 52 | rect_max = { 53 | min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))), 54 | min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y))) 55 | }; 56 | } 57 | 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix) 59 | { 60 | float3 transformed = { 61 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 62 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 63 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 64 | }; 65 | return transformed; 66 | } 67 | 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix) 69 | { 70 | float4 transformed = { 71 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 72 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 73 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 74 | matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15] 75 | }; 76 | return transformed; 77 | } 78 | 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix) 80 | { 81 | float3 transformed = { 82 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z, 83 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z, 84 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z, 85 | }; 86 | return transformed; 87 | } 88 | 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix) 90 | { 91 | float3 transformed = { 92 | matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z, 93 | matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z, 94 | matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z, 95 | }; 96 | return transformed; 97 | } 98 | 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv) 100 | { 101 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 102 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 103 | float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 104 | return dnormvdz; 105 | } 106 | 107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) 108 | { 109 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 110 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 111 | 112 | float3 dnormvdv; 113 | dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32; 114 | dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32; 115 | dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 116 | return dnormvdv; 117 | } 118 | 119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv) 120 | { 121 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; 122 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 123 | 124 | float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w }; 125 | float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w; 126 | float4 dnormvdv; 127 | dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32; 128 | dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32; 129 | dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32; 130 | dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32; 131 | return dnormvdv; 132 | } 133 | 134 | __forceinline__ __device__ float sigmoid(float x) 135 | { 136 | return 1.0f / (1.0f + expf(-x)); 137 | } 138 | 139 | __forceinline__ __device__ bool in_frustum(int idx, 140 | const float* orig_points, 141 | const float* viewmatrix, 142 | const float* projmatrix, 143 | bool prefiltered, 144 | float3& p_view) 145 | { 146 | float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] }; 147 | 148 | // Bring points to screen space 149 | float4 p_hom = transformPoint4x4(p_orig, projmatrix); 150 | float p_w = 1.0f / (p_hom.w + 0.0000001f); 151 | float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w }; 152 | p_view = transformPoint4x3(p_orig, viewmatrix); 153 | 154 | if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3))) 155 | { 156 | if (prefiltered) 157 | { 158 | printf("Point is filtered although prefiltered is set. This shouldn't happen!"); 159 | __trap(); 160 | } 161 | return false; 162 | } 163 | return true; 164 | } 165 | 166 | #define CHECK_CUDA(A, debug) \ 167 | A; if(debug) { \ 168 | auto ret = cudaDeviceSynchronize(); \ 169 | if (ret != cudaSuccess) { \ 170 | std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \ 171 | throw std::runtime_error(cudaGetErrorString(ret)); \ 172 | } \ 173 | } 174 | 175 | #endif -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/backward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace BACKWARD 22 | { 23 | void render( 24 | const dim3 grid, dim3 block, 25 | const uint2* ranges, 26 | const uint32_t* point_list, 27 | int W, int H, 28 | const float* bg_color, 29 | const float2* means2D, 30 | const float4* conic_opacity, 31 | const float3* means3D, 32 | const float* colors, 33 | const float* depths, 34 | const float* projmatrix, 35 | const float* final_Ts, 36 | const uint32_t* n_contrib, 37 | const float* dL_dpixels, 38 | const float* dL_depths, 39 | float3* dL_dmean2D, 40 | float4* dL_dconic2D, 41 | float3* dL_dmean3D, 42 | float* dL_dopacity, 43 | float* dL_dcolors); 44 | 45 | void preprocess( 46 | int P, int D, int M, 47 | const float3* means, 48 | const int* radii, 49 | const float* shs, 50 | const bool* clamped, 51 | const glm::vec3* scales, 52 | const glm::vec4* rotations, 53 | const float scale_modifier, 54 | const float* cov3Ds, 55 | const float* view, 56 | const float* proj, 57 | const float focal_x, float focal_y, 58 | const float tan_fovx, float tan_fovy, 59 | const glm::vec3* campos, 60 | const float3* dL_dmean2D, 61 | const float* dL_dconics, 62 | glm::vec3* dL_dmeans, 63 | float* dL_dcolor, 64 | float* dL_dcov3D, 65 | float* dL_dsh, 66 | glm::vec3* dL_dscale, 67 | glm::vec4* dL_drot); 68 | } 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED 13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED 14 | 15 | #define NUM_CHANNELS 3 // Default 3, RGB 16 | #define BLOCK_X 16 17 | #define BLOCK_Y 16 18 | 19 | #endif -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/forward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace FORWARD 22 | { 23 | // Perform initial steps for each Gaussian prior to rasterization. 24 | void preprocess(int P, int D, int M, 25 | const float* orig_points, 26 | const glm::vec3* scales, 27 | const float scale_modifier, 28 | const glm::vec4* rotations, 29 | const float* opacities, 30 | const float* shs, 31 | bool* clamped, 32 | const float* cov3D_precomp, 33 | const float* colors_precomp, 34 | const float* viewmatrix, 35 | const float* projmatrix, 36 | const glm::vec3* cam_pos, 37 | const int W, int H, 38 | const float focal_x, float focal_y, 39 | const float tan_fovx, float tan_fovy, 40 | int* radii, 41 | float2* points_xy_image, 42 | float* depths, 43 | float* cov3Ds, 44 | float* colors, 45 | float4* conic_opacity, 46 | const dim3 grid, 47 | uint32_t* tiles_touched, 48 | bool prefiltered); 49 | 50 | // Main rasterization method. 51 | void render( 52 | const dim3 grid, dim3 block, 53 | const uint2* ranges, 54 | const uint32_t* point_list, 55 | int W, int H, 56 | const float2* points_xy_image, 57 | const float* features, 58 | const float* depths, 59 | const float4* conic_opacity, 60 | float* final_T, 61 | uint32_t* n_contrib, 62 | const float* bg_color, 63 | float* out_color, 64 | float* out_depth); 65 | 66 | void filter_preprocess(int P, int M, 67 | const float* means3D, 68 | const glm::vec3* scales, 69 | const float scale_modifier, 70 | const glm::vec4* rotations, 71 | const float* cov3D_precomp, 72 | const float* viewmatrix, 73 | const float* projmatrix, 74 | const int W, int H, 75 | const float focal_x, float focal_y, 76 | const float tan_fovx, float tan_fovy, 77 | int* radii, 78 | float* cov3Ds, 79 | const dim3 grid, 80 | bool prefiltered); 81 | } 82 | 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_H_INCLUDED 13 | #define CUDA_RASTERIZER_H_INCLUDED 14 | 15 | #include 16 | #include 17 | 18 | namespace CudaRasterizer 19 | { 20 | class Rasterizer 21 | { 22 | public: 23 | 24 | static void markVisible( 25 | int P, 26 | float* means3D, 27 | float* viewmatrix, 28 | float* projmatrix, 29 | bool* present); 30 | 31 | static int forward( 32 | std::function geometryBuffer, 33 | std::function binningBuffer, 34 | std::function imageBuffer, 35 | const int P, int D, int M, 36 | const float* background, 37 | const int width, int height, 38 | const float* means3D, 39 | const float* shs, 40 | const float* colors_precomp, 41 | const float* opacities, 42 | const float* scales, 43 | const float scale_modifier, 44 | const float* rotations, 45 | const float* cov3D_precomp, 46 | const float* viewmatrix, 47 | const float* projmatrix, 48 | const float* cam_pos, 49 | const float tan_fovx, float tan_fovy, 50 | const bool prefiltered, 51 | float* out_color, 52 | float* out_depth, 53 | int* radii = nullptr, 54 | bool debug = false); 55 | 56 | 57 | static void visible_filter( 58 | std::function geometryBuffer, 59 | std::function binningBuffer, 60 | std::function imageBuffer, 61 | const int P, int M, 62 | const int width, int height, 63 | const float* means3D, 64 | const float* scales, 65 | const float scale_modifier, 66 | const float* rotations, 67 | const float* cov3D_precomp, 68 | const float* viewmatrix, 69 | const float* projmatrix, 70 | const float tan_fovx, float tan_fovy, 71 | const bool prefiltered, 72 | int* radii, 73 | bool debug); 74 | 75 | static void backward( 76 | const int P, int D, int M, int R, 77 | const float* background, 78 | const int width, int height, 79 | const float* means3D, 80 | const float* shs, 81 | const float* colors_precomp, 82 | const float* scales, 83 | const float scale_modifier, 84 | const float* rotations, 85 | const float* cov3D_precomp, 86 | const float* viewmatrix, 87 | const float* projmatrix, 88 | const float* campos, 89 | const float tan_fovx, float tan_fovy, 90 | const int* radii, 91 | char* geom_buffer, 92 | char* binning_buffer, 93 | char* image_buffer, 94 | const float* dL_dpix, 95 | const float* dL_depths, 96 | float* dL_dmean2D, 97 | float* dL_dconic, 98 | float* dL_dopacity, 99 | float* dL_dcolor, 100 | float* dL_dmean3D, 101 | float* dL_dcov3D, 102 | float* dL_dsh, 103 | float* dL_dscale, 104 | float* dL_drot, 105 | bool debug); 106 | }; 107 | }; 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include "rasterizer.h" 17 | #include 18 | 19 | namespace CudaRasterizer 20 | { 21 | template 22 | static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment) 23 | { 24 | std::size_t offset = (reinterpret_cast(chunk) + alignment - 1) & ~(alignment - 1); 25 | ptr = reinterpret_cast(offset); 26 | chunk = reinterpret_cast(ptr + count); 27 | } 28 | 29 | struct GeometryState 30 | { 31 | size_t scan_size; 32 | float* depths; 33 | char* scanning_space; 34 | bool* clamped; 35 | int* internal_radii; 36 | float2* means2D; 37 | float* cov3D; 38 | float4* conic_opacity; 39 | float* rgb; 40 | uint32_t* point_offsets; 41 | uint32_t* tiles_touched; 42 | 43 | static GeometryState fromChunk(char*& chunk, size_t P); 44 | }; 45 | 46 | struct ImageState 47 | { 48 | uint2* ranges; 49 | uint32_t* n_contrib; 50 | float* accum_alpha; 51 | 52 | static ImageState fromChunk(char*& chunk, size_t N); 53 | }; 54 | 55 | struct BinningState 56 | { 57 | size_t sorting_size; 58 | uint64_t* point_list_keys_unsorted; 59 | uint64_t* point_list_keys; 60 | uint32_t* point_list_unsorted; 61 | uint32_t* point_list; 62 | char* list_sorting_space; 63 | 64 | static BinningState fromChunk(char*& chunk, size_t P); 65 | }; 66 | 67 | template 68 | size_t required(size_t P) 69 | { 70 | char* size = nullptr; 71 | T::fromChunk(size, P); 72 | return ((size_t)size) + 128; 73 | } 74 | }; -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/ext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include "rasterize_points.h" 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("rasterize_gaussians", &RasterizeGaussiansCUDA); 17 | m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA); 18 | m.def("rasterize_aussians_filter", &RasterizeGaussiansfilterCUDA); 19 | m.def("mark_visible", &markVisible); 20 | } -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/rasterize_points.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | std::tuple 19 | RasterizeGaussiansCUDA( 20 | const torch::Tensor& background, 21 | const torch::Tensor& means3D, 22 | const torch::Tensor& colors, 23 | const torch::Tensor& opacity, 24 | const torch::Tensor& scales, 25 | const torch::Tensor& rotations, 26 | const float scale_modifier, 27 | const torch::Tensor& cov3D_precomp, 28 | const torch::Tensor& viewmatrix, 29 | const torch::Tensor& projmatrix, 30 | const float tan_fovx, 31 | const float tan_fovy, 32 | const int image_height, 33 | const int image_width, 34 | const torch::Tensor& sh, 35 | const int degree, 36 | const torch::Tensor& campos, 37 | const bool prefiltered, 38 | const bool debug); 39 | 40 | std::tuple 41 | RasterizeGaussiansBackwardCUDA( 42 | const torch::Tensor& background, 43 | const torch::Tensor& means3D, 44 | const torch::Tensor& radii, 45 | const torch::Tensor& colors, 46 | const torch::Tensor& scales, 47 | const torch::Tensor& rotations, 48 | const float scale_modifier, 49 | const torch::Tensor& cov3D_precomp, 50 | const torch::Tensor& viewmatrix, 51 | const torch::Tensor& projmatrix, 52 | const float tan_fovx, 53 | const float tan_fovy, 54 | const torch::Tensor& dL_dout_color, 55 | const torch::Tensor& dL_dout_depth, 56 | const torch::Tensor& sh, 57 | const int degree, 58 | const torch::Tensor& campos, 59 | const torch::Tensor& geomBuffer, 60 | const int R, 61 | const torch::Tensor& binningBuffer, 62 | const torch::Tensor& imageBuffer, 63 | const bool debug); 64 | 65 | torch::Tensor markVisible( 66 | torch::Tensor& means3D, 67 | torch::Tensor& viewmatrix, 68 | torch::Tensor& projmatrix); 69 | 70 | 71 | torch::Tensor 72 | RasterizeGaussiansfilterCUDA( 73 | const torch::Tensor& means3D, 74 | const torch::Tensor& scales, 75 | const torch::Tensor& rotations, 76 | const float scale_modifier, 77 | const torch::Tensor& cov3D_precomp, 78 | const torch::Tensor& viewmatrix, 79 | const torch::Tensor& projmatrix, 80 | const float tan_fovx, 81 | const float tan_fovy, 82 | const int image_height, 83 | const int image_width, 84 | const bool prefiltered, 85 | const bool debug); -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from setuptools import setup 13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 14 | import os 15 | os.path.dirname(os.path.abspath(__file__)) 16 | 17 | setup( 18 | name="depth_diff_gaussian_rasterization", 19 | packages=['depth_diff_gaussian_rasterization'], 20 | ext_modules=[ 21 | CUDAExtension( 22 | name="depth_diff_gaussian_rasterization._C", 23 | sources=[ 24 | "cuda_rasterizer/rasterizer_impl.cu", 25 | "cuda_rasterizer/forward.cu", 26 | "cuda_rasterizer/backward.cu", 27 | "rasterize_points.cu", 28 | "ext.cpp"], 29 | extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]}) 30 | ], 31 | cmdclass={ 32 | 'build_ext': BuildExtension 33 | } 34 | ) 35 | -------------------------------------------------------------------------------- /submodules/gridencoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .grid import GridEncoder -------------------------------------------------------------------------------- /submodules/gridencoder/backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | from torch.utils.cpp_extension import load 3 | 4 | _src_path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | nvcc_flags = [ 7 | '-O3', '-std=c++14', 8 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', 9 | ] 10 | 11 | if os.name == "posix": 12 | c_flags = ['-O3', '-std=c++14'] 13 | elif os.name == "nt": 14 | c_flags = ['/O2', '/std:c++17'] 15 | 16 | # find cl.exe 17 | def find_cl_path(): 18 | import glob 19 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: 20 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) 21 | if paths: 22 | return paths[0] 23 | 24 | # If cl.exe is not on path, try to find it. 25 | if os.system("where cl.exe >nul 2>nul") != 0: 26 | cl_path = find_cl_path() 27 | if cl_path is None: 28 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 29 | os.environ["PATH"] += ";" + cl_path 30 | 31 | _backend = load(name='_grid_encoder', 32 | extra_cflags=c_flags, 33 | extra_cuda_cflags=nvcc_flags, 34 | sources=[os.path.join(_src_path, 'src', f) for f in [ 35 | 'gridencoder.cu', 36 | 'bindings.cpp', 37 | ]], 38 | ) 39 | 40 | __all__ = ['_backend'] -------------------------------------------------------------------------------- /submodules/gridencoder/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | _src_path = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | nvcc_flags = [ 8 | '-O3', '-std=c++14', 9 | '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__', 10 | ] 11 | 12 | if os.name == "posix": 13 | c_flags = ['-O3', '-std=c++14'] 14 | elif os.name == "nt": 15 | c_flags = ['/O2', '/std:c++17'] 16 | 17 | # find cl.exe 18 | def find_cl_path(): 19 | import glob 20 | for edition in ["Enterprise", "Professional", "BuildTools", "Community"]: 21 | paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True) 22 | if paths: 23 | return paths[0] 24 | 25 | # If cl.exe is not on path, try to find it. 26 | if os.system("where cl.exe >nul 2>nul") != 0: 27 | cl_path = find_cl_path() 28 | if cl_path is None: 29 | raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation") 30 | os.environ["PATH"] += ";" + cl_path 31 | 32 | setup( 33 | name='gridencoder', # package name, import this to use python API 34 | ext_modules=[ 35 | CUDAExtension( 36 | name='_gridencoder', # extension name, import this to use CUDA API 37 | sources=[os.path.join(_src_path, 'src', f) for f in [ 38 | 'gridencoder.cu', 39 | 'bindings.cpp', 40 | ]], 41 | extra_compile_args={ 42 | 'cxx': c_flags, 43 | 'nvcc': nvcc_flags, 44 | } 45 | ), 46 | ], 47 | cmdclass={ 48 | 'build_ext': BuildExtension, 49 | } 50 | ) -------------------------------------------------------------------------------- /submodules/gridencoder/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "gridencoder.h" 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 6 | m.def("grid_encode_forward", &grid_encode_forward, "grid_encode_forward (CUDA)"); 7 | m.def("grid_encode_backward", &grid_encode_backward, "grid_encode_backward (CUDA)"); 8 | m.def("grid_encode_mix2D_forward", &grid_encode_mix2D_forward, "grid_encode_mix2D_forward (CUDA)"); 9 | m.def("grid_encode_mix2D_backward", &grid_encode_mix2D_backward, "grid_encode_mix2D_backward (CUDA)"); 10 | m.def("avg_2D_forward", &avg_2D_forward, "avg_2D_forward (CUDA)"); 11 | m.def("avg_2D_backward", &avg_2D_backward, "avg_2D_backward (CUDA)"); 12 | m.def("cnt_np_embed", &cnt_np_embed, "cnt_np_embed (CUDA)"); 13 | m.def("cnt_np_embed_backward", &cnt_np_embed_backward, "cnt_np_embed_backward (CUDA)"); 14 | } -------------------------------------------------------------------------------- /submodules/gridencoder/src/gridencoder.h: -------------------------------------------------------------------------------- 1 | #ifndef _HASH_ENCODE_H 2 | #define _HASH_ENCODE_H 3 | 4 | #include 5 | #include 6 | 7 | // inputs: [N, num_dim], float, in [0, 1] 8 | // embeddings: [offsets[-1], n_features], float 9 | // offsets: [n_levels + 1], uint32_t 10 | // outputs: [N, n_levels * n_features], float 11 | 12 | void grid_encode_forward( 13 | const at::Tensor inputs, 14 | const at::Tensor embeddings, 15 | const at::Tensor offsets_list, 16 | const at::Tensor resolutions_list, 17 | at::Tensor outputs, 18 | const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, const float PV, 19 | at::optional dy_dx, 20 | const at::optional binary_vxl, 21 | const at::optional min_level_id 22 | ); 23 | 24 | void grid_encode_backward( 25 | const at::Tensor grad, 26 | const at::Tensor inputs, 27 | const at::Tensor embeddings, 28 | const at::Tensor offsets_list, 29 | const at::Tensor resolutions_list, 30 | at::Tensor grad_embeddings, 31 | const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, 32 | const at::optional dy_dx, 33 | at::optional grad_inputs, 34 | const at::optional binary_vxl, 35 | const at::optional min_level_id 36 | ); 37 | 38 | void grid_encode_mix2D_forward( 39 | const at::Tensor inputs_xy, const at::Tensor inputs_xz, const at::Tensor inputs_yz, 40 | const at::Tensor embeddings_xy, const at::Tensor embeddings_xz, const at::Tensor embeddings_yz, 41 | const at::Tensor offsets_list, 42 | const at::Tensor resolutions_list, 43 | at::Tensor outputs, 44 | const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, const float PV, 45 | at::optional dy_dx, 46 | const at::optional binary_vxl_2D_xy, const at::optional binary_vxl_2D_xz, const at::optional binary_vxl_2D_yz, 47 | const at::optional min_level_id, const uint32_t xy_len, const uint32_t xz_len, const uint32_t yz_len 48 | ); 49 | 50 | void grid_encode_mix2D_backward( 51 | const at::Tensor grad, 52 | const at::Tensor inputs_xy, const at::Tensor inputs_xz, const at::Tensor inputs_yz, 53 | const at::Tensor embeddings_xy, const at::Tensor embeddings_xz, const at::Tensor embeddings_yz, 54 | const at::Tensor offsets_list, 55 | const at::Tensor resolutions_list, 56 | at::Tensor grad_embeddings, 57 | const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, 58 | const at::optional dy_dx, 59 | at::optional grad_inputs, 60 | const at::optional binary_vxl_2D_xy, const at::optional binary_vxl_2D_xz, const at::optional binary_vxl_2D_yz, 61 | const at::optional min_level_id, 62 | const uint32_t xy_len, const uint32_t xz_len, const uint32_t yz_len, 63 | const uint32_t exy_len, const uint32_t exz_len, const uint32_t eyz_len 64 | ); 65 | 66 | void avg_2D_forward( 67 | const at::Tensor inputs, 68 | const at::Tensor embeddings, 69 | const at::Tensor offsets_list, 70 | const at::Tensor resolutions_list, 71 | at::Tensor outputs, 72 | const uint32_t N, const uint32_t n_features, const uint32_t n_levels, const uint32_t Rb, const uint32_t ref_scale, 73 | const at::Tensor binary_vxl 74 | ); 75 | 76 | void avg_2D_backward( 77 | const at::Tensor grad, 78 | const at::Tensor inputs, 79 | const at::Tensor embeddings, 80 | const at::Tensor offsets_list, 81 | const at::Tensor resolutions_list, 82 | at::Tensor grad_embeddings, 83 | const uint32_t N, const uint32_t n_features, const uint32_t n_levels, const uint32_t Rb, const uint32_t ref_scale, 84 | const at::Tensor binary_vxl 85 | ); 86 | 87 | void cnt_np_embed( 88 | const at::Tensor inputs, // [N, 4*4*4, 3] 89 | const at::Tensor embeddings_clip, // [520000, 4] 90 | at::Tensor outputs, // [512, 512, 4, 2] 91 | const uint32_t N, const uint32_t resolution, const uint32_t n_features, const uint32_t hashmap_size, const uint32_t axis 92 | ); 93 | 94 | void cnt_np_embed_backward( 95 | const at::Tensor inputs, // [N, 4*4*4, 3] 96 | const at::Tensor embeddings_clip, // [520000, 4] 97 | const at::Tensor outputs_sum, // [512, 512, 4, 1] 98 | const at::Tensor grad, // [512, 512, 4, 2] 99 | at::Tensor grad_embeddings, // [520000, 4] 100 | const uint32_t N, const uint32_t resolution, const uint32_t n_features, const uint32_t hashmap_size, const uint32_t axis 101 | ); 102 | 103 | #endif -------------------------------------------------------------------------------- /submodules/gridencoder/src/try.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/submodules/gridencoder/src/try.cu -------------------------------------------------------------------------------- /submodules/simple-knn/ext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include "spatial.h" 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("distCUDA2", &distCUDA2); 17 | } 18 | -------------------------------------------------------------------------------- /submodules/simple-knn/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from setuptools import setup 13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 14 | import os 15 | 16 | cxx_compiler_flags = [] 17 | 18 | if os.name == 'nt': 19 | cxx_compiler_flags.append("/wd4624") 20 | 21 | setup( 22 | name="simple_knn", 23 | ext_modules=[ 24 | CUDAExtension( 25 | name="simple_knn._C", 26 | sources=[ 27 | "spatial.cu", 28 | "simple_knn.cu", 29 | "ext.cpp"], 30 | extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags}) 31 | ], 32 | cmdclass={ 33 | 'build_ext': BuildExtension 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /submodules/simple-knn/simple_knn.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef SIMPLEKNN_H_INCLUDED 13 | #define SIMPLEKNN_H_INCLUDED 14 | 15 | class SimpleKNN 16 | { 17 | public: 18 | static void knn(int P, float3* points, float* meanDists); 19 | }; 20 | 21 | #endif -------------------------------------------------------------------------------- /submodules/simple-knn/simple_knn/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/submodules/simple-knn/simple_knn/.gitkeep -------------------------------------------------------------------------------- /submodules/simple-knn/spatial.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include "spatial.h" 13 | #include "simple_knn.h" 14 | 15 | torch::Tensor 16 | distCUDA2(const torch::Tensor& points) 17 | { 18 | const int P = points.size(0); 19 | 20 | auto float_opts = points.options().dtype(torch::kFloat32); 21 | torch::Tensor means = torch::full({P}, 0.0, float_opts); 22 | 23 | SimpleKNN::knn(P, (float3*)points.contiguous().data(), means.contiguous().data()); 24 | 25 | return means; 26 | } -------------------------------------------------------------------------------- /submodules/simple-knn/spatial.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | 14 | torch::Tensor distCUDA2(const torch::Tensor& points); -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/utils/__init__.py -------------------------------------------------------------------------------- /utils/camera.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import json 12 | import numpy as np 13 | import torch 14 | 15 | from scene.cameras import Camera, MiniCam 16 | from utils.general import PILtoTorch 17 | from utils.graphics import fov2focal, focal2fov, getWorld2View, getProjectionMatrix 18 | 19 | 20 | WARNED = False 21 | 22 | 23 | def load_json(path, H, W): 24 | cams = [] 25 | with open(path) as json_file: 26 | contents = json.load(json_file) 27 | FoVx = contents["camera_angle_x"] 28 | FoVy = focal2fov(fov2focal(FoVx, W), H) 29 | zfar = 100.0 30 | znear = 0.01 31 | 32 | frames = contents["frames"] 33 | for idx, frame in enumerate(frames): 34 | # NeRF 'transform_matrix' is a camera-to-world transform 35 | c2w = np.array(frame["transform_matrix"]) 36 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 37 | c2w[:3, 1:3] *= -1 38 | if c2w.shape[0] == 3: 39 | one = np.zeros((1, 4)) 40 | one[0, -1] = 1 41 | c2w = np.concatenate((c2w, one), axis=0) 42 | 43 | # get the world-to-camera transform and set R, T 44 | w2c = np.linalg.inv(c2w) 45 | R = np.transpose(w2c[:3, :3]) # R is stored transposed due to 'glm' in CUDA code 46 | T = w2c[:3, 3] 47 | 48 | w2c = torch.as_tensor(getWorld2View(R, T)).T.cuda() 49 | proj = getProjectionMatrix(znear, zfar, FoVx, FoVy).T.cuda() 50 | cams.append(MiniCam(W, H, FoVx, FoVy, znear, zfar, w2c, w2c @ proj)) 51 | return cams 52 | 53 | 54 | def loadCam(args, id, cam_info, resolution_scale): 55 | orig_w, orig_h = cam_info.image.size 56 | 57 | if args.resolution in [1, 2, 4, 8]: 58 | resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) 59 | else: # should be a type that converts to float 60 | if args.resolution == -1: 61 | if orig_w > 1600: 62 | global WARNED 63 | if not WARNED: 64 | print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " 65 | "If this is not desired, please explicitly specify '--resolution/-r' as 1") 66 | WARNED = True 67 | global_down = orig_w / 1600 68 | else: 69 | global_down = 1 70 | else: 71 | global_down = orig_w / args.resolution 72 | 73 | scale = float(global_down) * float(resolution_scale) 74 | resolution = (int(orig_w / scale), int(orig_h / scale)) 75 | 76 | resized_image_rgb = PILtoTorch(cam_info.image, resolution) 77 | 78 | gt_image = resized_image_rgb[:3, ...] 79 | loaded_mask = None 80 | 81 | if resized_image_rgb.shape[1] == 4: 82 | loaded_mask = resized_image_rgb[3:4, ...] 83 | 84 | return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 85 | FoVx=cam_info.FovX, FoVy=cam_info.FovY, 86 | image=gt_image, gt_alpha_mask=loaded_mask, 87 | image_name=cam_info.image_name, uid=id, data_device=args.data_device) 88 | 89 | 90 | def cameraList_from_camInfos(cam_infos, resolution_scale, args): 91 | camera_list = [] 92 | 93 | for id, c in enumerate(cam_infos): 94 | camera_list.append(loadCam(args, id, c, resolution_scale)) 95 | 96 | return camera_list 97 | 98 | 99 | def camera_to_JSON(id, camera : Camera): 100 | Rt = np.zeros((4, 4)) 101 | Rt[:3, :3] = camera.R.transpose() 102 | Rt[:3, 3] = camera.T 103 | Rt[3, 3] = 1.0 104 | 105 | W2C = np.linalg.inv(Rt) 106 | pos = W2C[:3, 3] 107 | rot = W2C[:3, :3] 108 | serializable_array_2d = [x.tolist() for x in rot] 109 | camera_entry = { 110 | 'id' : id, 111 | 'img_name' : camera.image_name, 112 | 'width' : camera.width, 113 | 'height' : camera.height, 114 | 'position': pos.tolist(), 115 | 'rotation': serializable_array_2d, 116 | 'fy' : fov2focal(camera.FovY, camera.height), 117 | 'fx' : fov2focal(camera.FovX, camera.width) 118 | } 119 | return camera_entry 120 | -------------------------------------------------------------------------------- /utils/depth.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import matplotlib.cm 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def colorize(value, vmin=None, vmax=None, cmap='jet', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): 8 | """Converts a depth map to a color image. 9 | 10 | Args: 11 | value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed 12 | vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. 13 | vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. 14 | cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. 15 | invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. 16 | invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. 17 | background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). 18 | gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. 19 | value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. 20 | 21 | Returns: 22 | numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) 23 | """ 24 | if isinstance(value, torch.Tensor): 25 | value = value.detach().cpu().numpy() 26 | 27 | value = value.squeeze() 28 | if invalid_mask is None: 29 | invalid_mask = value == invalid_val 30 | mask = np.logical_not(invalid_mask) 31 | 32 | # normalize 33 | vmin = np.percentile(value[mask],2) if vmin is None else vmin 34 | vmax = np.percentile(value[mask],98) if vmax is None else vmax 35 | if vmin != vmax: 36 | value = (value - vmin) / (vmax - vmin) # vmin..vmax 37 | else: 38 | # Avoid 0-division 39 | value = value * 0. 40 | 41 | # squeeze last dim if it exists 42 | # grey out the invalid values 43 | 44 | value[invalid_mask] = np.nan 45 | cmapper = matplotlib.cm.get_cmap(cmap) 46 | if value_transform: 47 | value = value_transform(value) 48 | # value = value / value.max() 49 | value = cmapper(value, bytes=True) # (nxmx4) 50 | 51 | # img = value[:, :, :] 52 | img = value[...] 53 | img[invalid_mask] = background_color 54 | 55 | # return img.transpose((2, 0, 1)) 56 | if gamma_corrected: 57 | # gamma correction 58 | img = img / 255 59 | img = np.power(img, 2.2) 60 | img = img * 255 61 | img = img.astype(np.uint8) 62 | return img -------------------------------------------------------------------------------- /utils/entropy_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as nnf 4 | import numpy as np 5 | from torch.distributions.uniform import Uniform 6 | from utils.encodings import use_clamp 7 | 8 | 9 | 10 | class Entropy_gaussian(nn.Module): 11 | def __init__(self, Q=1): 12 | super(Entropy_gaussian, self).__init__() 13 | self.Q = Q 14 | def forward(self, x, mean, scale, Q=None, x_mean=None): 15 | if Q is None: # Q=1 16 | Q = self.Q 17 | if use_clamp: # True 18 | if x_mean is None: 19 | x_mean = x.mean() 20 | x_min = x_mean - 15_000 * Q 21 | x_max = x_mean + 15_000 * Q 22 | x = torch.clamp(x, min=x_min.detach(), max=x_max.detach()) 23 | scale = torch.clamp(scale, min=1e-9) 24 | m1 = torch.distributions.normal.Normal(mean, scale) 25 | lower = m1.cdf(x - 0.5*Q) 26 | upper = m1.cdf(x + 0.5*Q) 27 | likelihood = torch.abs(upper - lower) 28 | likelihood = Low_bound.apply(likelihood) 29 | 30 | bits = -torch.log2(likelihood) 31 | return bits 32 | 33 | 34 | 35 | class Low_bound(torch.autograd.Function): 36 | @staticmethod 37 | def forward(ctx, x): 38 | ctx.save_for_backward(x) 39 | x = torch.clamp(x, min=1e-6) 40 | return x 41 | 42 | @staticmethod 43 | def backward(ctx, g): 44 | x, = ctx.saved_tensors 45 | grad1 = g.clone() 46 | grad1[x < 1e-6] = 0 47 | pass_through_if = np.logical_or( 48 | x.cpu().numpy() >= 1e-6, g.cpu().numpy() < 0.0) 49 | t = torch.Tensor(pass_through_if+0.0).cuda() 50 | return grad1 * t 51 | 52 | 53 | -------------------------------------------------------------------------------- /utils/general.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import sys 12 | import random 13 | from datetime import datetime 14 | import numpy as np 15 | import torch 16 | 17 | 18 | def inverse_sigmoid(x): 19 | return torch.log(x/(1-x)) 20 | 21 | 22 | def PILtoTorch(pil_image, resolution): 23 | resized_image_PIL = pil_image.resize(resolution) 24 | resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0 25 | if len(resized_image.shape) == 3: 26 | return resized_image.permute(2, 0, 1) 27 | else: 28 | return resized_image.unsqueeze(dim=-1).permute(2, 0, 1) 29 | 30 | 31 | 32 | 33 | def get_expon_lr_func( 34 | lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000, step_sub=0, 35 | ): 36 | """ 37 | Copied from Plenoxels 38 | 39 | Continuous learning rate decay function. Adapted from JaxNeRF 40 | The returned rate is lr_init when step=0 and lr_final when step=max_steps, and 41 | is log-linearly interpolated elsewhere (equivalent to exponential decay). 42 | If lr_delay_steps>0 then the learning rate will be scaled by some smooth 43 | function of lr_delay_mult, such that the initial learning rate is 44 | lr_init*lr_delay_mult at the beginning of optimization but will be eased back 45 | to the normal learning rate when steps>lr_delay_steps. 46 | :param conf: config subtree 'lr' or similar 47 | :param max_steps: int, the number of steps during optimization. 48 | :return HoF which takes step as input 49 | """ 50 | 51 | def helper(step): 52 | if step < 0 or (lr_init == 0.0 and lr_final == 0.0): 53 | # Disable this parameter 54 | return 0.0 55 | if lr_delay_steps > 0: 56 | # A kind of reverse cosine decay. 57 | delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin( 58 | 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1) 59 | ) 60 | else: 61 | delay_rate = 1.0 62 | t = np.clip((step-step_sub) / (max_steps-step_sub), 0, 1) 63 | log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t) 64 | return delay_rate * log_lerp 65 | 66 | return helper 67 | 68 | def strip_lowerdiag(L): 69 | uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda") 70 | 71 | uncertainty[:, 0] = L[:, 0, 0] 72 | uncertainty[:, 1] = L[:, 0, 1] 73 | uncertainty[:, 2] = L[:, 0, 2] 74 | uncertainty[:, 3] = L[:, 1, 1] 75 | uncertainty[:, 4] = L[:, 1, 2] 76 | uncertainty[:, 5] = L[:, 2, 2] 77 | return uncertainty 78 | 79 | 80 | def strip_symmetric(sym): 81 | return strip_lowerdiag(sym) 82 | 83 | 84 | 85 | def build_rotation(r): 86 | norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3]) 87 | 88 | q = r / norm[:, None] 89 | 90 | R = torch.zeros((q.size(0), 3, 3), device='cuda') 91 | 92 | r = q[:, 0] 93 | x = q[:, 1] 94 | y = q[:, 2] 95 | z = q[:, 3] 96 | 97 | R[:, 0, 0] = 1 - 2 * (y*y + z*z) 98 | R[:, 0, 1] = 2 * (x*y - r*z) 99 | R[:, 0, 2] = 2 * (x*z + r*y) 100 | R[:, 1, 0] = 2 * (x*y + r*z) 101 | R[:, 1, 1] = 1 - 2 * (x*x + z*z) 102 | R[:, 1, 2] = 2 * (y*z - r*x) 103 | R[:, 2, 0] = 2 * (x*z - r*y) 104 | R[:, 2, 1] = 2 * (y*z + r*x) 105 | R[:, 2, 2] = 1 - 2 * (x*x + y*y) 106 | return R 107 | 108 | 109 | 110 | def build_scaling_rotation(s, r): 111 | L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda") 112 | R = build_rotation(r) 113 | 114 | L[:,0,0] = s[:,0] 115 | L[:,1,1] = s[:,1] 116 | L[:,2,2] = s[:,2] 117 | 118 | L = R @ L 119 | return L 120 | 121 | 122 | def safe_state(silent): 123 | old_f = sys.stdout 124 | class F: 125 | def __init__(self, silent): 126 | self.silent = silent 127 | 128 | def write(self, x): 129 | if not self.silent: 130 | if x.endswith("\n"): 131 | old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S"))))) 132 | else: 133 | old_f.write(x) 134 | 135 | def flush(self): 136 | old_f.flush() 137 | 138 | sys.stdout = F(silent) 139 | 140 | random.seed(0) 141 | np.random.seed(0) 142 | torch.manual_seed(0) 143 | torch.cuda.set_device(torch.device("cuda:0")) 144 | 145 | 146 | class LatentStorer: 147 | def __init__(self): 148 | self.latent = None 149 | 150 | def __call__(self, i, t, latent): 151 | self.latent = latent -------------------------------------------------------------------------------- /utils/graphics.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import math 12 | from typing import NamedTuple 13 | import numpy as np 14 | import torch 15 | import torch.nn.functional as F 16 | import cv2 17 | 18 | 19 | class BasicPointCloud(NamedTuple): 20 | points : np.array 21 | colors : np.array 22 | normals : np.array 23 | 24 | 25 | def geom_transform_points(points, transf_matrix): 26 | P, _ = points.shape 27 | ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) 28 | points_hom = torch.cat([points, ones], dim=1) 29 | points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) 30 | 31 | denom = points_out[..., 3:] + 0.0000001 32 | return (points_out[..., :3] / denom).squeeze(dim=0) 33 | 34 | 35 | def getWorld2View(R, t): 36 | Rt = np.zeros((4, 4)) 37 | Rt[:3, :3] = R.transpose() 38 | Rt[:3, 3] = t 39 | Rt[3, 3] = 1.0 40 | return np.float32(Rt) 41 | 42 | 43 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): 44 | Rt = np.zeros((4, 4)) 45 | Rt[:3, :3] = R.transpose() 46 | Rt[:3, 3] = t 47 | Rt[3, 3] = 1.0 48 | 49 | C2W = np.linalg.inv(Rt) 50 | cam_center = C2W[:3, 3] 51 | cam_center = (cam_center + translate) * scale 52 | C2W[:3, 3] = cam_center 53 | Rt = np.linalg.inv(C2W) 54 | return np.float32(Rt) 55 | 56 | 57 | def getProjectionMatrix(znear, zfar, fovX, fovY): 58 | tanHalfFovY = math.tan((fovY / 2)) 59 | tanHalfFovX = math.tan((fovX / 2)) 60 | 61 | top = tanHalfFovY * znear 62 | bottom = -top 63 | right = tanHalfFovX * znear 64 | left = -right 65 | 66 | P = torch.zeros(4, 4) 67 | 68 | z_sign = 1.0 69 | 70 | P[0, 0] = 2.0 * znear / (right - left) 71 | P[1, 1] = 2.0 * znear / (top - bottom) 72 | P[0, 2] = (right + left) / (right - left) 73 | P[1, 2] = (top + bottom) / (top - bottom) 74 | P[3, 2] = z_sign 75 | P[2, 2] = z_sign * zfar / (zfar - znear) 76 | P[2, 3] = -(zfar * znear) / (zfar - znear) 77 | return P 78 | 79 | 80 | def fov2focal(fov, pixels): 81 | return pixels / (2 * math.tan(fov / 2)) 82 | 83 | 84 | def focal2fov(focal, pixels): 85 | return 2*math.atan(pixels/(2*focal)) 86 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import numpy as np 5 | import pyiqa 6 | import wandb 7 | 8 | from tqdm.auto import tqdm 9 | from PIL import Image 10 | from transformers import CLIPProcessor, CLIPModel 11 | from torchmetrics.multimodal import CLIPImageQualityAssessment 12 | 13 | 14 | def pil_to_torch(img, device, normalize=True): 15 | img = torch.tensor(np.array(img), device=device).permute(2, 0, 1) 16 | if normalize: 17 | img = img / 255.0 18 | return img 19 | 20 | 21 | def clip_score_and_iqa(image_folder, text, out_path): 22 | model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") 23 | processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") 24 | 25 | prompt_metric = ("quality", "colorfullness", "sharpness") 26 | clipiqa_model = CLIPImageQualityAssessment(model_name_or_path="openai/clip-vit-base-patch16", prompts=prompt_metric, data_range=1.0) 27 | 28 | images = [Image.open(os.path.join(image_folder, f)) for f in os.listdir(image_folder) if "png" in f or "jpg" in f] 29 | if text.endswith('.txt'): 30 | with open(text, 'r') as f: 31 | prompt = f.readline() 32 | else: 33 | prompt = text 34 | 35 | scores = torch.zeros((len(prompt_metric), len(images)), device=clipiqa_model.device) 36 | clip_scores = torch.zeros(len(images), device=model.device) 37 | 38 | pbar = tqdm(images, desc="Calc CLIP Score and CLIP IQA") 39 | for idx, image in enumerate(pbar): 40 | img_torch = pil_to_torch(image, model.device, normalize=False) 41 | inputs = processor(text=[prompt], images=img_torch, return_tensors="pt", padding=True) 42 | outputs = model(**inputs) 43 | clip_scores[idx] = outputs.logits_per_image.detach() 44 | for prompt_idx in range(len(prompt_metric)): 45 | scores[prompt_idx][idx] = clipiqa_model(img_torch.unsqueeze(dim=0))[prompt_metric[prompt_idx]].detach() 46 | 47 | wandb.log({ 48 | 'clip_score': clip_scores.mean().cpu().numpy().item(), 49 | 'clipiqa-quality': scores[0].mean().cpu().numpy().item(), 50 | 'clipiqa-colorful': scores[1].mean().cpu().numpy().item(), 51 | 'clipiqa-sharp': scores[2].mean().cpu().numpy().item() 52 | }) 53 | 54 | print("CLIP Score", clip_scores.mean().cpu().numpy()) 55 | print("CLIP IQA") 56 | print("quality", scores[0].mean().cpu().numpy()) 57 | print("colorful", scores[1].mean().cpu().numpy()) 58 | print("sharp", scores[2].mean().cpu().numpy()) 59 | 60 | 61 | def brisque_and_niqe_score(image_folder, out_path): # pyiqa 62 | images = [Image.open(os.path.join(image_folder, f)) for f in os.listdir(image_folder) if "png" in f or "jpg" in f] 63 | images_tensor = [] 64 | for image in images: 65 | image_t = pil_to_torch(image, "cpu", normalize=True) 66 | images_tensor.append(image_t) 67 | stack_images_tensor = torch.stack(images_tensor, dim=0) 68 | 69 | 70 | brisque_metric = pyiqa.create_metric('brisque') 71 | brisque_scores = brisque_metric(stack_images_tensor).tolist() 72 | print("BRISQUE", np.mean(brisque_scores)) 73 | 74 | niqe_metric = pyiqa.create_metric('niqe') 75 | niqe_scores = niqe_metric(stack_images_tensor).tolist() 76 | print("NIQE", np.mean(niqe_scores)) 77 | 78 | wandb.log({ 79 | 'brisque': np.mean(brisque_scores), 80 | 'niqe': np.mean(niqe_scores) 81 | }) 82 | -------------------------------------------------------------------------------- /utils/pose_noise_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scene.cameras import Camera 4 | 5 | 6 | def sample_noise(n, r_max, t_max): 7 | nr = np.random.normal(0, scale=r_max/2.0, size=(n,3)) 8 | nr = np.clip(nr, a_min=-r_max, a_max=r_max) 9 | 10 | nt = np.random.normal(0, scale=t_max/2.0, size=(n,3)) 11 | nt = np.clip(nt, a_min=-t_max, a_max=t_max) 12 | 13 | return nr, nt 14 | 15 | 16 | def interpolate_noise(n, steps): 17 | last = np.linspace(n[-1], n[-1], num=steps) 18 | n = [np.linspace(n[i], n[i + 1], num=steps) for i in range(n.shape[0] - 1)] 19 | n.append(last) 20 | n = np.concatenate(n, axis=0) 21 | return n 22 | 23 | 24 | def to_degrees(x): 25 | return x * 180.0 / np.pi 26 | 27 | 28 | def to_radians(x): 29 | return x * np.pi / 180.0 30 | 31 | 32 | # Checks if a matrix is a valid rotation matrix. 33 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/ 34 | def isRotationMatrix(R): 35 | Rt = np.transpose(R) 36 | shouldBeIdentity = np.dot(Rt, R) 37 | I = np.identity(3, dtype=R.dtype) 38 | n = np.linalg.norm(I - shouldBeIdentity) 39 | return n < 1e-5 40 | 41 | 42 | # Calculates rotation matrix to euler angles 43 | # The result is the same as MATLAB except the order 44 | # of the euler angles ( x and z are swapped ). 45 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/ 46 | def rotationMatrixToEulerAngles(R): 47 | assert (isRotationMatrix(R)) 48 | 49 | sy = np.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0]) 50 | 51 | singular = sy < 1e-6 52 | 53 | if not singular: 54 | x = np.arctan2(R[2, 1], R[2, 2]) 55 | y = np.arctan2(-R[2, 0], sy) 56 | z = np.arctan2(R[1, 0], R[0, 0]) 57 | else: 58 | x = np.arctan2(-R[1, 2], R[1, 1]) 59 | y = np.arctan2(-R[2, 0], sy) 60 | z = 0 61 | 62 | return np.array([x, y, z]) 63 | 64 | 65 | # Calculates Rotation Matrix given euler angles. 66 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/ 67 | def eulerAnglesToRotationMatrix(theta): 68 | R_x = np.array([[1, 0, 0], 69 | [0, np.cos(theta[0]), -np.sin(theta[0])], 70 | [0, np.sin(theta[0]), np.cos(theta[0])] 71 | ]) 72 | 73 | R_y = np.array([[np.cos(theta[1]), 0, np.sin(theta[1])], 74 | [0, 1, 0], 75 | [-np.sin(theta[1]), 0, np.cos(theta[1])] 76 | ]) 77 | 78 | R_z = np.array([[np.cos(theta[2]), -np.sin(theta[2]), 0], 79 | [np.sin(theta[2]), np.cos(theta[2]), 0], 80 | [0, 0, 1] 81 | ]) 82 | 83 | R = np.dot(R_z, np.dot(R_y, R_x)) 84 | 85 | return R 86 | 87 | 88 | # poses = self.train_cameras 89 | def apply_noise_bloomscene(poses, chunk_size=10, r_max=2.0, t_max=0.05): 90 | noisy_poses = [] 91 | 92 | # create noise vectors 93 | n = len(poses) // chunk_size + (len(poses) % chunk_size != 0) 94 | nr, nt = sample_noise(n, r_max, t_max) 95 | nr = interpolate_noise(nr, chunk_size) 96 | nt = interpolate_noise(nt, chunk_size) 97 | 98 | for idx in range(len(poses)): 99 | pose = poses[idx] 100 | if isinstance(pose.R, torch.Tensor): 101 | r = pose.R.numpy() 102 | # pose_numpy = p.numpy() 103 | elif isinstance(pose.T, torch.Tensor): 104 | t = pose.T.numpy() 105 | else: 106 | # pose_numpy = p 107 | r = pose.R 108 | t = pose.T 109 | 110 | # extract r, t 111 | # r = pose_numpy[:3, :3] 112 | r = rotationMatrixToEulerAngles(r) 113 | r = to_degrees(r) 114 | 115 | 116 | # get noise 117 | nr_i = nr[idx // chunk_size] # (3, ) 118 | nt_i = nt[idx // chunk_size] # (3, ) 119 | 120 | # apply noise 121 | r += nr_i 122 | t += nt_i 123 | 124 | # create pose noise 125 | r = to_radians(r) 126 | r = eulerAnglesToRotationMatrix(r) 127 | 128 | # p_noise[:3, :3] = r 129 | # p_noise[:3, 3] = t 130 | #pose.R = r 131 | #pose.T = t 132 | 133 | if isinstance(pose.R, torch.Tensor): 134 | pose.R = torch.from_numpy(pose.R) 135 | elif isinstance(pose.T, torch.Tensor): 136 | pose.T = torch.from_numpy(pose.T) 137 | 138 | 139 | # p_noise = torch.from_numpy(p_noise).to(p) 140 | 141 | # noisy_poses.append(p_noise) 142 | noisy_poses.append(Camera(colmap_id=pose.colmap_id, R=r, T=t, FoVx=pose.FoVx, FoVy=pose.FoVy, image=pose.original_image, original_depth=pose.original_depth, 143 | gt_alpha_mask=None, image_name='', uid=pose.uid, data_device='cuda')) 144 | 145 | return noisy_poses 146 | -------------------------------------------------------------------------------- /utils/system.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | from errno import EEXIST 12 | from os import makedirs, path 13 | import os 14 | 15 | 16 | def mkdir_p(folder_path): 17 | # Creates a directory. equivalent to using mkdir -p on the command line 18 | try: 19 | makedirs(folder_path) 20 | except OSError as exc: # Python >2.5 21 | if exc.errno == EEXIST and path.isdir(folder_path): 22 | pass 23 | else: 24 | raise 25 | 26 | 27 | def searchForMaxIteration(folder): 28 | saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)] 29 | return max(saved_iters) 30 | -------------------------------------------------------------------------------- /utils/trajectory.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr 2 | # 3 | # Copyright 2023 LucidDreamer Authors 4 | # 5 | # Computer Vision Lab, SNU, its affiliates and licensors retain all intellectual 6 | # property and proprietary rights in and to this material, related 7 | # documentation and any modifications thereto. Any use, reproduction, 8 | # disclosure or distribution of this material and related documentation 9 | # without an express license agreement from the Computer Vision Lab, SNU or 10 | # its affiliates is strictly prohibited. 11 | # 12 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com. 13 | import numpy as np 14 | 15 | 16 | def generate_seed_360(viewangle, n_views): 17 | N = n_views 18 | render_poses = np.zeros((N, 3, 4)) 19 | for i in range(N): 20 | th = (viewangle/N)*i/180*np.pi 21 | render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]]) 22 | render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector 23 | 24 | return render_poses 25 | 26 | def my_generate_seed_360(viewangle, n_views): 27 | N = n_views 28 | render_poses = np.zeros((N, 3, 4)) 29 | th_list = [0, 1, 9, 2, 8, 3, 7, 4, 6, 5] 30 | for i in range(N): 31 | th = (viewangle/N)*th_list[i]/180*np.pi 32 | render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]]) 33 | render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector 34 | 35 | return render_poses 36 | 37 | 38 | def generate_seed_360_half(viewangle, n_views): 39 | N = n_views // 2 40 | halfangle = viewangle / 2 41 | render_poses = np.zeros((N*2, 3, 4)) 42 | for i in range(N): 43 | th = (halfangle/N)*i/180*np.pi 44 | render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]]) 45 | render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector 46 | for i in range(N): 47 | th = -(halfangle/N)*i/180*np.pi 48 | render_poses[i+N,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]]) 49 | render_poses[i+N,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector 50 | return render_poses 51 | 52 | 53 | def generate_seed_hemisphere(center_depth, degree=5): 54 | degree = 5 55 | thlist = np.array([degree, 0, 0, 0, -degree]) 56 | philist = np.array([0, -degree, 0, degree, 0]) 57 | assert len(thlist) == len(philist) 58 | 59 | render_poses = np.zeros((len(thlist), 3, 4)) 60 | for i in range(len(thlist)): 61 | th = thlist[i] 62 | phi = philist[i] 63 | d = center_depth 64 | 65 | render_poses[i,:3,:3] = np.matmul(np.array([[np.cos(th/180*np.pi), 0, -np.sin(th/180*np.pi)], [0, 1, 0], [np.sin(th/180*np.pi), 0, np.cos(th/180*np.pi)]]), np.array([[1, 0, 0], [0, np.cos(phi/180*np.pi), -np.sin(phi/180*np.pi)], [0, np.sin(phi/180*np.pi), np.cos(phi/180*np.pi)]])) 66 | render_poses[i,:3,3:4] = np.array([d*np.sin(th/180*np.pi), 0, d-d*np.cos(th/180*np.pi)]).reshape(3,1) + np.array([0, d*np.sin(phi/180*np.pi), d-d*np.cos(phi/180*np.pi)]).reshape(3,1) 67 | 68 | return render_poses 69 | 70 | 71 | def my_generate_seed_hemisphere(center_depth, degree=5): 72 | degree = 5 73 | thlist = np.array([degree, 0, 0, 0, -degree]) 74 | philist = np.array([0, -degree, 0, degree, 0]) 75 | assert len(thlist) == len(philist) 76 | 77 | render_poses = np.zeros((len(thlist) * len(center_depth), 3, 4)) 78 | for j in range(len(center_depth)): 79 | per_center_depth = center_depth[j] 80 | for i in range(len(thlist)): 81 | th = thlist[i] 82 | phi = philist[i] 83 | d = per_center_depth 84 | 85 | idx = j * len(thlist) + i 86 | render_poses[idx,:3,:3] = np.matmul(np.array([[np.cos(th/180*np.pi), 0, -np.sin(th/180*np.pi)], [0, 1, 0], [np.sin(th/180*np.pi), 0, np.cos(th/180*np.pi)]]), np.array([[1, 0, 0], [0, np.cos(phi/180*np.pi), -np.sin(phi/180*np.pi)], [0, np.sin(phi/180*np.pi), np.cos(phi/180*np.pi)]])) 87 | render_poses[idx,:3,3:4] = np.array([d*np.sin(th/180*np.pi), 0, d-d*np.cos(th/180*np.pi)]).reshape(3,1) + np.array([0, d*np.sin(phi/180*np.pi), d-d*np.cos(phi/180*np.pi)]).reshape(3,1) 88 | 89 | return render_poses 90 | 91 | 92 | def get_pcdGenPoses(pcdgenpath, argdict={}): 93 | if pcdgenpath == 'rotate360': 94 | render_poses = my_generate_seed_360(360, 10) 95 | elif pcdgenpath == 'hemisphere': 96 | render_poses = my_generate_seed_hemisphere(argdict['center_depth']) 97 | else: 98 | raise("Invalid pcdgenpath") 99 | return render_poses 100 | 101 | 102 | def get_camerapaths(): 103 | preset_json = {} 104 | for cam_path in ['rotate360']: 105 | if cam_path == 'rotate360': 106 | render_poses = generate_seed_360(360, 180) 107 | else: 108 | raise("Unknown pass") 109 | 110 | yz_reverse = np.array([[1,0,0], [0,-1,0], [0,0,-1]]) 111 | blender_train_json = {"frames": []} 112 | for render_pose in render_poses: 113 | curr_frame = {} 114 | Rw2i = render_pose[:3,:3] 115 | Tw2i = render_pose[:3,3:4] 116 | Ri2w = np.matmul(yz_reverse, Rw2i).T 117 | Ti2w = -np.matmul(Ri2w, np.matmul(yz_reverse, Tw2i)) 118 | Pc2w = np.concatenate((Ri2w, Ti2w), axis=1) 119 | Pc2w = np.concatenate((Pc2w, np.array([0,0,0,1]).reshape((1,4))), axis=0) 120 | 121 | curr_frame["transform_matrix"] = Pc2w.tolist() 122 | blender_train_json["frames"].append(curr_frame) 123 | 124 | preset_json[cam_path] = blender_train_json 125 | 126 | return preset_json --------------------------------------------------------------------------------