├── LICENSE
├── README.md
├── ZoeDepth
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── environment.yml
    ├── evaluate.py
    ├── hubconf.py
    ├── sanity.py
    ├── sanity_hub.py
    ├── train_mix.py
    ├── train_mono.py
    ├── train_test_inputs
    │   ├── kitti_eigen_test_files_with_gt.txt
    │   ├── kitti_eigen_train_files_with_gt.txt
    │   ├── nyudepthv2_test_files_with_gt.txt
    │   └── nyudepthv2_train_files_with_gt.txt
    ├── ui
    │   ├── app.py
    │   ├── gradio_depth_pred.py
    │   ├── gradio_im_to_3d.py
    │   ├── gradio_pano_to_3d.py
    │   └── ui_requirements.txt
    └── zoedepth
    │   ├── data
    │       ├── __init__.py
    │       ├── data_mono.py
    │       ├── ddad.py
    │       ├── diml_indoor_test.py
    │       ├── diml_outdoor_test.py
    │       ├── diode.py
    │       ├── hypersim.py
    │       ├── ibims.py
    │       ├── preprocess.py
    │       ├── sun_rgbd_loader.py
    │       ├── transforms.py
    │       ├── vkitti.py
    │       └── vkitti2.py
    │   ├── models
    │       ├── __init__.py
    │       ├── base_models
    │       │   ├── __init__.py
    │       │   └── midas.py
    │       ├── builder.py
    │       ├── depth_model.py
    │       ├── layers
    │       │   ├── attractor.py
    │       │   ├── dist_layers.py
    │       │   ├── localbins_layers.py
    │       │   └── patch_transformer.py
    │       ├── model_io.py
    │       ├── zoedepth
    │       │   ├── __init__.py
    │       │   ├── config_zoedepth.json
    │       │   ├── config_zoedepth_kitti.json
    │       │   └── zoedepth_v1.py
    │       └── zoedepth_nk
    │       │   ├── __init__.py
    │       │   ├── config_zoedepth_nk.json
    │       │   └── zoedepth_nk_v1.py
    │   ├── trainers
    │       ├── base_trainer.py
    │       ├── builder.py
    │       ├── loss.py
    │       ├── zoedepth_nk_trainer.py
    │       └── zoedepth_trainer.py
    │   └── utils
    │       ├── __init__.py
    │       ├── arg_utils.py
    │       ├── config.py
    │       ├── easydict
    │           └── __init__.py
    │       ├── geometry.py
    │       └── misc.py
├── arguments.py
├── bloomscene.py
├── cameras
    └── rotate360.json
├── environment.yml
├── examples
    ├── 01_childroom.png
    ├── 01_childroom.txt
    ├── 02_sunroom.png
    ├── 02_sunroom.txt
    ├── 03_beach.png
    ├── 03_beach.txt
    ├── 04_snow.png
    ├── 04_snow.txt
    ├── 05_christmas.png
    ├── 05_christmas.txt
    ├── 06_room.png
    ├── 06_room.txt
    ├── 07_museum.png
    ├── 07_museum.txt
    ├── 08_small_cabin.png
    ├── 08_small_cabin.txt
    ├── 08_small_cabin_negative.txt
    ├── 09_street.png
    └── 09_street.txt
├── gaussian_renderer
    └── __init__.py
├── images
    └── BloomScene.png
├── run.py
├── scene
    ├── __init__.py
    ├── cameras.py
    ├── dataset_readers.py
    └── gaussian_model.py
├── submodules
    ├── depth-diff-gaussian-rasterization
    │   ├── CMakeLists.txt
    │   ├── LICENSE.md
    │   ├── README.md
    │   ├── cuda_rasterizer
    │   │   ├── auxiliary.h
    │   │   ├── backward.cu
    │   │   ├── backward.h
    │   │   ├── config.h
    │   │   ├── forward.cu
    │   │   ├── forward.h
    │   │   ├── rasterizer.h
    │   │   ├── rasterizer_impl.cu
    │   │   └── rasterizer_impl.h
    │   ├── depth_diff_gaussian_rasterization
    │   │   └── __init__.py
    │   ├── ext.cpp
    │   ├── rasterize_points.cu
    │   ├── rasterize_points.h
    │   ├── setup.py
    │   └── third_party
    │   │   └── stbi_image_write.h
    ├── gridencoder
    │   ├── __init__.py
    │   ├── backend.py
    │   ├── grid.py
    │   ├── setup.py
    │   └── src
    │   │   ├── bindings.cpp
    │   │   ├── gridencoder.cu
    │   │   ├── gridencoder.h
    │   │   └── try.cu
    └── simple-knn
    │   ├── ext.cpp
    │   ├── setup.py
    │   ├── simple_knn.cu
    │   ├── simple_knn.h
    │   ├── simple_knn
    │       └── .gitkeep
    │   ├── spatial.cu
    │   └── spatial.h
└── utils
    ├── __init__.py
    ├── camera.py
    ├── depth.py
    ├── encodings.py
    ├── entropy_models.py
    ├── general.py
    ├── graphics.py
    ├── loss.py
    ├── metrics.py
    ├── pose_noise_util.py
    ├── system.py
    └── trajectory.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 SparklingH
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ##  BloomScene
 2 | The official implementation of AAAI 2025 paper ''BloomScene: Lightweight Structured 3D Gaussian Splatting for Crossmodal Scene Generation''.
 3 | ![BloomScene](images/BloomScene.png)
 4 | 
 5 | > ### [BloomScene: Lightweight Structured 3D Gaussian Splatting for Crossmodal Scene Generation](#)
 6 | > **Xiaolu Hou***, **Mingcheng Li***, Dingkang Yang, Jiawei Chen, Ziyun Qian, Xiao Zhao, Yue Jiang, Jinjie Wei, Qingyao Xu, Lihua Zhang  
 7 | > *Accepted by AAAI 2025*
 8 | 
 9 | ## Abstract
10 | With the widespread use of virtual reality applications, 3D scene generation has become a new challenging research frontier. 3D scenes have highly complex structures and need to ensure that the output is dense, coherent, and contains all necessary structures. Many current 3D scene generation methods rely on pre-trained text-to-image diffusion models and monocular depth estimators. However, the generated scenes occupy large amounts of storage space and often lack effective regularisation methods, leading to geometric distortions. To this end, we propose BloomScene, a lightweight structured 3D Gaussian splatting for crossmodal scene generation, which creates diverse and high-quality 3D scenes from text or image inputs. Specifically, a crossmodal progressive scene generation framework is proposed to generate coherent scenes utilizing incremental point cloud reconstruction and 3D Gaussian splatting. Additionally, we propose a hierarchical depth prior-based regularization mechanism that utilizes multi-level constraints on depth accuracy and smoothness to enhance the realism and continuity of the generated scenes. Ultimately, we propose a structured context-guided compression mechanism that exploits structured hash grids to model the context of unorganized anchor attributes, which significantly eliminates structural redundancy and reduces storage overhead. Comprehensive experiments across multiple scenes demonstrate the significant potential and advantages of our framework compared with several baselines.
11 | 
12 | ## Getting Started
13 | We provide pretrained image inpainting model. The download URLs are as follows:
14 | 
15 | - Baidu Disk URL for [Image inpainting model (Runway)](https://pan.baidu.com/s/1kK110nhCK5maU1_oD-06yw?pwd=1pd2)
16 | 
17 | - Google Drive URL for [Image inpainting model (Runway)](https://drive.google.com/drive/folders/1tP--db0MJtx1oaIRp-OU2hR0fzP_gWmm?usp=sharing)
18 | 
19 | Please download the model file and put it under `./BloomScene/models--runwayml--stable-diffusion-inpainting`
20 | 
21 | ## Install
22 | ### Ubuntu
23 | We tested our code on a server with Ubuntu 18.04, CUDA 11.4, gcc 9.4.0
24 | 
25 | #### Installation script
26 | 
27 | ```bash
28 | conda env create --file environment.yml 
29 | conda activate bloomscene
30 | # torch-scatter
31 | Download https://data.pyg.org/whl/torch-2.0.0%2Bcu117/torch_scatter-2.1.2%2Bpt20cu117-cp39-cp39-linux_x86_64.whl
32 | pip install <path_to_the_whl_file>
33 | 
34 | cd submodules/depth-diff-gaussian-rasterization
35 | python setup.py install
36 | cd ../simple-knn
37 | python setup.py install
38 | cd ../gridencoder
39 | python setup.py install
40 | cd ../..
41 | ```
42 | 
43 | #### Run with your own samples
44 | ```bash
45 | # Default Example
46 | python run.py --image <path_to_image> --text <path_to_text_file>  [Other options] 
47 | ``` 
48 | - Replace <path_to_image> and <path_to_text_file> with the paths to your image and text files.
49 | 
50 | 
51 | <details>
52 |   <summary>Other options</summary>
53 | 
54 |   - `--image`: Input image for scene generation.
55 |   - `--text`: Text prompt for scene generation.
56 |   - `--neg_text`: Optional. Negative text prompt for scene generation.
57 |   - `--lambdae`: Optional. Try variable bitrate.
58 |   - `--seed`: Manual seed for reproducibility.
59 |   - `--dep_value`: Pixel-level depth regularization.
60 |   - `--dep_value_lbd`: lambda for pixel-level depth regularization.
61 |   - `--dep_domin`: Distribution-level depth regularization.
62 |   - `--dep_domin_lbd`: lambda for distribution-level depth regularization.
63 |   - `--dep_smooth`: Depth smoothness regularization.
64 |   - `--dep_smooth_lbd`: lambda for depth smoothness regularization.
65 |   - `--diff_steps`: Optional. Number of inference steps for running Stable Diffusion Inpainting.
66 |   - `--save_dir`: Optional. Directory to save the generated scenes and videos. Specify to organize outputs.
67 |   - `--campath_gen`: Camera path for scene generation (options: `rotate360`).
68 |   - `--campath_render`: Camera path for video rendering (options: `rotate360`).
69 | </details>
70 | 
71 | 
72 | <!-- ## Updates
73 | 
74 | - ✅ Code release!
75 | - ✅  on [arXiv](). -->
76 | 
77 | 
78 | ## Acknowledgement
79 | 
80 | Many thanks to [LucidDreamer](https://github.com/luciddreamer-cvlab/LucidDreamer), [ZoeDepth](https://github.com/isl-org/ZoeDepth), [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), [Scaffold-GS](https://github.com/city-super/Scaffold-GS), [HAC](https://github.com/YihangChen-ee/HAC) and Runway for their excellent codebase.


--------------------------------------------------------------------------------
/ZoeDepth/.gitignore:
--------------------------------------------------------------------------------
  1 | *.png
  2 | **.gif
  3 | .vscode/
  4 | *.rdb
  5 | **.xml
  6 | wandb/
  7 | slurm/
  8 | tmp/
  9 | .logs/
 10 | checkpoints/
 11 | external_jobs/
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | ptlflow_logs/
 17 | output/
 18 | log/
 19 | .idea/
 20 | # C extensions
 21 | *.so
 22 | results/
 23 | **.DS_Store
 24 | **.pt
 25 | demo/
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | ~shortcuts/
 47 | **/wandb_logs/
 48 | **.db
 49 | # PyInstaller
 50 | #  Usually these files are written by a python script from a template
 51 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 52 | *.manifest
 53 | *.spec
 54 | 
 55 | # Installer logs
 56 | pip-log.txt
 57 | pip-delete-this-directory.txt
 58 | 
 59 | # Unit test / coverage reports
 60 | htmlcov/
 61 | .tox/
 62 | .nox/
 63 | .coverage
 64 | .coverage.*
 65 | .cache
 66 | nosetests.xml
 67 | coverage.xml
 68 | *.cover
 69 | *.py,cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | db.sqlite3-journal
 82 | 
 83 | # Flask stuff:
 84 | instance/
 85 | .webassets-cache
 86 | 
 87 | # Scrapy stuff:
 88 | .scrapy
 89 | 
 90 | # Sphinx documentation
 91 | docs/_build/
 92 | 
 93 | # PyBuilder
 94 | target/
 95 | 
 96 | # Jupyter Notebook
 97 | .ipynb_checkpoints
 98 | 
 99 | # IPython
100 | profile_default/
101 | ipython_config.py
102 | 
103 | # pyenv
104 | .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 


--------------------------------------------------------------------------------
/ZoeDepth/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ZoeDepth/environment.yml:
--------------------------------------------------------------------------------
 1 | name: zoe
 2 | channels:
 3 |   - pytorch
 4 |   - nvidia
 5 |   - conda-forge
 6 | dependencies:
 7 |   - cuda=11.7.1
 8 |   - h5py=3.7.0
 9 |   - hdf5=1.12.2
10 |   - matplotlib=3.6.2
11 |   - matplotlib-base=3.6.2
12 |   - numpy=1.24.1
13 |   - opencv=4.6.0
14 |   - pip=22.3.1
15 |   - python=3.9.7
16 |   - pytorch=1.13.1
17 |   - pytorch-cuda=11.7
18 |   - pytorch-mutex=1.0
19 |   - scipy=1.10.0
20 |   - torchaudio=0.13.1
21 |   - torchvision=0.14.1
22 |   - pip:
23 |     - huggingface-hub==0.11.1
24 |     - timm==0.6.12
25 |     - tqdm==4.64.1
26 |     - wandb==0.13.9
27 | 


--------------------------------------------------------------------------------
/ZoeDepth/sanity.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import numpy as np
26 | from torchvision.transforms import ToTensor
27 | from PIL import Image
28 | from zoedepth.utils.misc import get_image_from_url, colorize
29 | import torch
30 | 
31 | from zoedepth.models.builder import build_model
32 | from zoedepth.utils.config import get_config
33 | from pprint import pprint
34 | 
35 | 
36 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 
37 | 
38 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39 | if DEVICE == "cpu":
40 |     print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.")
41 | 
42 | print("*" * 20 + " Testing zoedepth " + "*" * 20)
43 | conf = get_config("zoedepth", "infer")
44 | 
45 | 
46 | print("Config:")
47 | pprint(conf)
48 | 
49 | model = build_model(conf).to(DEVICE)
50 | model.eval()
51 | x = torch.rand(1, 3, 384, 512).to(DEVICE)
52 | 
53 | print("-"*20 + "Testing on a random input" + "-"*20)
54 | 
55 | with torch.no_grad():
56 |     out = model(x)
57 | 
58 | if isinstance(out, dict):
59 |     # print shapes of all outputs
60 |     for k, v in out.items():
61 |         if v is not None:
62 |             print(k, v.shape)
63 | else:
64 |     print([o.shape for o in out if o is not None])
65 | 
66 | print("\n\n")
67 | print("-"*20 + " Testing on an indoor scene from url " + "-"*20)
68 | 
69 | # Test img
70 | url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
71 | img = get_image_from_url(url)
72 | orig_size = img.size
73 | X = ToTensor()(img)
74 | X = X.unsqueeze(0).to(DEVICE)
75 | 
76 | print("X.shape", X.shape)
77 | print("predicting")
78 | 
79 | with torch.no_grad():
80 |     out = model.infer(X).cpu()
81 | 
82 | # or just, 
83 | # out = model.infer_pil(img)
84 | 
85 | 
86 | print("output.shape", out.shape)
87 | pred = Image.fromarray(colorize(out))
88 | # Stack img and pred side by side for comparison and save
89 | pred = pred.resize(orig_size, Image.ANTIALIAS)
90 | stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1]))
91 | stacked.paste(img, (0, 0))
92 | stacked.paste(pred, (orig_size[0], 0))
93 | 
94 | stacked.save("pred.png")
95 | print("saved pred.png")
96 | 
97 | 
98 | model.infer_pil(img, output_type="pil").save("pred_raw.png")


--------------------------------------------------------------------------------
/ZoeDepth/sanity_hub.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | import numpy as np
27 | from torchvision.transforms import ToTensor
28 | from PIL import Image
29 | from zoedepth.utils.misc import get_image_from_url, colorize
30 | 
31 | from zoedepth.models.builder import build_model
32 | from zoedepth.utils.config import get_config
33 | from pprint import pprint
34 | 
35 | 
36 | 
37 | # Trigger reload of MiDaS
38 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 
39 | 
40 | 
41 | model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True)
42 | model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True)
43 | model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
44 | 


--------------------------------------------------------------------------------
/ZoeDepth/train_mono.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | from zoedepth.utils.misc import count_parameters, parallelize
 26 | from zoedepth.utils.config import get_config
 27 | from zoedepth.utils.arg_utils import parse_unknown
 28 | from zoedepth.trainers.builder import get_trainer
 29 | from zoedepth.models.builder import build_model
 30 | from zoedepth.data.data_mono import DepthDataLoader
 31 | import torch.utils.data.distributed
 32 | import torch.multiprocessing as mp
 33 | import torch
 34 | import numpy as np
 35 | from pprint import pprint
 36 | import argparse
 37 | import os
 38 | 
 39 | os.environ["PYOPENGL_PLATFORM"] = "egl"
 40 | os.environ["WANDB_START_METHOD"] = "thread"
 41 | 
 42 | 
 43 | def fix_random_seed(seed: int):
 44 |     import random
 45 | 
 46 |     import numpy
 47 |     import torch
 48 | 
 49 |     random.seed(seed)
 50 |     numpy.random.seed(seed)
 51 |     torch.manual_seed(seed)
 52 |     torch.cuda.manual_seed(seed)
 53 |     torch.cuda.manual_seed_all(seed)
 54 | 
 55 |     torch.backends.cudnn.deterministic = True
 56 |     torch.backends.cudnn.benchmark = True
 57 | 
 58 | 
 59 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
 60 |     import glob
 61 |     import os
 62 | 
 63 |     from zoedepth.models.model_io import load_wts
 64 | 
 65 |     if hasattr(config, "checkpoint"):
 66 |         checkpoint = config.checkpoint
 67 |     elif hasattr(config, "ckpt_pattern"):
 68 |         pattern = config.ckpt_pattern
 69 |         matches = glob.glob(os.path.join(
 70 |             checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
 71 |         if not (len(matches) > 0):
 72 |             raise ValueError(f"No matches found for the pattern {pattern}")
 73 | 
 74 |         checkpoint = matches[0]
 75 | 
 76 |     else:
 77 |         return model
 78 |     model = load_wts(model, checkpoint)
 79 |     print("Loaded weights from {0}".format(checkpoint))
 80 |     return model
 81 | 
 82 | 
 83 | def main_worker(gpu, ngpus_per_node, config):
 84 |     try:
 85 |         seed = config.seed if 'seed' in config and config.seed else 43
 86 |         fix_random_seed(seed)
 87 | 
 88 |         config.gpu = gpu
 89 | 
 90 |         model = build_model(config)
 91 |         model = load_ckpt(config, model)
 92 |         model = parallelize(config, model)
 93 | 
 94 |         total_params = f"{round(count_parameters(model)/1e6,2)}M"
 95 |         config.total_params = total_params
 96 |         print(f"Total parameters : {total_params}")
 97 | 
 98 |         train_loader = DepthDataLoader(config, "train").data
 99 |         test_loader = DepthDataLoader(config, "online_eval").data
100 | 
101 |         trainer = get_trainer(config)(
102 |             config, model, train_loader, test_loader, device=config.gpu)
103 | 
104 |         trainer.train()
105 |     finally:
106 |         import wandb
107 |         wandb.finish()
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     mp.set_start_method('forkserver')
112 | 
113 |     parser = argparse.ArgumentParser()
114 |     parser.add_argument("-m", "--model", type=str, default="synunet")
115 |     parser.add_argument("-d", "--dataset", type=str, default='nyu')
116 |     parser.add_argument("--trainer", type=str, default=None)
117 | 
118 |     args, unknown_args = parser.parse_known_args()
119 |     overwrite_kwargs = parse_unknown(unknown_args)
120 | 
121 |     overwrite_kwargs["model"] = args.model
122 |     if args.trainer is not None:
123 |         overwrite_kwargs["trainer"] = args.trainer
124 | 
125 |     config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
126 |     # git_commit()
127 |     if config.use_shared_dict:
128 |         shared_dict = mp.Manager().dict()
129 |     else:
130 |         shared_dict = None
131 |     config.shared_dict = shared_dict
132 | 
133 |     config.batch_size = config.bs
134 |     config.mode = 'train'
135 |     if config.root != "." and not os.path.isdir(config.root):
136 |         os.makedirs(config.root)
137 | 
138 |     try:
139 |         node_str = os.environ['SLURM_JOB_NODELIST'].replace(
140 |             '[', '').replace(']', '')
141 |         nodes = node_str.split(',')
142 | 
143 |         config.world_size = len(nodes)
144 |         config.rank = int(os.environ['SLURM_PROCID'])
145 |         # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
146 | 
147 |     except KeyError as e:
148 |         # We are NOT using SLURM
149 |         config.world_size = 1
150 |         config.rank = 0
151 |         nodes = ["127.0.0.1"]
152 | 
153 |     if config.distributed:
154 | 
155 |         print(config.rank)
156 |         port = np.random.randint(15000, 15025)
157 |         config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
158 |         print(config.dist_url)
159 |         config.dist_backend = 'nccl'
160 |         config.gpu = None
161 | 
162 |     ngpus_per_node = torch.cuda.device_count()
163 |     config.num_workers = config.workers
164 |     config.ngpus_per_node = ngpus_per_node
165 |     print("Config:")
166 |     pprint(config)
167 |     if config.distributed:
168 |         config.world_size = ngpus_per_node * config.world_size
169 |         mp.spawn(main_worker, nprocs=ngpus_per_node,
170 |                  args=(ngpus_per_node, config))
171 |     else:
172 |         if ngpus_per_node == 1:
173 |             config.gpu = 0
174 |         main_worker(config.gpu, ngpus_per_node, config)
175 | 


--------------------------------------------------------------------------------
/ZoeDepth/ui/app.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | import torch
27 | 
28 | from .gradio_depth_pred import create_demo as create_depth_pred_demo
29 | from .gradio_im_to_3d import create_demo as create_im_to_3d_demo
30 | from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo
31 | 
32 | 
33 | css = """
34 | #img-display-container {
35 |     max-height: 50vh;
36 |     }
37 | #img-display-input {
38 |     max-height: 40vh;
39 |     }
40 | #img-display-output {
41 |     max-height: 40vh;
42 |     }
43 |     
44 | """
45 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
46 | model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval()
47 | 
48 | title = "# ZoeDepth"
49 | description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**.
50 | 
51 | ZoeDepth is a deep learning model for metric depth estimation from a single image.
52 | 
53 | Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details."""
54 | 
55 | with gr.Blocks(css=css) as demo:
56 |     gr.Markdown(title)
57 |     gr.Markdown(description)
58 |     with gr.Tab("Depth Prediction"):
59 |         create_depth_pred_demo(model)
60 |     with gr.Tab("Image to 3D"):
61 |         create_im_to_3d_demo(model)
62 |     with gr.Tab("360 Panorama to 3D"):
63 |         create_pano_to_3d_demo(model)
64 | 
65 | if __name__ == '__main__':
66 |     demo.queue().launch()


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_depth_pred.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | from zoedepth.utils.misc import colorize
27 | from PIL import Image
28 | import tempfile
29 | 
30 | def predict_depth(model, image):
31 |     depth = model.infer_pil(image)
32 |     return depth
33 | 
34 | def create_demo(model):
35 |     gr.Markdown("### Depth Prediction demo")
36 |     with gr.Row():
37 |         input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto")
38 |         depth_image = gr.Image(label="Depth Map", elem_id='img-display-output')
39 |     raw_file = gr.File(label="16-bit raw depth, multiplier:256")
40 |     submit = gr.Button("Submit")
41 | 
42 |     def on_submit(image):
43 |         depth = predict_depth(model, image)
44 |         colored_depth = colorize(depth, cmap='gray_r')
45 |         tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
46 |         raw_depth = Image.fromarray((depth*256).astype('uint16'))
47 |         raw_depth.save(tmp.name)
48 |         return [colored_depth, tmp.name]
49 |     
50 |     submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file])
51 |     # examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"],
52 |     #                        inputs=[input_image])


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_im_to_3d.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | import numpy as np
27 | import trimesh
28 | from zoedepth.utils.geometry import depth_to_points, create_triangles
29 | from functools import partial
30 | import tempfile
31 | 
32 | 
33 | def depth_edges_mask(depth):
34 |     """Returns a mask of edges in the depth map.
35 |     Args:
36 |     depth: 2D numpy array of shape (H, W) with dtype float32.
37 |     Returns:
38 |     mask: 2D numpy array of shape (H, W) with dtype bool.
39 |     """
40 |     # Compute the x and y gradients of the depth map.
41 |     depth_dx, depth_dy = np.gradient(depth)
42 |     # Compute the gradient magnitude.
43 |     depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
44 |     # Compute the edge mask.
45 |     mask = depth_grad > 0.05
46 |     return mask
47 | 
48 | 
49 | def predict_depth(model, image):
50 |     depth = model.infer_pil(image)
51 |     return depth
52 | 
53 | def get_mesh(model, image, keep_edges=False):
54 |     image.thumbnail((1024,1024))  # limit the size of the input image
55 |     depth = predict_depth(model, image)
56 |     pts3d = depth_to_points(depth[None])
57 |     pts3d = pts3d.reshape(-1, 3)
58 | 
59 |     # Create a trimesh mesh from the points
60 |     # Each pixel is connected to its 4 neighbors
61 |     # colors are the RGB values of the image
62 | 
63 |     verts = pts3d.reshape(-1, 3)
64 |     image = np.array(image)
65 |     if keep_edges:
66 |         triangles = create_triangles(image.shape[0], image.shape[1])
67 |     else:
68 |         triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
69 |     colors = image.reshape(-1, 3)
70 |     mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
71 | 
72 |     # Save as glb
73 |     glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
74 |     glb_path = glb_file.name
75 |     mesh.export(glb_path)
76 |     return glb_path
77 | 
78 | def create_demo(model):
79 | 
80 |     gr.Markdown("### Image to 3D mesh")
81 |     gr.Markdown("Convert a single 2D image to a 3D mesh")
82 | 
83 |     with gr.Row():
84 |         image = gr.Image(label="Input Image", type='pil')
85 |         result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
86 |                                                  1.0, 1.0, 1.0, 1.0])
87 |     
88 |     checkbox = gr.Checkbox(label="Keep occlusion edges", value=False)
89 |     submit = gr.Button("Submit")
90 |     submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result])
91 |     # examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"],
92 |     #                         inputs=[image])
93 | 
94 | 


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_pano_to_3d.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import gradio as gr
 26 | import numpy as np
 27 | import trimesh
 28 | from zoedepth.utils.geometry import create_triangles
 29 | from functools import partial
 30 | import tempfile
 31 | 
 32 | def depth_edges_mask(depth):
 33 |     """Returns a mask of edges in the depth map.
 34 |     Args:
 35 |     depth: 2D numpy array of shape (H, W) with dtype float32.
 36 |     Returns:
 37 |     mask: 2D numpy array of shape (H, W) with dtype bool.
 38 |     """
 39 |     # Compute the x and y gradients of the depth map.
 40 |     depth_dx, depth_dy = np.gradient(depth)
 41 |     # Compute the gradient magnitude.
 42 |     depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
 43 |     # Compute the edge mask.
 44 |     mask = depth_grad > 0.05
 45 |     return mask
 46 | 
 47 | 
 48 | def pano_depth_to_world_points(depth):
 49 |     """
 50 |     360 depth to world points
 51 |     given 2D depth is an equirectangular projection of a spherical image
 52 |     Treat depth as radius
 53 | 
 54 |     longitude : -pi to pi
 55 |     latitude : -pi/2 to pi/2
 56 |     """
 57 | 
 58 |     # Convert depth to radius
 59 |     radius = depth.flatten()
 60 | 
 61 |     lon = np.linspace(-np.pi, np.pi, depth.shape[1])
 62 |     lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0])
 63 | 
 64 |     lon, lat = np.meshgrid(lon, lat)
 65 |     lon = lon.flatten()
 66 |     lat = lat.flatten()
 67 | 
 68 |     # Convert to cartesian coordinates
 69 |     x = radius * np.cos(lat) * np.cos(lon)
 70 |     y = radius * np.cos(lat) * np.sin(lon)
 71 |     z = radius * np.sin(lat)
 72 | 
 73 |     pts3d = np.stack([x, y, z], axis=1)
 74 | 
 75 |     return pts3d
 76 | 
 77 | 
 78 | def predict_depth(model, image):
 79 |     depth = model.infer_pil(image)
 80 |     return depth
 81 | 
 82 | def get_mesh(model, image, keep_edges=False):
 83 |     image.thumbnail((1024,1024))  # limit the size of the image
 84 |     depth = predict_depth(model, image)
 85 |     pts3d = pano_depth_to_world_points(depth)
 86 | 
 87 |     # Create a trimesh mesh from the points
 88 |     # Each pixel is connected to its 4 neighbors
 89 |     # colors are the RGB values of the image
 90 | 
 91 |     verts = pts3d.reshape(-1, 3)
 92 |     image = np.array(image)
 93 |     if keep_edges:
 94 |         triangles = create_triangles(image.shape[0], image.shape[1])
 95 |     else:
 96 |         triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
 97 |     colors = image.reshape(-1, 3)
 98 |     mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
 99 | 
100 |     # Save as glb
101 |     glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
102 |     glb_path = glb_file.name
103 |     mesh.export(glb_path)
104 |     return glb_path
105 | 
106 | def create_demo(model):
107 |     gr.Markdown("### Panorama to 3D mesh")
108 |     gr.Markdown("Convert a 360 spherical panorama to a 3D mesh")
109 |     gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.")
110 | 
111 |     with gr.Row():
112 |         input_image = gr.Image(label="Input Image", type='pil')
113 |         result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
114 |                                                  1.0, 1.0, 1.0, 1.0])
115 |         
116 |     checkbox = gr.Checkbox(label="Keep occlusion edges", value=True)
117 |     submit = gr.Button("Submit")
118 |     submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result])
119 |     # examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"],
120 |     #                         inputs=[input_image])


--------------------------------------------------------------------------------
/ZoeDepth/ui/ui_requirements.txt:
--------------------------------------------------------------------------------
1 | gradio
2 | trimesh==3.9.42


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/ddad.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self, resize_shape):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(resize_shape)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "ddad"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DDAD(Dataset):
 83 |     def __init__(self, data_dir_root, resize_shape):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 87 |         self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
 88 |         self.depth_files = [r.replace("_rgb.png", "_depth.npy")
 89 |                             for r in self.image_files]
 90 |         self.transform = ToTensor(resize_shape)
 91 | 
 92 |     def __getitem__(self, idx):
 93 | 
 94 |         image_path = self.image_files[idx]
 95 |         depth_path = self.depth_files[idx]
 96 | 
 97 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 98 |         depth = np.load(depth_path)  # meters
 99 | 
100 |         # depth[depth > 8] = -1
101 |         depth = depth[..., None]
102 | 
103 |         sample = dict(image=image, depth=depth)
104 |         sample = self.transform(sample)
105 | 
106 |         if idx == 0:
107 |             print(sample["image"].shape)
108 | 
109 |         return sample
110 | 
111 |     def __len__(self):
112 |         return len(self.image_files)
113 | 
114 | 
115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
116 |     dataset = DDAD(data_dir_root, resize_shape)
117 |     return DataLoader(dataset, batch_size, **kwargs)
118 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diml_indoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize((480, 640))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 |         if isinstance(img, torch.ByteTensor):
 76 |             return img.float()
 77 |         else:
 78 |             return img
 79 | 
 80 | 
 81 | class DIML_Indoor(Dataset):
 82 |     def __init__(self, data_dir_root):
 83 |         import glob
 84 | 
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "LR", '*', 'color', '*.png'))
 88 |         self.depth_files = [r.replace("color", "depth_filled").replace(
 89 |             "_c.png", "_depth_filled.png") for r in self.image_files]
 90 |         self.transform = ToTensor()
 91 | 
 92 |     def __getitem__(self, idx):
 93 |         image_path = self.image_files[idx]
 94 |         depth_path = self.depth_files[idx]
 95 | 
 96 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 97 |         depth = np.asarray(Image.open(depth_path),
 98 |                            dtype='uint16') / 1000.0  # mm to meters
 99 | 
100 |         # print(np.shape(image))
101 |         # print(np.shape(depth))
102 | 
103 |         # depth[depth > 8] = -1
104 |         depth = depth[..., None]
105 | 
106 |         sample = dict(image=image, depth=depth)
107 | 
108 |         # return sample
109 |         sample = self.transform(sample)
110 | 
111 |         if idx == 0:
112 |             print(sample["image"].shape)
113 | 
114 |         return sample
115 | 
116 |     def __len__(self):
117 |         return len(self.image_files)
118 | 
119 | 
120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
121 |     dataset = DIML_Indoor(data_dir_root)
122 |     return DataLoader(dataset, batch_size, **kwargs)
123 | 
124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
126 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diml_outdoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class DIML_Outdoor(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         import glob
 81 | 
 82 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 83 |         self.image_files = glob.glob(os.path.join(
 84 |             data_dir_root, "*", 'outleft', '*.png'))
 85 |         self.depth_files = [r.replace("outleft", "depthmap")
 86 |                             for r in self.image_files]
 87 |         self.transform = ToTensor()
 88 | 
 89 |     def __getitem__(self, idx):
 90 |         image_path = self.image_files[idx]
 91 |         depth_path = self.depth_files[idx]
 92 | 
 93 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 94 |         depth = np.asarray(Image.open(depth_path),
 95 |                            dtype='uint16') / 1000.0  # mm to meters
 96 | 
 97 |         # depth[depth > 8] = -1
 98 |         depth = depth[..., None]
 99 | 
100 |         sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101 | 
102 |         # return sample
103 |         return self.transform(sample)
104 | 
105 |     def __len__(self):
106 |         return len(self.image_files)
107 | 
108 | 
109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110 |     dataset = DIML_Outdoor(data_dir_root)
111 |     return DataLoader(dataset, batch_size, **kwargs)
112 | 
113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
115 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diode.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(480)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diode"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DIODE(Dataset):
 83 |     def __init__(self, data_dir_root):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
 87 |         self.image_files = glob.glob(
 88 |             os.path.join(data_dir_root, '*', '*', '*.png'))
 89 |         self.depth_files = [r.replace(".png", "_depth.npy")
 90 |                             for r in self.image_files]
 91 |         self.depth_mask_files = [
 92 |             r.replace(".png", "_depth_mask.npy") for r in self.image_files]
 93 |         self.transform = ToTensor()
 94 | 
 95 |     def __getitem__(self, idx):
 96 |         image_path = self.image_files[idx]
 97 |         depth_path = self.depth_files[idx]
 98 |         depth_mask_path = self.depth_mask_files[idx]
 99 | 
100 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101 |         depth = np.load(depth_path)  # in meters
102 |         valid = np.load(depth_mask_path)  # binary
103 | 
104 |         # depth[depth > 8] = -1
105 |         # depth = depth[..., None]
106 | 
107 |         sample = dict(image=image, depth=depth, valid=valid)
108 | 
109 |         # return sample
110 |         sample = self.transform(sample)
111 | 
112 |         if idx == 0:
113 |             print(sample["image"].shape)
114 | 
115 |         return sample
116 | 
117 |     def __len__(self):
118 |         return len(self.image_files)
119 | 
120 | 
121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122 |     dataset = DIODE(data_dir_root)
123 |     return DataLoader(dataset, batch_size, **kwargs)
124 | 
125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
126 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/hypersim.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import glob
 26 | import os
 27 | 
 28 | import h5py
 29 | import numpy as np
 30 | import torch
 31 | from PIL import Image
 32 | from torch.utils.data import DataLoader, Dataset
 33 | from torchvision import transforms
 34 | 
 35 | 
 36 | def hypersim_distance_to_depth(npyDistance):
 37 |     intWidth, intHeight, fltFocal = 1024, 768, 886.81
 38 | 
 39 |     npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
 40 |         1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
 41 |     npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
 42 |                                  intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
 43 |     npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
 44 |     npyImageplane = np.concatenate(
 45 |         [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
 46 | 
 47 |     npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
 48 |     return npyDepth
 49 | 
 50 | 
 51 | class ToTensor(object):
 52 |     def __init__(self):
 53 |         # self.normalize = transforms.Normalize(
 54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 55 |         self.normalize = lambda x: x
 56 |         self.resize = transforms.Resize((480, 640))
 57 | 
 58 |     def __call__(self, sample):
 59 |         image, depth = sample['image'], sample['depth']
 60 |         image = self.to_tensor(image)
 61 |         image = self.normalize(image)
 62 |         depth = self.to_tensor(depth)
 63 | 
 64 |         image = self.resize(image)
 65 | 
 66 |         return {'image': image, 'depth': depth, 'dataset': "hypersim"}
 67 | 
 68 |     def to_tensor(self, pic):
 69 | 
 70 |         if isinstance(pic, np.ndarray):
 71 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 72 |             return img
 73 | 
 74 |         #         # handle PIL Image
 75 |         if pic.mode == 'I':
 76 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 77 |         elif pic.mode == 'I;16':
 78 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 79 |         else:
 80 |             img = torch.ByteTensor(
 81 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 82 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 83 |         if pic.mode == 'YCbCr':
 84 |             nchannel = 3
 85 |         elif pic.mode == 'I;16':
 86 |             nchannel = 1
 87 |         else:
 88 |             nchannel = len(pic.mode)
 89 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 90 | 
 91 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 92 |         if isinstance(img, torch.ByteTensor):
 93 |             return img.float()
 94 |         else:
 95 |             return img
 96 | 
 97 | 
 98 | class HyperSim(Dataset):
 99 |     def __init__(self, data_dir_root):
100 |         # image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
101 |         # depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
102 |         self.image_files = glob.glob(os.path.join(
103 |             data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
104 |         self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
105 |             ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
106 |         self.transform = ToTensor()
107 | 
108 |     def __getitem__(self, idx):
109 |         image_path = self.image_files[idx]
110 |         depth_path = self.depth_files[idx]
111 | 
112 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
113 | 
114 |         # depth from hdf5
115 |         depth_fd = h5py.File(depth_path, "r")
116 |         # in meters (Euclidean distance)
117 |         distance_meters = np.array(depth_fd['dataset'])
118 |         depth = hypersim_distance_to_depth(
119 |             distance_meters)  # in meters (planar depth)
120 | 
121 |         # depth[depth > 8] = -1
122 |         depth = depth[..., None]
123 | 
124 |         sample = dict(image=image, depth=depth)
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = HyperSim(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/ibims.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import os
26 | 
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms as T
32 | 
33 | 
34 | class iBims(Dataset):
35 |     def __init__(self, config):
36 |         root_folder = config.ibims_root
37 |         with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38 |             imglist = f.read().split()
39 | 
40 |         samples = []
41 |         for basename in imglist:
42 |             img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43 |             depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44 |             valid_mask_path = os.path.join(
45 |                 root_folder, 'mask_invalid', basename+".png")
46 |             transp_mask_path = os.path.join(
47 |                 root_folder, 'mask_transp', basename+".png")
48 | 
49 |             samples.append(
50 |                 (img_path, depth_path, valid_mask_path, transp_mask_path))
51 | 
52 |         self.samples = samples
53 |         # self.normalize = T.Normalize(
54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55 |         self.normalize = lambda x : x
56 | 
57 |     def __getitem__(self, idx):
58 |         img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59 | 
60 |         img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61 |         depth = np.asarray(Image.open(depth_path),
62 |                            dtype=np.uint16).astype('float')*50.0/65535
63 | 
64 |         mask_valid = np.asarray(Image.open(valid_mask_path))
65 |         mask_transp = np.asarray(Image.open(transp_mask_path))
66 | 
67 |         # depth = depth * mask_valid * mask_transp
68 |         depth = np.where(mask_valid * mask_transp, depth, -1)
69 | 
70 |         img = torch.from_numpy(img).permute(2, 0, 1)
71 |         img = self.normalize(img)
72 |         depth = torch.from_numpy(depth).unsqueeze(0)
73 |         return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74 | 
75 |     def __len__(self):
76 |         return len(self.samples)
77 | 
78 | 
79 | def get_ibims_loader(config, batch_size=1, **kwargs):
80 |     dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81 |     return dataloader
82 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/preprocess.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import numpy as np
 26 | from dataclasses import dataclass
 27 | from typing import Tuple, List
 28 | 
 29 | # dataclass to store the crop parameters
 30 | @dataclass
 31 | class CropParams:
 32 |     top: int
 33 |     bottom: int
 34 |     left: int
 35 |     right: int
 36 | 
 37 | 
 38 | 
 39 | def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
 40 |     gray_image = np.mean(rgb_image, axis=channel_axis)
 41 |     h, w = gray_image.shape
 42 | 
 43 | 
 44 |     def num_value_pixels(arr):
 45 |         return np.sum(np.abs(arr - value) < level_diff_threshold)
 46 | 
 47 |     def is_above_tolerance(arr, total_pixels):
 48 |         return (num_value_pixels(arr) / total_pixels) > tolerance
 49 | 
 50 |     # Crop top border until number of value pixels become below tolerance
 51 |     top = min_border
 52 |     while is_above_tolerance(gray_image[top, :], w) and top < h-1:
 53 |         top += 1
 54 |         if top > cut_off:
 55 |             break
 56 | 
 57 |     # Crop bottom border until number of value pixels become below tolerance
 58 |     bottom = h - min_border
 59 |     while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
 60 |         bottom -= 1
 61 |         if h - bottom > cut_off:
 62 |             break
 63 | 
 64 |     # Crop left border until number of value pixels become below tolerance
 65 |     left = min_border
 66 |     while is_above_tolerance(gray_image[:, left], h) and left < w-1:
 67 |         left += 1
 68 |         if left > cut_off:
 69 |             break
 70 | 
 71 |     # Crop right border until number of value pixels become below tolerance
 72 |     right = w - min_border
 73 |     while is_above_tolerance(gray_image[:, right], h) and right > 0:
 74 |         right -= 1
 75 |         if w - right > cut_off:
 76 |             break
 77 |         
 78 | 
 79 |     return CropParams(top, bottom, left, right)
 80 | 
 81 | 
 82 | def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
 83 |     """Crops the white border of the RGB.
 84 | 
 85 |     Args:
 86 |         rgb: RGB image, shape (H, W, 3).
 87 |     Returns:
 88 |         Crop parameters.
 89 |     """
 90 |     if value == 255:
 91 |         # assert range of values in rgb image is [0, 255]
 92 |         assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
 93 |         assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
 94 |     elif value == 1:
 95 |         # assert range of values in rgb image is [0, 1]
 96 |         assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
 97 | 
 98 |     return get_border_params(rgb_image, value=value, **kwargs)
 99 | 
100 | def get_black_border(rgb_image, **kwargs) -> CropParams:
101 |     """Crops the black border of the RGB.
102 | 
103 |     Args:
104 |         rgb: RGB image, shape (H, W, 3).
105 | 
106 |     Returns:
107 |         Crop parameters.
108 |     """
109 | 
110 |     return get_border_params(rgb_image, value=0, **kwargs)
111 | 
112 | def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
113 |     """Crops the image according to the crop parameters.
114 | 
115 |     Args:
116 |         image: RGB or depth image, shape (H, W, 3) or (H, W).
117 |         crop_params: Crop parameters.
118 | 
119 |     Returns:
120 |         Cropped image.
121 |     """
122 |     return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
123 | 
124 | def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
125 |     """Crops the images according to the crop parameters.
126 | 
127 |     Args:
128 |         images: RGB or depth images, shape (H, W, 3) or (H, W).
129 |         crop_params: Crop parameters.
130 | 
131 |     Returns:
132 |         Cropped images.
133 |     """
134 |     return tuple(crop_image(image, crop_params) for image in images)
135 | 
136 | def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
137 |     """Crops the white and black border of the RGB and depth images.
138 | 
139 |     Args:
140 |         rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
141 |         other_images: The other images to crop according to the border of the RGB image.
142 |     Returns:
143 |         Cropped RGB and other images.
144 |     """
145 |     # crop black border
146 |     crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
147 |     cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
148 | 
149 |     # crop white border
150 |     crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
151 |     cropped_images = crop_images(*cropped_images, crop_params=crop_params)
152 | 
153 |     return cropped_images
154 |     


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/sun_rgbd_loader.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class SunRGBD(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
 81 |         # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
 82 |         # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
 83 |         import glob
 84 |         self.image_files = glob.glob(
 85 |             os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
 86 |         self.depth_files = [
 87 |             r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
 88 |         self.transform = ToTensor()
 89 | 
 90 |     def __getitem__(self, idx):
 91 |         image_path = self.image_files[idx]
 92 |         depth_path = self.depth_files[idx]
 93 | 
 94 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 95 |         depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0
 96 |         depth[depth > 8] = -1
 97 |         depth = depth[..., None]
 98 |         return self.transform(dict(image=image, depth=depth))
 99 | 
100 |     def __len__(self):
101 |         return len(self.image_files)
102 | 
103 | 
104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
105 |     dataset = SunRGBD(data_dir_root)
106 |     return DataLoader(dataset, batch_size, **kwargs)
107 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/vkitti.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | from torch.utils.data import Dataset, DataLoader
 27 | from torchvision import transforms
 28 | import os
 29 | 
 30 | from PIL import Image
 31 | import numpy as np
 32 | import cv2
 33 | 
 34 | 
 35 | class ToTensor(object):
 36 |     def __init__(self):
 37 |         self.normalize = transforms.Normalize(
 38 |             mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 39 |         # self.resize = transforms.Resize((375, 1242))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 | 
 44 |         image = self.to_tensor(image)
 45 |         image = self.normalize(image)
 46 |         depth = self.to_tensor(depth)
 47 | 
 48 |         # image = self.resize(image)
 49 | 
 50 |         return {'image': image, 'depth': depth, 'dataset': "vkitti"}
 51 | 
 52 |     def to_tensor(self, pic):
 53 | 
 54 |         if isinstance(pic, np.ndarray):
 55 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 56 |             return img
 57 | 
 58 |         #         # handle PIL Image
 59 |         if pic.mode == 'I':
 60 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 61 |         elif pic.mode == 'I;16':
 62 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 63 |         else:
 64 |             img = torch.ByteTensor(
 65 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 66 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 67 |         if pic.mode == 'YCbCr':
 68 |             nchannel = 3
 69 |         elif pic.mode == 'I;16':
 70 |             nchannel = 1
 71 |         else:
 72 |             nchannel = len(pic.mode)
 73 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 74 | 
 75 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class VKITTI(Dataset):
 83 |     def __init__(self, data_dir_root, do_kb_crop=True):
 84 |         import glob
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "test_color", '*.png'))
 88 |         self.depth_files = [r.replace("test_color", "test_depth")
 89 |                             for r in self.image_files]
 90 |         self.do_kb_crop = True
 91 |         self.transform = ToTensor()
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         image_path = self.image_files[idx]
 95 |         depth_path = self.depth_files[idx]
 96 | 
 97 |         image = Image.open(image_path)
 98 |         depth = Image.open(depth_path)
 99 |         depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
100 |                            cv2.IMREAD_ANYDEPTH)
101 |         print("dpeth min max", depth.min(), depth.max())
102 | 
103 |         # print(np.shape(image))
104 |         # print(np.shape(depth))
105 | 
106 |         # depth[depth > 8] = -1
107 | 
108 |         if self.do_kb_crop and False:
109 |             height = image.height
110 |             width = image.width
111 |             top_margin = int(height - 352)
112 |             left_margin = int((width - 1216) / 2)
113 |             depth = depth.crop(
114 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
115 |             image = image.crop(
116 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
117 |             # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
118 | 
119 |         image = np.asarray(image, dtype=np.float32) / 255.0
120 |         # depth = np.asarray(depth, dtype=np.uint16) /1.
121 |         depth = depth[..., None]
122 |         sample = dict(image=image, depth=depth)
123 | 
124 |         # return sample
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = VKITTI(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     loader = get_vkitti_loader(
143 |         data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
144 |     print("Total files", len(loader.dataset))
145 |     for i, sample in enumerate(loader):
146 |         print(sample["image"].shape)
147 |         print(sample["depth"].shape)
148 |         print(sample["dataset"])
149 |         print(sample['depth'].min(), sample['depth'].max())
150 |         if i > 5:
151 |             break
152 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/base_models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | from zoedepth.models.depth_model import DepthModel
27 | 
28 | def build_model(config) -> DepthModel:
29 |     """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30 |     This function should be used to construct models for training and evaluation.
31 | 
32 |     Args:
33 |         config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34 | 
35 |     Returns:
36 |         torch.nn.Module: Model corresponding to name and version as specified in config
37 |     """
38 |     module_name = f"zoedepth.models.{config.model}"
39 |     try:
40 |         module = import_module(module_name)
41 |     except ModuleNotFoundError as e:
42 |         # print the original error message
43 |         print(e)
44 |         raise ValueError(
45 |             f"Model {config.model} not found. Refer above error for details.") from e
46 |     try:
47 |         get_version = getattr(module, "get_version")
48 |     except AttributeError as e:
49 |         raise ValueError(
50 |             f"Model {config.model} has no get_version function.") from e
51 |     return get_version(config.version_name).build_from_config(config)
52 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/layers/dist_layers.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | import torch.nn as nn
 27 | 
 28 | 
 29 | def log_binom(n, k, eps=1e-7):
 30 |     """ log(nCk) using stirling approximation """
 31 |     n = n + eps
 32 |     k = k + eps
 33 |     return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
 34 | 
 35 | 
 36 | class LogBinomial(nn.Module):
 37 |     def __init__(self, n_classes=256, act=torch.softmax):
 38 |         """Compute log binomial distribution for n_classes
 39 | 
 40 |         Args:
 41 |             n_classes (int, optional): number of output classes. Defaults to 256.
 42 |         """
 43 |         super().__init__()
 44 |         self.K = n_classes
 45 |         self.act = act
 46 |         self.register_buffer('k_idx', torch.arange(
 47 |             0, n_classes).view(1, -1, 1, 1))
 48 |         self.register_buffer('K_minus_1', torch.Tensor(
 49 |             [self.K-1]).view(1, -1, 1, 1))
 50 | 
 51 |     def forward(self, x, t=1., eps=1e-4):
 52 |         """Compute log binomial distribution for x
 53 | 
 54 |         Args:
 55 |             x (torch.Tensor - NCHW): probabilities
 56 |             t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
 57 |             eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
 58 | 
 59 |         Returns:
 60 |             torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
 61 |         """
 62 |         if x.ndim == 3:
 63 |             x = x.unsqueeze(1)  # make it nchw
 64 | 
 65 |         one_minus_x = torch.clamp(1 - x, eps, 1)
 66 |         x = torch.clamp(x, eps, 1)
 67 |         y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
 68 |             torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
 69 |         return self.act(y/t, dim=1)
 70 | 
 71 | 
 72 | class ConditionalLogBinomial(nn.Module):
 73 |     def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
 74 |         """Conditional Log Binomial distribution
 75 | 
 76 |         Args:
 77 |             in_features (int): number of input channels in main feature
 78 |             condition_dim (int): number of input channels in condition feature
 79 |             n_classes (int, optional): Number of classes. Defaults to 256.
 80 |             bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
 81 |             p_eps (float, optional): small eps value. Defaults to 1e-4.
 82 |             max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
 83 |             min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
 84 |         """
 85 |         super().__init__()
 86 |         self.p_eps = p_eps
 87 |         self.max_temp = max_temp
 88 |         self.min_temp = min_temp
 89 |         self.log_binomial_transform = LogBinomial(n_classes, act=act)
 90 |         bottleneck = (in_features + condition_dim) // bottleneck_factor
 91 |         self.mlp = nn.Sequential(
 92 |             nn.Conv2d(in_features + condition_dim, bottleneck,
 93 |                       kernel_size=1, stride=1, padding=0),
 94 |             nn.GELU(),
 95 |             # 2 for p linear norm, 2 for t linear norm
 96 |             nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
 97 |             nn.Softplus()
 98 |         )
 99 | 
100 |     def forward(self, x, cond):
101 |         """Forward pass
102 | 
103 |         Args:
104 |             x (torch.Tensor - NCHW): Main feature
105 |             cond (torch.Tensor - NCHW): condition feature
106 | 
107 |         Returns:
108 |             torch.Tensor: Output log binomial distribution
109 |         """
110 |         pt = self.mlp(torch.concat((x, cond), dim=1))
111 |         p, t = pt[:, :2, ...], pt[:, 2:, ...]
112 | 
113 |         p = p + self.p_eps
114 |         p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115 | 
116 |         t = t + self.p_eps
117 |         t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118 |         t = t.unsqueeze(1)
119 |         t = (self.max_temp - self.min_temp) * t + self.min_temp
120 | 
121 |         return self.log_binomial_transform(p, t)
122 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/layers/patch_transformer.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | import torch.nn as nn
27 | 
28 | 
29 | class PatchTransformerEncoder(nn.Module):
30 |     def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31 |         """ViT-like transformer block
32 | 
33 |         Args:
34 |             in_channels (int): Input channels
35 |             patch_size (int, optional): patch size. Defaults to 10.
36 |             embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37 |             num_heads (int, optional): number of attention heads. Defaults to 4.
38 |             use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39 |         """
40 |         super(PatchTransformerEncoder, self).__init__()
41 |         self.use_class_token = use_class_token
42 |         encoder_layers = nn.TransformerEncoderLayer(
43 |             embedding_dim, num_heads, dim_feedforward=1024)
44 |         self.transformer_encoder = nn.TransformerEncoder(
45 |             encoder_layers, num_layers=4)  # takes shape S,N,E
46 | 
47 |         self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48 |                                            kernel_size=patch_size, stride=patch_size, padding=0)
49 |         
50 |     def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51 |         """Generate positional encodings
52 | 
53 |         Args:
54 |             sequence_length (int): Sequence length
55 |             embedding_dim (int): Embedding dimension
56 | 
57 |         Returns:
58 |             torch.Tensor SBE: Positional encodings
59 |         """
60 |         position = torch.arange(
61 |             0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62 |         index = torch.arange(
63 |             0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64 |         div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65 |         pos_encoding = position * div_term
66 |         pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67 |         pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68 |         return pos_encoding
69 |         
70 | 
71 |     def forward(self, x):
72 |         """Forward pass
73 | 
74 |         Args:
75 |             x (torch.Tensor - NCHW): Input feature tensor
76 | 
77 |         Returns:
78 |             torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79 |         """
80 |         embeddings = self.embedding_convPxP(x).flatten(
81 |             2)  # .shape = n,c,s = n, embedding_dim, s
82 |         if self.use_class_token:
83 |             # extra special token at start ?
84 |             embeddings = nn.functional.pad(embeddings, (1, 0))
85 |         
86 |         # change to S,N,E format required by transformer
87 |         embeddings = embeddings.permute(2, 0, 1)
88 |         S, N, E = embeddings.shape
89 |         embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90 |         x = self.transformer_encoder(embeddings)  # .shape = S, N, E
91 |         return x
92 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/model_io.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | 
27 | def load_state_dict(model, state_dict):
28 |     """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29 | 
30 |     DataParallel prefixes state_dict keys with 'module.' when saving.
31 |     If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32 |     If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33 |     """
34 |     state_dict = state_dict.get('model', state_dict)
35 |     # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36 | 
37 |     do_prefix = isinstance(
38 |         model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39 |     state = {}
40 |     for k, v in state_dict.items():
41 |         if k.startswith('module.') and not do_prefix:
42 |             k = k[7:]
43 | 
44 |         if not k.startswith('module.') and do_prefix:
45 |             k = 'module.' + k
46 | 
47 |         state[k] = v
48 | 
49 |     model.load_state_dict(state)
50 |     print("Loaded successfully")
51 |     return model
52 | 
53 | 
54 | def load_wts(model, checkpoint_path):
55 |     ckpt = torch.load(checkpoint_path, map_location='cpu')
56 |     return load_state_dict(model, ckpt)
57 | 
58 | 
59 | def load_state_dict_from_url(model, url, **kwargs):
60 |     state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61 |     return load_state_dict(model, state_dict)
62 | 
63 | 
64 | def load_state_from_resource(model, resource: str):
65 |     """Loads weights to the model from a given resource. A resource can be of following types:
66 |         1. URL. Prefixed with "url::"
67 |                 e.g. url::http(s)://url.resource.com/ckpt.pt
68 | 
69 |         2. Local path. Prefixed with "local::"
70 |                 e.g. local::/path/to/ckpt.pt
71 | 
72 | 
73 |     Args:
74 |         model (torch.nn.Module): Model
75 |         resource (str): resource string
76 | 
77 |     Returns:
78 |         torch.nn.Module: Model with loaded weights
79 |     """
80 |     print(f"Using pretrained resource {resource}")
81 | 
82 |     if resource.startswith('url::'):
83 |         url = resource.split('url::')[1]
84 |         return load_state_dict_from_url(model, url, progress=True)
85 | 
86 |     elif resource.startswith('local::'):
87 |         path = resource.split('local::')[1]
88 |         return load_wts(model, path)
89 |         
90 |     else:
91 |         raise ValueError("Invalid resource type, only url:: and local:: are supported")
92 |     


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_v1 import ZoeDepth 
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepth,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepth",
 4 |         "version_name": "v1",
 5 |         "n_bins": 64,
 6 |         "bin_embedding_dim": 128,
 7 |         "bin_centers_type": "softplus",
 8 |         "n_attractors":[16, 8, 4, 1],
 9 |         "attractor_alpha": 1000,
10 |         "attractor_gamma": 2,
11 |         "attractor_kind" : "mean",
12 |         "attractor_type" : "inv",
13 |         "midas_model_type" : "DPT_BEiT_L_384",
14 |         "min_temp": 0.0212,
15 |         "max_temp": 50.0,
16 |         "output_distribution": "logbinomial",
17 |         "memory_efficient": true,
18 |         "inverse_midas": false,
19 |         "img_size": [384, 512]
20 |     },
21 |     
22 |     "train": {
23 |         "train_midas": true,
24 |         "use_pretrained_midas": true,
25 |         "trainer": "zoedepth",
26 |         "epochs": 5,
27 |         "bs": 16,
28 |         "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30 |         "same_lr": false,
31 |         "w_si": 1,
32 |         "w_domain": 0.2,
33 |         "w_reg": 0,
34 |         "w_grad": 0,
35 |         "avoid_boundary": false,
36 |         "random_crop": false,
37 |         "input_width": 640,
38 |         "input_height": 480,
39 |         "midas_lr_factor": 1,
40 |         "encoder_lr_factor":10,
41 |         "pos_enc_lr_factor":10,
42 |         "freeze_midas_bn": true
43 | 
44 |     },
45 | 
46 |     "infer":{
47 |         "train_midas": false,
48 |         "use_pretrained_midas": false,
49 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
50 |         "force_keep_ar": true
51 |     },
52 | 
53 |     "eval":{
54 |         "train_midas": false,
55 |         "use_pretrained_midas": false,
56 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
57 |     }
58 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "bin_centers_type": "normed",
 4 |         "img_size": [384, 768]
 5 |     },
 6 |     
 7 |     "train": {
 8 |     },
 9 | 
10 |     "infer":{
11 |         "train_midas": false,
12 |         "use_pretrained_midas": false,
13 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14 |         "force_keep_ar": true
15 |     },
16 | 
17 |     "eval":{
18 |         "train_midas": false,
19 |         "use_pretrained_midas": false,
20 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21 |     }
22 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_nk_v1 import ZoeDepthNK
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepthNK,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepthNK",
 4 |         "version_name": "v1",
 5 |         "bin_conf" : [
 6 |             {
 7 |                 "name": "nyu",
 8 |                 "n_bins": 64,
 9 |                 "min_depth": 1e-3,
10 |                 "max_depth": 10.0
11 |             },
12 |             {
13 |                 "name": "kitti",
14 |                 "n_bins": 64,
15 |                 "min_depth": 1e-3,
16 |                 "max_depth": 80.0
17 |             }
18 |         ], 
19 |         "bin_embedding_dim": 128,
20 |         "bin_centers_type": "softplus",
21 |         "n_attractors":[16, 8, 4, 1],
22 |         "attractor_alpha": 1000,
23 |         "attractor_gamma": 2,
24 |         "attractor_kind" : "mean",
25 |         "attractor_type" : "inv",
26 |         "min_temp": 0.0212,
27 |         "max_temp": 50.0, 
28 |         "memory_efficient": true, 
29 |         "midas_model_type" : "DPT_BEiT_L_384",
30 |         "img_size": [384, 512]
31 |     },
32 | 
33 |     "train": {
34 |         "train_midas": true,
35 |         "use_pretrained_midas": true,
36 |         "trainer": "zoedepth_nk",
37 |         "epochs": 5,
38 |         "bs": 16,
39 |         "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41 |         "same_lr": false,
42 |         "w_si": 1,
43 |         "w_domain": 100,
44 |         "avoid_boundary": false,
45 |         "random_crop": false,
46 |         "input_width": 640,
47 |         "input_height": 480,
48 |         "w_grad": 0,
49 |         "w_reg": 0,
50 |         "midas_lr_factor": 10,
51 |         "encoder_lr_factor":10,
52 |         "pos_enc_lr_factor":10
53 |     },
54 | 
55 |     "infer": {
56 |         "train_midas": false,
57 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58 |         "use_pretrained_midas": false,
59 |         "force_keep_ar": true
60 |     },
61 |     
62 |     "eval": {
63 |         "train_midas": false,
64 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65 |         "use_pretrained_midas": false
66 |     }
67 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/trainers/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | 
27 | 
28 | def get_trainer(config):
29 |     """Builds and returns a trainer based on the config.
30 | 
31 |     Args:
32 |         config (dict): the config dict (typically constructed using utils.config.get_config)
33 |             config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module
34 | 
35 |     Raises:
36 |         ValueError: If the specified trainer does not exist under trainers/ folder
37 | 
38 |     Returns:
39 |         Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object
40 |     """
41 |     assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format(
42 |         config)
43 |     try:
44 |         Trainer = getattr(import_module(
45 |             f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer')
46 |     except ModuleNotFoundError as e:
47 |         raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e
48 |     return Trainer
49 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/arg_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def infer_type(x):  # hacky way to infer type from string args
 4 |     if not isinstance(x, str):
 5 |         return x
 6 | 
 7 |     try:
 8 |         x = int(x)
 9 |         return x
10 |     except ValueError:
11 |         pass
12 | 
13 |     try:
14 |         x = float(x)
15 |         return x
16 |     except ValueError:
17 |         pass
18 | 
19 |     return x
20 | 
21 | 
22 | def parse_unknown(unknown_args):
23 |     clean = []
24 |     for a in unknown_args:
25 |         if "=" in a:
26 |             k, v = a.split("=")
27 |             clean.extend([k, v])
28 |         else:
29 |             clean.append(a)
30 | 
31 |     keys = clean[::2]
32 |     values = clean[1::2]
33 |     return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
34 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/easydict/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | EasyDict
  3 | Copy/pasted from https://github.com/makinacorpus/easydict
  4 | Original author: Mathieu Leplatre <mathieu.leplatre@makina-corpus.com>
  5 | """
  6 | 
  7 | class EasyDict(dict):
  8 |     """
  9 |     Get attributes
 10 | 
 11 |     >>> d = EasyDict({'foo':3})
 12 |     >>> d['foo']
 13 |     3
 14 |     >>> d.foo
 15 |     3
 16 |     >>> d.bar
 17 |     Traceback (most recent call last):
 18 |     ...
 19 |     AttributeError: 'EasyDict' object has no attribute 'bar'
 20 | 
 21 |     Works recursively
 22 | 
 23 |     >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
 24 |     >>> isinstance(d.bar, dict)
 25 |     True
 26 |     >>> d.bar.x
 27 |     1
 28 | 
 29 |     Bullet-proof
 30 | 
 31 |     >>> EasyDict({})
 32 |     {}
 33 |     >>> EasyDict(d={})
 34 |     {}
 35 |     >>> EasyDict(None)
 36 |     {}
 37 |     >>> d = {'a': 1}
 38 |     >>> EasyDict(**d)
 39 |     {'a': 1}
 40 |     >>> EasyDict((('a', 1), ('b', 2)))
 41 |     {'a': 1, 'b': 2}
 42 |     
 43 |     Set attributes
 44 | 
 45 |     >>> d = EasyDict()
 46 |     >>> d.foo = 3
 47 |     >>> d.foo
 48 |     3
 49 |     >>> d.bar = {'prop': 'value'}
 50 |     >>> d.bar.prop
 51 |     'value'
 52 |     >>> d
 53 |     {'foo': 3, 'bar': {'prop': 'value'}}
 54 |     >>> d.bar.prop = 'newer'
 55 |     >>> d.bar.prop
 56 |     'newer'
 57 | 
 58 | 
 59 |     Values extraction
 60 | 
 61 |     >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
 62 |     >>> isinstance(d.bar, list)
 63 |     True
 64 |     >>> from operator import attrgetter
 65 |     >>> list(map(attrgetter('x'), d.bar))
 66 |     [1, 3]
 67 |     >>> list(map(attrgetter('y'), d.bar))
 68 |     [2, 4]
 69 |     >>> d = EasyDict()
 70 |     >>> list(d.keys())
 71 |     []
 72 |     >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
 73 |     >>> d.foo
 74 |     3
 75 |     >>> d.bar.x
 76 |     1
 77 | 
 78 |     Still like a dict though
 79 | 
 80 |     >>> o = EasyDict({'clean':True})
 81 |     >>> list(o.items())
 82 |     [('clean', True)]
 83 | 
 84 |     And like a class
 85 | 
 86 |     >>> class Flower(EasyDict):
 87 |     ...     power = 1
 88 |     ...
 89 |     >>> f = Flower()
 90 |     >>> f.power
 91 |     1
 92 |     >>> f = Flower({'height': 12})
 93 |     >>> f.height
 94 |     12
 95 |     >>> f['power']
 96 |     1
 97 |     >>> sorted(f.keys())
 98 |     ['height', 'power']
 99 | 
100 |     update and pop items
101 |     >>> d = EasyDict(a=1, b='2')
102 |     >>> e = EasyDict(c=3.0, a=9.0)
103 |     >>> d.update(e)
104 |     >>> d.c
105 |     3.0
106 |     >>> d['c']
107 |     3.0
108 |     >>> d.get('c')
109 |     3.0
110 |     >>> d.update(a=4, b=4)
111 |     >>> d.b
112 |     4
113 |     >>> d.pop('a')
114 |     4
115 |     >>> d.a
116 |     Traceback (most recent call last):
117 |     ...
118 |     AttributeError: 'EasyDict' object has no attribute 'a'
119 |     """
120 |     def __init__(self, d=None, **kwargs):
121 |         if d is None:
122 |             d = {}
123 |         else:
124 |             d = dict(d)        
125 |         if kwargs:
126 |             d.update(**kwargs)
127 |         for k, v in d.items():
128 |             setattr(self, k, v)
129 |         # Class attributes
130 |         for k in self.__class__.__dict__.keys():
131 |             if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
132 |                 setattr(self, k, getattr(self, k))
133 | 
134 |     def __setattr__(self, name, value):
135 |         if isinstance(value, (list, tuple)):
136 |             value = [self.__class__(x)
137 |                      if isinstance(x, dict) else x for x in value]
138 |         elif isinstance(value, dict) and not isinstance(value, self.__class__):
139 |             value = self.__class__(value)
140 |         super(EasyDict, self).__setattr__(name, value)
141 |         super(EasyDict, self).__setitem__(name, value)
142 | 
143 |     __setitem__ = __setattr__
144 | 
145 |     def update(self, e=None, **f):
146 |         d = e or dict()
147 |         d.update(f)
148 |         for k in d:
149 |             setattr(self, k, d[k])
150 | 
151 |     def pop(self, k, d=None):
152 |         delattr(self, k)
153 |         return super(EasyDict, self).pop(k, d)
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     import doctest
158 |     doctest.testmod()


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/geometry.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import numpy as np
26 | 
27 | def get_intrinsics(H,W):
28 |     """
29 |     Intrinsics for a pinhole camera model.
30 |     Assume fov of 55 degrees and central principal point.
31 |     """
32 |     f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33 |     cx = 0.5 * W
34 |     cy = 0.5 * H
35 |     return np.array([[f, 0, cx],
36 |                      [0, f, cy],
37 |                      [0, 0, 1]])
38 | 
39 | def depth_to_points(depth, R=None, t=None):
40 | 
41 |     K = get_intrinsics(depth.shape[1], depth.shape[2])
42 |     Kinv = np.linalg.inv(K)
43 |     if R is None:
44 |         R = np.eye(3)
45 |     if t is None:
46 |         t = np.zeros(3)
47 | 
48 |     # M converts from your coordinate to PyTorch3D's coordinate system
49 |     M = np.eye(3)
50 |     M[0, 0] = -1.0
51 |     M[1, 1] = -1.0
52 | 
53 |     height, width = depth.shape[1:3]
54 | 
55 |     x = np.arange(width)
56 |     y = np.arange(height)
57 |     coord = np.stack(np.meshgrid(x, y), -1)
58 |     coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1)  # z=1
59 |     coord = coord.astype(np.float32)
60 |     # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61 |     coord = coord[None]  # bs, h, w, 3
62 | 
63 |     D = depth[:, :, :, None, None]
64 |     # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65 |     pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66 |     # pts3D_1 live in your coordinate system. Convert them to Py3D's
67 |     pts3D_1 = M[None, None, None, ...] @ pts3D_1
68 |     # from reference to targe tviewpoint
69 |     pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70 |     # pts3D_2 = pts3D_1
71 |     # depth_2 = pts3D_2[:, :, :, 2, :]  # b,1,h,w
72 |     return pts3D_2[:, :, :, :3, 0][0]
73 | 
74 | 
75 | def create_triangles(h, w, mask=None):
76 |     """
77 |     Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78 |     Creates mesh triangle indices from a given pixel grid size.
79 |         This function is not and need not be differentiable as triangle indices are
80 |         fixed.
81 |     Args:
82 |     h: (int) denoting the height of the image.
83 |     w: (int) denoting the width of the image.
84 |     Returns:
85 |     triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86 |     """
87 |     x, y = np.meshgrid(range(w - 1), range(h - 1))
88 |     tl = y * w + x
89 |     tr = y * w + x + 1
90 |     bl = (y + 1) * w + x
91 |     br = (y + 1) * w + x + 1
92 |     triangles = np.array([tl, bl, tr, br, tr, bl])
93 |     triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94 |         ((w - 1) * (h - 1) * 2, 3))
95 |     if mask is not None:
96 |         mask = mask.reshape(-1)
97 |         triangles = triangles[mask[triangles].all(1)]
98 |     return triangles
99 | 


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class GSParams: 
  5 |     def __init__(self):
  6 |         self.sh_degree = 3   
  7 |         self.feat_dim = 32           
  8 |         self.n_offsets = 10          
  9 |         self.voxel_size =  0.001     
 10 |         self.update_depth = 3
 11 |         self.update_init_factor = 16
 12 |         self.update_hierachy_factor = 4
 13 | 
 14 |         self.use_feat_bank = False
 15 |         self._source_path = ""
 16 |         self._model_path = ""
 17 |         self.images = "images"
 18 |         self.resolution = -1
 19 |         self.white_background = False
 20 |         self.data_device = "cuda"
 21 |         self.eval = True
 22 | 
 23 |         self.iterations = 2990  # 3_000 2990
 24 |         self.position_lr_init = 0.0016         # BloomScene: 0.00016
 25 |         self.position_lr_final = 0.0000016
 26 |         self.position_lr_delay_mult = 0.01
 27 |         self.position_lr_max_steps = 2990  # 3_000
 28 | 
 29 |         self.offset_lr_init = 0.01
 30 |         self.offset_lr_final = 0.0001
 31 |         self.offset_lr_delay_mult = 0.01
 32 |         self.offset_lr_max_steps = 2990
 33 |         
 34 |         self.mask_lr_init = 0.01
 35 |         self.mask_lr_final = 0.0001
 36 |         self.mask_lr_delay_mult = 0.01
 37 |         self.mask_lr_max_steps = 2990
 38 | 
 39 |         self.feature_lr = 0.0025
 40 |         self.opacity_lr = 0.05   
 41 |         self.scaling_lr = 0.005   
 42 |         self.rotation_lr = 0.001   
 43 | 
 44 |         self.mlp_opacity_lr_init = 0.002
 45 |         self.mlp_opacity_lr_final = 0.00002  
 46 |         self.mlp_opacity_lr_delay_mult = 0.01
 47 |         self.mlp_opacity_lr_max_steps = 2990
 48 | 
 49 |         self.mlp_cov_lr_init = 0.004
 50 |         self.mlp_cov_lr_final = 0.004
 51 |         self.mlp_cov_lr_delay_mult = 0.01
 52 |         self.mlp_cov_lr_max_steps = 2990
 53 |         
 54 |         self.mlp_color_lr_init = 0.008
 55 |         self.mlp_color_lr_final = 0.00005
 56 |         self.mlp_color_lr_delay_mult = 0.01
 57 |         self.mlp_color_lr_max_steps = 2990
 58 |         
 59 |         self.mlp_featurebank_lr_init = 0.01
 60 |         self.mlp_featurebank_lr_final = 0.00001
 61 |         self.mlp_featurebank_lr_delay_mult = 0.01
 62 |         self.mlp_featurebank_lr_max_steps = 2990
 63 | 
 64 |         self.encoding_xyz_lr_init = 0.005
 65 |         self.encoding_xyz_lr_final = 0.00001
 66 |         self.encoding_xyz_lr_delay_mult = 0.33
 67 |         self.encoding_xyz_lr_max_steps = 2990
 68 | 
 69 |         self.mlp_grid_lr_init = 0.005
 70 |         self.mlp_grid_lr_final = 0.00001
 71 |         self.mlp_grid_lr_delay_mult = 0.01
 72 |         self.mlp_grid_lr_max_steps = 2990
 73 | 
 74 |         self.mlp_deform_lr_init = 0.005
 75 |         self.mlp_deform_lr_final = 0.0005
 76 |         self.mlp_deform_lr_delay_mult = 0.01
 77 |         self.mlp_deform_lr_max_steps = 2990
 78 | 
 79 |         # for anchor densification
 80 |         self.start_stat = 200
 81 |         self.update_from = 500
 82 |         self.update_interval = 100
 83 |         self.update_until = 2000
 84 | 
 85 |         self.percent_dense = 0.01
 86 |         self.lambda_dssim = 0.2   
 87 |         self.densification_interval = 100
 88 |         self.opacity_reset_interval = 2990
 89 |         self.densify_from_iter = 500
 90 |         self.densify_until_iter = 2990
 91 |         self.densify_grad_threshold = 0.0002
 92 | 
 93 |         self.min_opacity = 0.005  # 0.2
 94 |         self.success_threshold = 0.8
 95 | 
 96 |         self.convert_SHs_python = False
 97 |         self.compute_cov3D_python = False
 98 |         self.debug = False
 99 | 
100 | 
101 |   
102 | class CameraParams:
103 |     def __init__(self, H: int = 512, W: int = 512):
104 |         self.H = H
105 |         self.W = W
106 |         self.focal = (5.8269e+02, 5.8269e+02)   
107 |         self.fov = (2*np.arctan(self.W / (2*self.focal[0])), 2*np.arctan(self.H / (2*self.focal[1])))     
108 |         self.K = np.array([
109 |             [self.focal[0], 0., self.W/2],   
110 |             [0., self.focal[1], self.H/2],   
111 |             [0.,            0.,       1.],
112 |         ]).astype(np.float32)


--------------------------------------------------------------------------------
/examples/01_childroom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/01_childroom.png


--------------------------------------------------------------------------------
/examples/01_childroom.txt:
--------------------------------------------------------------------------------
1 | A children's room filled with toys and books, colorful, playful, highly detailed, vibrant.


--------------------------------------------------------------------------------
/examples/02_sunroom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/02_sunroom.png


--------------------------------------------------------------------------------
/examples/02_sunroom.txt:
--------------------------------------------------------------------------------
1 | A sunroom with floor-to-ceiling windows overlooking the garden, comfortable chairs, and a coffee table inside, bright, airy, photorealistic, high-resolution.


--------------------------------------------------------------------------------
/examples/03_beach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/03_beach.png


--------------------------------------------------------------------------------
/examples/03_beach.txt:
--------------------------------------------------------------------------------
1 | A sunny beach with fine sand and blue water, with a backdrop of blue sky and white clouds, highly detailed, vibrant, photorealistic, clear skies.


--------------------------------------------------------------------------------
/examples/04_snow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/04_snow.png


--------------------------------------------------------------------------------
/examples/04_snow.txt:
--------------------------------------------------------------------------------
1 | A winter snow scene with snow-covered trees and houses, crisp, highly detailed, photorealistic, clear reflections.


--------------------------------------------------------------------------------
/examples/05_christmas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/05_christmas.png


--------------------------------------------------------------------------------
/examples/05_christmas.txt:
--------------------------------------------------------------------------------
1 | a cozy living room in christmas


--------------------------------------------------------------------------------
/examples/06_room.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/06_room.png


--------------------------------------------------------------------------------
/examples/06_room.txt:
--------------------------------------------------------------------------------
1 | a living room with a lit furnace, couch and cozy curtains, bright lamps that make the room look well-lit.


--------------------------------------------------------------------------------
/examples/07_museum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/07_museum.png


--------------------------------------------------------------------------------
/examples/07_museum.txt:
--------------------------------------------------------------------------------
1 | Simple museum, pictures, paintings, artistic, best quality, dimly lit


--------------------------------------------------------------------------------
/examples/08_small_cabin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/08_small_cabin.png


--------------------------------------------------------------------------------
/examples/08_small_cabin.txt:
--------------------------------------------------------------------------------
1 | A small cabin on top of a snowy mountain in the style of Disney, artstation


--------------------------------------------------------------------------------
/examples/08_small_cabin_negative.txt:
--------------------------------------------------------------------------------
1 | low quality, ugly


--------------------------------------------------------------------------------
/examples/09_street.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/examples/09_street.png


--------------------------------------------------------------------------------
/examples/09_street.txt:
--------------------------------------------------------------------------------
1 | A suburban street in North Carolina on a bright, sunny day


--------------------------------------------------------------------------------
/images/BloomScene.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/images/BloomScene.png


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import argparse
  4 | import torch
  5 | import datetime
  6 | import json
  7 | import random
  8 | import numpy as np
  9 | from PIL import Image
 10 | from bloomscene import BloomScene
 11 | from utils.metrics import clip_score_and_iqa, brisque_and_niqe_score
 12 | 
 13 | 
 14 | def fix_random_seed(seed: int):
 15 |     random.seed(seed)
 16 |     np.random.seed(seed)
 17 |     torch.manual_seed(seed)
 18 |     torch.cuda.manual_seed(seed)
 19 |     torch.cuda.manual_seed_all(seed)
 20 | 
 21 |     torch.backends.cudnn.deterministic = True
 22 |     torch.backends.cudnn.benchmark = True
 23 | 
 24 | if __name__ == "__main__":
 25 |     ### option
 26 |     parser = argparse.ArgumentParser(description='Arguments for BloomScene')
 27 |     # Input options
 28 |     parser.add_argument('--image', '-img', type=str, default='examples/01_childroom.png', help='Input image for scene generation')
 29 |     parser.add_argument('--text', '-t', type=str, default='examples/01_childroom.txt', help='Text prompt for scene generation')
 30 |     parser.add_argument('--neg_text', '-nt', type=str, default='', help='Negative text prompt for scene generation')
 31 | 
 32 |     # Camera options
 33 |     parser.add_argument('--campath_gen', '-cg', type=str, default='rotate360', choices=['rotate360'], help='Camera extrinsic trajectories for scene generation')
 34 |     parser.add_argument('--campath_render', '-cr', type=str, default='rotate360', choices=['rotate360'], help='Camera extrinsic trajectories for video rendering')
 35 | 
 36 |     # Inpainting options
 37 |     parser.add_argument('--seed', type=int, default=1, help='Manual seed for running Stable Diffusion inpainting')
 38 |     parser.add_argument('--diff_steps', type=int, default=50, help='Number of inference steps for running Stable Diffusion inpainting')
 39 | 
 40 |     # Save options
 41 |     parser.add_argument('--save_dir', '-s', type=str, default='', help='Save directory')
 42 | 
 43 |     # DPR options
 44 |     parser.add_argument('--dep_value', action='store_true', help='Pixel-level depth regularization or not')
 45 |     parser.add_argument('--dep_domin', action='store_true', help='Distribution-level depth regularization or not')
 46 |     parser.add_argument('--dep_smooth', action='store_true', help='Depth smoothness regularization or not')
 47 |     parser.add_argument('--dep_value_lbd', type=float, default=0.7, help='Depth regularization..')
 48 |     parser.add_argument('--dep_domin_lbd', type=float, default=0.1, help='Depth regularization..')
 49 |     parser.add_argument('--dep_smooth_lbd', type=float, default=1.0, help='Depth regularization..')
 50 | 
 51 |     # SCC options
 52 |     parser.add_argument("--n_features", type=int, default = 4)
 53 |     parser.add_argument("--log2", type=int, default = 13)
 54 |     parser.add_argument("--log2_2D", type=int, default = 15)
 55 |     parser.add_argument("--lambdae", type=float, default = 0.002)
 56 |     parser.add_argument("--testing_iterations", nargs="+", type=int, default=[2990])
 57 |     parser.add_argument("--saving_iterations", nargs="+", type=int, default=[2990])
 58 | 
 59 | 
 60 |     args = parser.parse_args()
 61 | 
 62 |      
 63 |     fix_random_seed(args.seed)
 64 | 
 65 |     ### input (example)
 66 |     rgb_cond = Image.open(args.image).resize((512,512))
 67 | 
 68 |      
 69 |     if args.text.endswith('.txt'):
 70 |         with open(args.text, 'r') as f:
 71 |             txt_cond = f.readline()
 72 |     else:
 73 |         txt_cond = args.text
 74 | 
 75 |     if args.neg_text.endswith('.txt'):
 76 |         with open(args.neg_text, 'r') as f:
 77 |             neg_txt_cond = f.readline()
 78 |     else:
 79 |         neg_txt_cond = args.neg_text
 80 | 
 81 |     # Make default save directory if blank
 82 |      
 83 |     if args.save_dir == '':
 84 |         img_name = os.path.splitext(os.path.basename(args.image))[0]
 85 |         now_str = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
 86 |         args.save_dir = f'./outputs/{img_name}_{args.campath_gen}_{args.seed}_{now_str}'
 87 |         
 88 |         
 89 |     if not os.path.exists(args.save_dir):
 90 |         os.makedirs(args.save_dir, exist_ok=True)
 91 |     
 92 |      
 93 |     with open(os.path.join(args.save_dir, "settings.json"), "w") as f:
 94 |         json.dump(vars(args), f, indent=4, sort_keys=True)
 95 | 
 96 | 
 97 |     bs = BloomScene(args, save_dir=args.save_dir)
 98 |     start_time = datetime.datetime.now()
 99 |     print("start..", start_time.strftime('%Y-%m-%d  %H:%M:%S'))
100 |     
101 |     bs.create(rgb_cond, txt_cond, neg_txt_cond, args.campath_gen, args.seed, args.diff_steps)
102 |     end_time = datetime.datetime.now()
103 |     print("end..", end_time.strftime('%Y-%m-%d  %H:%M:%S'))
104 | 
105 |     bs.render_video(args.campath_render)
106 | 
107 | 
108 |     # # eval
109 |     image_folder = os.path.join(args.save_dir, "eval", "render_rgb")
110 |     clip_score_and_iqa(image_folder=image_folder, text=txt_cond, out_path=args.save_dir)
111 |     brisque_and_niqe_score(image_folder=image_folder, out_path=args.save_dir)
112 | 


--------------------------------------------------------------------------------
/scene/__init__.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from arguments import GSParams
 4 | from scene.dataset_readers import readDataInfo
 5 | from scene.gaussian_model import GaussianModel
 6 | from utils.pose_noise_util import apply_noise_bloomscene
 7 | 
 8 | 
 9 | class Scene:
10 |     gaussians: GaussianModel
11 | 
12 |     def __init__(self, traindata, gaussians: GaussianModel, opt: GSParams):
13 |         self.traindata = traindata
14 |         self.gaussians = gaussians
15 |         
16 |         info = readDataInfo(traindata, opt.white_background, opt.eval)
17 |         random.shuffle(info.train_cameras)  # Multi-res consistent random shuffling
18 |         self.cameras_extent = info.nerf_normalization["radius"]
19 | 
20 |         print("Loading Training Cameras ...")
21 |         self.train_cameras = info.train_cameras        
22 |         print("Loading Preset Cameras ...")
23 |         self.preset_cameras = {}
24 |         print("Loading Eval Cameras ...")
25 |         self.eval_cameras = apply_noise_bloomscene(self.train_cameras)
26 | 
27 |         for campath in info.preset_cameras.keys():
28 |             self.preset_cameras[campath] = info.preset_cameras[campath]
29 | 
30 |         self.gaussians.create_from_pcd(info.point_cloud, self.cameras_extent)
31 |         self.gaussians.training_setup(opt)
32 | 
33 |     def getTrainCameras(self):
34 |         return self.train_cameras
35 |     
36 |     def getPresetCameras(self, preset):
37 |         assert preset in self.preset_cameras
38 |         return self.preset_cameras[preset]
39 |     
40 |     def getEvalCameras(self):
41 |         return self.eval_cameras


--------------------------------------------------------------------------------
/scene/cameras.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | import numpy as np
12 | 
13 | import torch
14 | from torch import nn
15 | 
16 | from utils.graphics import getWorld2View2, getProjectionMatrix
17 | from utils.loss import image2canny
18 | 
19 | 
20 | class Camera(nn.Module):
21 |     def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
22 |                  image_name, uid, original_depth, 
23 |                  trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
24 |                  ):
25 |         super(Camera, self).__init__()
26 | 
27 |         self.uid = uid
28 |         self.colmap_id = colmap_id
29 |         self.R = R
30 |         self.T = T
31 |         self.FoVx = FoVx
32 |         self.FoVy = FoVy
33 |         self.image_name = image_name
34 | 
35 |         try:
36 |             self.data_device = torch.device(data_device)
37 |         except Exception as e:
38 |             print(e)
39 |             print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
40 |             self.data_device = torch.device("cuda")
41 | 
42 | 
43 |         self.original_image = image.clamp(0.0, 1.0).to(self.data_device)
44 |         self.canny_mask = image2canny(self.original_image.permute(1,2,0), 50, 150, isEdge1=False).detach().to(self.data_device)
45 |         self.image_width = self.original_image.shape[2]
46 |         self.image_height = self.original_image.shape[1]
47 | 
48 |         if gt_alpha_mask is not None:
49 |             self.original_image *= gt_alpha_mask.to(self.data_device)
50 |         else:
51 |             self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device)
52 | 
53 |         self.zfar = 100.0
54 |         self.znear = 0.01
55 | 
56 |         self.trans = trans
57 |         self.scale = scale
58 | 
59 |         self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
60 |         self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
61 |         self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
62 |         self.camera_center = self.world_view_transform.inverse()[3, :3]
63 | 
64 |         self.original_depth = original_depth
65 | 
66 | 
67 | class MiniCam:
68 |     def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
69 |         self.image_width = width
70 |         self.image_height = height    
71 |         self.FoVy = fovy
72 |         self.FoVx = fovx
73 |         self.znear = znear
74 |         self.zfar = zfar
75 |         self.world_view_transform = world_view_transform
76 |         self.full_proj_transform = full_proj_transform
77 |         view_inv = torch.inverse(self.world_view_transform)
78 |         self.camera_center = view_inv[3][:3]
79 | 
80 | 


--------------------------------------------------------------------------------
/scene/dataset_readers.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from PIL import Image
  4 | from typing import NamedTuple
  5 | 
  6 | from scene.gaussian_model import BasicPointCloud
  7 | from scene.cameras import MiniCam, Camera
  8 | from utils.graphics import getWorld2View2, focal2fov, fov2focal
  9 | from utils.graphics import getProjectionMatrix
 10 | from utils.trajectory import get_camerapaths
 11 | 
 12 | 
 13 | class CameraInfo(NamedTuple):
 14 |     uid: int
 15 |     R: np.array
 16 |     T: np.array
 17 |     FovY: np.array
 18 |     FovX: np.array
 19 |     image: np.array
 20 |     image_path: str
 21 |     image_name: str
 22 |     width: int
 23 |     height: int
 24 | 
 25 | 
 26 | class SceneInfo(NamedTuple):
 27 |     point_cloud: BasicPointCloud
 28 |     train_cameras: list
 29 |     test_cameras: list
 30 |     preset_cameras: list
 31 |     nerf_normalization: dict
 32 |     ply_path: str
 33 | 
 34 | 
 35 | def getNerfppNorm(cam_info):
 36 |     def get_center_and_diag(cam_centers):
 37 |         cam_centers = np.hstack(cam_centers)
 38 |         avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True)
 39 |         center = avg_cam_center
 40 |         dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True)
 41 |         diagonal = np.max(dist)
 42 |         return center.flatten(), diagonal
 43 | 
 44 |     cam_centers = []
 45 | 
 46 |     for cam in cam_info:
 47 |         W2C = getWorld2View2(cam.R, cam.T)
 48 |         C2W = np.linalg.inv(W2C)
 49 |         cam_centers.append(C2W[:3, 3:4])
 50 | 
 51 |     center, diagonal = get_center_and_diag(cam_centers)
 52 |     radius = diagonal * 1.1
 53 | 
 54 |     translate = -center
 55 | 
 56 |     return {"translate": translate, "radius": radius}
 57 | 
 58 | 
 59 | 
 60 | def loadCamerasFromData(traindata, white_background):
 61 |     cameras = []
 62 | 
 63 |     fovx = traindata["camera_angle_x"]
 64 |     frames = traindata["frames"]
 65 |     for idx, frame in enumerate(frames):
 66 |         # NeRF 'transform_matrix' is a camera-to-world transform
 67 |         c2w = np.array(frame["transform_matrix"])
 68 |         # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
 69 |         c2w[:3, 1:3] *= -1
 70 | 
 71 |         # get the world-to-camera transform and set R, T
 72 |         w2c = np.linalg.inv(c2w)
 73 |         R = np.transpose(w2c[:3,:3])  # R is stored transposed due to 'glm' in CUDA code
 74 |         T = w2c[:3, 3]
 75 | 
 76 |         image = frame["image"] if "image" in frame else None
 77 |         im_data = np.array(image.convert("RGBA"))
 78 | 
 79 |         bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0])
 80 | 
 81 |         norm_data = im_data / 255.0
 82 |         arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
 83 |         image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
 84 |         loaded_mask = np.ones_like(norm_data[:, :, 3:4])
 85 | 
 86 |         fovy = focal2fov(fov2focal(fovx, image.size[1]), image.size[0])
 87 |         FovY = fovy 
 88 |         FovX = fovx
 89 | 
 90 |         image = torch.Tensor(arr).permute(2,0,1)                        # [3, 512, 512]
 91 |         loaded_mask = None #torch.Tensor(loaded_mask).permute(2,0,1)
 92 | 
 93 |         # depth
 94 |         original_depth = torch.Tensor(frame["depth"]) if frame["depth"] is not None else None
 95 | 
 96 |         cameras.append(Camera(colmap_id=idx, R=R, T=T, FoVx=FovX, FoVy=FovY, image=image, original_depth=original_depth, 
 97 |                                 gt_alpha_mask=loaded_mask, image_name='', uid=idx, data_device='cuda'))
 98 |             
 99 |     return cameras
100 | 
101 | 
102 | def loadCameraPreset(traindata, presetdata):
103 |     cam_infos = {}
104 |     ## camera setting (for H, W and focal)
105 |     fovx = traindata["camera_angle_x"] * 0.95
106 |     W, H = traindata["frames"][0]["image"].size
107 | 
108 |     for camkey in presetdata:
109 |         cam_infos[camkey] = []
110 |         for idx, frame in enumerate(presetdata[camkey]["frames"]):
111 |             # NeRF 'transform_matrix' is a camera-to-world transform
112 |             c2w = np.array(frame["transform_matrix"])
113 |             # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
114 |             c2w[:3, 1:3] *= -1
115 | 
116 |             # get the world-to-camera transform and set R, T
117 |             w2c = np.linalg.inv(c2w)
118 |             R = np.transpose(w2c[:3,:3])  # R is stored transposed due to 'glm' in CUDA code
119 |             T = w2c[:3, 3]
120 | 
121 |             fovy = focal2fov(fov2focal(fovx, W), H)
122 |             FovY = fovy 
123 |             FovX = fovx
124 |         
125 |             znear, zfar = 0.01, 100
126 |             world_view_transform = torch.tensor(getWorld2View2(R, T, np.array([0.0, 0.0, 0.0]), 1.0)).transpose(0, 1).cuda()
127 |             projection_matrix = getProjectionMatrix(znear=znear, zfar=zfar, fovX=FovX, fovY=FovY).transpose(0,1).cuda()
128 |             full_proj_transform = (world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0))).squeeze(0)
129 | 
130 |             cam_infos[camkey].append(MiniCam(width=W, height=H, fovy=FovY, fovx=FovX, znear=znear, zfar=zfar,
131 |                                           world_view_transform=world_view_transform, full_proj_transform=full_proj_transform))
132 |             
133 |     return cam_infos
134 | 
135 | 
136 | 
137 | def readDataInfo(traindata, white_background, eval):
138 |     print("Reading Training Transforms")
139 |     
140 |     train_cameras = loadCamerasFromData(traindata, white_background)
141 |     preset_minicams = loadCameraPreset(traindata, presetdata=get_camerapaths())
142 | 
143 |     nerf_normalization = getNerfppNorm(train_cameras)
144 | 
145 |     pcd = BasicPointCloud(points=traindata['pcd_points'].T, colors=traindata['pcd_colors'], normals=None)
146 | 
147 |     
148 |     scene_info = SceneInfo(point_cloud=pcd,
149 |                            train_cameras=train_cameras,
150 |                            test_cameras=[],
151 |                            preset_cameras=preset_minicams,
152 |                            nerf_normalization=nerf_normalization,
153 |                            ply_path='')
154 |     return scene_info


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | cmake_minimum_required(VERSION 3.20)
13 | 
14 | project(DiffRast LANGUAGES CUDA CXX)
15 | 
16 | set(CMAKE_CXX_STANDARD 17)
17 | set(CMAKE_CXX_EXTENSIONS OFF)
18 | set(CMAKE_CUDA_STANDARD 17)
19 | 
20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
21 | 
22 | add_library(CudaRasterizer
23 | 	cuda_rasterizer/backward.h
24 | 	cuda_rasterizer/backward.cu
25 | 	cuda_rasterizer/forward.h
26 | 	cuda_rasterizer/forward.cu
27 | 	cuda_rasterizer/auxiliary.h
28 | 	cuda_rasterizer/rasterizer_impl.cu
29 | 	cuda_rasterizer/rasterizer_impl.h
30 | 	cuda_rasterizer/rasterizer.h
31 | )
32 | 
33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "70;75;86")
34 | 
35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
37 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  
84 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/README.md:
--------------------------------------------------------------------------------
 1 | # Differential Gaussian Rasterization
 2 | 
 3 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us.
 4 | 
 5 | <section class="section" id="BibTeX">
 6 |   <div class="container is-max-desktop content">
 7 |     <h2 class="title">BibTeX</h2>
 8 |     <pre><code>@Article{kerbl3Dgaussians,
 9 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
10 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
11 |       journal      = {ACM Transactions on Graphics},
12 |       number       = {4},
13 |       volume       = {42},
14 |       month        = {July},
15 |       year         = {2023},
16 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
17 | }</code></pre>
18 |   </div>
19 | </section>


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/auxiliary.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2023, Inria
  3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 |  * All rights reserved.
  5 |  *
  6 |  * This software is free for non-commercial, research and evaluation use 
  7 |  * under the terms of the LICENSE.md file.
  8 |  *
  9 |  * For inquiries contact  george.drettakis@inria.fr
 10 |  */
 11 | 
 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 14 | 
 15 | #include "config.h"
 16 | #include "stdio.h"
 17 | 
 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
 19 | #define NUM_WARPS (BLOCK_SIZE/32)
 20 | 
 21 | // Spherical harmonics coefficients
 22 | __device__ const float SH_C0 = 0.28209479177387814f;
 23 | __device__ const float SH_C1 = 0.4886025119029199f;
 24 | __device__ const float SH_C2[] = {
 25 | 	1.0925484305920792f,
 26 | 	-1.0925484305920792f,
 27 | 	0.31539156525252005f,
 28 | 	-1.0925484305920792f,
 29 | 	0.5462742152960396f
 30 | };
 31 | __device__ const float SH_C3[] = {
 32 | 	-0.5900435899266435f,
 33 | 	2.890611442640554f,
 34 | 	-0.4570457994644658f,
 35 | 	0.3731763325901154f,
 36 | 	-0.4570457994644658f,
 37 | 	1.445305721320277f,
 38 | 	-0.5900435899266435f
 39 | };
 40 | 
 41 | __forceinline__ __device__ float ndc2Pix(float v, int S)
 42 | {
 43 | 	return ((v + 1.0) * S - 1.0) * 0.5;
 44 | }
 45 | 
 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
 47 | {
 48 | 	rect_min = {
 49 | 		min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
 50 | 		min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
 51 | 	};
 52 | 	rect_max = {
 53 | 		min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
 54 | 		min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
 55 | 	};
 56 | }
 57 | 
 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
 59 | {
 60 | 	float3 transformed = {
 61 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 62 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 63 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 64 | 	};
 65 | 	return transformed;
 66 | }
 67 | 
 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
 69 | {
 70 | 	float4 transformed = {
 71 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 72 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 73 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 74 | 		matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
 75 | 	};
 76 | 	return transformed;
 77 | }
 78 | 
 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
 80 | {
 81 | 	float3 transformed = {
 82 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
 83 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
 84 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
 85 | 	};
 86 | 	return transformed;
 87 | }
 88 | 
 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
 90 | {
 91 | 	float3 transformed = {
 92 | 		matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
 93 | 		matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
 94 | 		matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
 95 | 	};
 96 | 	return transformed;
 97 | }
 98 | 
 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
100 | {
101 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
102 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
103 | 	float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
104 | 	return dnormvdz;
105 | }
106 | 
107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
108 | {
109 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
110 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
111 | 
112 | 	float3 dnormvdv;
113 | 	dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
114 | 	dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
115 | 	dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
116 | 	return dnormvdv;
117 | }
118 | 
119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
120 | {
121 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
122 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
123 | 
124 | 	float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
125 | 	float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
126 | 	float4 dnormvdv;
127 | 	dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
128 | 	dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
129 | 	dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
130 | 	dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
131 | 	return dnormvdv;
132 | }
133 | 
134 | __forceinline__ __device__ float sigmoid(float x)
135 | {
136 | 	return 1.0f / (1.0f + expf(-x));
137 | }
138 | 
139 | __forceinline__ __device__ bool in_frustum(int idx,
140 | 	const float* orig_points,
141 | 	const float* viewmatrix,
142 | 	const float* projmatrix,
143 | 	bool prefiltered,
144 | 	float3& p_view)
145 | {
146 | 	float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
147 | 
148 | 	// Bring points to screen space
149 | 	float4 p_hom = transformPoint4x4(p_orig, projmatrix);
150 | 	float p_w = 1.0f / (p_hom.w + 0.0000001f);
151 | 	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
152 | 	p_view = transformPoint4x3(p_orig, viewmatrix);
153 | 
154 | 	if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
155 | 	{
156 | 		if (prefiltered)
157 | 		{
158 | 			printf("Point is filtered although prefiltered is set. This shouldn't happen!");
159 | 			__trap();
160 | 		}
161 | 		return false;
162 | 	}
163 | 	return true;
164 | }
165 | 
166 | #define CHECK_CUDA(A, debug) \
167 | A; if(debug) { \
168 | auto ret = cudaDeviceSynchronize(); \
169 | if (ret != cudaSuccess) { \
170 | std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
171 | throw std::runtime_error(cudaGetErrorString(ret)); \
172 | } \
173 | }
174 | 
175 | #endif


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/backward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace BACKWARD
22 | {
23 | 	void render(
24 | 		const dim3 grid, dim3 block,
25 | 		const uint2* ranges,
26 | 		const uint32_t* point_list,
27 | 		int W, int H,
28 | 		const float* bg_color,
29 | 		const float2* means2D,
30 | 		const float4* conic_opacity,
31 | 		const float3* means3D,
32 | 		const float* colors,
33 | 		const float* depths,
34 | 		const float* projmatrix,
35 | 		const float* final_Ts,
36 | 		const uint32_t* n_contrib,
37 | 		const float* dL_dpixels,
38 | 		const float* dL_depths,
39 | 		float3* dL_dmean2D,
40 | 		float4* dL_dconic2D,
41 | 		float3* dL_dmean3D,
42 | 		float* dL_dopacity,
43 | 		float* dL_dcolors);
44 | 
45 | 	void preprocess(
46 | 		int P, int D, int M,
47 | 		const float3* means,
48 | 		const int* radii,
49 | 		const float* shs,
50 | 		const bool* clamped,
51 | 		const glm::vec3* scales,
52 | 		const glm::vec4* rotations,
53 | 		const float scale_modifier,
54 | 		const float* cov3Ds,
55 | 		const float* view,
56 | 		const float* proj,
57 | 		const float focal_x, float focal_y,
58 | 		const float tan_fovx, float tan_fovy,
59 | 		const glm::vec3* campos,
60 | 		const float3* dL_dmean2D,
61 | 		const float* dL_dconics,
62 | 		glm::vec3* dL_dmeans,
63 | 		float* dL_dcolor,
64 | 		float* dL_dcov3D,
65 | 		float* dL_dsh,
66 | 		glm::vec3* dL_dscale,
67 | 		glm::vec4* dL_drot);
68 | }
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED
14 | 
15 | #define NUM_CHANNELS 3 // Default 3, RGB
16 | #define BLOCK_X 16
17 | #define BLOCK_Y 16
18 | 
19 | #endif


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/forward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace FORWARD
22 | {
23 | 	// Perform initial steps for each Gaussian prior to rasterization.
24 | 	void preprocess(int P, int D, int M,
25 | 		const float* orig_points,
26 | 		const glm::vec3* scales,
27 | 		const float scale_modifier,
28 | 		const glm::vec4* rotations,
29 | 		const float* opacities,
30 | 		const float* shs,
31 | 		bool* clamped,
32 | 		const float* cov3D_precomp,
33 | 		const float* colors_precomp,
34 | 		const float* viewmatrix,
35 | 		const float* projmatrix,
36 | 		const glm::vec3* cam_pos,
37 | 		const int W, int H,
38 | 		const float focal_x, float focal_y,
39 | 		const float tan_fovx, float tan_fovy,
40 | 		int* radii,
41 | 		float2* points_xy_image,
42 | 		float* depths,
43 | 		float* cov3Ds,
44 | 		float* colors,
45 | 		float4* conic_opacity,
46 | 		const dim3 grid,
47 | 		uint32_t* tiles_touched,
48 | 		bool prefiltered);
49 | 
50 | 	// Main rasterization method.
51 | 	void render(
52 | 		const dim3 grid, dim3 block,
53 | 		const uint2* ranges,
54 | 		const uint32_t* point_list,
55 | 		int W, int H,
56 | 		const float2* points_xy_image,
57 | 		const float* features,
58 | 		const float* depths,
59 | 		const float4* conic_opacity,
60 | 		float* final_T,
61 | 		uint32_t* n_contrib,
62 | 		const float* bg_color,
63 | 		float* out_color,
64 | 		float* out_depth);
65 | 		
66 | 	void filter_preprocess(int P, int M,
67 | 		const float* means3D,
68 | 		const glm::vec3* scales,
69 | 		const float scale_modifier,
70 | 		const glm::vec4* rotations,
71 | 		const float* cov3D_precomp,
72 | 		const float* viewmatrix,
73 | 		const float* projmatrix,
74 | 		const int W, int H,
75 | 		const float focal_x, float focal_y,
76 | 		const float tan_fovx, float tan_fovy,
77 | 		int* radii,
78 | 		float* cov3Ds,
79 | 		const dim3 grid,
80 | 		bool prefiltered);
81 | }
82 | 
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2023, Inria
  3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 |  * All rights reserved.
  5 |  *
  6 |  * This software is free for non-commercial, research and evaluation use 
  7 |  * under the terms of the LICENSE.md file.
  8 |  *
  9 |  * For inquiries contact  george.drettakis@inria.fr
 10 |  */
 11 | 
 12 | #ifndef CUDA_RASTERIZER_H_INCLUDED
 13 | #define CUDA_RASTERIZER_H_INCLUDED
 14 | 
 15 | #include <vector>
 16 | #include <functional>
 17 | 
 18 | namespace CudaRasterizer
 19 | {
 20 | 	class Rasterizer
 21 | 	{
 22 | 	public:
 23 | 
 24 | 		static void markVisible(
 25 | 			int P,
 26 | 			float* means3D,
 27 | 			float* viewmatrix,
 28 | 			float* projmatrix,
 29 | 			bool* present);
 30 | 
 31 | 		static int forward(
 32 | 			std::function<char* (size_t)> geometryBuffer,
 33 | 			std::function<char* (size_t)> binningBuffer,
 34 | 			std::function<char* (size_t)> imageBuffer,
 35 | 			const int P, int D, int M,
 36 | 			const float* background,
 37 | 			const int width, int height,
 38 | 			const float* means3D,
 39 | 			const float* shs,
 40 | 			const float* colors_precomp,
 41 | 			const float* opacities,
 42 | 			const float* scales,
 43 | 			const float scale_modifier,
 44 | 			const float* rotations,
 45 | 			const float* cov3D_precomp,
 46 | 			const float* viewmatrix,
 47 | 			const float* projmatrix,
 48 | 			const float* cam_pos,
 49 | 			const float tan_fovx, float tan_fovy,
 50 | 			const bool prefiltered,
 51 | 			float* out_color,
 52 | 			float* out_depth,
 53 | 			int* radii = nullptr,
 54 | 			bool debug = false);
 55 | 
 56 | 
 57 | 		static void visible_filter(
 58 | 			std::function<char* (size_t)> geometryBuffer,
 59 | 			std::function<char* (size_t)> binningBuffer,
 60 | 			std::function<char* (size_t)> imageBuffer,
 61 | 			const int P, int M,
 62 | 			const int width, int height,
 63 | 			const float* means3D,
 64 | 			const float* scales,
 65 | 			const float scale_modifier,
 66 | 			const float* rotations,
 67 | 			const float* cov3D_precomp,
 68 | 			const float* viewmatrix,
 69 | 			const float* projmatrix,
 70 | 			const float tan_fovx, float tan_fovy,
 71 | 			const bool prefiltered,
 72 | 			int* radii,
 73 | 			bool debug);
 74 | 		
 75 | 		static void backward(
 76 | 			const int P, int D, int M, int R,
 77 | 			const float* background,
 78 | 			const int width, int height,
 79 | 			const float* means3D,
 80 | 			const float* shs,
 81 | 			const float* colors_precomp,
 82 | 			const float* scales,
 83 | 			const float scale_modifier,
 84 | 			const float* rotations,
 85 | 			const float* cov3D_precomp,
 86 | 			const float* viewmatrix,
 87 | 			const float* projmatrix,
 88 | 			const float* campos,
 89 | 			const float tan_fovx, float tan_fovy,
 90 | 			const int* radii,
 91 | 			char* geom_buffer,
 92 | 			char* binning_buffer,
 93 | 			char* image_buffer,
 94 | 			const float* dL_dpix,
 95 | 			const float* dL_depths,
 96 | 			float* dL_dmean2D,
 97 | 			float* dL_dconic,
 98 | 			float* dL_dopacity,
 99 | 			float* dL_dcolor,
100 | 			float* dL_dmean3D,
101 | 			float* dL_dcov3D,
102 | 			float* dL_dsh,
103 | 			float* dL_dscale,
104 | 			float* dL_drot,
105 | 			bool debug);
106 | 	};
107 | };
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include <iostream>
15 | #include <vector>
16 | #include "rasterizer.h"
17 | #include <cuda_runtime_api.h>
18 | 
19 | namespace CudaRasterizer
20 | {
21 | 	template <typename T>
22 | 	static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
23 | 	{
24 | 		std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
25 | 		ptr = reinterpret_cast<T*>(offset);
26 | 		chunk = reinterpret_cast<char*>(ptr + count);
27 | 	}
28 | 
29 | 	struct GeometryState
30 | 	{
31 | 		size_t scan_size;
32 | 		float* depths;
33 | 		char* scanning_space;
34 | 		bool* clamped;
35 | 		int* internal_radii;
36 | 		float2* means2D;
37 | 		float* cov3D;
38 | 		float4* conic_opacity;
39 | 		float* rgb;
40 | 		uint32_t* point_offsets;
41 | 		uint32_t* tiles_touched;
42 | 
43 | 		static GeometryState fromChunk(char*& chunk, size_t P);
44 | 	};
45 | 
46 | 	struct ImageState
47 | 	{
48 | 		uint2* ranges;
49 | 		uint32_t* n_contrib;
50 | 		float* accum_alpha;
51 | 
52 | 		static ImageState fromChunk(char*& chunk, size_t N);
53 | 	};
54 | 
55 | 	struct BinningState
56 | 	{
57 | 		size_t sorting_size;
58 | 		uint64_t* point_list_keys_unsorted;
59 | 		uint64_t* point_list_keys;
60 | 		uint32_t* point_list_unsorted;
61 | 		uint32_t* point_list;
62 | 		char* list_sorting_space;
63 | 
64 | 		static BinningState fromChunk(char*& chunk, size_t P);
65 | 	};
66 | 
67 | 	template<typename T> 
68 | 	size_t required(size_t P)
69 | 	{
70 | 		char* size = nullptr;
71 | 		T::fromChunk(size, P);
72 | 		return ((size_t)size) + 128;
73 | 	}
74 | };


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/ext.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | #include "rasterize_points.h"
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
17 |   m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
18 |   m.def("rasterize_aussians_filter", &RasterizeGaussiansfilterCUDA);
19 |   m.def("mark_visible", &markVisible);
20 | }


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/rasterize_points.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | #include <cstdio>
15 | #include <tuple>
16 | #include <string>
17 | 	
18 | std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
19 | RasterizeGaussiansCUDA(
20 | 	const torch::Tensor& background,
21 | 	const torch::Tensor& means3D,
22 |     const torch::Tensor& colors,
23 |     const torch::Tensor& opacity,
24 | 	const torch::Tensor& scales,
25 | 	const torch::Tensor& rotations,
26 | 	const float scale_modifier,
27 | 	const torch::Tensor& cov3D_precomp,
28 | 	const torch::Tensor& viewmatrix,
29 | 	const torch::Tensor& projmatrix,
30 | 	const float tan_fovx, 
31 | 	const float tan_fovy,
32 |     const int image_height,
33 |     const int image_width,
34 | 	const torch::Tensor& sh,
35 | 	const int degree,
36 | 	const torch::Tensor& campos,
37 | 	const bool prefiltered,
38 | 	const bool debug);
39 | 
40 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
41 |  RasterizeGaussiansBackwardCUDA(
42 |  	const torch::Tensor& background,
43 | 	const torch::Tensor& means3D,
44 | 	const torch::Tensor& radii,
45 |     const torch::Tensor& colors,
46 | 	const torch::Tensor& scales,
47 | 	const torch::Tensor& rotations,
48 | 	const float scale_modifier,
49 | 	const torch::Tensor& cov3D_precomp,
50 | 	const torch::Tensor& viewmatrix,
51 |     const torch::Tensor& projmatrix,
52 | 	const float tan_fovx, 
53 | 	const float tan_fovy,
54 |     const torch::Tensor& dL_dout_color,
55 |         const torch::Tensor& dL_dout_depth,
56 | 	const torch::Tensor& sh,
57 | 	const int degree,
58 | 	const torch::Tensor& campos,
59 | 	const torch::Tensor& geomBuffer,
60 | 	const int R,
61 | 	const torch::Tensor& binningBuffer,
62 | 	const torch::Tensor& imageBuffer,
63 | 	const bool debug);
64 | 		
65 | torch::Tensor markVisible(
66 | 		torch::Tensor& means3D,
67 | 		torch::Tensor& viewmatrix,
68 | 		torch::Tensor& projmatrix);
69 | 
70 | 
71 | torch::Tensor
72 | RasterizeGaussiansfilterCUDA(
73 | 	const torch::Tensor& means3D,
74 | 	const torch::Tensor& scales,
75 | 	const torch::Tensor& rotations,
76 | 	const float scale_modifier,
77 | 	const torch::Tensor& cov3D_precomp,
78 | 	const torch::Tensor& viewmatrix,
79 | 	const torch::Tensor& projmatrix,
80 | 	const float tan_fovx, 
81 | 	const float tan_fovy,
82 |     const int image_height,
83 |     const int image_width,
84 | 	const bool prefiltered,
85 | 	const bool debug);


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from setuptools import setup
13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14 | import os
15 | os.path.dirname(os.path.abspath(__file__))
16 | 
17 | setup(
18 |     name="depth_diff_gaussian_rasterization",
19 |     packages=['depth_diff_gaussian_rasterization'],
20 |     ext_modules=[
21 |         CUDAExtension(
22 |             name="depth_diff_gaussian_rasterization._C",
23 |             sources=[
24 |             "cuda_rasterizer/rasterizer_impl.cu",
25 |             "cuda_rasterizer/forward.cu",
26 |             "cuda_rasterizer/backward.cu",
27 |             "rasterize_points.cu",
28 |             "ext.cpp"],
29 |             extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]})
30 |         ],
31 |     cmdclass={
32 |         'build_ext': BuildExtension
33 |     }
34 | )
35 | 


--------------------------------------------------------------------------------
/submodules/gridencoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .grid import GridEncoder


--------------------------------------------------------------------------------
/submodules/gridencoder/backend.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from torch.utils.cpp_extension import load
 3 | 
 4 | _src_path = os.path.dirname(os.path.abspath(__file__))
 5 | 
 6 | nvcc_flags = [
 7 |     '-O3', '-std=c++14',
 8 |     '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
 9 | ]
10 | 
11 | if os.name == "posix":
12 |     c_flags = ['-O3', '-std=c++14']
13 | elif os.name == "nt":
14 |     c_flags = ['/O2', '/std:c++17']
15 | 
16 |     # find cl.exe
17 |     def find_cl_path():
18 |         import glob
19 |         for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
20 |             paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
21 |             if paths:
22 |                 return paths[0]
23 | 
24 |     # If cl.exe is not on path, try to find it.
25 |     if os.system("where cl.exe >nul 2>nul") != 0:
26 |         cl_path = find_cl_path()
27 |         if cl_path is None:
28 |             raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
29 |         os.environ["PATH"] += ";" + cl_path
30 | 
31 | _backend = load(name='_grid_encoder',
32 |                 extra_cflags=c_flags,
33 |                 extra_cuda_cflags=nvcc_flags,
34 |                 sources=[os.path.join(_src_path, 'src', f) for f in [
35 |                     'gridencoder.cu',
36 |                     'bindings.cpp',
37 |                 ]],
38 |                 )
39 | 
40 | __all__ = ['_backend']


--------------------------------------------------------------------------------
/submodules/gridencoder/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | _src_path = os.path.dirname(os.path.abspath(__file__))
 6 | 
 7 | nvcc_flags = [
 8 |     '-O3', '-std=c++14',
 9 |     '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
10 | ]
11 | 
12 | if os.name == "posix":
13 |     c_flags = ['-O3', '-std=c++14']
14 | elif os.name == "nt":
15 |     c_flags = ['/O2', '/std:c++17']
16 | 
17 |     # find cl.exe
18 |     def find_cl_path():
19 |         import glob
20 |         for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
21 |             paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
22 |             if paths:
23 |                 return paths[0]
24 | 
25 |     # If cl.exe is not on path, try to find it.
26 |     if os.system("where cl.exe >nul 2>nul") != 0:
27 |         cl_path = find_cl_path()
28 |         if cl_path is None:
29 |             raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
30 |         os.environ["PATH"] += ";" + cl_path
31 | 
32 | setup(
33 |     name='gridencoder', # package name, import this to use python API
34 |     ext_modules=[
35 |         CUDAExtension(
36 |             name='_gridencoder', # extension name, import this to use CUDA API
37 |             sources=[os.path.join(_src_path, 'src', f) for f in [
38 |                 'gridencoder.cu',
39 |                 'bindings.cpp',
40 |             ]],
41 |             extra_compile_args={
42 |                 'cxx': c_flags,
43 |                 'nvcc': nvcc_flags,
44 |             }
45 |         ),
46 |     ],
47 |     cmdclass={
48 |         'build_ext': BuildExtension,
49 |     }
50 | )


--------------------------------------------------------------------------------
/submodules/gridencoder/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include "gridencoder.h"
 4 | 
 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 6 |     m.def("grid_encode_forward", &grid_encode_forward, "grid_encode_forward (CUDA)");
 7 |     m.def("grid_encode_backward", &grid_encode_backward, "grid_encode_backward (CUDA)");
 8 |     m.def("grid_encode_mix2D_forward", &grid_encode_mix2D_forward, "grid_encode_mix2D_forward (CUDA)");
 9 |     m.def("grid_encode_mix2D_backward", &grid_encode_mix2D_backward, "grid_encode_mix2D_backward (CUDA)");
10 |     m.def("avg_2D_forward", &avg_2D_forward, "avg_2D_forward (CUDA)");
11 |     m.def("avg_2D_backward", &avg_2D_backward, "avg_2D_backward (CUDA)");
12 |     m.def("cnt_np_embed", &cnt_np_embed, "cnt_np_embed (CUDA)");
13 |     m.def("cnt_np_embed_backward", &cnt_np_embed_backward, "cnt_np_embed_backward (CUDA)");
14 | }


--------------------------------------------------------------------------------
/submodules/gridencoder/src/gridencoder.h:
--------------------------------------------------------------------------------
  1 | #ifndef _HASH_ENCODE_H
  2 | #define _HASH_ENCODE_H
  3 | 
  4 | #include <stdint.h>
  5 | #include <torch/torch.h>
  6 | 
  7 | // inputs: [N, num_dim], float, in [0, 1]
  8 | // embeddings: [offsets[-1], n_features], float
  9 | // offsets: [n_levels + 1], uint32_t
 10 | // outputs: [N, n_levels * n_features], float
 11 | 
 12 | void grid_encode_forward(
 13 |     const at::Tensor inputs, 
 14 |     const at::Tensor embeddings, 
 15 |     const at::Tensor offsets_list,
 16 |     const at::Tensor resolutions_list, 
 17 |     at::Tensor outputs, 
 18 |     const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, const float PV,
 19 |     at::optional<at::Tensor> dy_dx,
 20 |     const at::optional<at::Tensor> binary_vxl,
 21 |     const at::optional<at::Tensor> min_level_id
 22 |     );
 23 | 
 24 | void grid_encode_backward(
 25 |     const at::Tensor grad, 
 26 |     const at::Tensor inputs, 
 27 |     const at::Tensor embeddings, 
 28 |     const at::Tensor offsets_list,
 29 |     const at::Tensor resolutions_list,
 30 |     at::Tensor grad_embeddings, 
 31 |     const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb,
 32 |     const at::optional<at::Tensor> dy_dx, 
 33 |     at::optional<at::Tensor> grad_inputs,
 34 |     const at::optional<at::Tensor> binary_vxl,
 35 |     const at::optional<at::Tensor> min_level_id
 36 |     );
 37 | 
 38 | void grid_encode_mix2D_forward(
 39 |     const at::Tensor inputs_xy, const at::Tensor inputs_xz, const at::Tensor inputs_yz,
 40 |     const at::Tensor embeddings_xy, const at::Tensor embeddings_xz, const at::Tensor embeddings_yz,
 41 |     const at::Tensor offsets_list,
 42 |     const at::Tensor resolutions_list,
 43 |     at::Tensor outputs,
 44 |     const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb, const float PV,
 45 |     at::optional<at::Tensor> dy_dx,
 46 |     const at::optional<at::Tensor> binary_vxl_2D_xy, const at::optional<at::Tensor> binary_vxl_2D_xz, const at::optional<at::Tensor> binary_vxl_2D_yz,
 47 |     const at::optional<at::Tensor> min_level_id, const uint32_t xy_len, const uint32_t xz_len, const uint32_t yz_len
 48 |     );
 49 | 
 50 | void grid_encode_mix2D_backward(
 51 |     const at::Tensor grad,
 52 |     const at::Tensor inputs_xy, const at::Tensor inputs_xz, const at::Tensor inputs_yz,
 53 |     const at::Tensor embeddings_xy, const at::Tensor embeddings_xz, const at::Tensor embeddings_yz,
 54 |     const at::Tensor offsets_list,
 55 |     const at::Tensor resolutions_list,
 56 |     at::Tensor grad_embeddings,
 57 |     const uint32_t N, const uint32_t num_dim, const uint32_t n_features, const uint32_t n_levels, const uint32_t max_level, const uint32_t Rb,
 58 |     const at::optional<at::Tensor> dy_dx,
 59 |     at::optional<at::Tensor> grad_inputs,
 60 |     const at::optional<at::Tensor> binary_vxl_2D_xy, const at::optional<at::Tensor> binary_vxl_2D_xz, const at::optional<at::Tensor> binary_vxl_2D_yz,
 61 |     const at::optional<at::Tensor> min_level_id,
 62 |     const uint32_t xy_len, const uint32_t xz_len, const uint32_t yz_len,
 63 |     const uint32_t exy_len, const uint32_t exz_len, const uint32_t eyz_len
 64 |     );
 65 | 
 66 | void avg_2D_forward(
 67 |     const at::Tensor inputs,
 68 |     const at::Tensor embeddings,
 69 |     const at::Tensor offsets_list,
 70 |     const at::Tensor resolutions_list,
 71 |     at::Tensor outputs,
 72 |     const uint32_t N, const uint32_t n_features, const uint32_t n_levels, const uint32_t Rb, const uint32_t ref_scale,
 73 |     const at::Tensor binary_vxl
 74 |     );
 75 | 
 76 | void avg_2D_backward(
 77 |     const at::Tensor grad,
 78 |     const at::Tensor inputs,
 79 |     const at::Tensor embeddings,
 80 |     const at::Tensor offsets_list,
 81 |     const at::Tensor resolutions_list,
 82 |     at::Tensor grad_embeddings,
 83 |     const uint32_t N, const uint32_t n_features, const uint32_t n_levels, const uint32_t Rb, const uint32_t ref_scale,
 84 |     const at::Tensor binary_vxl
 85 |     );
 86 | 
 87 | void cnt_np_embed(
 88 |     const at::Tensor inputs, // [N, 4*4*4, 3]
 89 |     const at::Tensor embeddings_clip,  // [520000, 4]
 90 |     at::Tensor outputs,  // [512, 512, 4, 2]
 91 |     const uint32_t N, const uint32_t resolution, const uint32_t n_features, const uint32_t hashmap_size, const uint32_t axis
 92 |     );
 93 | 
 94 | void cnt_np_embed_backward(
 95 |     const at::Tensor inputs, // [N, 4*4*4, 3]
 96 |     const at::Tensor embeddings_clip,  // [520000, 4]
 97 |     const at::Tensor outputs_sum,  // [512, 512, 4, 1]
 98 |     const at::Tensor grad,  // [512, 512, 4, 2]
 99 |     at::Tensor grad_embeddings,  // [520000, 4]
100 |     const uint32_t N, const uint32_t resolution, const uint32_t n_features, const uint32_t hashmap_size, const uint32_t axis
101 |     );
102 | 
103 | #endif


--------------------------------------------------------------------------------
/submodules/gridencoder/src/try.cu:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/submodules/gridencoder/src/try.cu


--------------------------------------------------------------------------------
/submodules/simple-knn/ext.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | #include "spatial.h"
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("distCUDA2", &distCUDA2);
17 | }
18 | 


--------------------------------------------------------------------------------
/submodules/simple-knn/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from setuptools import setup
13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14 | import os
15 | 
16 | cxx_compiler_flags = []
17 | 
18 | if os.name == 'nt':
19 |     cxx_compiler_flags.append("/wd4624")
20 | 
21 | setup(
22 |     name="simple_knn",
23 |     ext_modules=[
24 |         CUDAExtension(
25 |             name="simple_knn._C",
26 |             sources=[
27 |             "spatial.cu", 
28 |             "simple_knn.cu",
29 |             "ext.cpp"],
30 |             extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags})
31 |         ],
32 |     cmdclass={
33 |         'build_ext': BuildExtension
34 |     }
35 | )
36 | 


--------------------------------------------------------------------------------
/submodules/simple-knn/simple_knn.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef SIMPLEKNN_H_INCLUDED
13 | #define SIMPLEKNN_H_INCLUDED
14 | 
15 | class SimpleKNN
16 | {
17 | public:
18 | 	static void knn(int P, float3* points, float* meanDists);
19 | };
20 | 
21 | #endif


--------------------------------------------------------------------------------
/submodules/simple-knn/simple_knn/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/submodules/simple-knn/simple_knn/.gitkeep


--------------------------------------------------------------------------------
/submodules/simple-knn/spatial.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include "spatial.h"
13 | #include "simple_knn.h"
14 | 
15 | torch::Tensor
16 | distCUDA2(const torch::Tensor& points)
17 | {
18 |   const int P = points.size(0);
19 | 
20 |   auto float_opts = points.options().dtype(torch::kFloat32);
21 |   torch::Tensor means = torch::full({P}, 0.0, float_opts);
22 |   
23 |   SimpleKNN::knn(P, (float3*)points.contiguous().data<float>(), means.contiguous().data<float>());
24 | 
25 |   return means;
26 | }


--------------------------------------------------------------------------------
/submodules/simple-knn/spatial.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | 
14 | torch::Tensor distCUDA2(const torch::Tensor& points);


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SparklingH/BloomScene/93c5855428895a19216685c2f3fb8bc75805dad4/utils/__init__.py


--------------------------------------------------------------------------------
/utils/camera.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | import json
 12 | import numpy as np
 13 | import torch
 14 | 
 15 | from scene.cameras import Camera, MiniCam
 16 | from utils.general import PILtoTorch
 17 | from utils.graphics import fov2focal, focal2fov, getWorld2View, getProjectionMatrix
 18 | 
 19 | 
 20 | WARNED = False
 21 | 
 22 | 
 23 | def load_json(path, H, W):
 24 |     cams = []
 25 |     with open(path) as json_file:
 26 |         contents = json.load(json_file)
 27 |         FoVx = contents["camera_angle_x"]
 28 |         FoVy = focal2fov(fov2focal(FoVx, W), H)
 29 |         zfar = 100.0
 30 |         znear = 0.01
 31 | 
 32 |         frames = contents["frames"]
 33 |         for idx, frame in enumerate(frames):
 34 |             # NeRF 'transform_matrix' is a camera-to-world transform
 35 |             c2w = np.array(frame["transform_matrix"])
 36 |             # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
 37 |             c2w[:3, 1:3] *= -1
 38 |             if c2w.shape[0] == 3:
 39 |                 one = np.zeros((1, 4))
 40 |                 one[0, -1] = 1
 41 |                 c2w = np.concatenate((c2w, one), axis=0)
 42 | 
 43 |             # get the world-to-camera transform and set R, T
 44 |             w2c = np.linalg.inv(c2w)
 45 |             R = np.transpose(w2c[:3, :3])  # R is stored transposed due to 'glm' in CUDA code
 46 |             T = w2c[:3, 3]
 47 | 
 48 |             w2c = torch.as_tensor(getWorld2View(R, T)).T.cuda()
 49 |             proj = getProjectionMatrix(znear, zfar, FoVx, FoVy).T.cuda()
 50 |             cams.append(MiniCam(W, H, FoVx, FoVy, znear, zfar, w2c, w2c @ proj))
 51 |     return cams
 52 | 
 53 | 
 54 | def loadCam(args, id, cam_info, resolution_scale):
 55 |     orig_w, orig_h = cam_info.image.size
 56 | 
 57 |     if args.resolution in [1, 2, 4, 8]:
 58 |         resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution))
 59 |     else:  # should be a type that converts to float
 60 |         if args.resolution == -1:
 61 |             if orig_w > 1600:
 62 |                 global WARNED
 63 |                 if not WARNED:
 64 |                     print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
 65 |                         "If this is not desired, please explicitly specify '--resolution/-r' as 1")
 66 |                     WARNED = True
 67 |                 global_down = orig_w / 1600
 68 |             else:
 69 |                 global_down = 1
 70 |         else:
 71 |             global_down = orig_w / args.resolution
 72 | 
 73 |         scale = float(global_down) * float(resolution_scale)
 74 |         resolution = (int(orig_w / scale), int(orig_h / scale))
 75 | 
 76 |     resized_image_rgb = PILtoTorch(cam_info.image, resolution)
 77 | 
 78 |     gt_image = resized_image_rgb[:3, ...]
 79 |     loaded_mask = None
 80 | 
 81 |     if resized_image_rgb.shape[1] == 4:
 82 |         loaded_mask = resized_image_rgb[3:4, ...]
 83 | 
 84 |     return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 
 85 |                   FoVx=cam_info.FovX, FoVy=cam_info.FovY, 
 86 |                   image=gt_image, gt_alpha_mask=loaded_mask,
 87 |                   image_name=cam_info.image_name, uid=id, data_device=args.data_device)
 88 | 
 89 | 
 90 | def cameraList_from_camInfos(cam_infos, resolution_scale, args):
 91 |     camera_list = []
 92 | 
 93 |     for id, c in enumerate(cam_infos):
 94 |         camera_list.append(loadCam(args, id, c, resolution_scale))
 95 | 
 96 |     return camera_list
 97 | 
 98 | 
 99 | def camera_to_JSON(id, camera : Camera):
100 |     Rt = np.zeros((4, 4))
101 |     Rt[:3, :3] = camera.R.transpose()
102 |     Rt[:3, 3] = camera.T
103 |     Rt[3, 3] = 1.0
104 | 
105 |     W2C = np.linalg.inv(Rt)
106 |     pos = W2C[:3, 3]
107 |     rot = W2C[:3, :3]
108 |     serializable_array_2d = [x.tolist() for x in rot]
109 |     camera_entry = {
110 |         'id' : id,
111 |         'img_name' : camera.image_name,
112 |         'width' : camera.width,
113 |         'height' : camera.height,
114 |         'position': pos.tolist(),
115 |         'rotation': serializable_array_2d,
116 |         'fy' : fov2focal(camera.FovY, camera.height),
117 |         'fx' : fov2focal(camera.FovX, camera.width)
118 |     }
119 |     return camera_entry
120 | 


--------------------------------------------------------------------------------
/utils/depth.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.cm
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def colorize(value, vmin=None, vmax=None, cmap='jet', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None):
 8 |     """Converts a depth map to a color image.
 9 | 
10 |     Args:
11 |         value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed
12 |         vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None.
13 |         vmax (float, optional):  vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None.
14 |         cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'.
15 |         invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99.
16 |         invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None.
17 |         background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255).
18 |         gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False.
19 |         value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None.
20 | 
21 |     Returns:
22 |         numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4)
23 |     """
24 |     if isinstance(value, torch.Tensor):
25 |         value = value.detach().cpu().numpy()
26 | 
27 |     value = value.squeeze()
28 |     if invalid_mask is None:
29 |         invalid_mask = value == invalid_val
30 |     mask = np.logical_not(invalid_mask)
31 | 
32 |     # normalize
33 |     vmin = np.percentile(value[mask],2) if vmin is None else vmin
34 |     vmax = np.percentile(value[mask],98) if vmax is None else vmax
35 |     if vmin != vmax:
36 |         value = (value - vmin) / (vmax - vmin)  # vmin..vmax
37 |     else:
38 |         # Avoid 0-division
39 |         value = value * 0.
40 | 
41 |     # squeeze last dim if it exists
42 |     # grey out the invalid values
43 | 
44 |     value[invalid_mask] = np.nan
45 |     cmapper = matplotlib.cm.get_cmap(cmap)
46 |     if value_transform:
47 |         value = value_transform(value)
48 |         # value = value / value.max()
49 |     value = cmapper(value, bytes=True)  # (nxmx4)
50 | 
51 |     # img = value[:, :, :]
52 |     img = value[...]
53 |     img[invalid_mask] = background_color
54 | 
55 |     #     return img.transpose((2, 0, 1))
56 |     if gamma_corrected:
57 |         # gamma correction
58 |         img = img / 255
59 |         img = np.power(img, 2.2)
60 |         img = img * 255
61 |         img = img.astype(np.uint8)
62 |     return img


--------------------------------------------------------------------------------
/utils/entropy_models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as nnf
 4 | import numpy as np
 5 | from torch.distributions.uniform import Uniform
 6 | from utils.encodings import use_clamp
 7 | 
 8 | 
 9 |  
10 | class Entropy_gaussian(nn.Module):
11 |     def __init__(self, Q=1):
12 |         super(Entropy_gaussian, self).__init__()
13 |         self.Q = Q
14 |     def forward(self, x, mean, scale, Q=None, x_mean=None):
15 |         if Q is None:   # Q=1
16 |             Q = self.Q
17 |         if use_clamp:   # True
18 |             if x_mean is None:
19 |                 x_mean = x.mean()
20 |             x_min = x_mean - 15_000 * Q
21 |             x_max = x_mean + 15_000 * Q
22 |             x = torch.clamp(x, min=x_min.detach(), max=x_max.detach())
23 |         scale = torch.clamp(scale, min=1e-9)
24 |         m1 = torch.distributions.normal.Normal(mean, scale)   
25 |         lower = m1.cdf(x - 0.5*Q)    
26 |         upper = m1.cdf(x + 0.5*Q)    
27 |         likelihood = torch.abs(upper - lower)    
28 |         likelihood = Low_bound.apply(likelihood)
29 |          
30 |         bits = -torch.log2(likelihood)   
31 |         return bits
32 | 
33 | 
34 | 
35 | class Low_bound(torch.autograd.Function):
36 |     @staticmethod
37 |     def forward(ctx, x):
38 |         ctx.save_for_backward(x)
39 |         x = torch.clamp(x, min=1e-6)
40 |         return x
41 | 
42 |     @staticmethod
43 |     def backward(ctx, g):
44 |         x, = ctx.saved_tensors
45 |         grad1 = g.clone()
46 |         grad1[x < 1e-6] = 0
47 |         pass_through_if = np.logical_or(
48 |             x.cpu().numpy() >= 1e-6, g.cpu().numpy() < 0.0)
49 |         t = torch.Tensor(pass_through_if+0.0).cuda()
50 |         return grad1 * t
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/utils/general.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | import sys
 12 | import random
 13 | from datetime import datetime
 14 | import numpy as np
 15 | import torch
 16 | 
 17 | 
 18 | def inverse_sigmoid(x):
 19 |     return torch.log(x/(1-x))
 20 | 
 21 | 
 22 | def PILtoTorch(pil_image, resolution):
 23 |     resized_image_PIL = pil_image.resize(resolution)
 24 |     resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
 25 |     if len(resized_image.shape) == 3:
 26 |         return resized_image.permute(2, 0, 1)
 27 |     else:
 28 |         return resized_image.unsqueeze(dim=-1).permute(2, 0, 1)
 29 | 
 30 | 
 31 |  
 32 | 
 33 | def get_expon_lr_func(
 34 |     lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000, step_sub=0,
 35 | ):
 36 |     """
 37 |     Copied from Plenoxels
 38 | 
 39 |     Continuous learning rate decay function. Adapted from JaxNeRF
 40 |     The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
 41 |     is log-linearly interpolated elsewhere (equivalent to exponential decay).
 42 |     If lr_delay_steps>0 then the learning rate will be scaled by some smooth
 43 |     function of lr_delay_mult, such that the initial learning rate is
 44 |     lr_init*lr_delay_mult at the beginning of optimization but will be eased back
 45 |     to the normal learning rate when steps>lr_delay_steps.
 46 |     :param conf: config subtree 'lr' or similar
 47 |     :param max_steps: int, the number of steps during optimization.
 48 |     :return HoF which takes step as input
 49 |     """
 50 | 
 51 |     def helper(step):
 52 |         if step < 0 or (lr_init == 0.0 and lr_final == 0.0):
 53 |             # Disable this parameter
 54 |             return 0.0
 55 |         if lr_delay_steps > 0:
 56 |             # A kind of reverse cosine decay.
 57 |             delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
 58 |                 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1)
 59 |             )
 60 |         else:
 61 |             delay_rate = 1.0
 62 |         t = np.clip((step-step_sub) / (max_steps-step_sub), 0, 1)
 63 |         log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
 64 |         return delay_rate * log_lerp
 65 | 
 66 |     return helper
 67 | 
 68 | def strip_lowerdiag(L):
 69 |     uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
 70 | 
 71 |     uncertainty[:, 0] = L[:, 0, 0]
 72 |     uncertainty[:, 1] = L[:, 0, 1]
 73 |     uncertainty[:, 2] = L[:, 0, 2]
 74 |     uncertainty[:, 3] = L[:, 1, 1]
 75 |     uncertainty[:, 4] = L[:, 1, 2]
 76 |     uncertainty[:, 5] = L[:, 2, 2]
 77 |     return uncertainty
 78 | 
 79 | 
 80 | def strip_symmetric(sym):
 81 |     return strip_lowerdiag(sym)
 82 | 
 83 | 
 84 |   
 85 | def build_rotation(r):
 86 |     norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3])
 87 | 
 88 |     q = r / norm[:, None]   
 89 | 
 90 |     R = torch.zeros((q.size(0), 3, 3), device='cuda')   
 91 | 
 92 |     r = q[:, 0]
 93 |     x = q[:, 1]
 94 |     y = q[:, 2]
 95 |     z = q[:, 3]
 96 | 
 97 |     R[:, 0, 0] = 1 - 2 * (y*y + z*z)
 98 |     R[:, 0, 1] = 2 * (x*y - r*z)
 99 |     R[:, 0, 2] = 2 * (x*z + r*y)
100 |     R[:, 1, 0] = 2 * (x*y + r*z)
101 |     R[:, 1, 1] = 1 - 2 * (x*x + z*z)
102 |     R[:, 1, 2] = 2 * (y*z - r*x)
103 |     R[:, 2, 0] = 2 * (x*z - r*y)
104 |     R[:, 2, 1] = 2 * (y*z + r*x)
105 |     R[:, 2, 2] = 1 - 2 * (x*x + y*y)
106 |     return R
107 | 
108 | 
109 |   
110 | def build_scaling_rotation(s, r):
111 |     L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda") 
112 |     R = build_rotation(r)  
113 | 
114 |     L[:,0,0] = s[:,0]
115 |     L[:,1,1] = s[:,1]
116 |     L[:,2,2] = s[:,2]
117 | 
118 |     L = R @ L
119 |     return L
120 | 
121 | 
122 | def safe_state(silent):
123 |     old_f = sys.stdout
124 |     class F:
125 |         def __init__(self, silent):
126 |             self.silent = silent
127 | 
128 |         def write(self, x):
129 |             if not self.silent:
130 |                 if x.endswith("\n"):
131 |                     old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S")))))
132 |                 else:
133 |                     old_f.write(x)
134 | 
135 |         def flush(self):
136 |             old_f.flush()
137 | 
138 |     sys.stdout = F(silent)
139 | 
140 |     random.seed(0)
141 |     np.random.seed(0)
142 |     torch.manual_seed(0)
143 |     torch.cuda.set_device(torch.device("cuda:0"))
144 | 
145 | 
146 | class LatentStorer:
147 |     def __init__(self):
148 |         self.latent = None
149 | 
150 |     def __call__(self, i, t, latent):
151 |         self.latent = latent


--------------------------------------------------------------------------------
/utils/graphics.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | import math
12 | from typing import NamedTuple
13 | import numpy as np
14 | import torch
15 | import torch.nn.functional as F
16 | import cv2
17 | 
18 | 
19 | class BasicPointCloud(NamedTuple):
20 |     points : np.array
21 |     colors : np.array
22 |     normals : np.array
23 | 
24 |     
25 | def geom_transform_points(points, transf_matrix):
26 |     P, _ = points.shape
27 |     ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
28 |     points_hom = torch.cat([points, ones], dim=1)
29 |     points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))
30 | 
31 |     denom = points_out[..., 3:] + 0.0000001
32 |     return (points_out[..., :3] / denom).squeeze(dim=0)
33 | 
34 | 
35 | def getWorld2View(R, t):
36 |     Rt = np.zeros((4, 4))
37 |     Rt[:3, :3] = R.transpose()
38 |     Rt[:3, 3] = t
39 |     Rt[3, 3] = 1.0
40 |     return np.float32(Rt)
41 | 
42 | 
43 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
44 |     Rt = np.zeros((4, 4))
45 |     Rt[:3, :3] = R.transpose()
46 |     Rt[:3, 3] = t
47 |     Rt[3, 3] = 1.0
48 | 
49 |     C2W = np.linalg.inv(Rt)
50 |     cam_center = C2W[:3, 3]
51 |     cam_center = (cam_center + translate) * scale
52 |     C2W[:3, 3] = cam_center
53 |     Rt = np.linalg.inv(C2W)
54 |     return np.float32(Rt)
55 | 
56 | 
57 | def getProjectionMatrix(znear, zfar, fovX, fovY):
58 |     tanHalfFovY = math.tan((fovY / 2))
59 |     tanHalfFovX = math.tan((fovX / 2))
60 | 
61 |     top = tanHalfFovY * znear
62 |     bottom = -top
63 |     right = tanHalfFovX * znear
64 |     left = -right
65 | 
66 |     P = torch.zeros(4, 4)
67 | 
68 |     z_sign = 1.0
69 | 
70 |     P[0, 0] = 2.0 * znear / (right - left)
71 |     P[1, 1] = 2.0 * znear / (top - bottom)
72 |     P[0, 2] = (right + left) / (right - left)
73 |     P[1, 2] = (top + bottom) / (top - bottom)
74 |     P[3, 2] = z_sign
75 |     P[2, 2] = z_sign * zfar / (zfar - znear)
76 |     P[2, 3] = -(zfar * znear) / (zfar - znear)
77 |     return P
78 | 
79 | 
80 | def fov2focal(fov, pixels):
81 |     return pixels / (2 * math.tan(fov / 2))
82 | 
83 | 
84 | def focal2fov(focal, pixels):
85 |     return 2*math.atan(pixels/(2*focal))
86 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | import numpy as np
 5 | import pyiqa
 6 | import wandb
 7 | 
 8 | from tqdm.auto import tqdm
 9 | from PIL import Image
10 | from transformers import CLIPProcessor, CLIPModel
11 | from torchmetrics.multimodal import CLIPImageQualityAssessment
12 | 
13 | 
14 | def pil_to_torch(img, device, normalize=True):
15 |     img = torch.tensor(np.array(img), device=device).permute(2, 0, 1)
16 |     if normalize:
17 |         img = img / 255.0
18 |     return img
19 | 
20 | 
21 | def clip_score_and_iqa(image_folder, text, out_path):
22 |     model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
23 |     processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
24 | 
25 |     prompt_metric = ("quality", "colorfullness", "sharpness")
26 |     clipiqa_model = CLIPImageQualityAssessment(model_name_or_path="openai/clip-vit-base-patch16", prompts=prompt_metric, data_range=1.0)
27 | 
28 |     images = [Image.open(os.path.join(image_folder, f)) for f in os.listdir(image_folder) if "png" in f or "jpg" in f]
29 |     if text.endswith('.txt'):
30 |         with open(text, 'r') as f:
31 |             prompt = f.readline()
32 |     else:
33 |         prompt = text
34 | 
35 |     scores = torch.zeros((len(prompt_metric), len(images)), device=clipiqa_model.device)
36 |     clip_scores = torch.zeros(len(images), device=model.device)
37 | 
38 |     pbar = tqdm(images, desc="Calc CLIP Score and CLIP IQA")
39 |     for idx, image in enumerate(pbar):
40 |         img_torch = pil_to_torch(image, model.device, normalize=False)
41 |         inputs = processor(text=[prompt], images=img_torch, return_tensors="pt", padding=True)
42 |         outputs = model(**inputs)
43 |         clip_scores[idx] = outputs.logits_per_image.detach()    
44 |         for prompt_idx in range(len(prompt_metric)):
45 |             scores[prompt_idx][idx] = clipiqa_model(img_torch.unsqueeze(dim=0))[prompt_metric[prompt_idx]].detach()
46 | 
47 |     wandb.log({
48 |         'clip_score': clip_scores.mean().cpu().numpy().item(),
49 |         'clipiqa-quality': scores[0].mean().cpu().numpy().item(),
50 |         'clipiqa-colorful': scores[1].mean().cpu().numpy().item(),
51 |         'clipiqa-sharp': scores[2].mean().cpu().numpy().item()
52 |     })
53 | 
54 |     print("CLIP Score", clip_scores.mean().cpu().numpy())
55 |     print("CLIP IQA")
56 |     print("quality", scores[0].mean().cpu().numpy())
57 |     print("colorful", scores[1].mean().cpu().numpy())
58 |     print("sharp", scores[2].mean().cpu().numpy())
59 | 
60 | 
61 | def brisque_and_niqe_score(image_folder, out_path):       # pyiqa
62 |     images = [Image.open(os.path.join(image_folder, f)) for f in os.listdir(image_folder) if "png" in f or "jpg" in f]
63 |     images_tensor = []
64 |     for image in images:
65 |         image_t = pil_to_torch(image, "cpu", normalize=True)  
66 |         images_tensor.append(image_t)
67 |     stack_images_tensor = torch.stack(images_tensor, dim=0)   
68 | 
69 | 
70 |     brisque_metric = pyiqa.create_metric('brisque')
71 |     brisque_scores = brisque_metric(stack_images_tensor).tolist()
72 |     print("BRISQUE", np.mean(brisque_scores))
73 | 
74 |     niqe_metric = pyiqa.create_metric('niqe')
75 |     niqe_scores = niqe_metric(stack_images_tensor).tolist()
76 |     print("NIQE", np.mean(niqe_scores))
77 | 
78 |     wandb.log({
79 |         'brisque': np.mean(brisque_scores),
80 |         'niqe': np.mean(niqe_scores)
81 |     })
82 | 


--------------------------------------------------------------------------------
/utils/pose_noise_util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scene.cameras import Camera
  4 | 
  5 | 
  6 | def sample_noise(n, r_max, t_max):  
  7 |     nr = np.random.normal(0, scale=r_max/2.0, size=(n,3))        
  8 |     nr = np.clip(nr, a_min=-r_max, a_max=r_max)                  
  9 | 
 10 |     nt = np.random.normal(0, scale=t_max/2.0, size=(n,3))        
 11 |     nt = np.clip(nt, a_min=-t_max, a_max=t_max)                  
 12 | 
 13 |     return nr, nt
 14 | 
 15 |  
 16 | def interpolate_noise(n, steps):
 17 |     last = np.linspace(n[-1], n[-1], num=steps)
 18 |     n = [np.linspace(n[i], n[i + 1], num=steps) for i in range(n.shape[0] - 1)]
 19 |     n.append(last)
 20 |     n = np.concatenate(n, axis=0)
 21 |     return n
 22 | 
 23 | 
 24 | def to_degrees(x):
 25 |     return x * 180.0 / np.pi
 26 | 
 27 | 
 28 | def to_radians(x):
 29 |     return x * np.pi / 180.0
 30 | 
 31 | 
 32 | # Checks if a matrix is a valid rotation matrix.
 33 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/
 34 | def isRotationMatrix(R):
 35 |     Rt = np.transpose(R)
 36 |     shouldBeIdentity = np.dot(Rt, R)
 37 |     I = np.identity(3, dtype=R.dtype)
 38 |     n = np.linalg.norm(I - shouldBeIdentity)
 39 |     return n < 1e-5
 40 | 
 41 | 
 42 | # Calculates rotation matrix to euler angles
 43 | # The result is the same as MATLAB except the order
 44 | # of the euler angles ( x and z are swapped ).
 45 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/
 46 | def rotationMatrixToEulerAngles(R):
 47 |     assert (isRotationMatrix(R))
 48 | 
 49 |     sy = np.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
 50 | 
 51 |     singular = sy < 1e-6
 52 | 
 53 |     if not singular:
 54 |         x = np.arctan2(R[2, 1], R[2, 2])
 55 |         y = np.arctan2(-R[2, 0], sy)
 56 |         z = np.arctan2(R[1, 0], R[0, 0])
 57 |     else:
 58 |         x = np.arctan2(-R[1, 2], R[1, 1])
 59 |         y = np.arctan2(-R[2, 0], sy)
 60 |         z = 0
 61 | 
 62 |     return np.array([x, y, z])
 63 | 
 64 | 
 65 | # Calculates Rotation Matrix given euler angles.
 66 | # https://www.learnopencv.com/rotation-matrix-to-euler-angles/
 67 | def eulerAnglesToRotationMatrix(theta):
 68 |     R_x = np.array([[1, 0, 0],
 69 |                     [0, np.cos(theta[0]), -np.sin(theta[0])],
 70 |                     [0, np.sin(theta[0]), np.cos(theta[0])]
 71 |                     ])
 72 | 
 73 |     R_y = np.array([[np.cos(theta[1]), 0, np.sin(theta[1])],
 74 |                     [0, 1, 0],
 75 |                     [-np.sin(theta[1]), 0, np.cos(theta[1])]
 76 |                     ])
 77 | 
 78 |     R_z = np.array([[np.cos(theta[2]), -np.sin(theta[2]), 0],
 79 |                     [np.sin(theta[2]), np.cos(theta[2]), 0],
 80 |                     [0, 0, 1]
 81 |                     ])
 82 | 
 83 |     R = np.dot(R_z, np.dot(R_y, R_x))
 84 | 
 85 |     return R
 86 | 
 87 | 
 88 | # poses = self.train_cameras
 89 | def apply_noise_bloomscene(poses, chunk_size=10, r_max=2.0, t_max=0.05):
 90 |     noisy_poses = []
 91 | 
 92 |     # create noise vectors
 93 |     n = len(poses) // chunk_size + (len(poses) % chunk_size != 0)
 94 |     nr, nt = sample_noise(n, r_max, t_max)
 95 |     nr = interpolate_noise(nr, chunk_size)       
 96 |     nt = interpolate_noise(nt, chunk_size)       
 97 | 
 98 |     for idx in range(len(poses)):
 99 |         pose = poses[idx]
100 |         if isinstance(pose.R, torch.Tensor):              
101 |             r = pose.R.numpy()
102 |             # pose_numpy = p.numpy()
103 |         elif isinstance(pose.T, torch.Tensor):
104 |             t = pose.T.numpy()
105 |         else:
106 |             # pose_numpy = p
107 |             r = pose.R
108 |             t = pose.T
109 | 
110 |         # extract r, t
111 |         # r = pose_numpy[:3, :3]
112 |         r = rotationMatrixToEulerAngles(r)       
113 |         r = to_degrees(r)                        
114 |          
115 | 
116 |         # get noise
117 |         nr_i = nr[idx // chunk_size]            # (3, )
118 |         nt_i = nt[idx // chunk_size]            # (3, )
119 | 
120 |         # apply noise
121 |         r += nr_i
122 |         t += nt_i
123 | 
124 |         # create pose noise
125 |         r = to_radians(r)                        
126 |         r = eulerAnglesToRotationMatrix(r)       
127 |          
128 |         # p_noise[:3, :3] = r
129 |         # p_noise[:3, 3] = t
130 |         #pose.R = r
131 |         #pose.T = t
132 | 
133 |         if isinstance(pose.R, torch.Tensor):              
134 |             pose.R = torch.from_numpy(pose.R)
135 |         elif isinstance(pose.T, torch.Tensor):
136 |             pose.T = torch.from_numpy(pose.T)
137 |         
138 |          
139 |         #     p_noise = torch.from_numpy(p_noise).to(p)
140 | 
141 |         # noisy_poses.append(p_noise)
142 |         noisy_poses.append(Camera(colmap_id=pose.colmap_id, R=r, T=t, FoVx=pose.FoVx, FoVy=pose.FoVy, image=pose.original_image, original_depth=pose.original_depth, 
143 |                                 gt_alpha_mask=None, image_name='', uid=pose.uid, data_device='cuda'))
144 | 
145 |     return noisy_poses
146 | 


--------------------------------------------------------------------------------
/utils/system.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | from errno import EEXIST
12 | from os import makedirs, path
13 | import os
14 | 
15 | 
16 | def mkdir_p(folder_path):
17 |     # Creates a directory. equivalent to using mkdir -p on the command line
18 |     try:
19 |         makedirs(folder_path)
20 |     except OSError as exc: # Python >2.5
21 |         if exc.errno == EEXIST and path.isdir(folder_path):
22 |             pass
23 |         else:
24 |             raise
25 | 
26 | 
27 | def searchForMaxIteration(folder):
28 |     saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
29 |     return max(saved_iters)
30 | 


--------------------------------------------------------------------------------
/utils/trajectory.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr
  2 | #
  3 | # Copyright 2023 LucidDreamer Authors
  4 | #
  5 | # Computer Vision Lab, SNU, its affiliates and licensors retain all intellectual
  6 | # property and proprietary rights in and to this material, related
  7 | # documentation and any modifications thereto. Any use, reproduction,
  8 | # disclosure or distribution of this material and related documentation
  9 | # without an express license agreement from the Computer Vision Lab, SNU or
 10 | # its affiliates is strictly prohibited.
 11 | #
 12 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com.
 13 | import numpy as np
 14 | 
 15 | 
 16 | def generate_seed_360(viewangle, n_views):   
 17 |     N = n_views
 18 |     render_poses = np.zeros((N, 3, 4))
 19 |     for i in range(N):
 20 |         th = (viewangle/N)*i/180*np.pi
 21 |         render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
 22 |         render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector
 23 | 
 24 |     return render_poses
 25 | 
 26 | def my_generate_seed_360(viewangle, n_views):   
 27 |     N = n_views
 28 |     render_poses = np.zeros((N, 3, 4))
 29 |     th_list = [0, 1, 9, 2, 8, 3, 7, 4, 6, 5]
 30 |     for i in range(N):
 31 |         th = (viewangle/N)*th_list[i]/180*np.pi
 32 |         render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
 33 |         render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector
 34 | 
 35 |     return render_poses
 36 | 
 37 | 
 38 | def generate_seed_360_half(viewangle, n_views):
 39 |     N = n_views // 2
 40 |     halfangle = viewangle / 2
 41 |     render_poses = np.zeros((N*2, 3, 4))
 42 |     for i in range(N): 
 43 |         th = (halfangle/N)*i/180*np.pi
 44 |         render_poses[i,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
 45 |         render_poses[i,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector
 46 |     for i in range(N):
 47 |         th = -(halfangle/N)*i/180*np.pi
 48 |         render_poses[i+N,:3,:3] = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
 49 |         render_poses[i+N,:3,3:4] = np.random.randn(3,1)*0.0 # Transition vector
 50 |     return render_poses
 51 | 
 52 | 
 53 | def generate_seed_hemisphere(center_depth, degree=5):
 54 |     degree = 5                                                   
 55 |     thlist = np.array([degree, 0, 0, 0, -degree])                
 56 |     philist = np.array([0, -degree, 0, degree, 0])
 57 |     assert len(thlist) == len(philist)
 58 | 
 59 |     render_poses = np.zeros((len(thlist), 3, 4))
 60 |     for i in range(len(thlist)):
 61 |         th = thlist[i]
 62 |         phi = philist[i]
 63 |         d = center_depth 
 64 |         
 65 |         render_poses[i,:3,:3] = np.matmul(np.array([[np.cos(th/180*np.pi), 0, -np.sin(th/180*np.pi)], [0, 1, 0], [np.sin(th/180*np.pi), 0, np.cos(th/180*np.pi)]]), np.array([[1, 0, 0], [0, np.cos(phi/180*np.pi), -np.sin(phi/180*np.pi)], [0, np.sin(phi/180*np.pi), np.cos(phi/180*np.pi)]]))   
 66 |         render_poses[i,:3,3:4] = np.array([d*np.sin(th/180*np.pi), 0, d-d*np.cos(th/180*np.pi)]).reshape(3,1) + np.array([0, d*np.sin(phi/180*np.pi), d-d*np.cos(phi/180*np.pi)]).reshape(3,1)       
 67 | 
 68 |     return render_poses
 69 | 
 70 | 
 71 | def my_generate_seed_hemisphere(center_depth, degree=5):
 72 |     degree = 5                                                   
 73 |     thlist = np.array([degree, 0, 0, 0, -degree])                
 74 |     philist = np.array([0, -degree, 0, degree, 0])
 75 |     assert len(thlist) == len(philist)
 76 | 
 77 |     render_poses = np.zeros((len(thlist) * len(center_depth), 3, 4))
 78 |     for j in range(len(center_depth)):
 79 |         per_center_depth = center_depth[j]
 80 |         for i in range(len(thlist)):
 81 |             th = thlist[i]
 82 |             phi = philist[i]
 83 |             d = per_center_depth 
 84 |             
 85 |             idx = j * len(thlist) + i
 86 |             render_poses[idx,:3,:3] = np.matmul(np.array([[np.cos(th/180*np.pi), 0, -np.sin(th/180*np.pi)], [0, 1, 0], [np.sin(th/180*np.pi), 0, np.cos(th/180*np.pi)]]), np.array([[1, 0, 0], [0, np.cos(phi/180*np.pi), -np.sin(phi/180*np.pi)], [0, np.sin(phi/180*np.pi), np.cos(phi/180*np.pi)]]))   
 87 |             render_poses[idx,:3,3:4] = np.array([d*np.sin(th/180*np.pi), 0, d-d*np.cos(th/180*np.pi)]).reshape(3,1) + np.array([0, d*np.sin(phi/180*np.pi), d-d*np.cos(phi/180*np.pi)]).reshape(3,1)       
 88 | 
 89 |     return render_poses
 90 | 
 91 | 
 92 | def get_pcdGenPoses(pcdgenpath, argdict={}):
 93 |     if pcdgenpath == 'rotate360':
 94 |         render_poses = my_generate_seed_360(360, 10)        
 95 |     elif pcdgenpath == 'hemisphere':
 96 |         render_poses = my_generate_seed_hemisphere(argdict['center_depth'])         
 97 |     else:
 98 |         raise("Invalid pcdgenpath")
 99 |     return render_poses
100 | 
101 | 
102 | def get_camerapaths():
103 |     preset_json = {}
104 |     for cam_path in ['rotate360']:
105 |         if cam_path == 'rotate360':
106 |             render_poses = generate_seed_360(360, 180)
107 |         else:
108 |             raise("Unknown pass")
109 |             
110 |         yz_reverse = np.array([[1,0,0], [0,-1,0], [0,0,-1]])
111 |         blender_train_json = {"frames": []}
112 |         for render_pose in render_poses:
113 |             curr_frame = {}
114 |             Rw2i = render_pose[:3,:3]
115 |             Tw2i = render_pose[:3,3:4]
116 |             Ri2w = np.matmul(yz_reverse, Rw2i).T
117 |             Ti2w = -np.matmul(Ri2w, np.matmul(yz_reverse, Tw2i))
118 |             Pc2w = np.concatenate((Ri2w, Ti2w), axis=1)
119 |             Pc2w = np.concatenate((Pc2w, np.array([0,0,0,1]).reshape((1,4))), axis=0)
120 | 
121 |             curr_frame["transform_matrix"] = Pc2w.tolist()
122 |             blender_train_json["frames"].append(curr_frame)
123 | 
124 |         preset_json[cam_path] = blender_train_json
125 | 
126 |     return preset_json


--------------------------------------------------------------------------------