├── .gitignore
├── LICENSE
├── README.md
├── ZoeDepth
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── environment.yml
    ├── evaluate.py
    ├── hubconf.py
    ├── sanity.py
    ├── sanity_hub.py
    ├── train_mix.py
    ├── train_mono.py
    ├── train_test_inputs
    │   ├── kitti_eigen_test_files_with_gt.txt
    │   ├── kitti_eigen_train_files_with_gt.txt
    │   ├── nyudepthv2_test_files_with_gt.txt
    │   └── nyudepthv2_train_files_with_gt.txt
    ├── ui
    │   ├── app.py
    │   ├── gradio_depth_pred.py
    │   ├── gradio_im_to_3d.py
    │   ├── gradio_pano_to_3d.py
    │   └── ui_requirements.txt
    └── zoedepth
    │   ├── data
    │       ├── __init__.py
    │       ├── data_mono.py
    │       ├── ddad.py
    │       ├── diml_indoor_test.py
    │       ├── diml_outdoor_test.py
    │       ├── diode.py
    │       ├── hypersim.py
    │       ├── ibims.py
    │       ├── preprocess.py
    │       ├── sun_rgbd_loader.py
    │       ├── transforms.py
    │       ├── vkitti.py
    │       └── vkitti2.py
    │   ├── models
    │       ├── __init__.py
    │       ├── base_models
    │       │   ├── __init__.py
    │       │   └── midas.py
    │       ├── builder.py
    │       ├── depth_model.py
    │       ├── layers
    │       │   ├── attractor.py
    │       │   ├── dist_layers.py
    │       │   ├── localbins_layers.py
    │       │   └── patch_transformer.py
    │       ├── model_io.py
    │       ├── zoedepth
    │       │   ├── __init__.py
    │       │   ├── config_zoedepth.json
    │       │   ├── config_zoedepth_kitti.json
    │       │   └── zoedepth_v1.py
    │       └── zoedepth_nk
    │       │   ├── __init__.py
    │       │   ├── config_zoedepth_nk.json
    │       │   └── zoedepth_nk_v1.py
    │   ├── trainers
    │       ├── base_trainer.py
    │       ├── builder.py
    │       ├── loss.py
    │       ├── zoedepth_nk_trainer.py
    │       └── zoedepth_trainer.py
    │   └── utils
    │       ├── __init__.py
    │       ├── arg_utils.py
    │       ├── config.py
    │       ├── easydict
    │           └── __init__.py
    │       ├── geometry.py
    │       └── misc.py
├── app.py
├── app_mini.py
├── arguments.py
├── assets
    ├── animestreet2_back_rgb.mp4
    ├── demo.gif
    ├── demo.mp4
    ├── fig5ours_360_rgb.mp4
    ├── logo_color.png
    ├── logo_cvlab.png
    └── waterfall_back_rgb.mp4
├── cameras
    ├── 1440.json
    ├── 360.json
    ├── 360_fov1.2.json
    ├── back.json
    ├── back_and_forth.json
    ├── headbanging.json
    ├── headbanging_circle.json
    ├── headbanging_r2.json
    ├── headbanging_r3.json
    ├── llff.json
    ├── llff_d0.25.json
    ├── llff_d0.5.json
    ├── llff_d1.json
    ├── llff_d2.json
    ├── llff_d4.json
    ├── llff_d6.json
    ├── llff_d8.json
    ├── lookaround.json
    ├── lookdown.json
    ├── rotate1440.json
    ├── rotate360.json
    └── rotate360_fov1.2.json
├── examples
    ├── Image002_modernvilla.jpg
    ├── Image002_modernvilla.txt
    ├── Image002_modernvilla_negative.txt
    ├── Image003_fantasy.jpg
    ├── Image003_fantasy.txt
    ├── Image003_fantasy_negative.txt
    ├── Image005_fruitmarket.jpg
    ├── Image005_fruitmarket.txt
    ├── Image005_fruitmarket_negative.txt
    ├── Image008_waterfall.jpg
    ├── Image008_waterfall.txt
    ├── Image009_spacestation.jpg
    ├── Image009_spacestation.txt
    ├── Image009_spacestation_negative.txt
    ├── Image011_lego.jpg
    ├── Image011_lego.txt
    ├── Image011_lego_negative.txt
    ├── Image012_whitecat.jpg
    ├── Image012_whitecat.txt
    ├── Image012_whitecat_2nd.txt
    ├── Image012_whitecat_negative.txt
    ├── Image014_animestreet.jpg
    ├── Image014_animestreet.txt
    ├── Image014_animestreet_negative.txt
    ├── Image015_animelakehouse.jpg
    ├── Image015_animelakehouse.txt
    ├── Image015_animelakehouse_negative.txt
    ├── Image018_animesummerhome.jpg
    ├── Image018_animesummerhome.txt
    ├── Image018_animesummerhome_negative.txt
    ├── Image031_fruit.jpg
    ├── Image031_fruit.txt
    ├── Image031_fruit_negative.txt
    ├── animelake_Back_and_forth_60fps.mp4
    ├── animelake_Headbanging_60fps.mp4
    ├── animelake_LLFF_60fps.mp4
    ├── cabin.png
    ├── cabin.txt
    ├── christmas.png
    ├── christmas.txt
    ├── doge.jpg
    ├── doge.png
    ├── doge.txt
    ├── elf.jpg
    ├── elf.png
    ├── elf.txt
    ├── fantasy_Back_and_forth_60fps.mp4
    ├── fantasy_Headbanging_60fps.mp4
    ├── fantasy_LLFF_60fps.mp4
    ├── girl.jpg
    ├── girl.txt
    ├── image020.png
    ├── image020.txt
    ├── island.png
    ├── island.txt
    ├── ruin.png
    └── ruin.txt
├── gaussian_renderer
    ├── __init__.py
    └── network_gui.py
├── luciddreamer.py
├── packages.txt
├── requirements.txt
├── run.py
├── scene
    ├── __init__.py
    ├── cameras.py
    ├── colmap_loader.py
    ├── dataset_readers.py
    └── gaussian_model.py
├── submodules
    ├── depth-diff-gaussian-rasterization-min
    │   ├── CMakeLists.txt
    │   ├── LICENSE.md
    │   ├── README.md
    │   ├── cuda_rasterizer
    │   │   ├── auxiliary.h
    │   │   ├── backward.cu
    │   │   ├── backward.h
    │   │   ├── config.h
    │   │   ├── forward.cu
    │   │   ├── forward.h
    │   │   ├── rasterizer.h
    │   │   ├── rasterizer_impl.cu
    │   │   └── rasterizer_impl.h
    │   ├── depth_diff_gaussian_rasterization_min
    │   │   └── __init__.py
    │   ├── ext.cpp
    │   ├── rasterize_points.cu
    │   ├── rasterize_points.h
    │   ├── setup.py
    │   └── third_party
    │   │   └── stbi_image_write.h
    └── simple-knn
    │   ├── ext.cpp
    │   ├── setup.py
    │   ├── simple_knn.cu
    │   ├── simple_knn.h
    │   ├── simple_knn
    │       └── .gitkeep
    │   ├── spatial.cu
    │   └── spatial.h
└── utils
    ├── __init__.py
    ├── camera.py
    ├── depth.py
    ├── general.py
    ├── graphics.py
    ├── image.py
    ├── lama.py
    ├── loss.py
    ├── sh.py
    ├── system.py
    └── trajectory.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | examples/*.mp4
  2 | examples/*.ply
  3 | examples/.gitattributes
  4 | examples/README.md
  5 | stablediffusion
  6 | 
  7 | 
  8 | # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
  9 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode
 10 | 
 11 | ### Python ###
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | build/
 23 | develop-eggs/
 24 | dist/
 25 | downloads/
 26 | eggs/
 27 | .eggs/
 28 | lib/
 29 | lib64/
 30 | parts/
 31 | sdist/
 32 | var/
 33 | wheels/
 34 | share/python-wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | MANIFEST
 39 | video*/
 40 | video/
 41 | result/
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .nox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | *.py,cover
 64 | .hypothesis/
 65 | .pytest_cache/
 66 | cover/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | db.sqlite3-journal
 77 | 
 78 | # Flask stuff:
 79 | instance/
 80 | .webassets-cache
 81 | 
 82 | # Scrapy stuff:
 83 | .scrapy
 84 | 
 85 | # Sphinx documentation
 86 | docs/_build/
 87 | 
 88 | # PyBuilder
 89 | .pybuilder/
 90 | target/
 91 | 
 92 | # Jupyter Notebook
 93 | .ipynb_checkpoints
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | #   For a library or package, you might want to ignore these files since the code is
101 | #   intended to run in multiple environments; otherwise, check them in:
102 | # .python-version
103 | 
104 | # pipenv
105 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | #   install all needed dependencies.
109 | #Pipfile.lock
110 | 
111 | # poetry
112 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
113 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
114 | #   commonly ignored for libraries.
115 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
116 | #poetry.lock
117 | 
118 | # pdm
119 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
120 | #pdm.lock
121 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
122 | #   in version control.
123 | #   https://pdm.fming.dev/#use-with-ide
124 | .pdm.toml
125 | 
126 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
127 | __pypackages__/
128 | 
129 | # Celery stuff
130 | celerybeat-schedule
131 | celerybeat.pid
132 | 
133 | # SageMath parsed files
134 | *.sage.py
135 | 
136 | # Environments
137 | .env
138 | .venv
139 | env/
140 | venv/
141 | ENV/
142 | env.bak/
143 | venv.bak/
144 | 
145 | # Spyder project settings
146 | .spyderproject
147 | .spyproject
148 | 
149 | # Rope project settings
150 | .ropeproject
151 | 
152 | # mkdocs documentation
153 | /site
154 | 
155 | # mypy
156 | .mypy_cache/
157 | .dmypy.json
158 | dmypy.json
159 | 
160 | # Pyre type checker
161 | .pyre/
162 | 
163 | # pytype static type analyzer
164 | .pytype/
165 | 
166 | # Cython debug symbols
167 | cython_debug/
168 | 
169 | # PyCharm
170 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
171 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
172 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
173 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
174 | #.idea/
175 | 
176 | ### Python Patch ###
177 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
178 | poetry.toml
179 | 
180 | # ruff
181 | .ruff_cache/
182 | 
183 | # LSP config files
184 | pyrightconfig.json
185 | 
186 | ### VisualStudioCode ###
187 | .vscode/*
188 | !.vscode/settings.json
189 | !.vscode/tasks.json
190 | !.vscode/launch.json
191 | !.vscode/extensions.json
192 | !.vscode/*.code-snippets
193 | 
194 | # Local History for Visual Studio Code
195 | .history/
196 | 
197 | # Built Visual Studio Code Extensions
198 | *.vsix
199 | 
200 | ### VisualStudioCode Patch ###
201 | # Ignore all local history of files
202 | .history
203 | .ionide
204 | 
205 | # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
206 | 
207 | *.ply
208 | *.safetensors
209 | results
210 | outputs
211 | gradio_cached_examples
212 | submodules/depth-diff-gaussian-rasterization-min/third_party/glm
213 | 


--------------------------------------------------------------------------------
/ZoeDepth/.gitignore:
--------------------------------------------------------------------------------
  1 | *.png
  2 | **.gif
  3 | .vscode/
  4 | *.rdb
  5 | **.xml
  6 | wandb/
  7 | slurm/
  8 | tmp/
  9 | .logs/
 10 | checkpoints/
 11 | external_jobs/
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | ptlflow_logs/
 17 | output/
 18 | log/
 19 | .idea/
 20 | # C extensions
 21 | *.so
 22 | results/
 23 | **.DS_Store
 24 | **.pt
 25 | demo/
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | ~shortcuts/
 47 | **/wandb_logs/
 48 | **.db
 49 | # PyInstaller
 50 | #  Usually these files are written by a python script from a template
 51 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 52 | *.manifest
 53 | *.spec
 54 | 
 55 | # Installer logs
 56 | pip-log.txt
 57 | pip-delete-this-directory.txt
 58 | 
 59 | # Unit test / coverage reports
 60 | htmlcov/
 61 | .tox/
 62 | .nox/
 63 | .coverage
 64 | .coverage.*
 65 | .cache
 66 | nosetests.xml
 67 | coverage.xml
 68 | *.cover
 69 | *.py,cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | db.sqlite3-journal
 82 | 
 83 | # Flask stuff:
 84 | instance/
 85 | .webassets-cache
 86 | 
 87 | # Scrapy stuff:
 88 | .scrapy
 89 | 
 90 | # Sphinx documentation
 91 | docs/_build/
 92 | 
 93 | # PyBuilder
 94 | target/
 95 | 
 96 | # Jupyter Notebook
 97 | .ipynb_checkpoints
 98 | 
 99 | # IPython
100 | profile_default/
101 | ipython_config.py
102 | 
103 | # pyenv
104 | .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 


--------------------------------------------------------------------------------
/ZoeDepth/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ZoeDepth/environment.yml:
--------------------------------------------------------------------------------
 1 | name: zoe
 2 | channels:
 3 |   - pytorch
 4 |   - nvidia
 5 |   - conda-forge
 6 | dependencies:
 7 |   - cuda=11.7.1
 8 |   - h5py=3.7.0
 9 |   - hdf5=1.12.2
10 |   - matplotlib=3.6.2
11 |   - matplotlib-base=3.6.2
12 |   - numpy=1.24.1
13 |   - opencv=4.6.0
14 |   - pip=22.3.1
15 |   - python=3.9.7
16 |   - pytorch=1.13.1
17 |   - pytorch-cuda=11.7
18 |   - pytorch-mutex=1.0
19 |   - scipy=1.10.0
20 |   - torchaudio=0.13.1
21 |   - torchvision=0.14.1
22 |   - pip:
23 |     - huggingface-hub==0.11.1
24 |     - timm==0.6.12
25 |     - tqdm==4.64.1
26 |     - wandb==0.13.9
27 | 


--------------------------------------------------------------------------------
/ZoeDepth/sanity.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import numpy as np
26 | from torchvision.transforms import ToTensor
27 | from PIL import Image
28 | from zoedepth.utils.misc import get_image_from_url, colorize
29 | import torch
30 | 
31 | from zoedepth.models.builder import build_model
32 | from zoedepth.utils.config import get_config
33 | from pprint import pprint
34 | 
35 | 
36 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 
37 | 
38 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39 | if DEVICE == "cpu":
40 |     print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.")
41 | 
42 | print("*" * 20 + " Testing zoedepth " + "*" * 20)
43 | conf = get_config("zoedepth", "infer")
44 | 
45 | 
46 | print("Config:")
47 | pprint(conf)
48 | 
49 | model = build_model(conf).to(DEVICE)
50 | model.eval()
51 | x = torch.rand(1, 3, 384, 512).to(DEVICE)
52 | 
53 | print("-"*20 + "Testing on a random input" + "-"*20)
54 | 
55 | with torch.no_grad():
56 |     out = model(x)
57 | 
58 | if isinstance(out, dict):
59 |     # print shapes of all outputs
60 |     for k, v in out.items():
61 |         if v is not None:
62 |             print(k, v.shape)
63 | else:
64 |     print([o.shape for o in out if o is not None])
65 | 
66 | print("\n\n")
67 | print("-"*20 + " Testing on an indoor scene from url " + "-"*20)
68 | 
69 | # Test img
70 | url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU"
71 | img = get_image_from_url(url)
72 | orig_size = img.size
73 | X = ToTensor()(img)
74 | X = X.unsqueeze(0).to(DEVICE)
75 | 
76 | print("X.shape", X.shape)
77 | print("predicting")
78 | 
79 | with torch.no_grad():
80 |     out = model.infer(X).cpu()
81 | 
82 | # or just, 
83 | # out = model.infer_pil(img)
84 | 
85 | 
86 | print("output.shape", out.shape)
87 | pred = Image.fromarray(colorize(out))
88 | # Stack img and pred side by side for comparison and save
89 | pred = pred.resize(orig_size, Image.ANTIALIAS)
90 | stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1]))
91 | stacked.paste(img, (0, 0))
92 | stacked.paste(pred, (orig_size[0], 0))
93 | 
94 | stacked.save("pred.png")
95 | print("saved pred.png")
96 | 
97 | 
98 | model.infer_pil(img, output_type="pil").save("pred_raw.png")


--------------------------------------------------------------------------------
/ZoeDepth/sanity_hub.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | import numpy as np
27 | from torchvision.transforms import ToTensor
28 | from PIL import Image
29 | from zoedepth.utils.misc import get_image_from_url, colorize
30 | 
31 | from zoedepth.models.builder import build_model
32 | from zoedepth.utils.config import get_config
33 | from pprint import pprint
34 | 
35 | 
36 | 
37 | # Trigger reload of MiDaS
38 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 
39 | 
40 | 
41 | model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True)
42 | model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True)
43 | model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True)
44 | 


--------------------------------------------------------------------------------
/ZoeDepth/train_mix.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | from zoedepth.utils.misc import count_parameters, parallelize
 26 | from zoedepth.utils.config import get_config
 27 | from zoedepth.utils.arg_utils import parse_unknown
 28 | from zoedepth.trainers.builder import get_trainer
 29 | from zoedepth.models.builder import build_model
 30 | from zoedepth.data.data_mono import MixedNYUKITTI
 31 | import torch.utils.data.distributed
 32 | import torch.multiprocessing as mp
 33 | import torch
 34 | import numpy as np
 35 | from pprint import pprint
 36 | import argparse
 37 | import os
 38 | 
 39 | os.environ["PYOPENGL_PLATFORM"] = "egl"
 40 | os.environ["WANDB_START_METHOD"] = "thread"
 41 | 
 42 | 
 43 | def fix_random_seed(seed: int):
 44 |     """
 45 |     Fix random seed for reproducibility
 46 | 
 47 |     Args:
 48 |         seed (int): random seed
 49 |     """
 50 |     import random
 51 | 
 52 |     import numpy
 53 |     import torch
 54 | 
 55 |     random.seed(seed)
 56 |     numpy.random.seed(seed)
 57 |     torch.manual_seed(seed)
 58 |     torch.cuda.manual_seed(seed)
 59 |     torch.cuda.manual_seed_all(seed)
 60 | 
 61 |     torch.backends.cudnn.deterministic = True
 62 |     torch.backends.cudnn.benchmark = False
 63 | 
 64 | 
 65 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
 66 |     import glob
 67 |     import os
 68 | 
 69 |     from zoedepth.models.model_io import load_wts
 70 | 
 71 |     if hasattr(config, "checkpoint"):
 72 |         checkpoint = config.checkpoint
 73 |     elif hasattr(config, "ckpt_pattern"):
 74 |         pattern = config.ckpt_pattern
 75 |         matches = glob.glob(os.path.join(
 76 |             checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
 77 |         if not (len(matches) > 0):
 78 |             raise ValueError(f"No matches found for the pattern {pattern}")
 79 | 
 80 |         checkpoint = matches[0]
 81 | 
 82 |     else:
 83 |         return model
 84 |     model = load_wts(model, checkpoint)
 85 |     print("Loaded weights from {0}".format(checkpoint))
 86 |     return model
 87 | 
 88 | 
 89 | def main_worker(gpu, ngpus_per_node, config):
 90 |     try:
 91 |         fix_random_seed(43)
 92 | 
 93 |         config.gpu = gpu
 94 | 
 95 |         model = build_model(config)
 96 |         model = load_ckpt(config, model)
 97 |         model = parallelize(config, model)
 98 | 
 99 |         total_params = f"{round(count_parameters(model)/1e6,2)}M"
100 |         config.total_params = total_params
101 |         print(f"Total parameters : {total_params}")
102 | 
103 |         train_loader = MixedNYUKITTI(config, "train").data
104 |         test_loader = MixedNYUKITTI(config, "online_eval").data
105 | 
106 |         trainer = get_trainer(config)(
107 |             config, model, train_loader, test_loader, device=config.gpu)
108 | 
109 |         trainer.train()
110 |     finally:
111 |         import wandb
112 |         wandb.finish()
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     mp.set_start_method('forkserver')
117 | 
118 |     parser = argparse.ArgumentParser()
119 |     parser.add_argument("-m", "--model", type=str, default="synunet")
120 |     parser.add_argument("-d", "--dataset", type=str, default='mix')
121 |     parser.add_argument("--trainer", type=str, default=None)
122 | 
123 |     args, unknown_args = parser.parse_known_args()
124 |     overwrite_kwargs = parse_unknown(unknown_args)
125 | 
126 |     overwrite_kwargs["model"] = args.model
127 |     if args.trainer is not None:
128 |         overwrite_kwargs["trainer"] = args.trainer
129 | 
130 |     config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
131 |     # git_commit()
132 |     if config.use_shared_dict:
133 |         shared_dict = mp.Manager().dict()
134 |     else:
135 |         shared_dict = None
136 |     config.shared_dict = shared_dict
137 | 
138 |     config.batch_size = config.bs
139 |     config.mode = 'train'
140 |     if config.root != "." and not os.path.isdir(config.root):
141 |         os.makedirs(config.root)
142 | 
143 |     try:
144 |         node_str = os.environ['SLURM_JOB_NODELIST'].replace(
145 |             '[', '').replace(']', '')
146 |         nodes = node_str.split(',')
147 | 
148 |         config.world_size = len(nodes)
149 |         config.rank = int(os.environ['SLURM_PROCID'])
150 |         # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
151 | 
152 |     except KeyError as e:
153 |         # We are NOT using SLURM
154 |         config.world_size = 1
155 |         config.rank = 0
156 |         nodes = ["127.0.0.1"]
157 | 
158 |     if config.distributed:
159 | 
160 |         print(config.rank)
161 |         port = np.random.randint(15000, 15025)
162 |         config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
163 |         print(config.dist_url)
164 |         config.dist_backend = 'nccl'
165 |         config.gpu = None
166 | 
167 |     ngpus_per_node = torch.cuda.device_count()
168 |     config.num_workers = config.workers
169 |     config.ngpus_per_node = ngpus_per_node
170 |     print("Config:")
171 |     pprint(config)
172 |     if config.distributed:
173 |         config.world_size = ngpus_per_node * config.world_size
174 |         mp.spawn(main_worker, nprocs=ngpus_per_node,
175 |                  args=(ngpus_per_node, config))
176 |     else:
177 |         if ngpus_per_node == 1:
178 |             config.gpu = 0
179 |         main_worker(config.gpu, ngpus_per_node, config)
180 | 


--------------------------------------------------------------------------------
/ZoeDepth/train_mono.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | from zoedepth.utils.misc import count_parameters, parallelize
 26 | from zoedepth.utils.config import get_config
 27 | from zoedepth.utils.arg_utils import parse_unknown
 28 | from zoedepth.trainers.builder import get_trainer
 29 | from zoedepth.models.builder import build_model
 30 | from zoedepth.data.data_mono import DepthDataLoader
 31 | import torch.utils.data.distributed
 32 | import torch.multiprocessing as mp
 33 | import torch
 34 | import numpy as np
 35 | from pprint import pprint
 36 | import argparse
 37 | import os
 38 | 
 39 | os.environ["PYOPENGL_PLATFORM"] = "egl"
 40 | os.environ["WANDB_START_METHOD"] = "thread"
 41 | 
 42 | 
 43 | def fix_random_seed(seed: int):
 44 |     import random
 45 | 
 46 |     import numpy
 47 |     import torch
 48 | 
 49 |     random.seed(seed)
 50 |     numpy.random.seed(seed)
 51 |     torch.manual_seed(seed)
 52 |     torch.cuda.manual_seed(seed)
 53 |     torch.cuda.manual_seed_all(seed)
 54 | 
 55 |     torch.backends.cudnn.deterministic = True
 56 |     torch.backends.cudnn.benchmark = True
 57 | 
 58 | 
 59 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"):
 60 |     import glob
 61 |     import os
 62 | 
 63 |     from zoedepth.models.model_io import load_wts
 64 | 
 65 |     if hasattr(config, "checkpoint"):
 66 |         checkpoint = config.checkpoint
 67 |     elif hasattr(config, "ckpt_pattern"):
 68 |         pattern = config.ckpt_pattern
 69 |         matches = glob.glob(os.path.join(
 70 |             checkpoint_dir, f"*{pattern}*{ckpt_type}*"))
 71 |         if not (len(matches) > 0):
 72 |             raise ValueError(f"No matches found for the pattern {pattern}")
 73 | 
 74 |         checkpoint = matches[0]
 75 | 
 76 |     else:
 77 |         return model
 78 |     model = load_wts(model, checkpoint)
 79 |     print("Loaded weights from {0}".format(checkpoint))
 80 |     return model
 81 | 
 82 | 
 83 | def main_worker(gpu, ngpus_per_node, config):
 84 |     try:
 85 |         seed = config.seed if 'seed' in config and config.seed else 43
 86 |         fix_random_seed(seed)
 87 | 
 88 |         config.gpu = gpu
 89 | 
 90 |         model = build_model(config)
 91 |         model = load_ckpt(config, model)
 92 |         model = parallelize(config, model)
 93 | 
 94 |         total_params = f"{round(count_parameters(model)/1e6,2)}M"
 95 |         config.total_params = total_params
 96 |         print(f"Total parameters : {total_params}")
 97 | 
 98 |         train_loader = DepthDataLoader(config, "train").data
 99 |         test_loader = DepthDataLoader(config, "online_eval").data
100 | 
101 |         trainer = get_trainer(config)(
102 |             config, model, train_loader, test_loader, device=config.gpu)
103 | 
104 |         trainer.train()
105 |     finally:
106 |         import wandb
107 |         wandb.finish()
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     mp.set_start_method('forkserver')
112 | 
113 |     parser = argparse.ArgumentParser()
114 |     parser.add_argument("-m", "--model", type=str, default="synunet")
115 |     parser.add_argument("-d", "--dataset", type=str, default='nyu')
116 |     parser.add_argument("--trainer", type=str, default=None)
117 | 
118 |     args, unknown_args = parser.parse_known_args()
119 |     overwrite_kwargs = parse_unknown(unknown_args)
120 | 
121 |     overwrite_kwargs["model"] = args.model
122 |     if args.trainer is not None:
123 |         overwrite_kwargs["trainer"] = args.trainer
124 | 
125 |     config = get_config(args.model, "train", args.dataset, **overwrite_kwargs)
126 |     # git_commit()
127 |     if config.use_shared_dict:
128 |         shared_dict = mp.Manager().dict()
129 |     else:
130 |         shared_dict = None
131 |     config.shared_dict = shared_dict
132 | 
133 |     config.batch_size = config.bs
134 |     config.mode = 'train'
135 |     if config.root != "." and not os.path.isdir(config.root):
136 |         os.makedirs(config.root)
137 | 
138 |     try:
139 |         node_str = os.environ['SLURM_JOB_NODELIST'].replace(
140 |             '[', '').replace(']', '')
141 |         nodes = node_str.split(',')
142 | 
143 |         config.world_size = len(nodes)
144 |         config.rank = int(os.environ['SLURM_PROCID'])
145 |         # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints"
146 | 
147 |     except KeyError as e:
148 |         # We are NOT using SLURM
149 |         config.world_size = 1
150 |         config.rank = 0
151 |         nodes = ["127.0.0.1"]
152 | 
153 |     if config.distributed:
154 | 
155 |         print(config.rank)
156 |         port = np.random.randint(15000, 15025)
157 |         config.dist_url = 'tcp://{}:{}'.format(nodes[0], port)
158 |         print(config.dist_url)
159 |         config.dist_backend = 'nccl'
160 |         config.gpu = None
161 | 
162 |     ngpus_per_node = torch.cuda.device_count()
163 |     config.num_workers = config.workers
164 |     config.ngpus_per_node = ngpus_per_node
165 |     print("Config:")
166 |     pprint(config)
167 |     if config.distributed:
168 |         config.world_size = ngpus_per_node * config.world_size
169 |         mp.spawn(main_worker, nprocs=ngpus_per_node,
170 |                  args=(ngpus_per_node, config))
171 |     else:
172 |         if ngpus_per_node == 1:
173 |             config.gpu = 0
174 |         main_worker(config.gpu, ngpus_per_node, config)
175 | 


--------------------------------------------------------------------------------
/ZoeDepth/ui/app.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | import torch
27 | 
28 | from .gradio_depth_pred import create_demo as create_depth_pred_demo
29 | from .gradio_im_to_3d import create_demo as create_im_to_3d_demo
30 | from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo
31 | 
32 | 
33 | css = """
34 | #img-display-container {
35 |     max-height: 50vh;
36 |     }
37 | #img-display-input {
38 |     max-height: 40vh;
39 |     }
40 | #img-display-output {
41 |     max-height: 40vh;
42 |     }
43 |     
44 | """
45 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
46 | model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval()
47 | 
48 | title = "# ZoeDepth"
49 | description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**.
50 | 
51 | ZoeDepth is a deep learning model for metric depth estimation from a single image.
52 | 
53 | Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details."""
54 | 
55 | with gr.Blocks(css=css) as demo:
56 |     gr.Markdown(title)
57 |     gr.Markdown(description)
58 |     with gr.Tab("Depth Prediction"):
59 |         create_depth_pred_demo(model)
60 |     with gr.Tab("Image to 3D"):
61 |         create_im_to_3d_demo(model)
62 |     with gr.Tab("360 Panorama to 3D"):
63 |         create_pano_to_3d_demo(model)
64 | 
65 | if __name__ == '__main__':
66 |     demo.queue().launch()


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_depth_pred.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | from zoedepth.utils.misc import colorize
27 | from PIL import Image
28 | import tempfile
29 | 
30 | def predict_depth(model, image):
31 |     depth = model.infer_pil(image)
32 |     return depth
33 | 
34 | def create_demo(model):
35 |     gr.Markdown("### Depth Prediction demo")
36 |     with gr.Row():
37 |         input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto")
38 |         depth_image = gr.Image(label="Depth Map", elem_id='img-display-output')
39 |     raw_file = gr.File(label="16-bit raw depth, multiplier:256")
40 |     submit = gr.Button("Submit")
41 | 
42 |     def on_submit(image):
43 |         depth = predict_depth(model, image)
44 |         colored_depth = colorize(depth, cmap='gray_r')
45 |         tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
46 |         raw_depth = Image.fromarray((depth*256).astype('uint16'))
47 |         raw_depth.save(tmp.name)
48 |         return [colored_depth, tmp.name]
49 |     
50 |     submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file])
51 |     # examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"],
52 |     #                        inputs=[input_image])


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_im_to_3d.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import gradio as gr
26 | import numpy as np
27 | import trimesh
28 | from zoedepth.utils.geometry import depth_to_points, create_triangles
29 | from functools import partial
30 | import tempfile
31 | 
32 | 
33 | def depth_edges_mask(depth):
34 |     """Returns a mask of edges in the depth map.
35 |     Args:
36 |     depth: 2D numpy array of shape (H, W) with dtype float32.
37 |     Returns:
38 |     mask: 2D numpy array of shape (H, W) with dtype bool.
39 |     """
40 |     # Compute the x and y gradients of the depth map.
41 |     depth_dx, depth_dy = np.gradient(depth)
42 |     # Compute the gradient magnitude.
43 |     depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
44 |     # Compute the edge mask.
45 |     mask = depth_grad > 0.05
46 |     return mask
47 | 
48 | 
49 | def predict_depth(model, image):
50 |     depth = model.infer_pil(image)
51 |     return depth
52 | 
53 | def get_mesh(model, image, keep_edges=False):
54 |     image.thumbnail((1024,1024))  # limit the size of the input image
55 |     depth = predict_depth(model, image)
56 |     pts3d = depth_to_points(depth[None])
57 |     pts3d = pts3d.reshape(-1, 3)
58 | 
59 |     # Create a trimesh mesh from the points
60 |     # Each pixel is connected to its 4 neighbors
61 |     # colors are the RGB values of the image
62 | 
63 |     verts = pts3d.reshape(-1, 3)
64 |     image = np.array(image)
65 |     if keep_edges:
66 |         triangles = create_triangles(image.shape[0], image.shape[1])
67 |     else:
68 |         triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
69 |     colors = image.reshape(-1, 3)
70 |     mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
71 | 
72 |     # Save as glb
73 |     glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
74 |     glb_path = glb_file.name
75 |     mesh.export(glb_path)
76 |     return glb_path
77 | 
78 | def create_demo(model):
79 | 
80 |     gr.Markdown("### Image to 3D mesh")
81 |     gr.Markdown("Convert a single 2D image to a 3D mesh")
82 | 
83 |     with gr.Row():
84 |         image = gr.Image(label="Input Image", type='pil')
85 |         result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
86 |                                                  1.0, 1.0, 1.0, 1.0])
87 |     
88 |     checkbox = gr.Checkbox(label="Keep occlusion edges", value=False)
89 |     submit = gr.Button("Submit")
90 |     submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result])
91 |     # examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"],
92 |     #                         inputs=[image])
93 | 
94 | 


--------------------------------------------------------------------------------
/ZoeDepth/ui/gradio_pano_to_3d.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import gradio as gr
 26 | import numpy as np
 27 | import trimesh
 28 | from zoedepth.utils.geometry import create_triangles
 29 | from functools import partial
 30 | import tempfile
 31 | 
 32 | def depth_edges_mask(depth):
 33 |     """Returns a mask of edges in the depth map.
 34 |     Args:
 35 |     depth: 2D numpy array of shape (H, W) with dtype float32.
 36 |     Returns:
 37 |     mask: 2D numpy array of shape (H, W) with dtype bool.
 38 |     """
 39 |     # Compute the x and y gradients of the depth map.
 40 |     depth_dx, depth_dy = np.gradient(depth)
 41 |     # Compute the gradient magnitude.
 42 |     depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
 43 |     # Compute the edge mask.
 44 |     mask = depth_grad > 0.05
 45 |     return mask
 46 | 
 47 | 
 48 | def pano_depth_to_world_points(depth):
 49 |     """
 50 |     360 depth to world points
 51 |     given 2D depth is an equirectangular projection of a spherical image
 52 |     Treat depth as radius
 53 | 
 54 |     longitude : -pi to pi
 55 |     latitude : -pi/2 to pi/2
 56 |     """
 57 | 
 58 |     # Convert depth to radius
 59 |     radius = depth.flatten()
 60 | 
 61 |     lon = np.linspace(-np.pi, np.pi, depth.shape[1])
 62 |     lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0])
 63 | 
 64 |     lon, lat = np.meshgrid(lon, lat)
 65 |     lon = lon.flatten()
 66 |     lat = lat.flatten()
 67 | 
 68 |     # Convert to cartesian coordinates
 69 |     x = radius * np.cos(lat) * np.cos(lon)
 70 |     y = radius * np.cos(lat) * np.sin(lon)
 71 |     z = radius * np.sin(lat)
 72 | 
 73 |     pts3d = np.stack([x, y, z], axis=1)
 74 | 
 75 |     return pts3d
 76 | 
 77 | 
 78 | def predict_depth(model, image):
 79 |     depth = model.infer_pil(image)
 80 |     return depth
 81 | 
 82 | def get_mesh(model, image, keep_edges=False):
 83 |     image.thumbnail((1024,1024))  # limit the size of the image
 84 |     depth = predict_depth(model, image)
 85 |     pts3d = pano_depth_to_world_points(depth)
 86 | 
 87 |     # Create a trimesh mesh from the points
 88 |     # Each pixel is connected to its 4 neighbors
 89 |     # colors are the RGB values of the image
 90 | 
 91 |     verts = pts3d.reshape(-1, 3)
 92 |     image = np.array(image)
 93 |     if keep_edges:
 94 |         triangles = create_triangles(image.shape[0], image.shape[1])
 95 |     else:
 96 |         triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
 97 |     colors = image.reshape(-1, 3)
 98 |     mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
 99 | 
100 |     # Save as glb
101 |     glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
102 |     glb_path = glb_file.name
103 |     mesh.export(glb_path)
104 |     return glb_path
105 | 
106 | def create_demo(model):
107 |     gr.Markdown("### Panorama to 3D mesh")
108 |     gr.Markdown("Convert a 360 spherical panorama to a 3D mesh")
109 |     gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.")
110 | 
111 |     with gr.Row():
112 |         input_image = gr.Image(label="Input Image", type='pil')
113 |         result = gr.Model3D(label="3d mesh reconstruction", clear_color=[
114 |                                                  1.0, 1.0, 1.0, 1.0])
115 |         
116 |     checkbox = gr.Checkbox(label="Keep occlusion edges", value=True)
117 |     submit = gr.Button("Submit")
118 |     submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result])
119 |     # examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"],
120 |     #                         inputs=[input_image])


--------------------------------------------------------------------------------
/ZoeDepth/ui/ui_requirements.txt:
--------------------------------------------------------------------------------
1 | gradio
2 | trimesh==3.9.42


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/ddad.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self, resize_shape):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(resize_shape)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "ddad"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DDAD(Dataset):
 83 |     def __init__(self, data_dir_root, resize_shape):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 87 |         self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
 88 |         self.depth_files = [r.replace("_rgb.png", "_depth.npy")
 89 |                             for r in self.image_files]
 90 |         self.transform = ToTensor(resize_shape)
 91 | 
 92 |     def __getitem__(self, idx):
 93 | 
 94 |         image_path = self.image_files[idx]
 95 |         depth_path = self.depth_files[idx]
 96 | 
 97 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 98 |         depth = np.load(depth_path)  # meters
 99 | 
100 |         # depth[depth > 8] = -1
101 |         depth = depth[..., None]
102 | 
103 |         sample = dict(image=image, depth=depth)
104 |         sample = self.transform(sample)
105 | 
106 |         if idx == 0:
107 |             print(sample["image"].shape)
108 | 
109 |         return sample
110 | 
111 |     def __len__(self):
112 |         return len(self.image_files)
113 | 
114 | 
115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
116 |     dataset = DDAD(data_dir_root, resize_shape)
117 |     return DataLoader(dataset, batch_size, **kwargs)
118 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diml_indoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize((480, 640))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 |         if isinstance(img, torch.ByteTensor):
 76 |             return img.float()
 77 |         else:
 78 |             return img
 79 | 
 80 | 
 81 | class DIML_Indoor(Dataset):
 82 |     def __init__(self, data_dir_root):
 83 |         import glob
 84 | 
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "LR", '*', 'color', '*.png'))
 88 |         self.depth_files = [r.replace("color", "depth_filled").replace(
 89 |             "_c.png", "_depth_filled.png") for r in self.image_files]
 90 |         self.transform = ToTensor()
 91 | 
 92 |     def __getitem__(self, idx):
 93 |         image_path = self.image_files[idx]
 94 |         depth_path = self.depth_files[idx]
 95 | 
 96 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 97 |         depth = np.asarray(Image.open(depth_path),
 98 |                            dtype='uint16') / 1000.0  # mm to meters
 99 | 
100 |         # print(np.shape(image))
101 |         # print(np.shape(depth))
102 | 
103 |         # depth[depth > 8] = -1
104 |         depth = depth[..., None]
105 | 
106 |         sample = dict(image=image, depth=depth)
107 | 
108 |         # return sample
109 |         sample = self.transform(sample)
110 | 
111 |         if idx == 0:
112 |             print(sample["image"].shape)
113 | 
114 |         return sample
115 | 
116 |     def __len__(self):
117 |         return len(self.image_files)
118 | 
119 | 
120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
121 |     dataset = DIML_Indoor(data_dir_root)
122 |     return DataLoader(dataset, batch_size, **kwargs)
123 | 
124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
126 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diml_outdoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class DIML_Outdoor(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         import glob
 81 | 
 82 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 83 |         self.image_files = glob.glob(os.path.join(
 84 |             data_dir_root, "*", 'outleft', '*.png'))
 85 |         self.depth_files = [r.replace("outleft", "depthmap")
 86 |                             for r in self.image_files]
 87 |         self.transform = ToTensor()
 88 | 
 89 |     def __getitem__(self, idx):
 90 |         image_path = self.image_files[idx]
 91 |         depth_path = self.depth_files[idx]
 92 | 
 93 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 94 |         depth = np.asarray(Image.open(depth_path),
 95 |                            dtype='uint16') / 1000.0  # mm to meters
 96 | 
 97 |         # depth[depth > 8] = -1
 98 |         depth = depth[..., None]
 99 | 
100 |         sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101 | 
102 |         # return sample
103 |         return self.transform(sample)
104 | 
105 |     def __len__(self):
106 |         return len(self.image_files)
107 | 
108 | 
109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110 |     dataset = DIML_Outdoor(data_dir_root)
111 |     return DataLoader(dataset, batch_size, **kwargs)
112 | 
113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
115 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/diode.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(480)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diode"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DIODE(Dataset):
 83 |     def __init__(self, data_dir_root):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
 87 |         self.image_files = glob.glob(
 88 |             os.path.join(data_dir_root, '*', '*', '*.png'))
 89 |         self.depth_files = [r.replace(".png", "_depth.npy")
 90 |                             for r in self.image_files]
 91 |         self.depth_mask_files = [
 92 |             r.replace(".png", "_depth_mask.npy") for r in self.image_files]
 93 |         self.transform = ToTensor()
 94 | 
 95 |     def __getitem__(self, idx):
 96 |         image_path = self.image_files[idx]
 97 |         depth_path = self.depth_files[idx]
 98 |         depth_mask_path = self.depth_mask_files[idx]
 99 | 
100 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101 |         depth = np.load(depth_path)  # in meters
102 |         valid = np.load(depth_mask_path)  # binary
103 | 
104 |         # depth[depth > 8] = -1
105 |         # depth = depth[..., None]
106 | 
107 |         sample = dict(image=image, depth=depth, valid=valid)
108 | 
109 |         # return sample
110 |         sample = self.transform(sample)
111 | 
112 |         if idx == 0:
113 |             print(sample["image"].shape)
114 | 
115 |         return sample
116 | 
117 |     def __len__(self):
118 |         return len(self.image_files)
119 | 
120 | 
121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122 |     dataset = DIODE(data_dir_root)
123 |     return DataLoader(dataset, batch_size, **kwargs)
124 | 
125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
126 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/hypersim.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import glob
 26 | import os
 27 | 
 28 | import h5py
 29 | import numpy as np
 30 | import torch
 31 | from PIL import Image
 32 | from torch.utils.data import DataLoader, Dataset
 33 | from torchvision import transforms
 34 | 
 35 | 
 36 | def hypersim_distance_to_depth(npyDistance):
 37 |     intWidth, intHeight, fltFocal = 1024, 768, 886.81
 38 | 
 39 |     npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
 40 |         1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
 41 |     npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
 42 |                                  intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
 43 |     npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
 44 |     npyImageplane = np.concatenate(
 45 |         [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
 46 | 
 47 |     npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
 48 |     return npyDepth
 49 | 
 50 | 
 51 | class ToTensor(object):
 52 |     def __init__(self):
 53 |         # self.normalize = transforms.Normalize(
 54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 55 |         self.normalize = lambda x: x
 56 |         self.resize = transforms.Resize((480, 640))
 57 | 
 58 |     def __call__(self, sample):
 59 |         image, depth = sample['image'], sample['depth']
 60 |         image = self.to_tensor(image)
 61 |         image = self.normalize(image)
 62 |         depth = self.to_tensor(depth)
 63 | 
 64 |         image = self.resize(image)
 65 | 
 66 |         return {'image': image, 'depth': depth, 'dataset': "hypersim"}
 67 | 
 68 |     def to_tensor(self, pic):
 69 | 
 70 |         if isinstance(pic, np.ndarray):
 71 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 72 |             return img
 73 | 
 74 |         #         # handle PIL Image
 75 |         if pic.mode == 'I':
 76 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 77 |         elif pic.mode == 'I;16':
 78 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 79 |         else:
 80 |             img = torch.ByteTensor(
 81 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 82 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 83 |         if pic.mode == 'YCbCr':
 84 |             nchannel = 3
 85 |         elif pic.mode == 'I;16':
 86 |             nchannel = 1
 87 |         else:
 88 |             nchannel = len(pic.mode)
 89 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 90 | 
 91 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 92 |         if isinstance(img, torch.ByteTensor):
 93 |             return img.float()
 94 |         else:
 95 |             return img
 96 | 
 97 | 
 98 | class HyperSim(Dataset):
 99 |     def __init__(self, data_dir_root):
100 |         # image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
101 |         # depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
102 |         self.image_files = glob.glob(os.path.join(
103 |             data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
104 |         self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
105 |             ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
106 |         self.transform = ToTensor()
107 | 
108 |     def __getitem__(self, idx):
109 |         image_path = self.image_files[idx]
110 |         depth_path = self.depth_files[idx]
111 | 
112 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
113 | 
114 |         # depth from hdf5
115 |         depth_fd = h5py.File(depth_path, "r")
116 |         # in meters (Euclidean distance)
117 |         distance_meters = np.array(depth_fd['dataset'])
118 |         depth = hypersim_distance_to_depth(
119 |             distance_meters)  # in meters (planar depth)
120 | 
121 |         # depth[depth > 8] = -1
122 |         depth = depth[..., None]
123 | 
124 |         sample = dict(image=image, depth=depth)
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = HyperSim(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/ibims.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import os
26 | 
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms as T
32 | 
33 | 
34 | class iBims(Dataset):
35 |     def __init__(self, config):
36 |         root_folder = config.ibims_root
37 |         with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38 |             imglist = f.read().split()
39 | 
40 |         samples = []
41 |         for basename in imglist:
42 |             img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43 |             depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44 |             valid_mask_path = os.path.join(
45 |                 root_folder, 'mask_invalid', basename+".png")
46 |             transp_mask_path = os.path.join(
47 |                 root_folder, 'mask_transp', basename+".png")
48 | 
49 |             samples.append(
50 |                 (img_path, depth_path, valid_mask_path, transp_mask_path))
51 | 
52 |         self.samples = samples
53 |         # self.normalize = T.Normalize(
54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55 |         self.normalize = lambda x : x
56 | 
57 |     def __getitem__(self, idx):
58 |         img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59 | 
60 |         img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61 |         depth = np.asarray(Image.open(depth_path),
62 |                            dtype=np.uint16).astype('float')*50.0/65535
63 | 
64 |         mask_valid = np.asarray(Image.open(valid_mask_path))
65 |         mask_transp = np.asarray(Image.open(transp_mask_path))
66 | 
67 |         # depth = depth * mask_valid * mask_transp
68 |         depth = np.where(mask_valid * mask_transp, depth, -1)
69 | 
70 |         img = torch.from_numpy(img).permute(2, 0, 1)
71 |         img = self.normalize(img)
72 |         depth = torch.from_numpy(depth).unsqueeze(0)
73 |         return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74 | 
75 |     def __len__(self):
76 |         return len(self.samples)
77 | 
78 | 
79 | def get_ibims_loader(config, batch_size=1, **kwargs):
80 |     dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81 |     return dataloader
82 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/preprocess.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import numpy as np
 26 | from dataclasses import dataclass
 27 | from typing import Tuple, List
 28 | 
 29 | # dataclass to store the crop parameters
 30 | @dataclass
 31 | class CropParams:
 32 |     top: int
 33 |     bottom: int
 34 |     left: int
 35 |     right: int
 36 | 
 37 | 
 38 | 
 39 | def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams:
 40 |     gray_image = np.mean(rgb_image, axis=channel_axis)
 41 |     h, w = gray_image.shape
 42 | 
 43 | 
 44 |     def num_value_pixels(arr):
 45 |         return np.sum(np.abs(arr - value) < level_diff_threshold)
 46 | 
 47 |     def is_above_tolerance(arr, total_pixels):
 48 |         return (num_value_pixels(arr) / total_pixels) > tolerance
 49 | 
 50 |     # Crop top border until number of value pixels become below tolerance
 51 |     top = min_border
 52 |     while is_above_tolerance(gray_image[top, :], w) and top < h-1:
 53 |         top += 1
 54 |         if top > cut_off:
 55 |             break
 56 | 
 57 |     # Crop bottom border until number of value pixels become below tolerance
 58 |     bottom = h - min_border
 59 |     while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0:
 60 |         bottom -= 1
 61 |         if h - bottom > cut_off:
 62 |             break
 63 | 
 64 |     # Crop left border until number of value pixels become below tolerance
 65 |     left = min_border
 66 |     while is_above_tolerance(gray_image[:, left], h) and left < w-1:
 67 |         left += 1
 68 |         if left > cut_off:
 69 |             break
 70 | 
 71 |     # Crop right border until number of value pixels become below tolerance
 72 |     right = w - min_border
 73 |     while is_above_tolerance(gray_image[:, right], h) and right > 0:
 74 |         right -= 1
 75 |         if w - right > cut_off:
 76 |             break
 77 |         
 78 | 
 79 |     return CropParams(top, bottom, left, right)
 80 | 
 81 | 
 82 | def get_white_border(rgb_image, value=255, **kwargs) -> CropParams:
 83 |     """Crops the white border of the RGB.
 84 | 
 85 |     Args:
 86 |         rgb: RGB image, shape (H, W, 3).
 87 |     Returns:
 88 |         Crop parameters.
 89 |     """
 90 |     if value == 255:
 91 |         # assert range of values in rgb image is [0, 255]
 92 |         assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]."
 93 |         assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]."
 94 |     elif value == 1:
 95 |         # assert range of values in rgb image is [0, 1]
 96 |         assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]."
 97 | 
 98 |     return get_border_params(rgb_image, value=value, **kwargs)
 99 | 
100 | def get_black_border(rgb_image, **kwargs) -> CropParams:
101 |     """Crops the black border of the RGB.
102 | 
103 |     Args:
104 |         rgb: RGB image, shape (H, W, 3).
105 | 
106 |     Returns:
107 |         Crop parameters.
108 |     """
109 | 
110 |     return get_border_params(rgb_image, value=0, **kwargs)
111 | 
112 | def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray:
113 |     """Crops the image according to the crop parameters.
114 | 
115 |     Args:
116 |         image: RGB or depth image, shape (H, W, 3) or (H, W).
117 |         crop_params: Crop parameters.
118 | 
119 |     Returns:
120 |         Cropped image.
121 |     """
122 |     return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right]
123 | 
124 | def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]:
125 |     """Crops the images according to the crop parameters.
126 | 
127 |     Args:
128 |         images: RGB or depth images, shape (H, W, 3) or (H, W).
129 |         crop_params: Crop parameters.
130 | 
131 |     Returns:
132 |         Cropped images.
133 |     """
134 |     return tuple(crop_image(image, crop_params) for image in images)
135 | 
136 | def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]:
137 |     """Crops the white and black border of the RGB and depth images.
138 | 
139 |     Args:
140 |         rgb: RGB image, shape (H, W, 3). This image is used to determine the border.
141 |         other_images: The other images to crop according to the border of the RGB image.
142 |     Returns:
143 |         Cropped RGB and other images.
144 |     """
145 |     # crop black border
146 |     crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
147 |     cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params)
148 | 
149 |     # crop white border
150 |     crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold)
151 |     cropped_images = crop_images(*cropped_images, crop_params=crop_params)
152 | 
153 |     return cropped_images
154 |     


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/sun_rgbd_loader.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class SunRGBD(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
 81 |         # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
 82 |         # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
 83 |         import glob
 84 |         self.image_files = glob.glob(
 85 |             os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
 86 |         self.depth_files = [
 87 |             r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
 88 |         self.transform = ToTensor()
 89 | 
 90 |     def __getitem__(self, idx):
 91 |         image_path = self.image_files[idx]
 92 |         depth_path = self.depth_files[idx]
 93 | 
 94 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 95 |         depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0
 96 |         depth[depth > 8] = -1
 97 |         depth = depth[..., None]
 98 |         return self.transform(dict(image=image, depth=depth))
 99 | 
100 |     def __len__(self):
101 |         return len(self.image_files)
102 | 
103 | 
104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
105 |     dataset = SunRGBD(data_dir_root)
106 |     return DataLoader(dataset, batch_size, **kwargs)
107 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/data/vkitti.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | from torch.utils.data import Dataset, DataLoader
 27 | from torchvision import transforms
 28 | import os
 29 | 
 30 | from PIL import Image
 31 | import numpy as np
 32 | import cv2
 33 | 
 34 | 
 35 | class ToTensor(object):
 36 |     def __init__(self):
 37 |         self.normalize = transforms.Normalize(
 38 |             mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 39 |         # self.resize = transforms.Resize((375, 1242))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 | 
 44 |         image = self.to_tensor(image)
 45 |         image = self.normalize(image)
 46 |         depth = self.to_tensor(depth)
 47 | 
 48 |         # image = self.resize(image)
 49 | 
 50 |         return {'image': image, 'depth': depth, 'dataset': "vkitti"}
 51 | 
 52 |     def to_tensor(self, pic):
 53 | 
 54 |         if isinstance(pic, np.ndarray):
 55 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 56 |             return img
 57 | 
 58 |         #         # handle PIL Image
 59 |         if pic.mode == 'I':
 60 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 61 |         elif pic.mode == 'I;16':
 62 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 63 |         else:
 64 |             img = torch.ByteTensor(
 65 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 66 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 67 |         if pic.mode == 'YCbCr':
 68 |             nchannel = 3
 69 |         elif pic.mode == 'I;16':
 70 |             nchannel = 1
 71 |         else:
 72 |             nchannel = len(pic.mode)
 73 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 74 | 
 75 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class VKITTI(Dataset):
 83 |     def __init__(self, data_dir_root, do_kb_crop=True):
 84 |         import glob
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "test_color", '*.png'))
 88 |         self.depth_files = [r.replace("test_color", "test_depth")
 89 |                             for r in self.image_files]
 90 |         self.do_kb_crop = True
 91 |         self.transform = ToTensor()
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         image_path = self.image_files[idx]
 95 |         depth_path = self.depth_files[idx]
 96 | 
 97 |         image = Image.open(image_path)
 98 |         depth = Image.open(depth_path)
 99 |         depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
100 |                            cv2.IMREAD_ANYDEPTH)
101 |         print("dpeth min max", depth.min(), depth.max())
102 | 
103 |         # print(np.shape(image))
104 |         # print(np.shape(depth))
105 | 
106 |         # depth[depth > 8] = -1
107 | 
108 |         if self.do_kb_crop and False:
109 |             height = image.height
110 |             width = image.width
111 |             top_margin = int(height - 352)
112 |             left_margin = int((width - 1216) / 2)
113 |             depth = depth.crop(
114 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
115 |             image = image.crop(
116 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
117 |             # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
118 | 
119 |         image = np.asarray(image, dtype=np.float32) / 255.0
120 |         # depth = np.asarray(depth, dtype=np.uint16) /1.
121 |         depth = depth[..., None]
122 |         sample = dict(image=image, depth=depth)
123 | 
124 |         # return sample
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = VKITTI(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     loader = get_vkitti_loader(
143 |         data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
144 |     print("Total files", len(loader.dataset))
145 |     for i, sample in enumerate(loader):
146 |         print(sample["image"].shape)
147 |         print(sample["depth"].shape)
148 |         print(sample["dataset"])
149 |         print(sample['depth'].min(), sample['depth'].max())
150 |         if i > 5:
151 |             break
152 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/base_models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | from zoedepth.models.depth_model import DepthModel
27 | 
28 | def build_model(config) -> DepthModel:
29 |     """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30 |     This function should be used to construct models for training and evaluation.
31 | 
32 |     Args:
33 |         config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34 | 
35 |     Returns:
36 |         torch.nn.Module: Model corresponding to name and version as specified in config
37 |     """
38 |     module_name = f"zoedepth.models.{config.model}"
39 |     try:
40 |         module = import_module(module_name)
41 |     except ModuleNotFoundError as e:
42 |         # print the original error message
43 |         print(e)
44 |         raise ValueError(
45 |             f"Model {config.model} not found. Refer above error for details.") from e
46 |     try:
47 |         get_version = getattr(module, "get_version")
48 |     except AttributeError as e:
49 |         raise ValueError(
50 |             f"Model {config.model} has no get_version function.") from e
51 |     return get_version(config.version_name).build_from_config(config)
52 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/layers/dist_layers.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | import torch.nn as nn
 27 | 
 28 | 
 29 | def log_binom(n, k, eps=1e-7):
 30 |     """ log(nCk) using stirling approximation """
 31 |     n = n + eps
 32 |     k = k + eps
 33 |     return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
 34 | 
 35 | 
 36 | class LogBinomial(nn.Module):
 37 |     def __init__(self, n_classes=256, act=torch.softmax):
 38 |         """Compute log binomial distribution for n_classes
 39 | 
 40 |         Args:
 41 |             n_classes (int, optional): number of output classes. Defaults to 256.
 42 |         """
 43 |         super().__init__()
 44 |         self.K = n_classes
 45 |         self.act = act
 46 |         self.register_buffer('k_idx', torch.arange(
 47 |             0, n_classes).view(1, -1, 1, 1))
 48 |         self.register_buffer('K_minus_1', torch.Tensor(
 49 |             [self.K-1]).view(1, -1, 1, 1))
 50 | 
 51 |     def forward(self, x, t=1., eps=1e-4):
 52 |         """Compute log binomial distribution for x
 53 | 
 54 |         Args:
 55 |             x (torch.Tensor - NCHW): probabilities
 56 |             t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
 57 |             eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
 58 | 
 59 |         Returns:
 60 |             torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
 61 |         """
 62 |         if x.ndim == 3:
 63 |             x = x.unsqueeze(1)  # make it nchw
 64 | 
 65 |         one_minus_x = torch.clamp(1 - x, eps, 1)
 66 |         x = torch.clamp(x, eps, 1)
 67 |         y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
 68 |             torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
 69 |         return self.act(y/t, dim=1)
 70 | 
 71 | 
 72 | class ConditionalLogBinomial(nn.Module):
 73 |     def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
 74 |         """Conditional Log Binomial distribution
 75 | 
 76 |         Args:
 77 |             in_features (int): number of input channels in main feature
 78 |             condition_dim (int): number of input channels in condition feature
 79 |             n_classes (int, optional): Number of classes. Defaults to 256.
 80 |             bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
 81 |             p_eps (float, optional): small eps value. Defaults to 1e-4.
 82 |             max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
 83 |             min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
 84 |         """
 85 |         super().__init__()
 86 |         self.p_eps = p_eps
 87 |         self.max_temp = max_temp
 88 |         self.min_temp = min_temp
 89 |         self.log_binomial_transform = LogBinomial(n_classes, act=act)
 90 |         bottleneck = (in_features + condition_dim) // bottleneck_factor
 91 |         self.mlp = nn.Sequential(
 92 |             nn.Conv2d(in_features + condition_dim, bottleneck,
 93 |                       kernel_size=1, stride=1, padding=0),
 94 |             nn.GELU(),
 95 |             # 2 for p linear norm, 2 for t linear norm
 96 |             nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
 97 |             nn.Softplus()
 98 |         )
 99 | 
100 |     def forward(self, x, cond):
101 |         """Forward pass
102 | 
103 |         Args:
104 |             x (torch.Tensor - NCHW): Main feature
105 |             cond (torch.Tensor - NCHW): condition feature
106 | 
107 |         Returns:
108 |             torch.Tensor: Output log binomial distribution
109 |         """
110 |         pt = self.mlp(torch.concat((x, cond), dim=1))
111 |         p, t = pt[:, :2, ...], pt[:, 2:, ...]
112 | 
113 |         p = p + self.p_eps
114 |         p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115 | 
116 |         t = t + self.p_eps
117 |         t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118 |         t = t.unsqueeze(1)
119 |         t = (self.max_temp - self.min_temp) * t + self.min_temp
120 | 
121 |         return self.log_binomial_transform(p, t)
122 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/layers/patch_transformer.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | import torch.nn as nn
27 | 
28 | 
29 | class PatchTransformerEncoder(nn.Module):
30 |     def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31 |         """ViT-like transformer block
32 | 
33 |         Args:
34 |             in_channels (int): Input channels
35 |             patch_size (int, optional): patch size. Defaults to 10.
36 |             embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37 |             num_heads (int, optional): number of attention heads. Defaults to 4.
38 |             use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39 |         """
40 |         super(PatchTransformerEncoder, self).__init__()
41 |         self.use_class_token = use_class_token
42 |         encoder_layers = nn.TransformerEncoderLayer(
43 |             embedding_dim, num_heads, dim_feedforward=1024)
44 |         self.transformer_encoder = nn.TransformerEncoder(
45 |             encoder_layers, num_layers=4)  # takes shape S,N,E
46 | 
47 |         self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48 |                                            kernel_size=patch_size, stride=patch_size, padding=0)
49 |         
50 |     def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51 |         """Generate positional encodings
52 | 
53 |         Args:
54 |             sequence_length (int): Sequence length
55 |             embedding_dim (int): Embedding dimension
56 | 
57 |         Returns:
58 |             torch.Tensor SBE: Positional encodings
59 |         """
60 |         position = torch.arange(
61 |             0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62 |         index = torch.arange(
63 |             0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64 |         div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65 |         pos_encoding = position * div_term
66 |         pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67 |         pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68 |         return pos_encoding
69 |         
70 | 
71 |     def forward(self, x):
72 |         """Forward pass
73 | 
74 |         Args:
75 |             x (torch.Tensor - NCHW): Input feature tensor
76 | 
77 |         Returns:
78 |             torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79 |         """
80 |         embeddings = self.embedding_convPxP(x).flatten(
81 |             2)  # .shape = n,c,s = n, embedding_dim, s
82 |         if self.use_class_token:
83 |             # extra special token at start ?
84 |             embeddings = nn.functional.pad(embeddings, (1, 0))
85 |         
86 |         # change to S,N,E format required by transformer
87 |         embeddings = embeddings.permute(2, 0, 1)
88 |         S, N, E = embeddings.shape
89 |         embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90 |         x = self.transformer_encoder(embeddings)  # .shape = S, N, E
91 |         return x
92 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/model_io.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | 
27 | def load_state_dict(model, state_dict):
28 |     """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29 | 
30 |     DataParallel prefixes state_dict keys with 'module.' when saving.
31 |     If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32 |     If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33 |     """
34 |     state_dict = state_dict.get('model', state_dict)
35 |     # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36 | 
37 |     do_prefix = isinstance(
38 |         model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39 |     state = {}
40 |     for k, v in state_dict.items():
41 |         if k.startswith('module.') and not do_prefix:
42 |             k = k[7:]
43 | 
44 |         if not k.startswith('module.') and do_prefix:
45 |             k = 'module.' + k
46 | 
47 |         state[k] = v
48 | 
49 |     model.load_state_dict(state)
50 |     print("Loaded successfully")
51 |     return model
52 | 
53 | 
54 | def load_wts(model, checkpoint_path):
55 |     ckpt = torch.load(checkpoint_path, map_location='cpu')
56 |     return load_state_dict(model, ckpt)
57 | 
58 | 
59 | def load_state_dict_from_url(model, url, **kwargs):
60 |     state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61 |     return load_state_dict(model, state_dict)
62 | 
63 | 
64 | def load_state_from_resource(model, resource: str):
65 |     """Loads weights to the model from a given resource. A resource can be of following types:
66 |         1. URL. Prefixed with "url::"
67 |                 e.g. url::http(s)://url.resource.com/ckpt.pt
68 | 
69 |         2. Local path. Prefixed with "local::"
70 |                 e.g. local::/path/to/ckpt.pt
71 | 
72 | 
73 |     Args:
74 |         model (torch.nn.Module): Model
75 |         resource (str): resource string
76 | 
77 |     Returns:
78 |         torch.nn.Module: Model with loaded weights
79 |     """
80 |     print(f"Using pretrained resource {resource}")
81 | 
82 |     if resource.startswith('url::'):
83 |         url = resource.split('url::')[1]
84 |         return load_state_dict_from_url(model, url, progress=True)
85 | 
86 |     elif resource.startswith('local::'):
87 |         path = resource.split('local::')[1]
88 |         return load_wts(model, path)
89 |         
90 |     else:
91 |         raise ValueError("Invalid resource type, only url:: and local:: are supported")
92 |     


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_v1 import ZoeDepth 
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepth,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepth",
 4 |         "version_name": "v1",
 5 |         "n_bins": 64,
 6 |         "bin_embedding_dim": 128,
 7 |         "bin_centers_type": "softplus",
 8 |         "n_attractors":[16, 8, 4, 1],
 9 |         "attractor_alpha": 1000,
10 |         "attractor_gamma": 2,
11 |         "attractor_kind" : "mean",
12 |         "attractor_type" : "inv",
13 |         "midas_model_type" : "DPT_BEiT_L_384",
14 |         "min_temp": 0.0212,
15 |         "max_temp": 50.0,
16 |         "output_distribution": "logbinomial",
17 |         "memory_efficient": true,
18 |         "inverse_midas": false,
19 |         "img_size": [384, 512]
20 |     },
21 |     
22 |     "train": {
23 |         "train_midas": true,
24 |         "use_pretrained_midas": true,
25 |         "trainer": "zoedepth",
26 |         "epochs": 5,
27 |         "bs": 16,
28 |         "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30 |         "same_lr": false,
31 |         "w_si": 1,
32 |         "w_domain": 0.2,
33 |         "w_reg": 0,
34 |         "w_grad": 0,
35 |         "avoid_boundary": false,
36 |         "random_crop": false,
37 |         "input_width": 640,
38 |         "input_height": 480,
39 |         "midas_lr_factor": 1,
40 |         "encoder_lr_factor":10,
41 |         "pos_enc_lr_factor":10,
42 |         "freeze_midas_bn": true
43 | 
44 |     },
45 | 
46 |     "infer":{
47 |         "train_midas": false,
48 |         "use_pretrained_midas": false,
49 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
50 |         "force_keep_ar": true
51 |     },
52 | 
53 |     "eval":{
54 |         "train_midas": false,
55 |         "use_pretrained_midas": false,
56 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
57 |     }
58 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "bin_centers_type": "normed",
 4 |         "img_size": [384, 768]
 5 |     },
 6 |     
 7 |     "train": {
 8 |     },
 9 | 
10 |     "infer":{
11 |         "train_midas": false,
12 |         "use_pretrained_midas": false,
13 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14 |         "force_keep_ar": true
15 |     },
16 | 
17 |     "eval":{
18 |         "train_midas": false,
19 |         "use_pretrained_midas": false,
20 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21 |     }
22 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_nk_v1 import ZoeDepthNK
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepthNK,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepthNK",
 4 |         "version_name": "v1",
 5 |         "bin_conf" : [
 6 |             {
 7 |                 "name": "nyu",
 8 |                 "n_bins": 64,
 9 |                 "min_depth": 1e-3,
10 |                 "max_depth": 10.0
11 |             },
12 |             {
13 |                 "name": "kitti",
14 |                 "n_bins": 64,
15 |                 "min_depth": 1e-3,
16 |                 "max_depth": 80.0
17 |             }
18 |         ], 
19 |         "bin_embedding_dim": 128,
20 |         "bin_centers_type": "softplus",
21 |         "n_attractors":[16, 8, 4, 1],
22 |         "attractor_alpha": 1000,
23 |         "attractor_gamma": 2,
24 |         "attractor_kind" : "mean",
25 |         "attractor_type" : "inv",
26 |         "min_temp": 0.0212,
27 |         "max_temp": 50.0, 
28 |         "memory_efficient": true, 
29 |         "midas_model_type" : "DPT_BEiT_L_384",
30 |         "img_size": [384, 512]
31 |     },
32 | 
33 |     "train": {
34 |         "train_midas": true,
35 |         "use_pretrained_midas": true,
36 |         "trainer": "zoedepth_nk",
37 |         "epochs": 5,
38 |         "bs": 16,
39 |         "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41 |         "same_lr": false,
42 |         "w_si": 1,
43 |         "w_domain": 100,
44 |         "avoid_boundary": false,
45 |         "random_crop": false,
46 |         "input_width": 640,
47 |         "input_height": 480,
48 |         "w_grad": 0,
49 |         "w_reg": 0,
50 |         "midas_lr_factor": 10,
51 |         "encoder_lr_factor":10,
52 |         "pos_enc_lr_factor":10
53 |     },
54 | 
55 |     "infer": {
56 |         "train_midas": false,
57 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58 |         "use_pretrained_midas": false,
59 |         "force_keep_ar": true
60 |     },
61 |     
62 |     "eval": {
63 |         "train_midas": false,
64 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65 |         "use_pretrained_midas": false
66 |     }
67 | }


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/trainers/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | 
27 | 
28 | def get_trainer(config):
29 |     """Builds and returns a trainer based on the config.
30 | 
31 |     Args:
32 |         config (dict): the config dict (typically constructed using utils.config.get_config)
33 |             config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module
34 | 
35 |     Raises:
36 |         ValueError: If the specified trainer does not exist under trainers/ folder
37 | 
38 |     Returns:
39 |         Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object
40 |     """
41 |     assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format(
42 |         config)
43 |     try:
44 |         Trainer = getattr(import_module(
45 |             f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer')
46 |     except ModuleNotFoundError as e:
47 |         raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e
48 |     return Trainer
49 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/arg_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def infer_type(x):  # hacky way to infer type from string args
 4 |     if not isinstance(x, str):
 5 |         return x
 6 | 
 7 |     try:
 8 |         x = int(x)
 9 |         return x
10 |     except ValueError:
11 |         pass
12 | 
13 |     try:
14 |         x = float(x)
15 |         return x
16 |     except ValueError:
17 |         pass
18 | 
19 |     return x
20 | 
21 | 
22 | def parse_unknown(unknown_args):
23 |     clean = []
24 |     for a in unknown_args:
25 |         if "=" in a:
26 |             k, v = a.split("=")
27 |             clean.extend([k, v])
28 |         else:
29 |             clean.append(a)
30 | 
31 |     keys = clean[::2]
32 |     values = clean[1::2]
33 |     return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
34 | 


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/easydict/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | EasyDict
  3 | Copy/pasted from https://github.com/makinacorpus/easydict
  4 | Original author: Mathieu Leplatre <mathieu.leplatre@makina-corpus.com>
  5 | """
  6 | 
  7 | class EasyDict(dict):
  8 |     """
  9 |     Get attributes
 10 | 
 11 |     >>> d = EasyDict({'foo':3})
 12 |     >>> d['foo']
 13 |     3
 14 |     >>> d.foo
 15 |     3
 16 |     >>> d.bar
 17 |     Traceback (most recent call last):
 18 |     ...
 19 |     AttributeError: 'EasyDict' object has no attribute 'bar'
 20 | 
 21 |     Works recursively
 22 | 
 23 |     >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
 24 |     >>> isinstance(d.bar, dict)
 25 |     True
 26 |     >>> d.bar.x
 27 |     1
 28 | 
 29 |     Bullet-proof
 30 | 
 31 |     >>> EasyDict({})
 32 |     {}
 33 |     >>> EasyDict(d={})
 34 |     {}
 35 |     >>> EasyDict(None)
 36 |     {}
 37 |     >>> d = {'a': 1}
 38 |     >>> EasyDict(**d)
 39 |     {'a': 1}
 40 |     >>> EasyDict((('a', 1), ('b', 2)))
 41 |     {'a': 1, 'b': 2}
 42 |     
 43 |     Set attributes
 44 | 
 45 |     >>> d = EasyDict()
 46 |     >>> d.foo = 3
 47 |     >>> d.foo
 48 |     3
 49 |     >>> d.bar = {'prop': 'value'}
 50 |     >>> d.bar.prop
 51 |     'value'
 52 |     >>> d
 53 |     {'foo': 3, 'bar': {'prop': 'value'}}
 54 |     >>> d.bar.prop = 'newer'
 55 |     >>> d.bar.prop
 56 |     'newer'
 57 | 
 58 | 
 59 |     Values extraction
 60 | 
 61 |     >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
 62 |     >>> isinstance(d.bar, list)
 63 |     True
 64 |     >>> from operator import attrgetter
 65 |     >>> list(map(attrgetter('x'), d.bar))
 66 |     [1, 3]
 67 |     >>> list(map(attrgetter('y'), d.bar))
 68 |     [2, 4]
 69 |     >>> d = EasyDict()
 70 |     >>> list(d.keys())
 71 |     []
 72 |     >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
 73 |     >>> d.foo
 74 |     3
 75 |     >>> d.bar.x
 76 |     1
 77 | 
 78 |     Still like a dict though
 79 | 
 80 |     >>> o = EasyDict({'clean':True})
 81 |     >>> list(o.items())
 82 |     [('clean', True)]
 83 | 
 84 |     And like a class
 85 | 
 86 |     >>> class Flower(EasyDict):
 87 |     ...     power = 1
 88 |     ...
 89 |     >>> f = Flower()
 90 |     >>> f.power
 91 |     1
 92 |     >>> f = Flower({'height': 12})
 93 |     >>> f.height
 94 |     12
 95 |     >>> f['power']
 96 |     1
 97 |     >>> sorted(f.keys())
 98 |     ['height', 'power']
 99 | 
100 |     update and pop items
101 |     >>> d = EasyDict(a=1, b='2')
102 |     >>> e = EasyDict(c=3.0, a=9.0)
103 |     >>> d.update(e)
104 |     >>> d.c
105 |     3.0
106 |     >>> d['c']
107 |     3.0
108 |     >>> d.get('c')
109 |     3.0
110 |     >>> d.update(a=4, b=4)
111 |     >>> d.b
112 |     4
113 |     >>> d.pop('a')
114 |     4
115 |     >>> d.a
116 |     Traceback (most recent call last):
117 |     ...
118 |     AttributeError: 'EasyDict' object has no attribute 'a'
119 |     """
120 |     def __init__(self, d=None, **kwargs):
121 |         if d is None:
122 |             d = {}
123 |         else:
124 |             d = dict(d)        
125 |         if kwargs:
126 |             d.update(**kwargs)
127 |         for k, v in d.items():
128 |             setattr(self, k, v)
129 |         # Class attributes
130 |         for k in self.__class__.__dict__.keys():
131 |             if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
132 |                 setattr(self, k, getattr(self, k))
133 | 
134 |     def __setattr__(self, name, value):
135 |         if isinstance(value, (list, tuple)):
136 |             value = [self.__class__(x)
137 |                      if isinstance(x, dict) else x for x in value]
138 |         elif isinstance(value, dict) and not isinstance(value, self.__class__):
139 |             value = self.__class__(value)
140 |         super(EasyDict, self).__setattr__(name, value)
141 |         super(EasyDict, self).__setitem__(name, value)
142 | 
143 |     __setitem__ = __setattr__
144 | 
145 |     def update(self, e=None, **f):
146 |         d = e or dict()
147 |         d.update(f)
148 |         for k in d:
149 |             setattr(self, k, d[k])
150 | 
151 |     def pop(self, k, d=None):
152 |         delattr(self, k)
153 |         return super(EasyDict, self).pop(k, d)
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     import doctest
158 |     doctest.testmod()


--------------------------------------------------------------------------------
/ZoeDepth/zoedepth/utils/geometry.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import numpy as np
26 | 
27 | def get_intrinsics(H,W):
28 |     """
29 |     Intrinsics for a pinhole camera model.
30 |     Assume fov of 55 degrees and central principal point.
31 |     """
32 |     f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33 |     cx = 0.5 * W
34 |     cy = 0.5 * H
35 |     return np.array([[f, 0, cx],
36 |                      [0, f, cy],
37 |                      [0, 0, 1]])
38 | 
39 | def depth_to_points(depth, R=None, t=None):
40 | 
41 |     K = get_intrinsics(depth.shape[1], depth.shape[2])
42 |     Kinv = np.linalg.inv(K)
43 |     if R is None:
44 |         R = np.eye(3)
45 |     if t is None:
46 |         t = np.zeros(3)
47 | 
48 |     # M converts from your coordinate to PyTorch3D's coordinate system
49 |     M = np.eye(3)
50 |     M[0, 0] = -1.0
51 |     M[1, 1] = -1.0
52 | 
53 |     height, width = depth.shape[1:3]
54 | 
55 |     x = np.arange(width)
56 |     y = np.arange(height)
57 |     coord = np.stack(np.meshgrid(x, y), -1)
58 |     coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1)  # z=1
59 |     coord = coord.astype(np.float32)
60 |     # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61 |     coord = coord[None]  # bs, h, w, 3
62 | 
63 |     D = depth[:, :, :, None, None]
64 |     # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65 |     pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66 |     # pts3D_1 live in your coordinate system. Convert them to Py3D's
67 |     pts3D_1 = M[None, None, None, ...] @ pts3D_1
68 |     # from reference to targe tviewpoint
69 |     pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70 |     # pts3D_2 = pts3D_1
71 |     # depth_2 = pts3D_2[:, :, :, 2, :]  # b,1,h,w
72 |     return pts3D_2[:, :, :, :3, 0][0]
73 | 
74 | 
75 | def create_triangles(h, w, mask=None):
76 |     """
77 |     Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78 |     Creates mesh triangle indices from a given pixel grid size.
79 |         This function is not and need not be differentiable as triangle indices are
80 |         fixed.
81 |     Args:
82 |     h: (int) denoting the height of the image.
83 |     w: (int) denoting the width of the image.
84 |     Returns:
85 |     triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86 |     """
87 |     x, y = np.meshgrid(range(w - 1), range(h - 1))
88 |     tl = y * w + x
89 |     tr = y * w + x + 1
90 |     bl = (y + 1) * w + x
91 |     br = (y + 1) * w + x + 1
92 |     triangles = np.array([tl, bl, tr, br, tr, bl])
93 |     triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94 |         ((w - 1) * (h - 1) * 2, 3))
95 |     if mask is not None:
96 |         mask = mask.reshape(-1)
97 |         triangles = triangles[mask[triangles].all(1)]
98 |     return triangles
99 | 


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
 1 | ###
 2 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr
 3 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com.
 4 | # All rights reserved.
 5 | ###
 6 | import numpy as np
 7 | 
 8 | 
 9 | class GSParams: 
10 |     def __init__(self):
11 |         self.sh_degree = 3
12 |         self.images = "images"
13 |         self.resolution = -1
14 |         self.white_background = False
15 |         self.data_device = "cuda"
16 |         self.eval = False
17 |         self.use_depth = False
18 | 
19 |         self.iterations = 2990#3_000
20 |         self.position_lr_init = 0.00016
21 |         self.position_lr_final = 0.0000016
22 |         self.position_lr_delay_mult = 0.01
23 |         self.position_lr_max_steps = 2990#3_000
24 |         self.feature_lr = 0.0025
25 |         self.opacity_lr = 0.05
26 |         self.scaling_lr = 0.005
27 |         self.rotation_lr = 0.001
28 |         self.percent_dense = 0.01
29 |         self.lambda_dssim = 0.2
30 |         self.densification_interval = 100
31 |         self.opacity_reset_interval = 3000
32 |         self.densify_from_iter = 500
33 |         self.densify_until_iter = 15_000
34 |         self.densify_grad_threshold = 0.0002
35 | 
36 |         self.convert_SHs_python = False
37 |         self.compute_cov3D_python = False
38 |         self.debug = False
39 | 
40 | 
41 | class CameraParams:
42 |     def __init__(self, H: int = 512, W: int = 512):
43 |         self.H = H
44 |         self.W = W
45 |         self.focal = (5.8269e+02, 5.8269e+02)
46 |         self.fov = (2*np.arctan(self.W / (2*self.focal[0])), 2*np.arctan(self.H / (2*self.focal[1])))
47 |         self.K = np.array([
48 |             [self.focal[0], 0., self.W/2],
49 |             [0., self.focal[1], self.H/2],
50 |             [0.,            0.,       1.],
51 |         ]).astype(np.float32)


--------------------------------------------------------------------------------
/assets/animestreet2_back_rgb.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/animestreet2_back_rgb.mp4


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/demo.gif


--------------------------------------------------------------------------------
/assets/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/demo.mp4


--------------------------------------------------------------------------------
/assets/fig5ours_360_rgb.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/fig5ours_360_rgb.mp4


--------------------------------------------------------------------------------
/assets/logo_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/logo_color.png


--------------------------------------------------------------------------------
/assets/logo_cvlab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/logo_cvlab.png


--------------------------------------------------------------------------------
/assets/waterfall_back_rgb.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/waterfall_back_rgb.mp4


--------------------------------------------------------------------------------
/examples/Image002_modernvilla.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image002_modernvilla.jpg


--------------------------------------------------------------------------------
/examples/Image002_modernvilla.txt:
--------------------------------------------------------------------------------
1 | ultra-modern mega villa by the sea with swimming pool and green space with beautiful open space and tropical paradise green space and guest annex, bright and sunny weather
2 | 


--------------------------------------------------------------------------------
/examples/Image002_modernvilla_negative.txt:
--------------------------------------------------------------------------------
1 | trees, front objects, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image003_fantasy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image003_fantasy.jpg


--------------------------------------------------------------------------------
/examples/Image003_fantasy.txt:
--------------------------------------------------------------------------------
1 | A vibrant, colorful floating community city, clouds above a beautiful, enchanted landscape filled with whimsical flora, enchanted forest landscape, Magical and dreamy woodland with vibrant green foliage and sparkling flowers, Landscape with twisted trees and vines, natural lighting and dark shadows, unique fantastical elements like floating islands and floating orbs, Highly detailed vegetation and foliage, deep contrast and color vibrancy, texture and intricate details in a floating element
2 | 


--------------------------------------------------------------------------------
/examples/Image003_fantasy_negative.txt:
--------------------------------------------------------------------------------
1 | (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image005_fruitmarket.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image005_fruitmarket.jpg


--------------------------------------------------------------------------------
/examples/Image005_fruitmarket.txt:
--------------------------------------------------------------------------------
1 | 4k, best quality, grocery store with big brick roads and wooden hard rooftop
2 | 


--------------------------------------------------------------------------------
/examples/Image005_fruitmarket_negative.txt:
--------------------------------------------------------------------------------
1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image008_waterfall.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image008_waterfall.jpg


--------------------------------------------------------------------------------
/examples/Image008_waterfall.txt:
--------------------------------------------------------------------------------
1 | A dense forest with hardwood trees on the stones. a lake is surrounded by wet stones and pebbles. realistic and intricate details, highly detailed outdoor photo.
2 | 


--------------------------------------------------------------------------------
/examples/Image009_spacestation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image009_spacestation.jpg


--------------------------------------------------------------------------------
/examples/Image009_spacestation.txt:
--------------------------------------------------------------------------------
1 | inside the space station, space control machines with many electric lines, 4k, best quality
2 | 


--------------------------------------------------------------------------------
/examples/Image009_spacestation_negative.txt:
--------------------------------------------------------------------------------
1 | astronaut, people, scientist, person, photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image011_lego.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image011_lego.jpg


--------------------------------------------------------------------------------
/examples/Image011_lego.txt:
--------------------------------------------------------------------------------
1 | (Brick studs in a certain pattern:3), (best lego man face:1.4), best quality, lego city with lego shops, lego road with street lamp, cars and lego mans on the street, lego trees and lake at a park
2 | 


--------------------------------------------------------------------------------
/examples/Image011_lego_negative.txt:
--------------------------------------------------------------------------------
1 | shining effect, shining light, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image012_whitecat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image012_whitecat.jpg


--------------------------------------------------------------------------------
/examples/Image012_whitecat.txt:
--------------------------------------------------------------------------------
1 | a cat sitting on the street, white house with white stairs, trees near the house, gray blocks on the street,
2 | 


--------------------------------------------------------------------------------
/examples/Image012_whitecat_2nd.txt:
--------------------------------------------------------------------------------
1 | A number of flower buckets, small bushes and many pebbles on the ground
2 | 


--------------------------------------------------------------------------------
/examples/Image012_whitecat_negative.txt:
--------------------------------------------------------------------------------
1 | shining effect, shining light, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image014_animestreet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image014_animestreet.jpg


--------------------------------------------------------------------------------
/examples/Image014_animestreet.txt:
--------------------------------------------------------------------------------
1 | best quality, 4k, anime-style, anime, manga style, a long anime-style road with anime-blocks and little anime-grass, anime-houses and anime-tree on the side of the anime-style road, wide anime-style bright blue sky, shiny and beautiful day, bright scene
2 | 


--------------------------------------------------------------------------------
/examples/Image014_animestreet_negative.txt:
--------------------------------------------------------------------------------
1 | defocus, blurry, shadow, character, person, people, photo frame, frame, boarder, simple color, dark sky, dark scene, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image015_animelakehouse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image015_animelakehouse.jpg


--------------------------------------------------------------------------------
/examples/Image015_animelakehouse.txt:
--------------------------------------------------------------------------------
1 | anime style, animation, best quality, a boat on lake, trees and rocks near the lake. a house and port in front of a house
2 | 


--------------------------------------------------------------------------------
/examples/Image015_animelakehouse_negative.txt:
--------------------------------------------------------------------------------
1 | (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image018_animesummerhome.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image018_animesummerhome.jpg


--------------------------------------------------------------------------------
/examples/Image018_animesummerhome.txt:
--------------------------------------------------------------------------------
1 | Anime-style, Japanese-style anime house overlooking the anime sea with anime tatami mats, anime curtains blowing in the wind, anme clouds visible in the anime sky, anime livingroom with anime flowers
2 | 


--------------------------------------------------------------------------------
/examples/Image018_animesummerhome_negative.txt:
--------------------------------------------------------------------------------
1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/Image031_fruit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image031_fruit.jpg


--------------------------------------------------------------------------------
/examples/Image031_fruit.txt:
--------------------------------------------------------------------------------
1 | 4k, best quality, grocery store with block tiles, wooden hard rooftop, various fruits upon the wooden tables, artificial trees on the tables.
2 | 


--------------------------------------------------------------------------------
/examples/Image031_fruit_negative.txt:
--------------------------------------------------------------------------------
1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale)
2 | 


--------------------------------------------------------------------------------
/examples/animelake_Back_and_forth_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_Back_and_forth_60fps.mp4


--------------------------------------------------------------------------------
/examples/animelake_Headbanging_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_Headbanging_60fps.mp4


--------------------------------------------------------------------------------
/examples/animelake_LLFF_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_LLFF_60fps.mp4


--------------------------------------------------------------------------------
/examples/cabin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/cabin.png


--------------------------------------------------------------------------------
/examples/cabin.txt:
--------------------------------------------------------------------------------
1 | Magician's magical cabin alone in a serene forest


--------------------------------------------------------------------------------
/examples/christmas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/christmas.png


--------------------------------------------------------------------------------
/examples/christmas.txt:
--------------------------------------------------------------------------------
1 | Cozy livingroom in christmas
2 | 


--------------------------------------------------------------------------------
/examples/doge.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/doge.jpg


--------------------------------------------------------------------------------
/examples/doge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/doge.png


--------------------------------------------------------------------------------
/examples/doge.txt:
--------------------------------------------------------------------------------
1 | a cozy livingroom


--------------------------------------------------------------------------------
/examples/elf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/elf.jpg


--------------------------------------------------------------------------------
/examples/elf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/elf.png


--------------------------------------------------------------------------------
/examples/elf.txt:
--------------------------------------------------------------------------------
1 | serene deep forest


--------------------------------------------------------------------------------
/examples/fantasy_Back_and_forth_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_Back_and_forth_60fps.mp4


--------------------------------------------------------------------------------
/examples/fantasy_Headbanging_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_Headbanging_60fps.mp4


--------------------------------------------------------------------------------
/examples/fantasy_LLFF_60fps.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_LLFF_60fps.mp4


--------------------------------------------------------------------------------
/examples/girl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/girl.jpg


--------------------------------------------------------------------------------
/examples/girl.txt:
--------------------------------------------------------------------------------
1 | dark messy room, noir style, indoors, bottle, shoe soles, jacket, cup, window, blurry, black footwear, depth of field, box, couch, table, gun, chair, foreshortening


--------------------------------------------------------------------------------
/examples/image020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/image020.png


--------------------------------------------------------------------------------
/examples/image020.txt:
--------------------------------------------------------------------------------
1 | High-resolution photography kitchen design, wooden floor, small windows opening onto the garden, Bauhaus furniture and decoration, high ceiling, beige blue salmon pastel palette, interior design magazine, cozy atmosphere; 8k, intricate detail, photorealistic, realistic light, wide angle, kinfolk photography, A+D architecture, Kitchen Sink, Basket of fruits and vegetables, a bottle of drinking water, walls painted magazine style photo, looking towards a sink under a window, with a door on the left of the sink with a 25 cm distance from the kitchen, the kitchen is an L shaped starting from the right corner, on the far right a fridge nest to it a stove, next the dishwasher then the sink, a smokey grey kitchen with modern touches, taupe walls, a taup ceiling with spotlights inside the ceiling with 90 cm distance, wooden parquet floor


--------------------------------------------------------------------------------
/examples/island.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/island.png


--------------------------------------------------------------------------------
/examples/island.txt:
--------------------------------------------------------------------------------
1 | Tiny island of wonder


--------------------------------------------------------------------------------
/examples/ruin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/ruin.png


--------------------------------------------------------------------------------
/examples/ruin.txt:
--------------------------------------------------------------------------------
1 | Postapocalyptic city in desert


--------------------------------------------------------------------------------
/gaussian_renderer/__init__.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | import torch
 13 | import math
 14 | from depth_diff_gaussian_rasterization_min import GaussianRasterizationSettings, GaussianRasterizer
 15 | from scene.gaussian_model import GaussianModel
 16 | from utils.sh import eval_sh
 17 | 
 18 | def render(viewpoint_camera, pc: GaussianModel, opt, bg_color: torch.Tensor, scaling_modifier=1.0, override_color=None, render_only=False):
 19 |     """
 20 |     Render the scene. 
 21 |     
 22 |     Background tensor (bg_color) must be on GPU!
 23 |     """
 24 |  
 25 |     # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
 26 |     screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
 27 |     try:
 28 |         screenspace_points.retain_grad()
 29 |     except:
 30 |         pass
 31 | 
 32 |     # Set up rasterization configuration
 33 |     tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
 34 |     tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
 35 | 
 36 |     raster_settings = GaussianRasterizationSettings(
 37 |         image_height=int(viewpoint_camera.image_height),
 38 |         image_width=int(viewpoint_camera.image_width),
 39 |         tanfovx=tanfovx,
 40 |         tanfovy=tanfovy,
 41 |         bg=bg_color,
 42 |         scale_modifier=scaling_modifier,
 43 |         viewmatrix=viewpoint_camera.world_view_transform,
 44 |         projmatrix=viewpoint_camera.full_proj_transform,
 45 |         sh_degree=pc.active_sh_degree,
 46 |         campos=viewpoint_camera.camera_center,
 47 |         prefiltered=False,
 48 |         debug=opt.debug
 49 |     )
 50 | 
 51 |     rasterizer = GaussianRasterizer(raster_settings=raster_settings)
 52 | 
 53 |     means3D = pc.get_xyz
 54 |     means2D = screenspace_points
 55 |     opacity = pc.get_opacity
 56 | 
 57 |     # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
 58 |     # scaling / rotation by the rasterizer.
 59 |     scales = None
 60 |     rotations = None
 61 |     cov3D_precomp = None
 62 |     if opt.compute_cov3D_python:
 63 |         cov3D_precomp = pc.get_covariance(scaling_modifier)
 64 |     else:
 65 |         scales = pc.get_scaling
 66 |         rotations = pc.get_rotation
 67 | 
 68 |     # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
 69 |     # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
 70 |     shs = None
 71 |     colors_precomp = None
 72 |     if override_color is None:
 73 |         if opt.convert_SHs_python:
 74 |             shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
 75 |             dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
 76 |             dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
 77 |             sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
 78 |             colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
 79 |         else:
 80 |             shs = pc.get_features
 81 |     else:
 82 |         colors_precomp = override_color
 83 | 
 84 |     # Rasterize visible Gaussians to image, obtain their radii (on screen). 
 85 |     rendered_image, radii, depth = rasterizer(
 86 |         means3D = means3D,
 87 |         means2D = means2D,
 88 |         shs = shs,
 89 |         colors_precomp = colors_precomp,
 90 |         opacities = opacity,
 91 |         scales = scales,
 92 |         rotations = rotations,
 93 |         cov3D_precomp = cov3D_precomp)
 94 | 
 95 |     # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
 96 |     # They will be excluded from value updates used in the splitting criteria.
 97 |     if render_only:
 98 |         return {"render": rendered_image, "depth": depth}
 99 |     else:
100 |         return {"render": rendered_image,
101 |                 "viewspace_points": screenspace_points,
102 |                 "visibility_filter" : radii > 0,
103 |                 "radii": radii,
104 |                 "depth": depth}
105 | 


--------------------------------------------------------------------------------
/gaussian_renderer/network_gui.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | import torch
13 | import traceback
14 | import socket
15 | import json
16 | from scene.cameras import MiniCam
17 | 
18 | host = "127.0.0.1"
19 | port = 6009
20 | 
21 | conn = None
22 | addr = None
23 | 
24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
25 | 
26 | def init(wish_host, wish_port):
27 |     global host, port, listener
28 |     host = wish_host
29 |     port = wish_port
30 |     listener.bind((host, port))
31 |     listener.listen()
32 |     listener.settimeout(0)
33 | 
34 | def try_connect():
35 |     global conn, addr, listener
36 |     try:
37 |         conn, addr = listener.accept()
38 |         print(f"\nConnected by {addr}")
39 |         conn.settimeout(None)
40 |     except Exception as inst:
41 |         pass
42 |             
43 | def read():
44 |     global conn
45 |     messageLength = conn.recv(4)
46 |     messageLength = int.from_bytes(messageLength, 'little')
47 |     message = conn.recv(messageLength)
48 |     return json.loads(message.decode("utf-8"))
49 | 
50 | def send(message_bytes, verify):
51 |     global conn
52 |     if message_bytes != None:
53 |         conn.sendall(message_bytes)
54 |     conn.sendall(len(verify).to_bytes(4, 'little'))
55 |     conn.sendall(bytes(verify, 'ascii'))
56 | 
57 | def receive():
58 |     message = read()
59 | 
60 |     width = message["resolution_x"]
61 |     height = message["resolution_y"]
62 | 
63 |     if width != 0 and height != 0:
64 |         try:
65 |             do_training = bool(message["train"])
66 |             fovy = message["fov_y"]
67 |             fovx = message["fov_x"]
68 |             znear = message["z_near"]
69 |             zfar = message["z_far"]
70 |             do_shs_python = bool(message["shs_python"])
71 |             do_rot_scale_python = bool(message["rot_scale_python"])
72 |             keep_alive = bool(message["keep_alive"])
73 |             scaling_modifier = message["scaling_modifier"]
74 |             world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda()
75 |             world_view_transform[:,1] = -world_view_transform[:,1]
76 |             world_view_transform[:,2] = -world_view_transform[:,2]
77 |             full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda()
78 |             full_proj_transform[:,1] = -full_proj_transform[:,1]
79 |             custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform)
80 |         except Exception as e:
81 |             print("")
82 |             traceback.print_exc()
83 |             raise e
84 |         return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier
85 |     else:
86 |         return None, None, None, None, None, None


--------------------------------------------------------------------------------
/packages.txt:
--------------------------------------------------------------------------------
1 | libglm-dev
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | timm==0.6.7
 2 | torch==2.0.1
 3 | plyfile==0.8.1
 4 | diffusers==0.23.1
 5 | peft==0.6.2
 6 | torchvision 
 7 | scipy
 8 | numpy
 9 | imageio[ffmpeg]
10 | opencv-python
11 | Pillow
12 | open3d
13 | gradio
14 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from PIL import Image
 4 | 
 5 | from luciddreamer import LucidDreamer
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     ### option
10 |     parser = argparse.ArgumentParser(description='Arguments for LucidDreamer')
11 |     # Input options
12 |     parser.add_argument('--image', '-img', type=str, default='examples/Image015_animelakehouse.jpg', help='Input image for scene generation')
13 |     parser.add_argument('--text', '-t', type=str, default='examples/Image015_animelakehouse.txt', help='Text prompt for scene generation')
14 |     parser.add_argument('--neg_text', '-nt', type=str, default='', help='Negative text prompt for scene generation')
15 | 
16 |     # Camera options
17 |     parser.add_argument('--campath_gen', '-cg', type=str, default='lookdown', choices=['lookdown', 'lookaround', 'rotate360'], help='Camera extrinsic trajectories for scene generation')
18 |     parser.add_argument('--campath_render', '-cr', type=str, default='llff', choices=['back_and_forth', 'llff', 'headbanging'], help='Camera extrinsic trajectories for video rendering')
19 | 
20 |     # Inpainting options
21 |     parser.add_argument('--model_name', type=str, default=None, help='Model name for inpainting(dreaming)')
22 |     parser.add_argument('--seed', type=int, default=1, help='Manual seed for running Stable Diffusion inpainting')
23 |     parser.add_argument('--diff_steps', type=int, default=50, help='Number of inference steps for running Stable Diffusion inpainting')
24 | 
25 |     # Save options
26 |     parser.add_argument('--save_dir', '-s', type=str, default='', help='Save directory')
27 | 
28 |     args = parser.parse_args()
29 | 
30 | 
31 |     ### input (example)
32 |     rgb_cond = Image.open(args.image)
33 | 
34 |     if args.text.endswith('.txt'):
35 |         with open(args.text, 'r') as f:
36 |             txt_cond = f.readline()
37 |     else:
38 |         txt_cond = args.text
39 | 
40 |     if args.neg_text.endswith('.txt'):
41 |         with open(args.neg_text, 'r') as f:
42 |             neg_txt_cond = f.readline()
43 |     else:
44 |         neg_txt_cond = args.neg_text
45 | 
46 |     # Make default save directory if blank
47 |     if args.save_dir == '':
48 |         img_name = os.path.splitext(os.path.basename(args.image))[0]
49 |         args.save_dir = f'./outputs/{img_name}_{args.campath_gen}_{args.seed}'
50 |     if not os.path.exists(args.save_dir):
51 |         os.makedirs(args.save_dir, exist_ok=True)
52 | 
53 |     if args.model_name is not None and args.model_name.endswith('safetensors'):
54 |         print('Your model is saved in safetensor form. Converting to HF models...')
55 |         from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt
56 | 
57 |         pipe = download_from_original_stable_diffusion_ckpt(
58 |             checkpoint_path_or_dict=args.model_name,
59 |             from_safetensors=True,
60 |             device='cuda',
61 |             )
62 |         pipe.save_pretrained('stablediffusion/', safe_serialization=False)
63 |         args.model_name = f'stablediffusion/{args.model_name}'
64 | 
65 |     ld = LucidDreamer(for_gradio=False, save_dir=args.save_dir)
66 |     ld.create(rgb_cond, txt_cond, neg_txt_cond, args.campath_gen, args.seed, args.diff_steps, model_name=args.model_name)
67 |     ld.render_video(args.campath_render)
68 | 


--------------------------------------------------------------------------------
/scene/__init__.py:
--------------------------------------------------------------------------------
 1 | ###
 2 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr
 3 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com.
 4 | # All rights reserved.
 5 | ###
 6 | import os
 7 | import random
 8 | 
 9 | from arguments import GSParams
10 | from utils.system import searchForMaxIteration
11 | from scene.dataset_readers import readDataInfo
12 | from scene.gaussian_model import GaussianModel
13 | 
14 | 
15 | class Scene:
16 |     gaussians: GaussianModel
17 | 
18 |     def __init__(self, traindata, gaussians: GaussianModel, opt: GSParams):
19 |         self.traindata = traindata
20 |         self.gaussians = gaussians
21 |         
22 |         info = readDataInfo(traindata, opt.white_background)
23 |         random.shuffle(info.train_cameras)  # Multi-res consistent random shuffling
24 |         self.cameras_extent = info.nerf_normalization["radius"]
25 | 
26 |         print("Loading Training Cameras")
27 |         self.train_cameras = info.train_cameras        
28 |         print("Loading Preset Cameras")
29 |         self.preset_cameras = {}
30 |         for campath in info.preset_cameras.keys():
31 |             self.preset_cameras[campath] = info.preset_cameras[campath]
32 | 
33 |         self.gaussians.create_from_pcd(info.point_cloud, self.cameras_extent)
34 |         self.gaussians.training_setup(opt)
35 | 
36 |     def getTrainCameras(self):
37 |         return self.train_cameras
38 |     
39 |     def getPresetCameras(self, preset):
40 |         assert preset in self.preset_cameras
41 |         return self.preset_cameras[preset]


--------------------------------------------------------------------------------
/scene/cameras.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | import numpy as np
12 | 
13 | import torch
14 | from torch import nn
15 | 
16 | from utils.graphics import getWorld2View2, getProjectionMatrix
17 | from utils.loss import image2canny
18 | 
19 | 
20 | class Camera(nn.Module):
21 |     def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
22 |                  image_name, uid,
23 |                  trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
24 |                  ):
25 |         super(Camera, self).__init__()
26 | 
27 |         self.uid = uid
28 |         self.colmap_id = colmap_id
29 |         self.R = R
30 |         self.T = T
31 |         self.FoVx = FoVx
32 |         self.FoVy = FoVy
33 |         self.image_name = image_name
34 | 
35 |         try:
36 |             self.data_device = torch.device(data_device)
37 |         except Exception as e:
38 |             print(e)
39 |             print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
40 |             self.data_device = torch.device("cuda")
41 | 
42 |         self.original_image = image.clamp(0.0, 1.0).to(self.data_device)
43 |         self.canny_mask = image2canny(self.original_image.permute(1,2,0), 50, 150, isEdge1=False).detach().to(self.data_device)
44 |         self.image_width = self.original_image.shape[2]
45 |         self.image_height = self.original_image.shape[1]
46 | 
47 |         if gt_alpha_mask is not None:
48 |             self.original_image *= gt_alpha_mask.to(self.data_device)
49 |         else:
50 |             self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device)
51 | 
52 |         self.zfar = 100.0
53 |         self.znear = 0.01
54 | 
55 |         self.trans = trans
56 |         self.scale = scale
57 | 
58 |         self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
59 |         self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
60 |         self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
61 |         self.camera_center = self.world_view_transform.inverse()[3, :3]
62 | 
63 | 
64 | class MiniCam:
65 |     def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
66 |         self.image_width = width
67 |         self.image_height = height    
68 |         self.FoVy = fovy
69 |         self.FoVx = fovx
70 |         self.znear = znear
71 |         self.zfar = zfar
72 |         self.world_view_transform = world_view_transform
73 |         self.full_proj_transform = full_proj_transform
74 |         view_inv = torch.inverse(self.world_view_transform)
75 |         self.camera_center = view_inv[3][:3]
76 | 
77 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | cmake_minimum_required(VERSION 3.20)
13 | 
14 | project(DiffRast LANGUAGES CUDA CXX)
15 | 
16 | set(CMAKE_CXX_STANDARD 17)
17 | set(CMAKE_CXX_EXTENSIONS OFF)
18 | set(CMAKE_CUDA_STANDARD 17)
19 | 
20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
21 | 
22 | add_library(CudaRasterizer
23 | 	cuda_rasterizer/backward.h
24 | 	cuda_rasterizer/backward.cu
25 | 	cuda_rasterizer/forward.h
26 | 	cuda_rasterizer/forward.cu
27 | 	cuda_rasterizer/auxiliary.h
28 | 	cuda_rasterizer/rasterizer_impl.cu
29 | 	cuda_rasterizer/rasterizer_impl.h
30 | 	cuda_rasterizer/rasterizer.h
31 | )
32 | 
33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "70;75;86")
34 | 
35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer)
36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
37 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  
84 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/README.md:
--------------------------------------------------------------------------------
 1 | # Differential Gaussian Rasterization
 2 | 
 3 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us.
 4 | 
 5 | <section class="section" id="BibTeX">
 6 |   <div class="container is-max-desktop content">
 7 |     <h2 class="title">BibTeX</h2>
 8 |     <pre><code>@Article{kerbl3Dgaussians,
 9 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
10 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
11 |       journal      = {ACM Transactions on Graphics},
12 |       number       = {4},
13 |       volume       = {42},
14 |       month        = {July},
15 |       year         = {2023},
16 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
17 | }</code></pre>
18 |   </div>
19 | </section>


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/auxiliary.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2023, Inria
  3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 |  * All rights reserved.
  5 |  *
  6 |  * This software is free for non-commercial, research and evaluation use 
  7 |  * under the terms of the LICENSE.md file.
  8 |  *
  9 |  * For inquiries contact  george.drettakis@inria.fr
 10 |  */
 11 | 
 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
 14 | 
 15 | #include "config.h"
 16 | #include "stdio.h"
 17 | 
 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
 19 | #define NUM_WARPS (BLOCK_SIZE/32)
 20 | 
 21 | // Spherical harmonics coefficients
 22 | __device__ const float SH_C0 = 0.28209479177387814f;
 23 | __device__ const float SH_C1 = 0.4886025119029199f;
 24 | __device__ const float SH_C2[] = {
 25 | 	1.0925484305920792f,
 26 | 	-1.0925484305920792f,
 27 | 	0.31539156525252005f,
 28 | 	-1.0925484305920792f,
 29 | 	0.5462742152960396f
 30 | };
 31 | __device__ const float SH_C3[] = {
 32 | 	-0.5900435899266435f,
 33 | 	2.890611442640554f,
 34 | 	-0.4570457994644658f,
 35 | 	0.3731763325901154f,
 36 | 	-0.4570457994644658f,
 37 | 	1.445305721320277f,
 38 | 	-0.5900435899266435f
 39 | };
 40 | 
 41 | __forceinline__ __device__ float ndc2Pix(float v, int S)
 42 | {
 43 | 	return ((v + 1.0) * S - 1.0) * 0.5;
 44 | }
 45 | 
 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
 47 | {
 48 | 	rect_min = {
 49 | 		min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
 50 | 		min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
 51 | 	};
 52 | 	rect_max = {
 53 | 		min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
 54 | 		min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
 55 | 	};
 56 | }
 57 | 
 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
 59 | {
 60 | 	float3 transformed = {
 61 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 62 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 63 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 64 | 	};
 65 | 	return transformed;
 66 | }
 67 | 
 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
 69 | {
 70 | 	float4 transformed = {
 71 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
 72 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
 73 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
 74 | 		matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
 75 | 	};
 76 | 	return transformed;
 77 | }
 78 | 
 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
 80 | {
 81 | 	float3 transformed = {
 82 | 		matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
 83 | 		matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
 84 | 		matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
 85 | 	};
 86 | 	return transformed;
 87 | }
 88 | 
 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
 90 | {
 91 | 	float3 transformed = {
 92 | 		matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
 93 | 		matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
 94 | 		matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
 95 | 	};
 96 | 	return transformed;
 97 | }
 98 | 
 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
100 | {
101 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
102 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
103 | 	float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
104 | 	return dnormvdz;
105 | }
106 | 
107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
108 | {
109 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
110 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
111 | 
112 | 	float3 dnormvdv;
113 | 	dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
114 | 	dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
115 | 	dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
116 | 	return dnormvdv;
117 | }
118 | 
119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
120 | {
121 | 	float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
122 | 	float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
123 | 
124 | 	float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
125 | 	float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
126 | 	float4 dnormvdv;
127 | 	dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
128 | 	dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
129 | 	dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
130 | 	dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
131 | 	return dnormvdv;
132 | }
133 | 
134 | __forceinline__ __device__ float sigmoid(float x)
135 | {
136 | 	return 1.0f / (1.0f + expf(-x));
137 | }
138 | 
139 | __forceinline__ __device__ bool in_frustum(int idx,
140 | 	const float* orig_points,
141 | 	const float* viewmatrix,
142 | 	const float* projmatrix,
143 | 	bool prefiltered,
144 | 	float3& p_view)
145 | {
146 | 	float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
147 | 
148 | 	// Bring points to screen space
149 | 	float4 p_hom = transformPoint4x4(p_orig, projmatrix);
150 | 	float p_w = 1.0f / (p_hom.w + 0.0000001f);
151 | 	float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
152 | 	p_view = transformPoint4x3(p_orig, viewmatrix);
153 | 
154 | 	if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
155 | 	{
156 | 		if (prefiltered)
157 | 		{
158 | 			printf("Point is filtered although prefiltered is set. This shouldn't happen!");
159 | 			__trap();
160 | 		}
161 | 		return false;
162 | 	}
163 | 	return true;
164 | }
165 | 
166 | #define CHECK_CUDA(A, debug) \
167 | A; if(debug) { \
168 | auto ret = cudaDeviceSynchronize(); \
169 | if (ret != cudaSuccess) { \
170 | std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
171 | throw std::runtime_error(cudaGetErrorString(ret)); \
172 | } \
173 | }
174 | 
175 | #endif


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/backward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace BACKWARD
22 | {
23 | 	void render(
24 | 		const dim3 grid, dim3 block,
25 | 		const uint2* ranges,
26 | 		const uint32_t* point_list,
27 | 		int W, int H,
28 | 		const float* bg_color,
29 | 		const float2* means2D,
30 | 		const float4* conic_opacity,
31 | 		const float3* means3D,
32 | 		const float* colors,
33 | 		const float* depths,
34 | 		const float* projmatrix,
35 | 		const float* final_Ts,
36 | 		const uint32_t* n_contrib,
37 | 		const float* dL_dpixels,
38 | 		const float* dL_depths,
39 | 		float3* dL_dmean2D,
40 | 		float4* dL_dconic2D,
41 | 		float3* dL_dmean3D,
42 | 		float* dL_dopacity,
43 | 		float* dL_dcolors);
44 | 
45 | 	void preprocess(
46 | 		int P, int D, int M,
47 | 		const float3* means,
48 | 		const int* radii,
49 | 		const float* shs,
50 | 		const bool* clamped,
51 | 		const glm::vec3* scales,
52 | 		const glm::vec4* rotations,
53 | 		const float scale_modifier,
54 | 		const float* cov3Ds,
55 | 		const float* view,
56 | 		const float* proj,
57 | 		const float focal_x, float focal_y,
58 | 		const float tan_fovx, float tan_fovy,
59 | 		const glm::vec3* campos,
60 | 		const float3* dL_dmean2D,
61 | 		const float* dL_dconics,
62 | 		glm::vec3* dL_dmeans,
63 | 		float* dL_dcolor,
64 | 		float* dL_dcov3D,
65 | 		float* dL_dsh,
66 | 		glm::vec3* dL_dscale,
67 | 		glm::vec4* dL_drot);
68 | }
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED
13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED
14 | 
15 | #define NUM_CHANNELS 3 // Default 3, RGB
16 | #define BLOCK_X 16
17 | #define BLOCK_Y 16
18 | 
19 | #endif


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/forward.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED
13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED
14 | 
15 | #include <cuda.h>
16 | #include "cuda_runtime.h"
17 | #include "device_launch_parameters.h"
18 | #define GLM_FORCE_CUDA
19 | #include <glm/glm.hpp>
20 | 
21 | namespace FORWARD
22 | {
23 | 	// Perform initial steps for each Gaussian prior to rasterization.
24 | 	void preprocess(int P, int D, int M,
25 | 		const float* orig_points,
26 | 		const glm::vec3* scales,
27 | 		const float scale_modifier,
28 | 		const glm::vec4* rotations,
29 | 		const float* opacities,
30 | 		const float* shs,
31 | 		bool* clamped,
32 | 		const float* cov3D_precomp,
33 | 		const float* colors_precomp,
34 | 		const float* viewmatrix,
35 | 		const float* projmatrix,
36 | 		const glm::vec3* cam_pos,
37 | 		const int W, int H,
38 | 		const float focal_x, float focal_y,
39 | 		const float tan_fovx, float tan_fovy,
40 | 		int* radii,
41 | 		float2* points_xy_image,
42 | 		float* depths,
43 | 		float* cov3Ds,
44 | 		float* colors,
45 | 		float4* conic_opacity,
46 | 		const dim3 grid,
47 | 		uint32_t* tiles_touched,
48 | 		bool prefiltered);
49 | 
50 | 	// Main rasterization method.
51 | 	void render(
52 | 		const dim3 grid, dim3 block,
53 | 		const uint2* ranges,
54 | 		const uint32_t* point_list,
55 | 		int W, int H,
56 | 		const float2* points_xy_image,
57 | 		const float* features,
58 | 		const float* depths,
59 | 		const float4* conic_opacity,
60 | 		float* final_T,
61 | 		uint32_t* n_contrib,
62 | 		const float* bg_color,
63 | 		float* out_color,
64 | 		float* out_depth);
65 | }
66 | 
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/rasterizer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef CUDA_RASTERIZER_H_INCLUDED
13 | #define CUDA_RASTERIZER_H_INCLUDED
14 | 
15 | #include <vector>
16 | #include <functional>
17 | 
18 | namespace CudaRasterizer
19 | {
20 | 	class Rasterizer
21 | 	{
22 | 	public:
23 | 
24 | 		static void markVisible(
25 | 			int P,
26 | 			float* means3D,
27 | 			float* viewmatrix,
28 | 			float* projmatrix,
29 | 			bool* present);
30 | 
31 | 		static int forward(
32 | 			std::function<char* (size_t)> geometryBuffer,
33 | 			std::function<char* (size_t)> binningBuffer,
34 | 			std::function<char* (size_t)> imageBuffer,
35 | 			const int P, int D, int M,
36 | 			const float* background,
37 | 			const int width, int height,
38 | 			const float* means3D,
39 | 			const float* shs,
40 | 			const float* colors_precomp,
41 | 			const float* opacities,
42 | 			const float* scales,
43 | 			const float scale_modifier,
44 | 			const float* rotations,
45 | 			const float* cov3D_precomp,
46 | 			const float* viewmatrix,
47 | 			const float* projmatrix,
48 | 			const float* cam_pos,
49 | 			const float tan_fovx, float tan_fovy,
50 | 			const bool prefiltered,
51 | 			float* out_color,
52 | 			float* out_depth,
53 | 			int* radii = nullptr,
54 | 			bool debug = false);
55 | 
56 | 		static void backward(
57 | 			const int P, int D, int M, int R,
58 | 			const float* background,
59 | 			const int width, int height,
60 | 			const float* means3D,
61 | 			const float* shs,
62 | 			const float* colors_precomp,
63 | 			const float* scales,
64 | 			const float scale_modifier,
65 | 			const float* rotations,
66 | 			const float* cov3D_precomp,
67 | 			const float* viewmatrix,
68 | 			const float* projmatrix,
69 | 			const float* campos,
70 | 			const float tan_fovx, float tan_fovy,
71 | 			const int* radii,
72 | 			char* geom_buffer,
73 | 			char* binning_buffer,
74 | 			char* image_buffer,
75 | 			const float* dL_dpix,
76 | 			const float* dL_depths,
77 | 			float* dL_dmean2D,
78 | 			float* dL_dconic,
79 | 			float* dL_dopacity,
80 | 			float* dL_dcolor,
81 | 			float* dL_dmean3D,
82 | 			float* dL_dcov3D,
83 | 			float* dL_dsh,
84 | 			float* dL_dscale,
85 | 			float* dL_drot,
86 | 			bool debug);
87 | 	};
88 | };
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/rasterizer_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | 
14 | #include <iostream>
15 | #include <vector>
16 | #include "rasterizer.h"
17 | #include <cuda_runtime_api.h>
18 | 
19 | namespace CudaRasterizer
20 | {
21 | 	template <typename T>
22 | 	static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment)
23 | 	{
24 | 		std::size_t offset = (reinterpret_cast<std::uintptr_t>(chunk) + alignment - 1) & ~(alignment - 1);
25 | 		ptr = reinterpret_cast<T*>(offset);
26 | 		chunk = reinterpret_cast<char*>(ptr + count);
27 | 	}
28 | 
29 | 	struct GeometryState
30 | 	{
31 | 		size_t scan_size;
32 | 		float* depths;
33 | 		char* scanning_space;
34 | 		bool* clamped;
35 | 		int* internal_radii;
36 | 		float2* means2D;
37 | 		float* cov3D;
38 | 		float4* conic_opacity;
39 | 		float* rgb;
40 | 		uint32_t* point_offsets;
41 | 		uint32_t* tiles_touched;
42 | 
43 | 		static GeometryState fromChunk(char*& chunk, size_t P);
44 | 	};
45 | 
46 | 	struct ImageState
47 | 	{
48 | 		uint2* ranges;
49 | 		uint32_t* n_contrib;
50 | 		float* accum_alpha;
51 | 
52 | 		static ImageState fromChunk(char*& chunk, size_t N);
53 | 	};
54 | 
55 | 	struct BinningState
56 | 	{
57 | 		size_t sorting_size;
58 | 		uint64_t* point_list_keys_unsorted;
59 | 		uint64_t* point_list_keys;
60 | 		uint32_t* point_list_unsorted;
61 | 		uint32_t* point_list;
62 | 		char* list_sorting_space;
63 | 
64 | 		static BinningState fromChunk(char*& chunk, size_t P);
65 | 	};
66 | 
67 | 	template<typename T> 
68 | 	size_t required(size_t P)
69 | 	{
70 | 		char* size = nullptr;
71 | 		T::fromChunk(size, P);
72 | 		return ((size_t)size) + 128;
73 | 	}
74 | };


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/ext.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | #include "rasterize_points.h"
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("rasterize_gaussians", &RasterizeGaussiansCUDA);
17 |   m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA);
18 |   m.def("mark_visible", &markVisible);
19 | }


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/rasterize_points.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #pragma once
13 | #include <torch/extension.h>
14 | #include <cstdio>
15 | #include <tuple>
16 | #include <string>
17 | 	
18 | std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
19 | RasterizeGaussiansCUDA(
20 | 	const torch::Tensor& background,
21 | 	const torch::Tensor& means3D,
22 |     const torch::Tensor& colors,
23 |     const torch::Tensor& opacity,
24 | 	const torch::Tensor& scales,
25 | 	const torch::Tensor& rotations,
26 | 	const float scale_modifier,
27 | 	const torch::Tensor& cov3D_precomp,
28 | 	const torch::Tensor& viewmatrix,
29 | 	const torch::Tensor& projmatrix,
30 | 	const float tan_fovx, 
31 | 	const float tan_fovy,
32 |     const int image_height,
33 |     const int image_width,
34 | 	const torch::Tensor& sh,
35 | 	const int degree,
36 | 	const torch::Tensor& campos,
37 | 	const bool prefiltered,
38 | 	const bool debug);
39 | 
40 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
41 |  RasterizeGaussiansBackwardCUDA(
42 |  	const torch::Tensor& background,
43 | 	const torch::Tensor& means3D,
44 | 	const torch::Tensor& radii,
45 |     const torch::Tensor& colors,
46 | 	const torch::Tensor& scales,
47 | 	const torch::Tensor& rotations,
48 | 	const float scale_modifier,
49 | 	const torch::Tensor& cov3D_precomp,
50 | 	const torch::Tensor& viewmatrix,
51 |     const torch::Tensor& projmatrix,
52 | 	const float tan_fovx, 
53 | 	const float tan_fovy,
54 |     const torch::Tensor& dL_dout_color,
55 |         const torch::Tensor& dL_dout_depth,
56 | 	const torch::Tensor& sh,
57 | 	const int degree,
58 | 	const torch::Tensor& campos,
59 | 	const torch::Tensor& geomBuffer,
60 | 	const int R,
61 | 	const torch::Tensor& binningBuffer,
62 | 	const torch::Tensor& imageBuffer,
63 | 	const bool debug);
64 | 		
65 | torch::Tensor markVisible(
66 | 		torch::Tensor& means3D,
67 | 		torch::Tensor& viewmatrix,
68 | 		torch::Tensor& projmatrix);
69 | 


--------------------------------------------------------------------------------
/submodules/depth-diff-gaussian-rasterization-min/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from setuptools import setup
13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14 | import os
15 | os.path.dirname(os.path.abspath(__file__))
16 | 
17 | setup(
18 |     name="depth_diff_gaussian_rasterization_min",
19 |     packages=['depth_diff_gaussian_rasterization_min'],
20 |     ext_modules=[
21 |         CUDAExtension(
22 |             name="depth_diff_gaussian_rasterization_min._C",
23 |             sources=[
24 |             "cuda_rasterizer/rasterizer_impl.cu",
25 |             "cuda_rasterizer/forward.cu",
26 |             "cuda_rasterizer/backward.cu",
27 |             "rasterize_points.cu",
28 |             "ext.cpp"],
29 |             extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]})
30 |         ],
31 |     cmdclass={
32 |         'build_ext': BuildExtension
33 |     }
34 | )
35 | 


--------------------------------------------------------------------------------
/submodules/simple-knn/ext.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | #include "spatial.h"
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("distCUDA2", &distCUDA2);
17 | }
18 | 


--------------------------------------------------------------------------------
/submodules/simple-knn/setup.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | 
12 | from setuptools import setup
13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14 | import os
15 | 
16 | cxx_compiler_flags = []
17 | 
18 | if os.name == 'nt':
19 |     cxx_compiler_flags.append("/wd4624")
20 | 
21 | setup(
22 |     name="simple_knn",
23 |     ext_modules=[
24 |         CUDAExtension(
25 |             name="simple_knn._C",
26 |             sources=[
27 |             "spatial.cu", 
28 |             "simple_knn.cu",
29 |             "ext.cpp"],
30 |             extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags})
31 |         ],
32 |     cmdclass={
33 |         'build_ext': BuildExtension
34 |     }
35 | )
36 | 


--------------------------------------------------------------------------------
/submodules/simple-knn/simple_knn.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #ifndef SIMPLEKNN_H_INCLUDED
13 | #define SIMPLEKNN_H_INCLUDED
14 | 
15 | class SimpleKNN
16 | {
17 | public:
18 | 	static void knn(int P, float3* points, float* meanDists);
19 | };
20 | 
21 | #endif


--------------------------------------------------------------------------------
/submodules/simple-knn/simple_knn/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/submodules/simple-knn/simple_knn/.gitkeep


--------------------------------------------------------------------------------
/submodules/simple-knn/spatial.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include "spatial.h"
13 | #include "simple_knn.h"
14 | 
15 | torch::Tensor
16 | distCUDA2(const torch::Tensor& points)
17 | {
18 |   const int P = points.size(0);
19 | 
20 |   auto float_opts = points.options().dtype(torch::kFloat32);
21 |   torch::Tensor means = torch::full({P}, 0.0, float_opts);
22 |   
23 |   SimpleKNN::knn(P, (float3*)points.contiguous().data<float>(), means.contiguous().data<float>());
24 | 
25 |   return means;
26 | }


--------------------------------------------------------------------------------
/submodules/simple-knn/spatial.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | #include <torch/extension.h>
13 | 
14 | torch::Tensor distCUDA2(const torch::Tensor& points);


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/utils/__init__.py


--------------------------------------------------------------------------------
/utils/camera.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | import json
 12 | 
 13 | import numpy as np
 14 | import torch
 15 | 
 16 | from scene.cameras import Camera, MiniCam
 17 | from utils.general import PILtoTorch
 18 | from utils.graphics import fov2focal, focal2fov, getWorld2View, getProjectionMatrix
 19 | 
 20 | 
 21 | WARNED = False
 22 | 
 23 | 
 24 | def load_json(path, H, W):
 25 |     cams = []
 26 |     with open(path) as json_file:
 27 |         contents = json.load(json_file)
 28 |         FoVx = contents["camera_angle_x"]
 29 |         FoVy = focal2fov(fov2focal(FoVx, W), H)
 30 |         zfar = 100.0
 31 |         znear = 0.01
 32 | 
 33 |         frames = contents["frames"]
 34 |         for idx, frame in enumerate(frames):
 35 |             # NeRF 'transform_matrix' is a camera-to-world transform
 36 |             c2w = np.array(frame["transform_matrix"])
 37 |             # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
 38 |             c2w[:3, 1:3] *= -1
 39 |             if c2w.shape[0] == 3:
 40 |                 one = np.zeros((1, 4))
 41 |                 one[0, -1] = 1
 42 |                 c2w = np.concatenate((c2w, one), axis=0)
 43 | 
 44 |             # get the world-to-camera transform and set R, T
 45 |             w2c = np.linalg.inv(c2w)
 46 |             R = np.transpose(w2c[:3, :3])  # R is stored transposed due to 'glm' in CUDA code
 47 |             T = w2c[:3, 3]
 48 | 
 49 |             w2c = torch.as_tensor(getWorld2View(R, T)).T.cuda()
 50 |             proj = getProjectionMatrix(znear, zfar, FoVx, FoVy).T.cuda()
 51 |             cams.append(MiniCam(W, H, FoVx, FoVy, znear, zfar, w2c, w2c @ proj))
 52 |     return cams
 53 | 
 54 | 
 55 | def loadCam(args, id, cam_info, resolution_scale):
 56 |     orig_w, orig_h = cam_info.image.size
 57 | 
 58 |     if args.resolution in [1, 2, 4, 8]:
 59 |         resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution))
 60 |     else:  # should be a type that converts to float
 61 |         if args.resolution == -1:
 62 |             if orig_w > 1600:
 63 |                 global WARNED
 64 |                 if not WARNED:
 65 |                     print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
 66 |                         "If this is not desired, please explicitly specify '--resolution/-r' as 1")
 67 |                     WARNED = True
 68 |                 global_down = orig_w / 1600
 69 |             else:
 70 |                 global_down = 1
 71 |         else:
 72 |             global_down = orig_w / args.resolution
 73 | 
 74 |         scale = float(global_down) * float(resolution_scale)
 75 |         resolution = (int(orig_w / scale), int(orig_h / scale))
 76 | 
 77 |     resized_image_rgb = PILtoTorch(cam_info.image, resolution)
 78 | 
 79 |     gt_image = resized_image_rgb[:3, ...]
 80 |     loaded_mask = None
 81 | 
 82 |     if resized_image_rgb.shape[1] == 4:
 83 |         loaded_mask = resized_image_rgb[3:4, ...]
 84 | 
 85 |     return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 
 86 |                   FoVx=cam_info.FovX, FoVy=cam_info.FovY, 
 87 |                   image=gt_image, gt_alpha_mask=loaded_mask,
 88 |                   image_name=cam_info.image_name, uid=id, data_device=args.data_device)
 89 | 
 90 | 
 91 | def cameraList_from_camInfos(cam_infos, resolution_scale, args):
 92 |     camera_list = []
 93 | 
 94 |     for id, c in enumerate(cam_infos):
 95 |         camera_list.append(loadCam(args, id, c, resolution_scale))
 96 | 
 97 |     return camera_list
 98 | 
 99 | 
100 | def camera_to_JSON(id, camera : Camera):
101 |     Rt = np.zeros((4, 4))
102 |     Rt[:3, :3] = camera.R.transpose()
103 |     Rt[:3, 3] = camera.T
104 |     Rt[3, 3] = 1.0
105 | 
106 |     W2C = np.linalg.inv(Rt)
107 |     pos = W2C[:3, 3]
108 |     rot = W2C[:3, :3]
109 |     serializable_array_2d = [x.tolist() for x in rot]
110 |     camera_entry = {
111 |         'id' : id,
112 |         'img_name' : camera.image_name,
113 |         'width' : camera.width,
114 |         'height' : camera.height,
115 |         'position': pos.tolist(),
116 |         'rotation': serializable_array_2d,
117 |         'fy' : fov2focal(camera.FovY, camera.height),
118 |         'fx' : fov2focal(camera.FovX, camera.width)
119 |     }
120 |     return camera_entry
121 | 


--------------------------------------------------------------------------------
/utils/depth.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.cm
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def colorize(value, vmin=None, vmax=None, cmap='jet', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None):
 8 |     """Converts a depth map to a color image.
 9 | 
10 |     Args:
11 |         value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed
12 |         vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None.
13 |         vmax (float, optional):  vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None.
14 |         cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'.
15 |         invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99.
16 |         invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None.
17 |         background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255).
18 |         gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False.
19 |         value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None.
20 | 
21 |     Returns:
22 |         numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4)
23 |     """
24 |     if isinstance(value, torch.Tensor):
25 |         value = value.detach().cpu().numpy()
26 | 
27 |     value = value.squeeze()
28 |     if invalid_mask is None:
29 |         invalid_mask = value == invalid_val
30 |     mask = np.logical_not(invalid_mask)
31 | 
32 |     # normalize
33 |     vmin = np.percentile(value[mask],2) if vmin is None else vmin
34 |     vmax = np.percentile(value[mask],98) if vmax is None else vmax
35 |     if vmin != vmax:
36 |         value = (value - vmin) / (vmax - vmin)  # vmin..vmax
37 |     else:
38 |         # Avoid 0-division
39 |         value = value * 0.
40 | 
41 |     # squeeze last dim if it exists
42 |     # grey out the invalid values
43 | 
44 |     value[invalid_mask] = np.nan
45 |     cmapper = matplotlib.cm.get_cmap(cmap)
46 |     if value_transform:
47 |         value = value_transform(value)
48 |         # value = value / value.max()
49 |     value = cmapper(value, bytes=True)  # (nxmx4)
50 | 
51 |     # img = value[:, :, :]
52 |     img = value[...]
53 |     img[invalid_mask] = background_color
54 | 
55 |     #     return img.transpose((2, 0, 1))
56 |     if gamma_corrected:
57 |         # gamma correction
58 |         img = img / 255
59 |         img = np.power(img, 2.2)
60 |         img = img * 255
61 |         img = img.astype(np.uint8)
62 |     return img


--------------------------------------------------------------------------------
/utils/general.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | import sys
 12 | import random
 13 | from datetime import datetime
 14 | import numpy as np
 15 | import torch
 16 | 
 17 | 
 18 | def inverse_sigmoid(x):
 19 |     return torch.log(x/(1-x))
 20 | 
 21 | 
 22 | def PILtoTorch(pil_image, resolution):
 23 |     resized_image_PIL = pil_image.resize(resolution)
 24 |     resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
 25 |     if len(resized_image.shape) == 3:
 26 |         return resized_image.permute(2, 0, 1)
 27 |     else:
 28 |         return resized_image.unsqueeze(dim=-1).permute(2, 0, 1)
 29 | 
 30 |     
 31 | def get_expon_lr_func(
 32 |     lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000
 33 | ):
 34 |     """
 35 |     Copied from Plenoxels
 36 | 
 37 |     Continuous learning rate decay function. Adapted from JaxNeRF
 38 |     The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
 39 |     is log-linearly interpolated elsewhere (equivalent to exponential decay).
 40 |     If lr_delay_steps>0 then the learning rate will be scaled by some smooth
 41 |     function of lr_delay_mult, such that the initial learning rate is
 42 |     lr_init*lr_delay_mult at the beginning of optimization but will be eased back
 43 |     to the normal learning rate when steps>lr_delay_steps.
 44 |     :param conf: config subtree 'lr' or similar
 45 |     :param max_steps: int, the number of steps during optimization.
 46 |     :return HoF which takes step as input
 47 |     """
 48 | 
 49 |     def helper(step):
 50 |         if step < 0 or (lr_init == 0.0 and lr_final == 0.0):
 51 |             # Disable this parameter
 52 |             return 0.0
 53 |         if lr_delay_steps > 0:
 54 |             # A kind of reverse cosine decay.
 55 |             delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
 56 |                 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1)
 57 |             )
 58 |         else:
 59 |             delay_rate = 1.0
 60 |         t = np.clip(step / max_steps, 0, 1)
 61 |         log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
 62 |         return delay_rate * log_lerp
 63 | 
 64 |     return helper
 65 | 
 66 | 
 67 | def strip_lowerdiag(L):
 68 |     uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
 69 | 
 70 |     uncertainty[:, 0] = L[:, 0, 0]
 71 |     uncertainty[:, 1] = L[:, 0, 1]
 72 |     uncertainty[:, 2] = L[:, 0, 2]
 73 |     uncertainty[:, 3] = L[:, 1, 1]
 74 |     uncertainty[:, 4] = L[:, 1, 2]
 75 |     uncertainty[:, 5] = L[:, 2, 2]
 76 |     return uncertainty
 77 | 
 78 | 
 79 | def strip_symmetric(sym):
 80 |     return strip_lowerdiag(sym)
 81 | 
 82 | 
 83 | def build_rotation(r):
 84 |     norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3])
 85 | 
 86 |     q = r / norm[:, None]
 87 | 
 88 |     R = torch.zeros((q.size(0), 3, 3), device='cuda')
 89 | 
 90 |     r = q[:, 0]
 91 |     x = q[:, 1]
 92 |     y = q[:, 2]
 93 |     z = q[:, 3]
 94 | 
 95 |     R[:, 0, 0] = 1 - 2 * (y*y + z*z)
 96 |     R[:, 0, 1] = 2 * (x*y - r*z)
 97 |     R[:, 0, 2] = 2 * (x*z + r*y)
 98 |     R[:, 1, 0] = 2 * (x*y + r*z)
 99 |     R[:, 1, 1] = 1 - 2 * (x*x + z*z)
100 |     R[:, 1, 2] = 2 * (y*z - r*x)
101 |     R[:, 2, 0] = 2 * (x*z - r*y)
102 |     R[:, 2, 1] = 2 * (y*z + r*x)
103 |     R[:, 2, 2] = 1 - 2 * (x*x + y*y)
104 |     return R
105 | 
106 | 
107 | def build_scaling_rotation(s, r):
108 |     L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda")
109 |     R = build_rotation(r)
110 | 
111 |     L[:,0,0] = s[:,0]
112 |     L[:,1,1] = s[:,1]
113 |     L[:,2,2] = s[:,2]
114 | 
115 |     L = R @ L
116 |     return L
117 | 
118 | 
119 | def safe_state(silent):
120 |     old_f = sys.stdout
121 |     class F:
122 |         def __init__(self, silent):
123 |             self.silent = silent
124 | 
125 |         def write(self, x):
126 |             if not self.silent:
127 |                 if x.endswith("\n"):
128 |                     old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S")))))
129 |                 else:
130 |                     old_f.write(x)
131 | 
132 |         def flush(self):
133 |             old_f.flush()
134 | 
135 |     sys.stdout = F(silent)
136 | 
137 |     random.seed(0)
138 |     np.random.seed(0)
139 |     torch.manual_seed(0)
140 |     torch.cuda.set_device(torch.device("cuda:0"))
141 | 


--------------------------------------------------------------------------------
/utils/graphics.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | import math
12 | from typing import NamedTuple
13 | import numpy as np
14 | import torch
15 | 
16 | 
17 | class BasicPointCloud(NamedTuple):
18 |     points : np.array
19 |     colors : np.array
20 |     normals : np.array
21 | 
22 |     
23 | def geom_transform_points(points, transf_matrix):
24 |     P, _ = points.shape
25 |     ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
26 |     points_hom = torch.cat([points, ones], dim=1)
27 |     points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))
28 | 
29 |     denom = points_out[..., 3:] + 0.0000001
30 |     return (points_out[..., :3] / denom).squeeze(dim=0)
31 | 
32 | 
33 | def getWorld2View(R, t):
34 |     Rt = np.zeros((4, 4))
35 |     Rt[:3, :3] = R.transpose()
36 |     Rt[:3, 3] = t
37 |     Rt[3, 3] = 1.0
38 |     return np.float32(Rt)
39 | 
40 | 
41 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
42 |     Rt = np.zeros((4, 4))
43 |     Rt[:3, :3] = R.transpose()
44 |     Rt[:3, 3] = t
45 |     Rt[3, 3] = 1.0
46 | 
47 |     C2W = np.linalg.inv(Rt)
48 |     cam_center = C2W[:3, 3]
49 |     cam_center = (cam_center + translate) * scale
50 |     C2W[:3, 3] = cam_center
51 |     Rt = np.linalg.inv(C2W)
52 |     return np.float32(Rt)
53 | 
54 | 
55 | def getProjectionMatrix(znear, zfar, fovX, fovY):
56 |     tanHalfFovY = math.tan((fovY / 2))
57 |     tanHalfFovX = math.tan((fovX / 2))
58 | 
59 |     top = tanHalfFovY * znear
60 |     bottom = -top
61 |     right = tanHalfFovX * znear
62 |     left = -right
63 | 
64 |     P = torch.zeros(4, 4)
65 | 
66 |     z_sign = 1.0
67 | 
68 |     P[0, 0] = 2.0 * znear / (right - left)
69 |     P[1, 1] = 2.0 * znear / (top - bottom)
70 |     P[0, 2] = (right + left) / (right - left)
71 |     P[1, 2] = (top + bottom) / (top - bottom)
72 |     P[3, 2] = z_sign
73 |     P[2, 2] = z_sign * zfar / (zfar - znear)
74 |     P[2, 3] = -(zfar * znear) / (zfar - znear)
75 |     return P
76 | 
77 | 
78 | def fov2focal(fov, pixels):
79 |     return pixels / (2 * math.tan(fov / 2))
80 | 
81 | 
82 | def focal2fov(focal, pixels):
83 |     return 2*math.atan(pixels/(2*focal))


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | import torch
12 | 
13 | 
14 | def mse(img1, img2):
15 |     return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
16 | 
17 | 
18 | def psnr(img1, img2):
19 |     mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
20 |     return 20 * torch.log10(1.0 / torch.sqrt(mse))
21 | 


--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | from math import exp
12 | 
13 | import torch
14 | import torch.nn.functional as F
15 | from torch.autograd import Variable
16 | 
17 | 
18 | def l1_loss(network_output, gt):
19 |     return torch.abs((network_output - gt)).mean()
20 | 
21 | 
22 | def l2_loss(network_output, gt):
23 |     return ((network_output - gt) ** 2).mean()
24 | 
25 | 
26 | def gaussian(window_size, sigma):
27 |     gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
28 |     return gauss / gauss.sum()
29 | 
30 | 
31 | def create_window(window_size, channel):
32 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
33 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
34 |     window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
35 |     return window
36 | 
37 | 
38 | def ssim(img1, img2, window_size=11, size_average=True):
39 |     channel = img1.size(-3)
40 |     window = create_window(window_size, channel)
41 | 
42 |     if img1.is_cuda:
43 |         window = window.cuda(img1.get_device())
44 |     window = window.type_as(img1)
45 | 
46 |     return _ssim(img1, img2, window, window_size, channel, size_average)
47 | 
48 | 
49 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
50 |     mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
51 |     mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
52 | 
53 |     mu1_sq = mu1.pow(2)
54 |     mu2_sq = mu2.pow(2)
55 |     mu1_mu2 = mu1 * mu2
56 | 
57 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
58 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
59 |     sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
60 | 
61 |     C1 = 0.01 ** 2
62 |     C2 = 0.03 ** 2
63 | 
64 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
65 | 
66 |     if size_average:
67 |         return ssim_map.mean()
68 |     else:
69 |         return ssim_map.mean(1).mean(1).mean(1)
70 | 
71 | 
72 | import numpy as np
73 | import cv2
74 | def image2canny(image, thres1, thres2, isEdge1=True):
75 |     """ image: (H, W, 3)"""
76 |     canny_mask = torch.from_numpy(cv2.Canny((image.detach().cpu().numpy()*255.).astype(np.uint8), thres1, thres2)/255.)
77 |     if not isEdge1:
78 |         canny_mask = 1. - canny_mask
79 |     return canny_mask.float()
80 | 
81 | with torch.no_grad():
82 |     kernelsize=3
83 |     conv = torch.nn.Conv2d(1, 1, kernel_size=kernelsize, padding=(kernelsize//2))
84 |     kernel = torch.tensor([[0.,1.,0.],[1.,0.,1.],[0.,1.,0.]]).reshape(1,1,kernelsize,kernelsize)
85 |     conv.weight.data = kernel #torch.ones((1,1,kernelsize,kernelsize))
86 |     conv.bias.data = torch.tensor([0.])
87 |     conv.requires_grad_(False)
88 |     conv = conv.cuda()
89 | 
90 | 
91 | def nearMean_map(array, mask, kernelsize=3):
92 |     """ array: (H,W) / mask: (H,W) """
93 |     cnt_map = torch.ones_like(array)
94 | 
95 |     nearMean_map = conv((array * mask)[None,None])
96 |     cnt_map = conv((cnt_map * mask)[None,None])
97 |     nearMean_map = (nearMean_map / (cnt_map+1e-8)).squeeze()
98 |         
99 |     return nearMean_map


--------------------------------------------------------------------------------
/utils/sh.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2021 The PlenOctree Authors.
  2 | #  Redistribution and use in source and binary forms, with or without
  3 | #  modification, are permitted provided that the following conditions are met:
  4 | #
  5 | #  1. Redistributions of source code must retain the above copyright notice,
  6 | #  this list of conditions and the following disclaimer.
  7 | #
  8 | #  2. Redistributions in binary form must reproduce the above copyright notice,
  9 | #  this list of conditions and the following disclaimer in the documentation
 10 | #  and/or other materials provided with the distribution.
 11 | #
 12 | #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 13 | #  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 14 | #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 15 | #  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 16 | #  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 17 | #  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 18 | #  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 19 | #  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 20 | #  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 21 | #  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 22 | #  POSSIBILITY OF SUCH DAMAGE.
 23 | import torch
 24 | 
 25 | 
 26 | C0 = 0.28209479177387814
 27 | C1 = 0.4886025119029199
 28 | C2 = [
 29 |     1.0925484305920792,
 30 |     -1.0925484305920792,
 31 |     0.31539156525252005,
 32 |     -1.0925484305920792,
 33 |     0.5462742152960396
 34 | ]
 35 | C3 = [
 36 |     -0.5900435899266435,
 37 |     2.890611442640554,
 38 |     -0.4570457994644658,
 39 |     0.3731763325901154,
 40 |     -0.4570457994644658,
 41 |     1.445305721320277,
 42 |     -0.5900435899266435
 43 | ]
 44 | C4 = [
 45 |     2.5033429417967046,
 46 |     -1.7701307697799304,
 47 |     0.9461746957575601,
 48 |     -0.6690465435572892,
 49 |     0.10578554691520431,
 50 |     -0.6690465435572892,
 51 |     0.47308734787878004,
 52 |     -1.7701307697799304,
 53 |     0.6258357354491761,
 54 | ]   
 55 | 
 56 | 
 57 | def eval_sh(deg, sh, dirs):
 58 |     """
 59 |     Evaluate spherical harmonics at unit directions
 60 |     using hardcoded SH polynomials.
 61 |     Works with torch/np/jnp.
 62 |     ... Can be 0 or more batch dimensions.
 63 |     Args:
 64 |         deg: int SH deg. Currently, 0-3 supported
 65 |         sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
 66 |         dirs: jnp.ndarray unit directions [..., 3]
 67 |     Returns:
 68 |         [..., C]
 69 |     """
 70 |     assert deg <= 4 and deg >= 0
 71 |     coeff = (deg + 1) ** 2
 72 |     assert sh.shape[-1] >= coeff
 73 | 
 74 |     result = C0 * sh[..., 0]
 75 |     if deg > 0:
 76 |         x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
 77 |         result = (result -
 78 |                 C1 * y * sh[..., 1] +
 79 |                 C1 * z * sh[..., 2] -
 80 |                 C1 * x * sh[..., 3])
 81 | 
 82 |         if deg > 1:
 83 |             xx, yy, zz = x * x, y * y, z * z
 84 |             xy, yz, xz = x * y, y * z, x * z
 85 |             result = (result +
 86 |                     C2[0] * xy * sh[..., 4] +
 87 |                     C2[1] * yz * sh[..., 5] +
 88 |                     C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
 89 |                     C2[3] * xz * sh[..., 7] +
 90 |                     C2[4] * (xx - yy) * sh[..., 8])
 91 | 
 92 |             if deg > 2:
 93 |                 result = (result +
 94 |                 C3[0] * y * (3 * xx - yy) * sh[..., 9] +
 95 |                 C3[1] * xy * z * sh[..., 10] +
 96 |                 C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] +
 97 |                 C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
 98 |                 C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
 99 |                 C3[5] * z * (xx - yy) * sh[..., 14] +
100 |                 C3[6] * x * (xx - 3 * yy) * sh[..., 15])
101 | 
102 |                 if deg > 3:
103 |                     result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
104 |                             C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
105 |                             C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
106 |                             C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
107 |                             C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
108 |                             C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
109 |                             C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
110 |                             C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
111 |                             C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
112 |     return result
113 | 
114 | 
115 | def RGB2SH(rgb):
116 |     return (rgb - 0.5) / C0
117 | 
118 | 
119 | def SH2RGB(sh):
120 |     return sh * C0 + 0.5


--------------------------------------------------------------------------------
/utils/system.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Inria
 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 | # All rights reserved.
 5 | #
 6 | # This software is free for non-commercial, research and evaluation use 
 7 | # under the terms of the LICENSE.md file.
 8 | #
 9 | # For inquiries contact  george.drettakis@inria.fr
10 | #
11 | from errno import EEXIST
12 | from os import makedirs, path
13 | import os
14 | 
15 | 
16 | def mkdir_p(folder_path):
17 |     # Creates a directory. equivalent to using mkdir -p on the command line
18 |     try:
19 |         makedirs(folder_path)
20 |     except OSError as exc: # Python >2.5
21 |         if exc.errno == EEXIST and path.isdir(folder_path):
22 |             pass
23 |         else:
24 |             raise
25 | 
26 | 
27 | def searchForMaxIteration(folder):
28 |     saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
29 |     return max(saved_iters)
30 | 


--------------------------------------------------------------------------------