├── .gitignore ├── LICENSE ├── README.md ├── ZoeDepth ├── .gitignore ├── LICENSE ├── README.md ├── environment.yml ├── evaluate.py ├── hubconf.py ├── sanity.py ├── sanity_hub.py ├── train_mix.py ├── train_mono.py ├── train_test_inputs │ ├── kitti_eigen_test_files_with_gt.txt │ ├── kitti_eigen_train_files_with_gt.txt │ ├── nyudepthv2_test_files_with_gt.txt │ └── nyudepthv2_train_files_with_gt.txt ├── ui │ ├── app.py │ ├── gradio_depth_pred.py │ ├── gradio_im_to_3d.py │ ├── gradio_pano_to_3d.py │ └── ui_requirements.txt └── zoedepth │ ├── data │ ├── __init__.py │ ├── data_mono.py │ ├── ddad.py │ ├── diml_indoor_test.py │ ├── diml_outdoor_test.py │ ├── diode.py │ ├── hypersim.py │ ├── ibims.py │ ├── preprocess.py │ ├── sun_rgbd_loader.py │ ├── transforms.py │ ├── vkitti.py │ └── vkitti2.py │ ├── models │ ├── __init__.py │ ├── base_models │ │ ├── __init__.py │ │ └── midas.py │ ├── builder.py │ ├── depth_model.py │ ├── layers │ │ ├── attractor.py │ │ ├── dist_layers.py │ │ ├── localbins_layers.py │ │ └── patch_transformer.py │ ├── model_io.py │ ├── zoedepth │ │ ├── __init__.py │ │ ├── config_zoedepth.json │ │ ├── config_zoedepth_kitti.json │ │ └── zoedepth_v1.py │ └── zoedepth_nk │ │ ├── __init__.py │ │ ├── config_zoedepth_nk.json │ │ └── zoedepth_nk_v1.py │ ├── trainers │ ├── base_trainer.py │ ├── builder.py │ ├── loss.py │ ├── zoedepth_nk_trainer.py │ └── zoedepth_trainer.py │ └── utils │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── easydict │ └── __init__.py │ ├── geometry.py │ └── misc.py ├── app.py ├── app_mini.py ├── arguments.py ├── assets ├── animestreet2_back_rgb.mp4 ├── demo.gif ├── demo.mp4 ├── fig5ours_360_rgb.mp4 ├── logo_color.png ├── logo_cvlab.png └── waterfall_back_rgb.mp4 ├── cameras ├── 1440.json ├── 360.json ├── 360_fov1.2.json ├── back.json ├── back_and_forth.json ├── headbanging.json ├── headbanging_circle.json ├── headbanging_r2.json ├── headbanging_r3.json ├── llff.json ├── llff_d0.25.json ├── llff_d0.5.json ├── llff_d1.json ├── llff_d2.json ├── llff_d4.json ├── llff_d6.json ├── llff_d8.json ├── lookaround.json ├── lookdown.json ├── rotate1440.json ├── rotate360.json └── rotate360_fov1.2.json ├── examples ├── Image002_modernvilla.jpg ├── Image002_modernvilla.txt ├── Image002_modernvilla_negative.txt ├── Image003_fantasy.jpg ├── Image003_fantasy.txt ├── Image003_fantasy_negative.txt ├── Image005_fruitmarket.jpg ├── Image005_fruitmarket.txt ├── Image005_fruitmarket_negative.txt ├── Image008_waterfall.jpg ├── Image008_waterfall.txt ├── Image009_spacestation.jpg ├── Image009_spacestation.txt ├── Image009_spacestation_negative.txt ├── Image011_lego.jpg ├── Image011_lego.txt ├── Image011_lego_negative.txt ├── Image012_whitecat.jpg ├── Image012_whitecat.txt ├── Image012_whitecat_2nd.txt ├── Image012_whitecat_negative.txt ├── Image014_animestreet.jpg ├── Image014_animestreet.txt ├── Image014_animestreet_negative.txt ├── Image015_animelakehouse.jpg ├── Image015_animelakehouse.txt ├── Image015_animelakehouse_negative.txt ├── Image018_animesummerhome.jpg ├── Image018_animesummerhome.txt ├── Image018_animesummerhome_negative.txt ├── Image031_fruit.jpg ├── Image031_fruit.txt ├── Image031_fruit_negative.txt ├── animelake_Back_and_forth_60fps.mp4 ├── animelake_Headbanging_60fps.mp4 ├── animelake_LLFF_60fps.mp4 ├── cabin.png ├── cabin.txt ├── christmas.png ├── christmas.txt ├── doge.jpg ├── doge.png ├── doge.txt ├── elf.jpg ├── elf.png ├── elf.txt ├── fantasy_Back_and_forth_60fps.mp4 ├── fantasy_Headbanging_60fps.mp4 ├── fantasy_LLFF_60fps.mp4 ├── girl.jpg ├── girl.txt ├── image020.png ├── image020.txt ├── island.png ├── island.txt ├── ruin.png └── ruin.txt ├── gaussian_renderer ├── __init__.py └── network_gui.py ├── luciddreamer.py ├── packages.txt ├── requirements.txt ├── run.py ├── scene ├── __init__.py ├── cameras.py ├── colmap_loader.py ├── dataset_readers.py └── gaussian_model.py ├── submodules ├── depth-diff-gaussian-rasterization-min │ ├── CMakeLists.txt │ ├── LICENSE.md │ ├── README.md │ ├── cuda_rasterizer │ │ ├── auxiliary.h │ │ ├── backward.cu │ │ ├── backward.h │ │ ├── config.h │ │ ├── forward.cu │ │ ├── forward.h │ │ ├── rasterizer.h │ │ ├── rasterizer_impl.cu │ │ └── rasterizer_impl.h │ ├── depth_diff_gaussian_rasterization_min │ │ └── __init__.py │ ├── ext.cpp │ ├── rasterize_points.cu │ ├── rasterize_points.h │ ├── setup.py │ └── third_party │ │ └── stbi_image_write.h └── simple-knn │ ├── ext.cpp │ ├── setup.py │ ├── simple_knn.cu │ ├── simple_knn.h │ ├── simple_knn │ └── .gitkeep │ ├── spatial.cu │ └── spatial.h └── utils ├── __init__.py ├── camera.py ├── depth.py ├── general.py ├── graphics.py ├── image.py ├── lama.py ├── loss.py ├── sh.py ├── system.py └── trajectory.py /.gitignore: -------------------------------------------------------------------------------- 1 | examples/*.mp4 2 | examples/*.ply 3 | examples/.gitattributes 4 | examples/README.md 5 | stablediffusion 6 | 7 | 8 | # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode 9 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode 10 | 11 | ### Python ### 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | video*/ 40 | video/ 41 | result/ 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | *.py,cover 64 | .hypothesis/ 65 | .pytest_cache/ 66 | cover/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | db.sqlite3-journal 77 | 78 | # Flask stuff: 79 | instance/ 80 | .webassets-cache 81 | 82 | # Scrapy stuff: 83 | .scrapy 84 | 85 | # Sphinx documentation 86 | docs/_build/ 87 | 88 | # PyBuilder 89 | .pybuilder/ 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | # For a library or package, you might want to ignore these files since the code is 101 | # intended to run in multiple environments; otherwise, check them in: 102 | # .python-version 103 | 104 | # pipenv 105 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 106 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 107 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 108 | # install all needed dependencies. 109 | #Pipfile.lock 110 | 111 | # poetry 112 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 113 | # This is especially recommended for binary packages to ensure reproducibility, and is more 114 | # commonly ignored for libraries. 115 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 116 | #poetry.lock 117 | 118 | # pdm 119 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 120 | #pdm.lock 121 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 122 | # in version control. 123 | # https://pdm.fming.dev/#use-with-ide 124 | .pdm.toml 125 | 126 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 127 | __pypackages__/ 128 | 129 | # Celery stuff 130 | celerybeat-schedule 131 | celerybeat.pid 132 | 133 | # SageMath parsed files 134 | *.sage.py 135 | 136 | # Environments 137 | .env 138 | .venv 139 | env/ 140 | venv/ 141 | ENV/ 142 | env.bak/ 143 | venv.bak/ 144 | 145 | # Spyder project settings 146 | .spyderproject 147 | .spyproject 148 | 149 | # Rope project settings 150 | .ropeproject 151 | 152 | # mkdocs documentation 153 | /site 154 | 155 | # mypy 156 | .mypy_cache/ 157 | .dmypy.json 158 | dmypy.json 159 | 160 | # Pyre type checker 161 | .pyre/ 162 | 163 | # pytype static type analyzer 164 | .pytype/ 165 | 166 | # Cython debug symbols 167 | cython_debug/ 168 | 169 | # PyCharm 170 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 171 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 172 | # and can be added to the global gitignore or merged into this file. For a more nuclear 173 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 174 | #.idea/ 175 | 176 | ### Python Patch ### 177 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 178 | poetry.toml 179 | 180 | # ruff 181 | .ruff_cache/ 182 | 183 | # LSP config files 184 | pyrightconfig.json 185 | 186 | ### VisualStudioCode ### 187 | .vscode/* 188 | !.vscode/settings.json 189 | !.vscode/tasks.json 190 | !.vscode/launch.json 191 | !.vscode/extensions.json 192 | !.vscode/*.code-snippets 193 | 194 | # Local History for Visual Studio Code 195 | .history/ 196 | 197 | # Built Visual Studio Code Extensions 198 | *.vsix 199 | 200 | ### VisualStudioCode Patch ### 201 | # Ignore all local history of files 202 | .history 203 | .ionide 204 | 205 | # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode 206 | 207 | *.ply 208 | *.safetensors 209 | results 210 | outputs 211 | gradio_cached_examples 212 | submodules/depth-diff-gaussian-rasterization-min/third_party/glm 213 | -------------------------------------------------------------------------------- /ZoeDepth/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | **.gif 3 | .vscode/ 4 | *.rdb 5 | **.xml 6 | wandb/ 7 | slurm/ 8 | tmp/ 9 | .logs/ 10 | checkpoints/ 11 | external_jobs/ 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | ptlflow_logs/ 17 | output/ 18 | log/ 19 | .idea/ 20 | # C extensions 21 | *.so 22 | results/ 23 | **.DS_Store 24 | **.pt 25 | demo/ 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | pip-wheel-metadata/ 41 | share/python-wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | ~shortcuts/ 47 | **/wandb_logs/ 48 | **.db 49 | # PyInstaller 50 | # Usually these files are written by a python script from a template 51 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 52 | *.manifest 53 | *.spec 54 | 55 | # Installer logs 56 | pip-log.txt 57 | pip-delete-this-directory.txt 58 | 59 | # Unit test / coverage reports 60 | htmlcov/ 61 | .tox/ 62 | .nox/ 63 | .coverage 64 | .coverage.* 65 | .cache 66 | nosetests.xml 67 | coverage.xml 68 | *.cover 69 | *.py,cover 70 | .hypothesis/ 71 | .pytest_cache/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | target/ 95 | 96 | # Jupyter Notebook 97 | .ipynb_checkpoints 98 | 99 | # IPython 100 | profile_default/ 101 | ipython_config.py 102 | 103 | # pyenv 104 | .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | -------------------------------------------------------------------------------- /ZoeDepth/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ZoeDepth/environment.yml: -------------------------------------------------------------------------------- 1 | name: zoe 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | dependencies: 7 | - cuda=11.7.1 8 | - h5py=3.7.0 9 | - hdf5=1.12.2 10 | - matplotlib=3.6.2 11 | - matplotlib-base=3.6.2 12 | - numpy=1.24.1 13 | - opencv=4.6.0 14 | - pip=22.3.1 15 | - python=3.9.7 16 | - pytorch=1.13.1 17 | - pytorch-cuda=11.7 18 | - pytorch-mutex=1.0 19 | - scipy=1.10.0 20 | - torchaudio=0.13.1 21 | - torchvision=0.14.1 22 | - pip: 23 | - huggingface-hub==0.11.1 24 | - timm==0.6.12 25 | - tqdm==4.64.1 26 | - wandb==0.13.9 27 | -------------------------------------------------------------------------------- /ZoeDepth/sanity.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | from torchvision.transforms import ToTensor 27 | from PIL import Image 28 | from zoedepth.utils.misc import get_image_from_url, colorize 29 | import torch 30 | 31 | from zoedepth.models.builder import build_model 32 | from zoedepth.utils.config import get_config 33 | from pprint import pprint 34 | 35 | 36 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 37 | 38 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 39 | if DEVICE == "cpu": 40 | print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.") 41 | 42 | print("*" * 20 + " Testing zoedepth " + "*" * 20) 43 | conf = get_config("zoedepth", "infer") 44 | 45 | 46 | print("Config:") 47 | pprint(conf) 48 | 49 | model = build_model(conf).to(DEVICE) 50 | model.eval() 51 | x = torch.rand(1, 3, 384, 512).to(DEVICE) 52 | 53 | print("-"*20 + "Testing on a random input" + "-"*20) 54 | 55 | with torch.no_grad(): 56 | out = model(x) 57 | 58 | if isinstance(out, dict): 59 | # print shapes of all outputs 60 | for k, v in out.items(): 61 | if v is not None: 62 | print(k, v.shape) 63 | else: 64 | print([o.shape for o in out if o is not None]) 65 | 66 | print("\n\n") 67 | print("-"*20 + " Testing on an indoor scene from url " + "-"*20) 68 | 69 | # Test img 70 | url = "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS4W8H_Nxk_rs3Vje_zj6mglPOH7bnPhQitBH8WkqjlqQVotdtDEG37BsnGofME3_u6lDk&usqp=CAU" 71 | img = get_image_from_url(url) 72 | orig_size = img.size 73 | X = ToTensor()(img) 74 | X = X.unsqueeze(0).to(DEVICE) 75 | 76 | print("X.shape", X.shape) 77 | print("predicting") 78 | 79 | with torch.no_grad(): 80 | out = model.infer(X).cpu() 81 | 82 | # or just, 83 | # out = model.infer_pil(img) 84 | 85 | 86 | print("output.shape", out.shape) 87 | pred = Image.fromarray(colorize(out)) 88 | # Stack img and pred side by side for comparison and save 89 | pred = pred.resize(orig_size, Image.ANTIALIAS) 90 | stacked = Image.new("RGB", (orig_size[0]*2, orig_size[1])) 91 | stacked.paste(img, (0, 0)) 92 | stacked.paste(pred, (orig_size[0], 0)) 93 | 94 | stacked.save("pred.png") 95 | print("saved pred.png") 96 | 97 | 98 | model.infer_pil(img, output_type="pil").save("pred_raw.png") -------------------------------------------------------------------------------- /ZoeDepth/sanity_hub.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import numpy as np 27 | from torchvision.transforms import ToTensor 28 | from PIL import Image 29 | from zoedepth.utils.misc import get_image_from_url, colorize 30 | 31 | from zoedepth.models.builder import build_model 32 | from zoedepth.utils.config import get_config 33 | from pprint import pprint 34 | 35 | 36 | 37 | # Trigger reload of MiDaS 38 | torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True) 39 | 40 | 41 | model = torch.hub.load(".", "ZoeD_K", source="local", pretrained=True) 42 | model = torch.hub.load(".", "ZoeD_NK", source="local", pretrained=True) 43 | model = torch.hub.load(".", "ZoeD_N", source="local", pretrained=True) 44 | -------------------------------------------------------------------------------- /ZoeDepth/train_mix.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from zoedepth.utils.misc import count_parameters, parallelize 26 | from zoedepth.utils.config import get_config 27 | from zoedepth.utils.arg_utils import parse_unknown 28 | from zoedepth.trainers.builder import get_trainer 29 | from zoedepth.models.builder import build_model 30 | from zoedepth.data.data_mono import MixedNYUKITTI 31 | import torch.utils.data.distributed 32 | import torch.multiprocessing as mp 33 | import torch 34 | import numpy as np 35 | from pprint import pprint 36 | import argparse 37 | import os 38 | 39 | os.environ["PYOPENGL_PLATFORM"] = "egl" 40 | os.environ["WANDB_START_METHOD"] = "thread" 41 | 42 | 43 | def fix_random_seed(seed: int): 44 | """ 45 | Fix random seed for reproducibility 46 | 47 | Args: 48 | seed (int): random seed 49 | """ 50 | import random 51 | 52 | import numpy 53 | import torch 54 | 55 | random.seed(seed) 56 | numpy.random.seed(seed) 57 | torch.manual_seed(seed) 58 | torch.cuda.manual_seed(seed) 59 | torch.cuda.manual_seed_all(seed) 60 | 61 | torch.backends.cudnn.deterministic = True 62 | torch.backends.cudnn.benchmark = False 63 | 64 | 65 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"): 66 | import glob 67 | import os 68 | 69 | from zoedepth.models.model_io import load_wts 70 | 71 | if hasattr(config, "checkpoint"): 72 | checkpoint = config.checkpoint 73 | elif hasattr(config, "ckpt_pattern"): 74 | pattern = config.ckpt_pattern 75 | matches = glob.glob(os.path.join( 76 | checkpoint_dir, f"*{pattern}*{ckpt_type}*")) 77 | if not (len(matches) > 0): 78 | raise ValueError(f"No matches found for the pattern {pattern}") 79 | 80 | checkpoint = matches[0] 81 | 82 | else: 83 | return model 84 | model = load_wts(model, checkpoint) 85 | print("Loaded weights from {0}".format(checkpoint)) 86 | return model 87 | 88 | 89 | def main_worker(gpu, ngpus_per_node, config): 90 | try: 91 | fix_random_seed(43) 92 | 93 | config.gpu = gpu 94 | 95 | model = build_model(config) 96 | model = load_ckpt(config, model) 97 | model = parallelize(config, model) 98 | 99 | total_params = f"{round(count_parameters(model)/1e6,2)}M" 100 | config.total_params = total_params 101 | print(f"Total parameters : {total_params}") 102 | 103 | train_loader = MixedNYUKITTI(config, "train").data 104 | test_loader = MixedNYUKITTI(config, "online_eval").data 105 | 106 | trainer = get_trainer(config)( 107 | config, model, train_loader, test_loader, device=config.gpu) 108 | 109 | trainer.train() 110 | finally: 111 | import wandb 112 | wandb.finish() 113 | 114 | 115 | if __name__ == '__main__': 116 | mp.set_start_method('forkserver') 117 | 118 | parser = argparse.ArgumentParser() 119 | parser.add_argument("-m", "--model", type=str, default="synunet") 120 | parser.add_argument("-d", "--dataset", type=str, default='mix') 121 | parser.add_argument("--trainer", type=str, default=None) 122 | 123 | args, unknown_args = parser.parse_known_args() 124 | overwrite_kwargs = parse_unknown(unknown_args) 125 | 126 | overwrite_kwargs["model"] = args.model 127 | if args.trainer is not None: 128 | overwrite_kwargs["trainer"] = args.trainer 129 | 130 | config = get_config(args.model, "train", args.dataset, **overwrite_kwargs) 131 | # git_commit() 132 | if config.use_shared_dict: 133 | shared_dict = mp.Manager().dict() 134 | else: 135 | shared_dict = None 136 | config.shared_dict = shared_dict 137 | 138 | config.batch_size = config.bs 139 | config.mode = 'train' 140 | if config.root != "." and not os.path.isdir(config.root): 141 | os.makedirs(config.root) 142 | 143 | try: 144 | node_str = os.environ['SLURM_JOB_NODELIST'].replace( 145 | '[', '').replace(']', '') 146 | nodes = node_str.split(',') 147 | 148 | config.world_size = len(nodes) 149 | config.rank = int(os.environ['SLURM_PROCID']) 150 | # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints" 151 | 152 | except KeyError as e: 153 | # We are NOT using SLURM 154 | config.world_size = 1 155 | config.rank = 0 156 | nodes = ["127.0.0.1"] 157 | 158 | if config.distributed: 159 | 160 | print(config.rank) 161 | port = np.random.randint(15000, 15025) 162 | config.dist_url = 'tcp://{}:{}'.format(nodes[0], port) 163 | print(config.dist_url) 164 | config.dist_backend = 'nccl' 165 | config.gpu = None 166 | 167 | ngpus_per_node = torch.cuda.device_count() 168 | config.num_workers = config.workers 169 | config.ngpus_per_node = ngpus_per_node 170 | print("Config:") 171 | pprint(config) 172 | if config.distributed: 173 | config.world_size = ngpus_per_node * config.world_size 174 | mp.spawn(main_worker, nprocs=ngpus_per_node, 175 | args=(ngpus_per_node, config)) 176 | else: 177 | if ngpus_per_node == 1: 178 | config.gpu = 0 179 | main_worker(config.gpu, ngpus_per_node, config) 180 | -------------------------------------------------------------------------------- /ZoeDepth/train_mono.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from zoedepth.utils.misc import count_parameters, parallelize 26 | from zoedepth.utils.config import get_config 27 | from zoedepth.utils.arg_utils import parse_unknown 28 | from zoedepth.trainers.builder import get_trainer 29 | from zoedepth.models.builder import build_model 30 | from zoedepth.data.data_mono import DepthDataLoader 31 | import torch.utils.data.distributed 32 | import torch.multiprocessing as mp 33 | import torch 34 | import numpy as np 35 | from pprint import pprint 36 | import argparse 37 | import os 38 | 39 | os.environ["PYOPENGL_PLATFORM"] = "egl" 40 | os.environ["WANDB_START_METHOD"] = "thread" 41 | 42 | 43 | def fix_random_seed(seed: int): 44 | import random 45 | 46 | import numpy 47 | import torch 48 | 49 | random.seed(seed) 50 | numpy.random.seed(seed) 51 | torch.manual_seed(seed) 52 | torch.cuda.manual_seed(seed) 53 | torch.cuda.manual_seed_all(seed) 54 | 55 | torch.backends.cudnn.deterministic = True 56 | torch.backends.cudnn.benchmark = True 57 | 58 | 59 | def load_ckpt(config, model, checkpoint_dir="./checkpoints", ckpt_type="best"): 60 | import glob 61 | import os 62 | 63 | from zoedepth.models.model_io import load_wts 64 | 65 | if hasattr(config, "checkpoint"): 66 | checkpoint = config.checkpoint 67 | elif hasattr(config, "ckpt_pattern"): 68 | pattern = config.ckpt_pattern 69 | matches = glob.glob(os.path.join( 70 | checkpoint_dir, f"*{pattern}*{ckpt_type}*")) 71 | if not (len(matches) > 0): 72 | raise ValueError(f"No matches found for the pattern {pattern}") 73 | 74 | checkpoint = matches[0] 75 | 76 | else: 77 | return model 78 | model = load_wts(model, checkpoint) 79 | print("Loaded weights from {0}".format(checkpoint)) 80 | return model 81 | 82 | 83 | def main_worker(gpu, ngpus_per_node, config): 84 | try: 85 | seed = config.seed if 'seed' in config and config.seed else 43 86 | fix_random_seed(seed) 87 | 88 | config.gpu = gpu 89 | 90 | model = build_model(config) 91 | model = load_ckpt(config, model) 92 | model = parallelize(config, model) 93 | 94 | total_params = f"{round(count_parameters(model)/1e6,2)}M" 95 | config.total_params = total_params 96 | print(f"Total parameters : {total_params}") 97 | 98 | train_loader = DepthDataLoader(config, "train").data 99 | test_loader = DepthDataLoader(config, "online_eval").data 100 | 101 | trainer = get_trainer(config)( 102 | config, model, train_loader, test_loader, device=config.gpu) 103 | 104 | trainer.train() 105 | finally: 106 | import wandb 107 | wandb.finish() 108 | 109 | 110 | if __name__ == '__main__': 111 | mp.set_start_method('forkserver') 112 | 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument("-m", "--model", type=str, default="synunet") 115 | parser.add_argument("-d", "--dataset", type=str, default='nyu') 116 | parser.add_argument("--trainer", type=str, default=None) 117 | 118 | args, unknown_args = parser.parse_known_args() 119 | overwrite_kwargs = parse_unknown(unknown_args) 120 | 121 | overwrite_kwargs["model"] = args.model 122 | if args.trainer is not None: 123 | overwrite_kwargs["trainer"] = args.trainer 124 | 125 | config = get_config(args.model, "train", args.dataset, **overwrite_kwargs) 126 | # git_commit() 127 | if config.use_shared_dict: 128 | shared_dict = mp.Manager().dict() 129 | else: 130 | shared_dict = None 131 | config.shared_dict = shared_dict 132 | 133 | config.batch_size = config.bs 134 | config.mode = 'train' 135 | if config.root != "." and not os.path.isdir(config.root): 136 | os.makedirs(config.root) 137 | 138 | try: 139 | node_str = os.environ['SLURM_JOB_NODELIST'].replace( 140 | '[', '').replace(']', '') 141 | nodes = node_str.split(',') 142 | 143 | config.world_size = len(nodes) 144 | config.rank = int(os.environ['SLURM_PROCID']) 145 | # config.save_dir = "/ibex/scratch/bhatsf/videodepth/checkpoints" 146 | 147 | except KeyError as e: 148 | # We are NOT using SLURM 149 | config.world_size = 1 150 | config.rank = 0 151 | nodes = ["127.0.0.1"] 152 | 153 | if config.distributed: 154 | 155 | print(config.rank) 156 | port = np.random.randint(15000, 15025) 157 | config.dist_url = 'tcp://{}:{}'.format(nodes[0], port) 158 | print(config.dist_url) 159 | config.dist_backend = 'nccl' 160 | config.gpu = None 161 | 162 | ngpus_per_node = torch.cuda.device_count() 163 | config.num_workers = config.workers 164 | config.ngpus_per_node = ngpus_per_node 165 | print("Config:") 166 | pprint(config) 167 | if config.distributed: 168 | config.world_size = ngpus_per_node * config.world_size 169 | mp.spawn(main_worker, nprocs=ngpus_per_node, 170 | args=(ngpus_per_node, config)) 171 | else: 172 | if ngpus_per_node == 1: 173 | config.gpu = 0 174 | main_worker(config.gpu, ngpus_per_node, config) 175 | -------------------------------------------------------------------------------- /ZoeDepth/ui/app.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import torch 27 | 28 | from .gradio_depth_pred import create_demo as create_depth_pred_demo 29 | from .gradio_im_to_3d import create_demo as create_im_to_3d_demo 30 | from .gradio_pano_to_3d import create_demo as create_pano_to_3d_demo 31 | 32 | 33 | css = """ 34 | #img-display-container { 35 | max-height: 50vh; 36 | } 37 | #img-display-input { 38 | max-height: 40vh; 39 | } 40 | #img-display-output { 41 | max-height: 40vh; 42 | } 43 | 44 | """ 45 | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' 46 | model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to(DEVICE).eval() 47 | 48 | title = "# ZoeDepth" 49 | description = """Official demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**. 50 | 51 | ZoeDepth is a deep learning model for metric depth estimation from a single image. 52 | 53 | Please refer to our [paper](https://arxiv.org/abs/2302.12288) or [github](https://github.com/isl-org/ZoeDepth) for more details.""" 54 | 55 | with gr.Blocks(css=css) as demo: 56 | gr.Markdown(title) 57 | gr.Markdown(description) 58 | with gr.Tab("Depth Prediction"): 59 | create_depth_pred_demo(model) 60 | with gr.Tab("Image to 3D"): 61 | create_im_to_3d_demo(model) 62 | with gr.Tab("360 Panorama to 3D"): 63 | create_pano_to_3d_demo(model) 64 | 65 | if __name__ == '__main__': 66 | demo.queue().launch() -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_depth_pred.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | from zoedepth.utils.misc import colorize 27 | from PIL import Image 28 | import tempfile 29 | 30 | def predict_depth(model, image): 31 | depth = model.infer_pil(image) 32 | return depth 33 | 34 | def create_demo(model): 35 | gr.Markdown("### Depth Prediction demo") 36 | with gr.Row(): 37 | input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input').style(height="auto") 38 | depth_image = gr.Image(label="Depth Map", elem_id='img-display-output') 39 | raw_file = gr.File(label="16-bit raw depth, multiplier:256") 40 | submit = gr.Button("Submit") 41 | 42 | def on_submit(image): 43 | depth = predict_depth(model, image) 44 | colored_depth = colorize(depth, cmap='gray_r') 45 | tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False) 46 | raw_depth = Image.fromarray((depth*256).astype('uint16')) 47 | raw_depth.save(tmp.name) 48 | return [colored_depth, tmp.name] 49 | 50 | submit.click(on_submit, inputs=[input_image], outputs=[depth_image, raw_file]) 51 | # examples = gr.Examples(examples=["examples/person_1.jpeg", "examples/person_2.jpeg", "examples/person-leaves.png", "examples/living-room.jpeg"], 52 | # inputs=[input_image]) -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_im_to_3d.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import numpy as np 27 | import trimesh 28 | from zoedepth.utils.geometry import depth_to_points, create_triangles 29 | from functools import partial 30 | import tempfile 31 | 32 | 33 | def depth_edges_mask(depth): 34 | """Returns a mask of edges in the depth map. 35 | Args: 36 | depth: 2D numpy array of shape (H, W) with dtype float32. 37 | Returns: 38 | mask: 2D numpy array of shape (H, W) with dtype bool. 39 | """ 40 | # Compute the x and y gradients of the depth map. 41 | depth_dx, depth_dy = np.gradient(depth) 42 | # Compute the gradient magnitude. 43 | depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) 44 | # Compute the edge mask. 45 | mask = depth_grad > 0.05 46 | return mask 47 | 48 | 49 | def predict_depth(model, image): 50 | depth = model.infer_pil(image) 51 | return depth 52 | 53 | def get_mesh(model, image, keep_edges=False): 54 | image.thumbnail((1024,1024)) # limit the size of the input image 55 | depth = predict_depth(model, image) 56 | pts3d = depth_to_points(depth[None]) 57 | pts3d = pts3d.reshape(-1, 3) 58 | 59 | # Create a trimesh mesh from the points 60 | # Each pixel is connected to its 4 neighbors 61 | # colors are the RGB values of the image 62 | 63 | verts = pts3d.reshape(-1, 3) 64 | image = np.array(image) 65 | if keep_edges: 66 | triangles = create_triangles(image.shape[0], image.shape[1]) 67 | else: 68 | triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) 69 | colors = image.reshape(-1, 3) 70 | mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) 71 | 72 | # Save as glb 73 | glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False) 74 | glb_path = glb_file.name 75 | mesh.export(glb_path) 76 | return glb_path 77 | 78 | def create_demo(model): 79 | 80 | gr.Markdown("### Image to 3D mesh") 81 | gr.Markdown("Convert a single 2D image to a 3D mesh") 82 | 83 | with gr.Row(): 84 | image = gr.Image(label="Input Image", type='pil') 85 | result = gr.Model3D(label="3d mesh reconstruction", clear_color=[ 86 | 1.0, 1.0, 1.0, 1.0]) 87 | 88 | checkbox = gr.Checkbox(label="Keep occlusion edges", value=False) 89 | submit = gr.Button("Submit") 90 | submit.click(partial(get_mesh, model), inputs=[image, checkbox], outputs=[result]) 91 | # examples = gr.Examples(examples=["examples/aerial_beach.jpeg", "examples/mountains.jpeg", "examples/person_1.jpeg", "examples/ancient-carved.jpeg"], 92 | # inputs=[image]) 93 | 94 | -------------------------------------------------------------------------------- /ZoeDepth/ui/gradio_pano_to_3d.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import gradio as gr 26 | import numpy as np 27 | import trimesh 28 | from zoedepth.utils.geometry import create_triangles 29 | from functools import partial 30 | import tempfile 31 | 32 | def depth_edges_mask(depth): 33 | """Returns a mask of edges in the depth map. 34 | Args: 35 | depth: 2D numpy array of shape (H, W) with dtype float32. 36 | Returns: 37 | mask: 2D numpy array of shape (H, W) with dtype bool. 38 | """ 39 | # Compute the x and y gradients of the depth map. 40 | depth_dx, depth_dy = np.gradient(depth) 41 | # Compute the gradient magnitude. 42 | depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) 43 | # Compute the edge mask. 44 | mask = depth_grad > 0.05 45 | return mask 46 | 47 | 48 | def pano_depth_to_world_points(depth): 49 | """ 50 | 360 depth to world points 51 | given 2D depth is an equirectangular projection of a spherical image 52 | Treat depth as radius 53 | 54 | longitude : -pi to pi 55 | latitude : -pi/2 to pi/2 56 | """ 57 | 58 | # Convert depth to radius 59 | radius = depth.flatten() 60 | 61 | lon = np.linspace(-np.pi, np.pi, depth.shape[1]) 62 | lat = np.linspace(-np.pi/2, np.pi/2, depth.shape[0]) 63 | 64 | lon, lat = np.meshgrid(lon, lat) 65 | lon = lon.flatten() 66 | lat = lat.flatten() 67 | 68 | # Convert to cartesian coordinates 69 | x = radius * np.cos(lat) * np.cos(lon) 70 | y = radius * np.cos(lat) * np.sin(lon) 71 | z = radius * np.sin(lat) 72 | 73 | pts3d = np.stack([x, y, z], axis=1) 74 | 75 | return pts3d 76 | 77 | 78 | def predict_depth(model, image): 79 | depth = model.infer_pil(image) 80 | return depth 81 | 82 | def get_mesh(model, image, keep_edges=False): 83 | image.thumbnail((1024,1024)) # limit the size of the image 84 | depth = predict_depth(model, image) 85 | pts3d = pano_depth_to_world_points(depth) 86 | 87 | # Create a trimesh mesh from the points 88 | # Each pixel is connected to its 4 neighbors 89 | # colors are the RGB values of the image 90 | 91 | verts = pts3d.reshape(-1, 3) 92 | image = np.array(image) 93 | if keep_edges: 94 | triangles = create_triangles(image.shape[0], image.shape[1]) 95 | else: 96 | triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) 97 | colors = image.reshape(-1, 3) 98 | mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) 99 | 100 | # Save as glb 101 | glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False) 102 | glb_path = glb_file.name 103 | mesh.export(glb_path) 104 | return glb_path 105 | 106 | def create_demo(model): 107 | gr.Markdown("### Panorama to 3D mesh") 108 | gr.Markdown("Convert a 360 spherical panorama to a 3D mesh") 109 | gr.Markdown("ZoeDepth was not trained on panoramic images. It doesn't know anything about panoramas or spherical projection. Here, we just treat the estimated depth as radius and some projection errors are expected. Nonetheless, ZoeDepth still works surprisingly well on 360 reconstruction.") 110 | 111 | with gr.Row(): 112 | input_image = gr.Image(label="Input Image", type='pil') 113 | result = gr.Model3D(label="3d mesh reconstruction", clear_color=[ 114 | 1.0, 1.0, 1.0, 1.0]) 115 | 116 | checkbox = gr.Checkbox(label="Keep occlusion edges", value=True) 117 | submit = gr.Button("Submit") 118 | submit.click(partial(get_mesh, model), inputs=[input_image, checkbox], outputs=[result]) 119 | # examples = gr.Examples(examples=["examples/pano_1.jpeg", "examples/pano_2.jpeg", "examples/pano_3.jpeg"], 120 | # inputs=[input_image]) -------------------------------------------------------------------------------- /ZoeDepth/ui/ui_requirements.txt: -------------------------------------------------------------------------------- 1 | gradio 2 | trimesh==3.9.42 -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/ddad.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self, resize_shape): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(resize_shape) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "ddad"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DDAD(Dataset): 83 | def __init__(self, data_dir_root, resize_shape): 84 | import glob 85 | 86 | # image paths are of the form /{outleft, depthmap}/*.png 87 | self.image_files = glob.glob(os.path.join(data_dir_root, '*.png')) 88 | self.depth_files = [r.replace("_rgb.png", "_depth.npy") 89 | for r in self.image_files] 90 | self.transform = ToTensor(resize_shape) 91 | 92 | def __getitem__(self, idx): 93 | 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 98 | depth = np.load(depth_path) # meters 99 | 100 | # depth[depth > 8] = -1 101 | depth = depth[..., None] 102 | 103 | sample = dict(image=image, depth=depth) 104 | sample = self.transform(sample) 105 | 106 | if idx == 0: 107 | print(sample["image"].shape) 108 | 109 | return sample 110 | 111 | def __len__(self): 112 | return len(self.image_files) 113 | 114 | 115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs): 116 | dataset = DDAD(data_dir_root, resize_shape) 117 | return DataLoader(dataset, batch_size, **kwargs) 118 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diml_indoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize((480, 640)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diml_indoor"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | if isinstance(img, torch.ByteTensor): 76 | return img.float() 77 | else: 78 | return img 79 | 80 | 81 | class DIML_Indoor(Dataset): 82 | def __init__(self, data_dir_root): 83 | import glob 84 | 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "LR", '*', 'color', '*.png')) 88 | self.depth_files = [r.replace("color", "depth_filled").replace( 89 | "_c.png", "_depth_filled.png") for r in self.image_files] 90 | self.transform = ToTensor() 91 | 92 | def __getitem__(self, idx): 93 | image_path = self.image_files[idx] 94 | depth_path = self.depth_files[idx] 95 | 96 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 97 | depth = np.asarray(Image.open(depth_path), 98 | dtype='uint16') / 1000.0 # mm to meters 99 | 100 | # print(np.shape(image)) 101 | # print(np.shape(depth)) 102 | 103 | # depth[depth > 8] = -1 104 | depth = depth[..., None] 105 | 106 | sample = dict(image=image, depth=depth) 107 | 108 | # return sample 109 | sample = self.transform(sample) 110 | 111 | if idx == 0: 112 | print(sample["image"].shape) 113 | 114 | return sample 115 | 116 | def __len__(self): 117 | return len(self.image_files) 118 | 119 | 120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs): 121 | dataset = DIML_Indoor(data_dir_root) 122 | return DataLoader(dataset, batch_size, **kwargs) 123 | 124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR") 125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR") 126 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diml_outdoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class DIML_Outdoor(Dataset): 79 | def __init__(self, data_dir_root): 80 | import glob 81 | 82 | # image paths are of the form /{outleft, depthmap}/*.png 83 | self.image_files = glob.glob(os.path.join( 84 | data_dir_root, "*", 'outleft', '*.png')) 85 | self.depth_files = [r.replace("outleft", "depthmap") 86 | for r in self.image_files] 87 | self.transform = ToTensor() 88 | 89 | def __getitem__(self, idx): 90 | image_path = self.image_files[idx] 91 | depth_path = self.depth_files[idx] 92 | 93 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 94 | depth = np.asarray(Image.open(depth_path), 95 | dtype='uint16') / 1000.0 # mm to meters 96 | 97 | # depth[depth > 8] = -1 98 | depth = depth[..., None] 99 | 100 | sample = dict(image=image, depth=depth, dataset="diml_outdoor") 101 | 102 | # return sample 103 | return self.transform(sample) 104 | 105 | def __len__(self): 106 | return len(self.image_files) 107 | 108 | 109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs): 110 | dataset = DIML_Outdoor(data_dir_root) 111 | return DataLoader(dataset, batch_size, **kwargs) 112 | 113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR") 114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR") 115 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/diode.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(480) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diode"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DIODE(Dataset): 83 | def __init__(self, data_dir_root): 84 | import glob 85 | 86 | # image paths are of the form /scene_#/scan_#/*.png 87 | self.image_files = glob.glob( 88 | os.path.join(data_dir_root, '*', '*', '*.png')) 89 | self.depth_files = [r.replace(".png", "_depth.npy") 90 | for r in self.image_files] 91 | self.depth_mask_files = [ 92 | r.replace(".png", "_depth_mask.npy") for r in self.image_files] 93 | self.transform = ToTensor() 94 | 95 | def __getitem__(self, idx): 96 | image_path = self.image_files[idx] 97 | depth_path = self.depth_files[idx] 98 | depth_mask_path = self.depth_mask_files[idx] 99 | 100 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 101 | depth = np.load(depth_path) # in meters 102 | valid = np.load(depth_mask_path) # binary 103 | 104 | # depth[depth > 8] = -1 105 | # depth = depth[..., None] 106 | 107 | sample = dict(image=image, depth=depth, valid=valid) 108 | 109 | # return sample 110 | sample = self.transform(sample) 111 | 112 | if idx == 0: 113 | print(sample["image"].shape) 114 | 115 | return sample 116 | 117 | def __len__(self): 118 | return len(self.image_files) 119 | 120 | 121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs): 122 | dataset = DIODE(data_dir_root) 123 | return DataLoader(dataset, batch_size, **kwargs) 124 | 125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor") 126 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/hypersim.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import glob 26 | import os 27 | 28 | import h5py 29 | import numpy as np 30 | import torch 31 | from PIL import Image 32 | from torch.utils.data import DataLoader, Dataset 33 | from torchvision import transforms 34 | 35 | 36 | def hypersim_distance_to_depth(npyDistance): 37 | intWidth, intHeight, fltFocal = 1024, 768, 886.81 38 | 39 | npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape( 40 | 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None] 41 | npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, 42 | intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None] 43 | npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32) 44 | npyImageplane = np.concatenate( 45 | [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2) 46 | 47 | npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal 48 | return npyDepth 49 | 50 | 51 | class ToTensor(object): 52 | def __init__(self): 53 | # self.normalize = transforms.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x: x 56 | self.resize = transforms.Resize((480, 640)) 57 | 58 | def __call__(self, sample): 59 | image, depth = sample['image'], sample['depth'] 60 | image = self.to_tensor(image) 61 | image = self.normalize(image) 62 | depth = self.to_tensor(depth) 63 | 64 | image = self.resize(image) 65 | 66 | return {'image': image, 'depth': depth, 'dataset': "hypersim"} 67 | 68 | def to_tensor(self, pic): 69 | 70 | if isinstance(pic, np.ndarray): 71 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 72 | return img 73 | 74 | # # handle PIL Image 75 | if pic.mode == 'I': 76 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 77 | elif pic.mode == 'I;16': 78 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 79 | else: 80 | img = torch.ByteTensor( 81 | torch.ByteStorage.from_buffer(pic.tobytes())) 82 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 83 | if pic.mode == 'YCbCr': 84 | nchannel = 3 85 | elif pic.mode == 'I;16': 86 | nchannel = 1 87 | else: 88 | nchannel = len(pic.mode) 89 | img = img.view(pic.size[1], pic.size[0], nchannel) 90 | 91 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 92 | if isinstance(img, torch.ByteTensor): 93 | return img.float() 94 | else: 95 | return img 96 | 97 | 98 | class HyperSim(Dataset): 99 | def __init__(self, data_dir_root): 100 | # image paths are of the form //images/scene_cam_#_final_preview/*.tonemap.jpg 101 | # depth paths are of the form //images/scene_cam_#_final_preview/*.depth_meters.hdf5 102 | self.image_files = glob.glob(os.path.join( 103 | data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg')) 104 | self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace( 105 | ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files] 106 | self.transform = ToTensor() 107 | 108 | def __getitem__(self, idx): 109 | image_path = self.image_files[idx] 110 | depth_path = self.depth_files[idx] 111 | 112 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 113 | 114 | # depth from hdf5 115 | depth_fd = h5py.File(depth_path, "r") 116 | # in meters (Euclidean distance) 117 | distance_meters = np.array(depth_fd['dataset']) 118 | depth = hypersim_distance_to_depth( 119 | distance_meters) # in meters (planar depth) 120 | 121 | # depth[depth > 8] = -1 122 | depth = depth[..., None] 123 | 124 | sample = dict(image=image, depth=depth) 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = HyperSim(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/preprocess.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | from dataclasses import dataclass 27 | from typing import Tuple, List 28 | 29 | # dataclass to store the crop parameters 30 | @dataclass 31 | class CropParams: 32 | top: int 33 | bottom: int 34 | left: int 35 | right: int 36 | 37 | 38 | 39 | def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams: 40 | gray_image = np.mean(rgb_image, axis=channel_axis) 41 | h, w = gray_image.shape 42 | 43 | 44 | def num_value_pixels(arr): 45 | return np.sum(np.abs(arr - value) < level_diff_threshold) 46 | 47 | def is_above_tolerance(arr, total_pixels): 48 | return (num_value_pixels(arr) / total_pixels) > tolerance 49 | 50 | # Crop top border until number of value pixels become below tolerance 51 | top = min_border 52 | while is_above_tolerance(gray_image[top, :], w) and top < h-1: 53 | top += 1 54 | if top > cut_off: 55 | break 56 | 57 | # Crop bottom border until number of value pixels become below tolerance 58 | bottom = h - min_border 59 | while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0: 60 | bottom -= 1 61 | if h - bottom > cut_off: 62 | break 63 | 64 | # Crop left border until number of value pixels become below tolerance 65 | left = min_border 66 | while is_above_tolerance(gray_image[:, left], h) and left < w-1: 67 | left += 1 68 | if left > cut_off: 69 | break 70 | 71 | # Crop right border until number of value pixels become below tolerance 72 | right = w - min_border 73 | while is_above_tolerance(gray_image[:, right], h) and right > 0: 74 | right -= 1 75 | if w - right > cut_off: 76 | break 77 | 78 | 79 | return CropParams(top, bottom, left, right) 80 | 81 | 82 | def get_white_border(rgb_image, value=255, **kwargs) -> CropParams: 83 | """Crops the white border of the RGB. 84 | 85 | Args: 86 | rgb: RGB image, shape (H, W, 3). 87 | Returns: 88 | Crop parameters. 89 | """ 90 | if value == 255: 91 | # assert range of values in rgb image is [0, 255] 92 | assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]." 93 | assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]." 94 | elif value == 1: 95 | # assert range of values in rgb image is [0, 1] 96 | assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]." 97 | 98 | return get_border_params(rgb_image, value=value, **kwargs) 99 | 100 | def get_black_border(rgb_image, **kwargs) -> CropParams: 101 | """Crops the black border of the RGB. 102 | 103 | Args: 104 | rgb: RGB image, shape (H, W, 3). 105 | 106 | Returns: 107 | Crop parameters. 108 | """ 109 | 110 | return get_border_params(rgb_image, value=0, **kwargs) 111 | 112 | def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray: 113 | """Crops the image according to the crop parameters. 114 | 115 | Args: 116 | image: RGB or depth image, shape (H, W, 3) or (H, W). 117 | crop_params: Crop parameters. 118 | 119 | Returns: 120 | Cropped image. 121 | """ 122 | return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right] 123 | 124 | def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]: 125 | """Crops the images according to the crop parameters. 126 | 127 | Args: 128 | images: RGB or depth images, shape (H, W, 3) or (H, W). 129 | crop_params: Crop parameters. 130 | 131 | Returns: 132 | Cropped images. 133 | """ 134 | return tuple(crop_image(image, crop_params) for image in images) 135 | 136 | def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]: 137 | """Crops the white and black border of the RGB and depth images. 138 | 139 | Args: 140 | rgb: RGB image, shape (H, W, 3). This image is used to determine the border. 141 | other_images: The other images to crop according to the border of the RGB image. 142 | Returns: 143 | Cropped RGB and other images. 144 | """ 145 | # crop black border 146 | crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) 147 | cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params) 148 | 149 | # crop white border 150 | crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) 151 | cropped_images = crop_images(*cropped_images, crop_params=crop_params) 152 | 153 | return cropped_images 154 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/sun_rgbd_loader.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "sunrgbd"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class SunRGBD(Dataset): 79 | def __init__(self, data_dir_root): 80 | # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze() 81 | # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs] 82 | # self.all_test = [os.path.join(data_dir_root, t) for t in all_test] 83 | import glob 84 | self.image_files = glob.glob( 85 | os.path.join(data_dir_root, 'rgb', 'rgb', '*')) 86 | self.depth_files = [ 87 | r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files] 88 | self.transform = ToTensor() 89 | 90 | def __getitem__(self, idx): 91 | image_path = self.image_files[idx] 92 | depth_path = self.depth_files[idx] 93 | 94 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 95 | depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0 96 | depth[depth > 8] = -1 97 | depth = depth[..., None] 98 | return self.transform(dict(image=image, depth=depth)) 99 | 100 | def __len__(self): 101 | return len(self.image_files) 102 | 103 | 104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs): 105 | dataset = SunRGBD(data_dir_root) 106 | return DataLoader(dataset, batch_size, **kwargs) 107 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/data/vkitti.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | from torch.utils.data import Dataset, DataLoader 27 | from torchvision import transforms 28 | import os 29 | 30 | from PIL import Image 31 | import numpy as np 32 | import cv2 33 | 34 | 35 | class ToTensor(object): 36 | def __init__(self): 37 | self.normalize = transforms.Normalize( 38 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 39 | # self.resize = transforms.Resize((375, 1242)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | 44 | image = self.to_tensor(image) 45 | image = self.normalize(image) 46 | depth = self.to_tensor(depth) 47 | 48 | # image = self.resize(image) 49 | 50 | return {'image': image, 'depth': depth, 'dataset': "vkitti"} 51 | 52 | def to_tensor(self, pic): 53 | 54 | if isinstance(pic, np.ndarray): 55 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 56 | return img 57 | 58 | # # handle PIL Image 59 | if pic.mode == 'I': 60 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 61 | elif pic.mode == 'I;16': 62 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 63 | else: 64 | img = torch.ByteTensor( 65 | torch.ByteStorage.from_buffer(pic.tobytes())) 66 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 67 | if pic.mode == 'YCbCr': 68 | nchannel = 3 69 | elif pic.mode == 'I;16': 70 | nchannel = 1 71 | else: 72 | nchannel = len(pic.mode) 73 | img = img.view(pic.size[1], pic.size[0], nchannel) 74 | 75 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class VKITTI(Dataset): 83 | def __init__(self, data_dir_root, do_kb_crop=True): 84 | import glob 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "test_color", '*.png')) 88 | self.depth_files = [r.replace("test_color", "test_depth") 89 | for r in self.image_files] 90 | self.do_kb_crop = True 91 | self.transform = ToTensor() 92 | 93 | def __getitem__(self, idx): 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = Image.open(image_path) 98 | depth = Image.open(depth_path) 99 | depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | 100 | cv2.IMREAD_ANYDEPTH) 101 | print("dpeth min max", depth.min(), depth.max()) 102 | 103 | # print(np.shape(image)) 104 | # print(np.shape(depth)) 105 | 106 | # depth[depth > 8] = -1 107 | 108 | if self.do_kb_crop and False: 109 | height = image.height 110 | width = image.width 111 | top_margin = int(height - 352) 112 | left_margin = int((width - 1216) / 2) 113 | depth = depth.crop( 114 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 115 | image = image.crop( 116 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 117 | # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216] 118 | 119 | image = np.asarray(image, dtype=np.float32) / 255.0 120 | # depth = np.asarray(depth, dtype=np.uint16) /1. 121 | depth = depth[..., None] 122 | sample = dict(image=image, depth=depth) 123 | 124 | # return sample 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = VKITTI(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | 140 | 141 | if __name__ == "__main__": 142 | loader = get_vkitti_loader( 143 | data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test") 144 | print("Total files", len(loader.dataset)) 145 | for i, sample in enumerate(loader): 146 | print(sample["image"].shape) 147 | print(sample["depth"].shape) 148 | print(sample["dataset"]) 149 | print(sample['depth'].min(), sample['depth'].max()) 150 | if i > 5: 151 | break 152 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from zoedepth.models.depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/layers/dist_layers.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | def log_binom(n, k, eps=1e-7): 30 | """ log(nCk) using stirling approximation """ 31 | n = n + eps 32 | k = k + eps 33 | return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps) 34 | 35 | 36 | class LogBinomial(nn.Module): 37 | def __init__(self, n_classes=256, act=torch.softmax): 38 | """Compute log binomial distribution for n_classes 39 | 40 | Args: 41 | n_classes (int, optional): number of output classes. Defaults to 256. 42 | """ 43 | super().__init__() 44 | self.K = n_classes 45 | self.act = act 46 | self.register_buffer('k_idx', torch.arange( 47 | 0, n_classes).view(1, -1, 1, 1)) 48 | self.register_buffer('K_minus_1', torch.Tensor( 49 | [self.K-1]).view(1, -1, 1, 1)) 50 | 51 | def forward(self, x, t=1., eps=1e-4): 52 | """Compute log binomial distribution for x 53 | 54 | Args: 55 | x (torch.Tensor - NCHW): probabilities 56 | t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1.. 57 | eps (float, optional): Small number for numerical stability. Defaults to 1e-4. 58 | 59 | Returns: 60 | torch.Tensor -NCHW: log binomial distribution logbinomial(p;t) 61 | """ 62 | if x.ndim == 3: 63 | x = x.unsqueeze(1) # make it nchw 64 | 65 | one_minus_x = torch.clamp(1 - x, eps, 1) 66 | x = torch.clamp(x, eps, 1) 67 | y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \ 68 | torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x) 69 | return self.act(y/t, dim=1) 70 | 71 | 72 | class ConditionalLogBinomial(nn.Module): 73 | def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax): 74 | """Conditional Log Binomial distribution 75 | 76 | Args: 77 | in_features (int): number of input channels in main feature 78 | condition_dim (int): number of input channels in condition feature 79 | n_classes (int, optional): Number of classes. Defaults to 256. 80 | bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2. 81 | p_eps (float, optional): small eps value. Defaults to 1e-4. 82 | max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50. 83 | min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7. 84 | """ 85 | super().__init__() 86 | self.p_eps = p_eps 87 | self.max_temp = max_temp 88 | self.min_temp = min_temp 89 | self.log_binomial_transform = LogBinomial(n_classes, act=act) 90 | bottleneck = (in_features + condition_dim) // bottleneck_factor 91 | self.mlp = nn.Sequential( 92 | nn.Conv2d(in_features + condition_dim, bottleneck, 93 | kernel_size=1, stride=1, padding=0), 94 | nn.GELU(), 95 | # 2 for p linear norm, 2 for t linear norm 96 | nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0), 97 | nn.Softplus() 98 | ) 99 | 100 | def forward(self, x, cond): 101 | """Forward pass 102 | 103 | Args: 104 | x (torch.Tensor - NCHW): Main feature 105 | cond (torch.Tensor - NCHW): condition feature 106 | 107 | Returns: 108 | torch.Tensor: Output log binomial distribution 109 | """ 110 | pt = self.mlp(torch.concat((x, cond), dim=1)) 111 | p, t = pt[:, :2, ...], pt[:, 2:, ...] 112 | 113 | p = p + self.p_eps 114 | p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...]) 115 | 116 | t = t + self.p_eps 117 | t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...]) 118 | t = t.unsqueeze(1) 119 | t = (self.max_temp - self.min_temp) * t + self.min_temp 120 | 121 | return self.log_binomial_transform(p, t) 122 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/layers/patch_transformer.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | class PatchTransformerEncoder(nn.Module): 30 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): 31 | """ViT-like transformer block 32 | 33 | Args: 34 | in_channels (int): Input channels 35 | patch_size (int, optional): patch size. Defaults to 10. 36 | embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. 37 | num_heads (int, optional): number of attention heads. Defaults to 4. 38 | use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. 39 | """ 40 | super(PatchTransformerEncoder, self).__init__() 41 | self.use_class_token = use_class_token 42 | encoder_layers = nn.TransformerEncoderLayer( 43 | embedding_dim, num_heads, dim_feedforward=1024) 44 | self.transformer_encoder = nn.TransformerEncoder( 45 | encoder_layers, num_layers=4) # takes shape S,N,E 46 | 47 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, 48 | kernel_size=patch_size, stride=patch_size, padding=0) 49 | 50 | def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): 51 | """Generate positional encodings 52 | 53 | Args: 54 | sequence_length (int): Sequence length 55 | embedding_dim (int): Embedding dimension 56 | 57 | Returns: 58 | torch.Tensor SBE: Positional encodings 59 | """ 60 | position = torch.arange( 61 | 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) 62 | index = torch.arange( 63 | 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) 64 | div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) 65 | pos_encoding = position * div_term 66 | pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) 67 | pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) 68 | return pos_encoding 69 | 70 | 71 | def forward(self, x): 72 | """Forward pass 73 | 74 | Args: 75 | x (torch.Tensor - NCHW): Input feature tensor 76 | 77 | Returns: 78 | torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim 79 | """ 80 | embeddings = self.embedding_convPxP(x).flatten( 81 | 2) # .shape = n,c,s = n, embedding_dim, s 82 | if self.use_class_token: 83 | # extra special token at start ? 84 | embeddings = nn.functional.pad(embeddings, (1, 0)) 85 | 86 | # change to S,N,E format required by transformer 87 | embeddings = embeddings.permute(2, 0, 1) 88 | S, N, E = embeddings.shape 89 | embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device) 90 | x = self.transformer_encoder(embeddings) # .shape = S, N, E 91 | return x 92 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt", 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt" 57 | } 58 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/easydict/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyDict 3 | Copy/pasted from https://github.com/makinacorpus/easydict 4 | Original author: Mathieu Leplatre 5 | """ 6 | 7 | class EasyDict(dict): 8 | """ 9 | Get attributes 10 | 11 | >>> d = EasyDict({'foo':3}) 12 | >>> d['foo'] 13 | 3 14 | >>> d.foo 15 | 3 16 | >>> d.bar 17 | Traceback (most recent call last): 18 | ... 19 | AttributeError: 'EasyDict' object has no attribute 'bar' 20 | 21 | Works recursively 22 | 23 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) 24 | >>> isinstance(d.bar, dict) 25 | True 26 | >>> d.bar.x 27 | 1 28 | 29 | Bullet-proof 30 | 31 | >>> EasyDict({}) 32 | {} 33 | >>> EasyDict(d={}) 34 | {} 35 | >>> EasyDict(None) 36 | {} 37 | >>> d = {'a': 1} 38 | >>> EasyDict(**d) 39 | {'a': 1} 40 | >>> EasyDict((('a', 1), ('b', 2))) 41 | {'a': 1, 'b': 2} 42 | 43 | Set attributes 44 | 45 | >>> d = EasyDict() 46 | >>> d.foo = 3 47 | >>> d.foo 48 | 3 49 | >>> d.bar = {'prop': 'value'} 50 | >>> d.bar.prop 51 | 'value' 52 | >>> d 53 | {'foo': 3, 'bar': {'prop': 'value'}} 54 | >>> d.bar.prop = 'newer' 55 | >>> d.bar.prop 56 | 'newer' 57 | 58 | 59 | Values extraction 60 | 61 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) 62 | >>> isinstance(d.bar, list) 63 | True 64 | >>> from operator import attrgetter 65 | >>> list(map(attrgetter('x'), d.bar)) 66 | [1, 3] 67 | >>> list(map(attrgetter('y'), d.bar)) 68 | [2, 4] 69 | >>> d = EasyDict() 70 | >>> list(d.keys()) 71 | [] 72 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) 73 | >>> d.foo 74 | 3 75 | >>> d.bar.x 76 | 1 77 | 78 | Still like a dict though 79 | 80 | >>> o = EasyDict({'clean':True}) 81 | >>> list(o.items()) 82 | [('clean', True)] 83 | 84 | And like a class 85 | 86 | >>> class Flower(EasyDict): 87 | ... power = 1 88 | ... 89 | >>> f = Flower() 90 | >>> f.power 91 | 1 92 | >>> f = Flower({'height': 12}) 93 | >>> f.height 94 | 12 95 | >>> f['power'] 96 | 1 97 | >>> sorted(f.keys()) 98 | ['height', 'power'] 99 | 100 | update and pop items 101 | >>> d = EasyDict(a=1, b='2') 102 | >>> e = EasyDict(c=3.0, a=9.0) 103 | >>> d.update(e) 104 | >>> d.c 105 | 3.0 106 | >>> d['c'] 107 | 3.0 108 | >>> d.get('c') 109 | 3.0 110 | >>> d.update(a=4, b=4) 111 | >>> d.b 112 | 4 113 | >>> d.pop('a') 114 | 4 115 | >>> d.a 116 | Traceback (most recent call last): 117 | ... 118 | AttributeError: 'EasyDict' object has no attribute 'a' 119 | """ 120 | def __init__(self, d=None, **kwargs): 121 | if d is None: 122 | d = {} 123 | else: 124 | d = dict(d) 125 | if kwargs: 126 | d.update(**kwargs) 127 | for k, v in d.items(): 128 | setattr(self, k, v) 129 | # Class attributes 130 | for k in self.__class__.__dict__.keys(): 131 | if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): 132 | setattr(self, k, getattr(self, k)) 133 | 134 | def __setattr__(self, name, value): 135 | if isinstance(value, (list, tuple)): 136 | value = [self.__class__(x) 137 | if isinstance(x, dict) else x for x in value] 138 | elif isinstance(value, dict) and not isinstance(value, self.__class__): 139 | value = self.__class__(value) 140 | super(EasyDict, self).__setattr__(name, value) 141 | super(EasyDict, self).__setitem__(name, value) 142 | 143 | __setitem__ = __setattr__ 144 | 145 | def update(self, e=None, **f): 146 | d = e or dict() 147 | d.update(f) 148 | for k in d: 149 | setattr(self, k, d[k]) 150 | 151 | def pop(self, k, d=None): 152 | delattr(self, k) 153 | return super(EasyDict, self).pop(k, d) 154 | 155 | 156 | if __name__ == "__main__": 157 | import doctest 158 | doctest.testmod() -------------------------------------------------------------------------------- /ZoeDepth/zoedepth/utils/geometry.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | 27 | def get_intrinsics(H,W): 28 | """ 29 | Intrinsics for a pinhole camera model. 30 | Assume fov of 55 degrees and central principal point. 31 | """ 32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) 33 | cx = 0.5 * W 34 | cy = 0.5 * H 35 | return np.array([[f, 0, cx], 36 | [0, f, cy], 37 | [0, 0, 1]]) 38 | 39 | def depth_to_points(depth, R=None, t=None): 40 | 41 | K = get_intrinsics(depth.shape[1], depth.shape[2]) 42 | Kinv = np.linalg.inv(K) 43 | if R is None: 44 | R = np.eye(3) 45 | if t is None: 46 | t = np.zeros(3) 47 | 48 | # M converts from your coordinate to PyTorch3D's coordinate system 49 | M = np.eye(3) 50 | M[0, 0] = -1.0 51 | M[1, 1] = -1.0 52 | 53 | height, width = depth.shape[1:3] 54 | 55 | x = np.arange(width) 56 | y = np.arange(height) 57 | coord = np.stack(np.meshgrid(x, y), -1) 58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 59 | coord = coord.astype(np.float32) 60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) 61 | coord = coord[None] # bs, h, w, 3 62 | 63 | D = depth[:, :, :, None, None] 64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) 65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] 66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's 67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1 68 | # from reference to targe tviewpoint 69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] 70 | # pts3D_2 = pts3D_1 71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w 72 | return pts3D_2[:, :, :, :3, 0][0] 73 | 74 | 75 | def create_triangles(h, w, mask=None): 76 | """ 77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 78 | Creates mesh triangle indices from a given pixel grid size. 79 | This function is not and need not be differentiable as triangle indices are 80 | fixed. 81 | Args: 82 | h: (int) denoting the height of the image. 83 | w: (int) denoting the width of the image. 84 | Returns: 85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) 86 | """ 87 | x, y = np.meshgrid(range(w - 1), range(h - 1)) 88 | tl = y * w + x 89 | tr = y * w + x + 1 90 | bl = (y + 1) * w + x 91 | br = (y + 1) * w + x + 1 92 | triangles = np.array([tl, bl, tr, br, tr, bl]) 93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape( 94 | ((w - 1) * (h - 1) * 2, 3)) 95 | if mask is not None: 96 | mask = mask.reshape(-1) 97 | triangles = triangles[mask[triangles].all(1)] 98 | return triangles 99 | -------------------------------------------------------------------------------- /arguments.py: -------------------------------------------------------------------------------- 1 | ### 2 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr 3 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com. 4 | # All rights reserved. 5 | ### 6 | import numpy as np 7 | 8 | 9 | class GSParams: 10 | def __init__(self): 11 | self.sh_degree = 3 12 | self.images = "images" 13 | self.resolution = -1 14 | self.white_background = False 15 | self.data_device = "cuda" 16 | self.eval = False 17 | self.use_depth = False 18 | 19 | self.iterations = 2990#3_000 20 | self.position_lr_init = 0.00016 21 | self.position_lr_final = 0.0000016 22 | self.position_lr_delay_mult = 0.01 23 | self.position_lr_max_steps = 2990#3_000 24 | self.feature_lr = 0.0025 25 | self.opacity_lr = 0.05 26 | self.scaling_lr = 0.005 27 | self.rotation_lr = 0.001 28 | self.percent_dense = 0.01 29 | self.lambda_dssim = 0.2 30 | self.densification_interval = 100 31 | self.opacity_reset_interval = 3000 32 | self.densify_from_iter = 500 33 | self.densify_until_iter = 15_000 34 | self.densify_grad_threshold = 0.0002 35 | 36 | self.convert_SHs_python = False 37 | self.compute_cov3D_python = False 38 | self.debug = False 39 | 40 | 41 | class CameraParams: 42 | def __init__(self, H: int = 512, W: int = 512): 43 | self.H = H 44 | self.W = W 45 | self.focal = (5.8269e+02, 5.8269e+02) 46 | self.fov = (2*np.arctan(self.W / (2*self.focal[0])), 2*np.arctan(self.H / (2*self.focal[1]))) 47 | self.K = np.array([ 48 | [self.focal[0], 0., self.W/2], 49 | [0., self.focal[1], self.H/2], 50 | [0., 0., 1.], 51 | ]).astype(np.float32) -------------------------------------------------------------------------------- /assets/animestreet2_back_rgb.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/animestreet2_back_rgb.mp4 -------------------------------------------------------------------------------- /assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/demo.gif -------------------------------------------------------------------------------- /assets/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/demo.mp4 -------------------------------------------------------------------------------- /assets/fig5ours_360_rgb.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/fig5ours_360_rgb.mp4 -------------------------------------------------------------------------------- /assets/logo_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/logo_color.png -------------------------------------------------------------------------------- /assets/logo_cvlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/logo_cvlab.png -------------------------------------------------------------------------------- /assets/waterfall_back_rgb.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/assets/waterfall_back_rgb.mp4 -------------------------------------------------------------------------------- /examples/Image002_modernvilla.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image002_modernvilla.jpg -------------------------------------------------------------------------------- /examples/Image002_modernvilla.txt: -------------------------------------------------------------------------------- 1 | ultra-modern mega villa by the sea with swimming pool and green space with beautiful open space and tropical paradise green space and guest annex, bright and sunny weather 2 | -------------------------------------------------------------------------------- /examples/Image002_modernvilla_negative.txt: -------------------------------------------------------------------------------- 1 | trees, front objects, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image003_fantasy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image003_fantasy.jpg -------------------------------------------------------------------------------- /examples/Image003_fantasy.txt: -------------------------------------------------------------------------------- 1 | A vibrant, colorful floating community city, clouds above a beautiful, enchanted landscape filled with whimsical flora, enchanted forest landscape, Magical and dreamy woodland with vibrant green foliage and sparkling flowers, Landscape with twisted trees and vines, natural lighting and dark shadows, unique fantastical elements like floating islands and floating orbs, Highly detailed vegetation and foliage, deep contrast and color vibrancy, texture and intricate details in a floating element 2 | -------------------------------------------------------------------------------- /examples/Image003_fantasy_negative.txt: -------------------------------------------------------------------------------- 1 | (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image005_fruitmarket.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image005_fruitmarket.jpg -------------------------------------------------------------------------------- /examples/Image005_fruitmarket.txt: -------------------------------------------------------------------------------- 1 | 4k, best quality, grocery store with big brick roads and wooden hard rooftop 2 | -------------------------------------------------------------------------------- /examples/Image005_fruitmarket_negative.txt: -------------------------------------------------------------------------------- 1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image008_waterfall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image008_waterfall.jpg -------------------------------------------------------------------------------- /examples/Image008_waterfall.txt: -------------------------------------------------------------------------------- 1 | A dense forest with hardwood trees on the stones. a lake is surrounded by wet stones and pebbles. realistic and intricate details, highly detailed outdoor photo. 2 | -------------------------------------------------------------------------------- /examples/Image009_spacestation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image009_spacestation.jpg -------------------------------------------------------------------------------- /examples/Image009_spacestation.txt: -------------------------------------------------------------------------------- 1 | inside the space station, space control machines with many electric lines, 4k, best quality 2 | -------------------------------------------------------------------------------- /examples/Image009_spacestation_negative.txt: -------------------------------------------------------------------------------- 1 | astronaut, people, scientist, person, photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image011_lego.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image011_lego.jpg -------------------------------------------------------------------------------- /examples/Image011_lego.txt: -------------------------------------------------------------------------------- 1 | (Brick studs in a certain pattern:3), (best lego man face:1.4), best quality, lego city with lego shops, lego road with street lamp, cars and lego mans on the street, lego trees and lake at a park 2 | -------------------------------------------------------------------------------- /examples/Image011_lego_negative.txt: -------------------------------------------------------------------------------- 1 | shining effect, shining light, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image012_whitecat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image012_whitecat.jpg -------------------------------------------------------------------------------- /examples/Image012_whitecat.txt: -------------------------------------------------------------------------------- 1 | a cat sitting on the street, white house with white stairs, trees near the house, gray blocks on the street, 2 | -------------------------------------------------------------------------------- /examples/Image012_whitecat_2nd.txt: -------------------------------------------------------------------------------- 1 | A number of flower buckets, small bushes and many pebbles on the ground 2 | -------------------------------------------------------------------------------- /examples/Image012_whitecat_negative.txt: -------------------------------------------------------------------------------- 1 | shining effect, shining light, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image014_animestreet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image014_animestreet.jpg -------------------------------------------------------------------------------- /examples/Image014_animestreet.txt: -------------------------------------------------------------------------------- 1 | best quality, 4k, anime-style, anime, manga style, a long anime-style road with anime-blocks and little anime-grass, anime-houses and anime-tree on the side of the anime-style road, wide anime-style bright blue sky, shiny and beautiful day, bright scene 2 | -------------------------------------------------------------------------------- /examples/Image014_animestreet_negative.txt: -------------------------------------------------------------------------------- 1 | defocus, blurry, shadow, character, person, people, photo frame, frame, boarder, simple color, dark sky, dark scene, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image015_animelakehouse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image015_animelakehouse.jpg -------------------------------------------------------------------------------- /examples/Image015_animelakehouse.txt: -------------------------------------------------------------------------------- 1 | anime style, animation, best quality, a boat on lake, trees and rocks near the lake. a house and port in front of a house 2 | -------------------------------------------------------------------------------- /examples/Image015_animelakehouse_negative.txt: -------------------------------------------------------------------------------- 1 | (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image018_animesummerhome.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image018_animesummerhome.jpg -------------------------------------------------------------------------------- /examples/Image018_animesummerhome.txt: -------------------------------------------------------------------------------- 1 | Anime-style, Japanese-style anime house overlooking the anime sea with anime tatami mats, anime curtains blowing in the wind, anme clouds visible in the anime sky, anime livingroom with anime flowers 2 | -------------------------------------------------------------------------------- /examples/Image018_animesummerhome_negative.txt: -------------------------------------------------------------------------------- 1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/Image031_fruit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/Image031_fruit.jpg -------------------------------------------------------------------------------- /examples/Image031_fruit.txt: -------------------------------------------------------------------------------- 1 | 4k, best quality, grocery store with block tiles, wooden hard rooftop, various fruits upon the wooden tables, artificial trees on the tables. 2 | -------------------------------------------------------------------------------- /examples/Image031_fruit_negative.txt: -------------------------------------------------------------------------------- 1 | photo frame, frame, boarder, simple color, (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale), (worst quality:2), (low quality:2), (normal quality:2), lowres, bad anatomy, bad hands, normal quality, ((monochrome)), ((grayscale) 2 | -------------------------------------------------------------------------------- /examples/animelake_Back_and_forth_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_Back_and_forth_60fps.mp4 -------------------------------------------------------------------------------- /examples/animelake_Headbanging_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_Headbanging_60fps.mp4 -------------------------------------------------------------------------------- /examples/animelake_LLFF_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/animelake_LLFF_60fps.mp4 -------------------------------------------------------------------------------- /examples/cabin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/cabin.png -------------------------------------------------------------------------------- /examples/cabin.txt: -------------------------------------------------------------------------------- 1 | Magician's magical cabin alone in a serene forest -------------------------------------------------------------------------------- /examples/christmas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/christmas.png -------------------------------------------------------------------------------- /examples/christmas.txt: -------------------------------------------------------------------------------- 1 | Cozy livingroom in christmas 2 | -------------------------------------------------------------------------------- /examples/doge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/doge.jpg -------------------------------------------------------------------------------- /examples/doge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/doge.png -------------------------------------------------------------------------------- /examples/doge.txt: -------------------------------------------------------------------------------- 1 | a cozy livingroom -------------------------------------------------------------------------------- /examples/elf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/elf.jpg -------------------------------------------------------------------------------- /examples/elf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/elf.png -------------------------------------------------------------------------------- /examples/elf.txt: -------------------------------------------------------------------------------- 1 | serene deep forest -------------------------------------------------------------------------------- /examples/fantasy_Back_and_forth_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_Back_and_forth_60fps.mp4 -------------------------------------------------------------------------------- /examples/fantasy_Headbanging_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_Headbanging_60fps.mp4 -------------------------------------------------------------------------------- /examples/fantasy_LLFF_60fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/fantasy_LLFF_60fps.mp4 -------------------------------------------------------------------------------- /examples/girl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/girl.jpg -------------------------------------------------------------------------------- /examples/girl.txt: -------------------------------------------------------------------------------- 1 | dark messy room, noir style, indoors, bottle, shoe soles, jacket, cup, window, blurry, black footwear, depth of field, box, couch, table, gun, chair, foreshortening -------------------------------------------------------------------------------- /examples/image020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/image020.png -------------------------------------------------------------------------------- /examples/image020.txt: -------------------------------------------------------------------------------- 1 | High-resolution photography kitchen design, wooden floor, small windows opening onto the garden, Bauhaus furniture and decoration, high ceiling, beige blue salmon pastel palette, interior design magazine, cozy atmosphere; 8k, intricate detail, photorealistic, realistic light, wide angle, kinfolk photography, A+D architecture, Kitchen Sink, Basket of fruits and vegetables, a bottle of drinking water, walls painted magazine style photo, looking towards a sink under a window, with a door on the left of the sink with a 25 cm distance from the kitchen, the kitchen is an L shaped starting from the right corner, on the far right a fridge nest to it a stove, next the dishwasher then the sink, a smokey grey kitchen with modern touches, taupe walls, a taup ceiling with spotlights inside the ceiling with 90 cm distance, wooden parquet floor -------------------------------------------------------------------------------- /examples/island.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/island.png -------------------------------------------------------------------------------- /examples/island.txt: -------------------------------------------------------------------------------- 1 | Tiny island of wonder -------------------------------------------------------------------------------- /examples/ruin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/examples/ruin.png -------------------------------------------------------------------------------- /examples/ruin.txt: -------------------------------------------------------------------------------- 1 | Postapocalyptic city in desert -------------------------------------------------------------------------------- /gaussian_renderer/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import math 14 | from depth_diff_gaussian_rasterization_min import GaussianRasterizationSettings, GaussianRasterizer 15 | from scene.gaussian_model import GaussianModel 16 | from utils.sh import eval_sh 17 | 18 | def render(viewpoint_camera, pc: GaussianModel, opt, bg_color: torch.Tensor, scaling_modifier=1.0, override_color=None, render_only=False): 19 | """ 20 | Render the scene. 21 | 22 | Background tensor (bg_color) must be on GPU! 23 | """ 24 | 25 | # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means 26 | screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0 27 | try: 28 | screenspace_points.retain_grad() 29 | except: 30 | pass 31 | 32 | # Set up rasterization configuration 33 | tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) 34 | tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) 35 | 36 | raster_settings = GaussianRasterizationSettings( 37 | image_height=int(viewpoint_camera.image_height), 38 | image_width=int(viewpoint_camera.image_width), 39 | tanfovx=tanfovx, 40 | tanfovy=tanfovy, 41 | bg=bg_color, 42 | scale_modifier=scaling_modifier, 43 | viewmatrix=viewpoint_camera.world_view_transform, 44 | projmatrix=viewpoint_camera.full_proj_transform, 45 | sh_degree=pc.active_sh_degree, 46 | campos=viewpoint_camera.camera_center, 47 | prefiltered=False, 48 | debug=opt.debug 49 | ) 50 | 51 | rasterizer = GaussianRasterizer(raster_settings=raster_settings) 52 | 53 | means3D = pc.get_xyz 54 | means2D = screenspace_points 55 | opacity = pc.get_opacity 56 | 57 | # If precomputed 3d covariance is provided, use it. If not, then it will be computed from 58 | # scaling / rotation by the rasterizer. 59 | scales = None 60 | rotations = None 61 | cov3D_precomp = None 62 | if opt.compute_cov3D_python: 63 | cov3D_precomp = pc.get_covariance(scaling_modifier) 64 | else: 65 | scales = pc.get_scaling 66 | rotations = pc.get_rotation 67 | 68 | # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors 69 | # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. 70 | shs = None 71 | colors_precomp = None 72 | if override_color is None: 73 | if opt.convert_SHs_python: 74 | shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2) 75 | dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1)) 76 | dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True) 77 | sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized) 78 | colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0) 79 | else: 80 | shs = pc.get_features 81 | else: 82 | colors_precomp = override_color 83 | 84 | # Rasterize visible Gaussians to image, obtain their radii (on screen). 85 | rendered_image, radii, depth = rasterizer( 86 | means3D = means3D, 87 | means2D = means2D, 88 | shs = shs, 89 | colors_precomp = colors_precomp, 90 | opacities = opacity, 91 | scales = scales, 92 | rotations = rotations, 93 | cov3D_precomp = cov3D_precomp) 94 | 95 | # Those Gaussians that were frustum culled or had a radius of 0 were not visible. 96 | # They will be excluded from value updates used in the splitting criteria. 97 | if render_only: 98 | return {"render": rendered_image, "depth": depth} 99 | else: 100 | return {"render": rendered_image, 101 | "viewspace_points": screenspace_points, 102 | "visibility_filter" : radii > 0, 103 | "radii": radii, 104 | "depth": depth} 105 | -------------------------------------------------------------------------------- /gaussian_renderer/network_gui.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import traceback 14 | import socket 15 | import json 16 | from scene.cameras import MiniCam 17 | 18 | host = "127.0.0.1" 19 | port = 6009 20 | 21 | conn = None 22 | addr = None 23 | 24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | 26 | def init(wish_host, wish_port): 27 | global host, port, listener 28 | host = wish_host 29 | port = wish_port 30 | listener.bind((host, port)) 31 | listener.listen() 32 | listener.settimeout(0) 33 | 34 | def try_connect(): 35 | global conn, addr, listener 36 | try: 37 | conn, addr = listener.accept() 38 | print(f"\nConnected by {addr}") 39 | conn.settimeout(None) 40 | except Exception as inst: 41 | pass 42 | 43 | def read(): 44 | global conn 45 | messageLength = conn.recv(4) 46 | messageLength = int.from_bytes(messageLength, 'little') 47 | message = conn.recv(messageLength) 48 | return json.loads(message.decode("utf-8")) 49 | 50 | def send(message_bytes, verify): 51 | global conn 52 | if message_bytes != None: 53 | conn.sendall(message_bytes) 54 | conn.sendall(len(verify).to_bytes(4, 'little')) 55 | conn.sendall(bytes(verify, 'ascii')) 56 | 57 | def receive(): 58 | message = read() 59 | 60 | width = message["resolution_x"] 61 | height = message["resolution_y"] 62 | 63 | if width != 0 and height != 0: 64 | try: 65 | do_training = bool(message["train"]) 66 | fovy = message["fov_y"] 67 | fovx = message["fov_x"] 68 | znear = message["z_near"] 69 | zfar = message["z_far"] 70 | do_shs_python = bool(message["shs_python"]) 71 | do_rot_scale_python = bool(message["rot_scale_python"]) 72 | keep_alive = bool(message["keep_alive"]) 73 | scaling_modifier = message["scaling_modifier"] 74 | world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda() 75 | world_view_transform[:,1] = -world_view_transform[:,1] 76 | world_view_transform[:,2] = -world_view_transform[:,2] 77 | full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda() 78 | full_proj_transform[:,1] = -full_proj_transform[:,1] 79 | custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform) 80 | except Exception as e: 81 | print("") 82 | traceback.print_exc() 83 | raise e 84 | return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier 85 | else: 86 | return None, None, None, None, None, None -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- 1 | libglm-dev 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | timm==0.6.7 2 | torch==2.0.1 3 | plyfile==0.8.1 4 | diffusers==0.23.1 5 | peft==0.6.2 6 | torchvision 7 | scipy 8 | numpy 9 | imageio[ffmpeg] 10 | opencv-python 11 | Pillow 12 | open3d 13 | gradio 14 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from PIL import Image 4 | 5 | from luciddreamer import LucidDreamer 6 | 7 | 8 | if __name__ == "__main__": 9 | ### option 10 | parser = argparse.ArgumentParser(description='Arguments for LucidDreamer') 11 | # Input options 12 | parser.add_argument('--image', '-img', type=str, default='examples/Image015_animelakehouse.jpg', help='Input image for scene generation') 13 | parser.add_argument('--text', '-t', type=str, default='examples/Image015_animelakehouse.txt', help='Text prompt for scene generation') 14 | parser.add_argument('--neg_text', '-nt', type=str, default='', help='Negative text prompt for scene generation') 15 | 16 | # Camera options 17 | parser.add_argument('--campath_gen', '-cg', type=str, default='lookdown', choices=['lookdown', 'lookaround', 'rotate360'], help='Camera extrinsic trajectories for scene generation') 18 | parser.add_argument('--campath_render', '-cr', type=str, default='llff', choices=['back_and_forth', 'llff', 'headbanging'], help='Camera extrinsic trajectories for video rendering') 19 | 20 | # Inpainting options 21 | parser.add_argument('--model_name', type=str, default=None, help='Model name for inpainting(dreaming)') 22 | parser.add_argument('--seed', type=int, default=1, help='Manual seed for running Stable Diffusion inpainting') 23 | parser.add_argument('--diff_steps', type=int, default=50, help='Number of inference steps for running Stable Diffusion inpainting') 24 | 25 | # Save options 26 | parser.add_argument('--save_dir', '-s', type=str, default='', help='Save directory') 27 | 28 | args = parser.parse_args() 29 | 30 | 31 | ### input (example) 32 | rgb_cond = Image.open(args.image) 33 | 34 | if args.text.endswith('.txt'): 35 | with open(args.text, 'r') as f: 36 | txt_cond = f.readline() 37 | else: 38 | txt_cond = args.text 39 | 40 | if args.neg_text.endswith('.txt'): 41 | with open(args.neg_text, 'r') as f: 42 | neg_txt_cond = f.readline() 43 | else: 44 | neg_txt_cond = args.neg_text 45 | 46 | # Make default save directory if blank 47 | if args.save_dir == '': 48 | img_name = os.path.splitext(os.path.basename(args.image))[0] 49 | args.save_dir = f'./outputs/{img_name}_{args.campath_gen}_{args.seed}' 50 | if not os.path.exists(args.save_dir): 51 | os.makedirs(args.save_dir, exist_ok=True) 52 | 53 | if args.model_name is not None and args.model_name.endswith('safetensors'): 54 | print('Your model is saved in safetensor form. Converting to HF models...') 55 | from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt 56 | 57 | pipe = download_from_original_stable_diffusion_ckpt( 58 | checkpoint_path_or_dict=args.model_name, 59 | from_safetensors=True, 60 | device='cuda', 61 | ) 62 | pipe.save_pretrained('stablediffusion/', safe_serialization=False) 63 | args.model_name = f'stablediffusion/{args.model_name}' 64 | 65 | ld = LucidDreamer(for_gradio=False, save_dir=args.save_dir) 66 | ld.create(rgb_cond, txt_cond, neg_txt_cond, args.campath_gen, args.seed, args.diff_steps, model_name=args.model_name) 67 | ld.render_video(args.campath_render) 68 | -------------------------------------------------------------------------------- /scene/__init__.py: -------------------------------------------------------------------------------- 1 | ### 2 | # Copyright (C) 2023, Computer Vision Lab, Seoul National University, https://cv.snu.ac.kr 3 | # For permission requests, please contact robot0321@snu.ac.kr, esw0116@snu.ac.kr, namhj28@gmail.com, jarin.lee@gmail.com. 4 | # All rights reserved. 5 | ### 6 | import os 7 | import random 8 | 9 | from arguments import GSParams 10 | from utils.system import searchForMaxIteration 11 | from scene.dataset_readers import readDataInfo 12 | from scene.gaussian_model import GaussianModel 13 | 14 | 15 | class Scene: 16 | gaussians: GaussianModel 17 | 18 | def __init__(self, traindata, gaussians: GaussianModel, opt: GSParams): 19 | self.traindata = traindata 20 | self.gaussians = gaussians 21 | 22 | info = readDataInfo(traindata, opt.white_background) 23 | random.shuffle(info.train_cameras) # Multi-res consistent random shuffling 24 | self.cameras_extent = info.nerf_normalization["radius"] 25 | 26 | print("Loading Training Cameras") 27 | self.train_cameras = info.train_cameras 28 | print("Loading Preset Cameras") 29 | self.preset_cameras = {} 30 | for campath in info.preset_cameras.keys(): 31 | self.preset_cameras[campath] = info.preset_cameras[campath] 32 | 33 | self.gaussians.create_from_pcd(info.point_cloud, self.cameras_extent) 34 | self.gaussians.training_setup(opt) 35 | 36 | def getTrainCameras(self): 37 | return self.train_cameras 38 | 39 | def getPresetCameras(self, preset): 40 | assert preset in self.preset_cameras 41 | return self.preset_cameras[preset] -------------------------------------------------------------------------------- /scene/cameras.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import numpy as np 12 | 13 | import torch 14 | from torch import nn 15 | 16 | from utils.graphics import getWorld2View2, getProjectionMatrix 17 | from utils.loss import image2canny 18 | 19 | 20 | class Camera(nn.Module): 21 | def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, 22 | image_name, uid, 23 | trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda" 24 | ): 25 | super(Camera, self).__init__() 26 | 27 | self.uid = uid 28 | self.colmap_id = colmap_id 29 | self.R = R 30 | self.T = T 31 | self.FoVx = FoVx 32 | self.FoVy = FoVy 33 | self.image_name = image_name 34 | 35 | try: 36 | self.data_device = torch.device(data_device) 37 | except Exception as e: 38 | print(e) 39 | print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) 40 | self.data_device = torch.device("cuda") 41 | 42 | self.original_image = image.clamp(0.0, 1.0).to(self.data_device) 43 | self.canny_mask = image2canny(self.original_image.permute(1,2,0), 50, 150, isEdge1=False).detach().to(self.data_device) 44 | self.image_width = self.original_image.shape[2] 45 | self.image_height = self.original_image.shape[1] 46 | 47 | if gt_alpha_mask is not None: 48 | self.original_image *= gt_alpha_mask.to(self.data_device) 49 | else: 50 | self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device) 51 | 52 | self.zfar = 100.0 53 | self.znear = 0.01 54 | 55 | self.trans = trans 56 | self.scale = scale 57 | 58 | self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() 59 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() 60 | self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) 61 | self.camera_center = self.world_view_transform.inverse()[3, :3] 62 | 63 | 64 | class MiniCam: 65 | def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): 66 | self.image_width = width 67 | self.image_height = height 68 | self.FoVy = fovy 69 | self.FoVx = fovx 70 | self.znear = znear 71 | self.zfar = zfar 72 | self.world_view_transform = world_view_transform 73 | self.full_proj_transform = full_proj_transform 74 | view_inv = torch.inverse(self.world_view_transform) 75 | self.camera_center = view_inv[3][:3] 76 | 77 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | cmake_minimum_required(VERSION 3.20) 13 | 14 | project(DiffRast LANGUAGES CUDA CXX) 15 | 16 | set(CMAKE_CXX_STANDARD 17) 17 | set(CMAKE_CXX_EXTENSIONS OFF) 18 | set(CMAKE_CUDA_STANDARD 17) 19 | 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 21 | 22 | add_library(CudaRasterizer 23 | cuda_rasterizer/backward.h 24 | cuda_rasterizer/backward.cu 25 | cuda_rasterizer/forward.h 26 | cuda_rasterizer/forward.cu 27 | cuda_rasterizer/auxiliary.h 28 | cuda_rasterizer/rasterizer_impl.cu 29 | cuda_rasterizer/rasterizer_impl.h 30 | cuda_rasterizer/rasterizer.h 31 | ) 32 | 33 | set_target_properties(CudaRasterizer PROPERTIES CUDA_ARCHITECTURES "70;75;86") 34 | 35 | target_include_directories(CudaRasterizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/cuda_rasterizer) 36 | target_include_directories(CudaRasterizer PRIVATE third_party/glm ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 37 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/LICENSE.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. 84 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/README.md: -------------------------------------------------------------------------------- 1 | # Differential Gaussian Rasterization 2 | 3 | Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-Time Rendering of Radiance Fields". If you can make use of it in your own research, please be so kind to cite us. 4 | 5 |
6 |
7 |

BibTeX

8 |
@Article{kerbl3Dgaussians,
 9 |       author       = {Kerbl, Bernhard and Kopanas, Georgios and Leimk{\"u}hler, Thomas and Drettakis, George},
10 |       title        = {3D Gaussian Splatting for Real-Time Radiance Field Rendering},
11 |       journal      = {ACM Transactions on Graphics},
12 |       number       = {4},
13 |       volume       = {42},
14 |       month        = {July},
15 |       year         = {2023},
16 |       url          = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/}
17 | }
18 |
19 |
-------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/auxiliary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 13 | #define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED 14 | 15 | #include "config.h" 16 | #include "stdio.h" 17 | 18 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y) 19 | #define NUM_WARPS (BLOCK_SIZE/32) 20 | 21 | // Spherical harmonics coefficients 22 | __device__ const float SH_C0 = 0.28209479177387814f; 23 | __device__ const float SH_C1 = 0.4886025119029199f; 24 | __device__ const float SH_C2[] = { 25 | 1.0925484305920792f, 26 | -1.0925484305920792f, 27 | 0.31539156525252005f, 28 | -1.0925484305920792f, 29 | 0.5462742152960396f 30 | }; 31 | __device__ const float SH_C3[] = { 32 | -0.5900435899266435f, 33 | 2.890611442640554f, 34 | -0.4570457994644658f, 35 | 0.3731763325901154f, 36 | -0.4570457994644658f, 37 | 1.445305721320277f, 38 | -0.5900435899266435f 39 | }; 40 | 41 | __forceinline__ __device__ float ndc2Pix(float v, int S) 42 | { 43 | return ((v + 1.0) * S - 1.0) * 0.5; 44 | } 45 | 46 | __forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid) 47 | { 48 | rect_min = { 49 | min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))), 50 | min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y))) 51 | }; 52 | rect_max = { 53 | min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))), 54 | min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y))) 55 | }; 56 | } 57 | 58 | __forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix) 59 | { 60 | float3 transformed = { 61 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 62 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 63 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 64 | }; 65 | return transformed; 66 | } 67 | 68 | __forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix) 69 | { 70 | float4 transformed = { 71 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], 72 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], 73 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], 74 | matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15] 75 | }; 76 | return transformed; 77 | } 78 | 79 | __forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix) 80 | { 81 | float3 transformed = { 82 | matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z, 83 | matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z, 84 | matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z, 85 | }; 86 | return transformed; 87 | } 88 | 89 | __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix) 90 | { 91 | float3 transformed = { 92 | matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z, 93 | matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z, 94 | matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z, 95 | }; 96 | return transformed; 97 | } 98 | 99 | __forceinline__ __device__ float dnormvdz(float3 v, float3 dv) 100 | { 101 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 102 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 103 | float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 104 | return dnormvdz; 105 | } 106 | 107 | __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) 108 | { 109 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; 110 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 111 | 112 | float3 dnormvdv; 113 | dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32; 114 | dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32; 115 | dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; 116 | return dnormvdv; 117 | } 118 | 119 | __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv) 120 | { 121 | float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; 122 | float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); 123 | 124 | float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w }; 125 | float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w; 126 | float4 dnormvdv; 127 | dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32; 128 | dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32; 129 | dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32; 130 | dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32; 131 | return dnormvdv; 132 | } 133 | 134 | __forceinline__ __device__ float sigmoid(float x) 135 | { 136 | return 1.0f / (1.0f + expf(-x)); 137 | } 138 | 139 | __forceinline__ __device__ bool in_frustum(int idx, 140 | const float* orig_points, 141 | const float* viewmatrix, 142 | const float* projmatrix, 143 | bool prefiltered, 144 | float3& p_view) 145 | { 146 | float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] }; 147 | 148 | // Bring points to screen space 149 | float4 p_hom = transformPoint4x4(p_orig, projmatrix); 150 | float p_w = 1.0f / (p_hom.w + 0.0000001f); 151 | float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w }; 152 | p_view = transformPoint4x3(p_orig, viewmatrix); 153 | 154 | if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3))) 155 | { 156 | if (prefiltered) 157 | { 158 | printf("Point is filtered although prefiltered is set. This shouldn't happen!"); 159 | __trap(); 160 | } 161 | return false; 162 | } 163 | return true; 164 | } 165 | 166 | #define CHECK_CUDA(A, debug) \ 167 | A; if(debug) { \ 168 | auto ret = cudaDeviceSynchronize(); \ 169 | if (ret != cudaSuccess) { \ 170 | std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \ 171 | throw std::runtime_error(cudaGetErrorString(ret)); \ 172 | } \ 173 | } 174 | 175 | #endif -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/backward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_BACKWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_BACKWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace BACKWARD 22 | { 23 | void render( 24 | const dim3 grid, dim3 block, 25 | const uint2* ranges, 26 | const uint32_t* point_list, 27 | int W, int H, 28 | const float* bg_color, 29 | const float2* means2D, 30 | const float4* conic_opacity, 31 | const float3* means3D, 32 | const float* colors, 33 | const float* depths, 34 | const float* projmatrix, 35 | const float* final_Ts, 36 | const uint32_t* n_contrib, 37 | const float* dL_dpixels, 38 | const float* dL_depths, 39 | float3* dL_dmean2D, 40 | float4* dL_dconic2D, 41 | float3* dL_dmean3D, 42 | float* dL_dopacity, 43 | float* dL_dcolors); 44 | 45 | void preprocess( 46 | int P, int D, int M, 47 | const float3* means, 48 | const int* radii, 49 | const float* shs, 50 | const bool* clamped, 51 | const glm::vec3* scales, 52 | const glm::vec4* rotations, 53 | const float scale_modifier, 54 | const float* cov3Ds, 55 | const float* view, 56 | const float* proj, 57 | const float focal_x, float focal_y, 58 | const float tan_fovx, float tan_fovy, 59 | const glm::vec3* campos, 60 | const float3* dL_dmean2D, 61 | const float* dL_dconics, 62 | glm::vec3* dL_dmeans, 63 | float* dL_dcolor, 64 | float* dL_dcov3D, 65 | float* dL_dsh, 66 | glm::vec3* dL_dscale, 67 | glm::vec4* dL_drot); 68 | } 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_CONFIG_H_INCLUDED 13 | #define CUDA_RASTERIZER_CONFIG_H_INCLUDED 14 | 15 | #define NUM_CHANNELS 3 // Default 3, RGB 16 | #define BLOCK_X 16 17 | #define BLOCK_Y 16 18 | 19 | #endif -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/forward.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_FORWARD_H_INCLUDED 13 | #define CUDA_RASTERIZER_FORWARD_H_INCLUDED 14 | 15 | #include 16 | #include "cuda_runtime.h" 17 | #include "device_launch_parameters.h" 18 | #define GLM_FORCE_CUDA 19 | #include 20 | 21 | namespace FORWARD 22 | { 23 | // Perform initial steps for each Gaussian prior to rasterization. 24 | void preprocess(int P, int D, int M, 25 | const float* orig_points, 26 | const glm::vec3* scales, 27 | const float scale_modifier, 28 | const glm::vec4* rotations, 29 | const float* opacities, 30 | const float* shs, 31 | bool* clamped, 32 | const float* cov3D_precomp, 33 | const float* colors_precomp, 34 | const float* viewmatrix, 35 | const float* projmatrix, 36 | const glm::vec3* cam_pos, 37 | const int W, int H, 38 | const float focal_x, float focal_y, 39 | const float tan_fovx, float tan_fovy, 40 | int* radii, 41 | float2* points_xy_image, 42 | float* depths, 43 | float* cov3Ds, 44 | float* colors, 45 | float4* conic_opacity, 46 | const dim3 grid, 47 | uint32_t* tiles_touched, 48 | bool prefiltered); 49 | 50 | // Main rasterization method. 51 | void render( 52 | const dim3 grid, dim3 block, 53 | const uint2* ranges, 54 | const uint32_t* point_list, 55 | int W, int H, 56 | const float2* points_xy_image, 57 | const float* features, 58 | const float* depths, 59 | const float4* conic_opacity, 60 | float* final_T, 61 | uint32_t* n_contrib, 62 | const float* bg_color, 63 | float* out_color, 64 | float* out_depth); 65 | } 66 | 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/rasterizer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef CUDA_RASTERIZER_H_INCLUDED 13 | #define CUDA_RASTERIZER_H_INCLUDED 14 | 15 | #include 16 | #include 17 | 18 | namespace CudaRasterizer 19 | { 20 | class Rasterizer 21 | { 22 | public: 23 | 24 | static void markVisible( 25 | int P, 26 | float* means3D, 27 | float* viewmatrix, 28 | float* projmatrix, 29 | bool* present); 30 | 31 | static int forward( 32 | std::function geometryBuffer, 33 | std::function binningBuffer, 34 | std::function imageBuffer, 35 | const int P, int D, int M, 36 | const float* background, 37 | const int width, int height, 38 | const float* means3D, 39 | const float* shs, 40 | const float* colors_precomp, 41 | const float* opacities, 42 | const float* scales, 43 | const float scale_modifier, 44 | const float* rotations, 45 | const float* cov3D_precomp, 46 | const float* viewmatrix, 47 | const float* projmatrix, 48 | const float* cam_pos, 49 | const float tan_fovx, float tan_fovy, 50 | const bool prefiltered, 51 | float* out_color, 52 | float* out_depth, 53 | int* radii = nullptr, 54 | bool debug = false); 55 | 56 | static void backward( 57 | const int P, int D, int M, int R, 58 | const float* background, 59 | const int width, int height, 60 | const float* means3D, 61 | const float* shs, 62 | const float* colors_precomp, 63 | const float* scales, 64 | const float scale_modifier, 65 | const float* rotations, 66 | const float* cov3D_precomp, 67 | const float* viewmatrix, 68 | const float* projmatrix, 69 | const float* campos, 70 | const float tan_fovx, float tan_fovy, 71 | const int* radii, 72 | char* geom_buffer, 73 | char* binning_buffer, 74 | char* image_buffer, 75 | const float* dL_dpix, 76 | const float* dL_depths, 77 | float* dL_dmean2D, 78 | float* dL_dconic, 79 | float* dL_dopacity, 80 | float* dL_dcolor, 81 | float* dL_dmean3D, 82 | float* dL_dcov3D, 83 | float* dL_dsh, 84 | float* dL_dscale, 85 | float* dL_drot, 86 | bool debug); 87 | }; 88 | }; 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/cuda_rasterizer/rasterizer_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | #include "rasterizer.h" 17 | #include 18 | 19 | namespace CudaRasterizer 20 | { 21 | template 22 | static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment) 23 | { 24 | std::size_t offset = (reinterpret_cast(chunk) + alignment - 1) & ~(alignment - 1); 25 | ptr = reinterpret_cast(offset); 26 | chunk = reinterpret_cast(ptr + count); 27 | } 28 | 29 | struct GeometryState 30 | { 31 | size_t scan_size; 32 | float* depths; 33 | char* scanning_space; 34 | bool* clamped; 35 | int* internal_radii; 36 | float2* means2D; 37 | float* cov3D; 38 | float4* conic_opacity; 39 | float* rgb; 40 | uint32_t* point_offsets; 41 | uint32_t* tiles_touched; 42 | 43 | static GeometryState fromChunk(char*& chunk, size_t P); 44 | }; 45 | 46 | struct ImageState 47 | { 48 | uint2* ranges; 49 | uint32_t* n_contrib; 50 | float* accum_alpha; 51 | 52 | static ImageState fromChunk(char*& chunk, size_t N); 53 | }; 54 | 55 | struct BinningState 56 | { 57 | size_t sorting_size; 58 | uint64_t* point_list_keys_unsorted; 59 | uint64_t* point_list_keys; 60 | uint32_t* point_list_unsorted; 61 | uint32_t* point_list; 62 | char* list_sorting_space; 63 | 64 | static BinningState fromChunk(char*& chunk, size_t P); 65 | }; 66 | 67 | template 68 | size_t required(size_t P) 69 | { 70 | char* size = nullptr; 71 | T::fromChunk(size, P); 72 | return ((size_t)size) + 128; 73 | } 74 | }; -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/ext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include "rasterize_points.h" 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("rasterize_gaussians", &RasterizeGaussiansCUDA); 17 | m.def("rasterize_gaussians_backward", &RasterizeGaussiansBackwardCUDA); 18 | m.def("mark_visible", &markVisible); 19 | } -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/rasterize_points.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #pragma once 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | std::tuple 19 | RasterizeGaussiansCUDA( 20 | const torch::Tensor& background, 21 | const torch::Tensor& means3D, 22 | const torch::Tensor& colors, 23 | const torch::Tensor& opacity, 24 | const torch::Tensor& scales, 25 | const torch::Tensor& rotations, 26 | const float scale_modifier, 27 | const torch::Tensor& cov3D_precomp, 28 | const torch::Tensor& viewmatrix, 29 | const torch::Tensor& projmatrix, 30 | const float tan_fovx, 31 | const float tan_fovy, 32 | const int image_height, 33 | const int image_width, 34 | const torch::Tensor& sh, 35 | const int degree, 36 | const torch::Tensor& campos, 37 | const bool prefiltered, 38 | const bool debug); 39 | 40 | std::tuple 41 | RasterizeGaussiansBackwardCUDA( 42 | const torch::Tensor& background, 43 | const torch::Tensor& means3D, 44 | const torch::Tensor& radii, 45 | const torch::Tensor& colors, 46 | const torch::Tensor& scales, 47 | const torch::Tensor& rotations, 48 | const float scale_modifier, 49 | const torch::Tensor& cov3D_precomp, 50 | const torch::Tensor& viewmatrix, 51 | const torch::Tensor& projmatrix, 52 | const float tan_fovx, 53 | const float tan_fovy, 54 | const torch::Tensor& dL_dout_color, 55 | const torch::Tensor& dL_dout_depth, 56 | const torch::Tensor& sh, 57 | const int degree, 58 | const torch::Tensor& campos, 59 | const torch::Tensor& geomBuffer, 60 | const int R, 61 | const torch::Tensor& binningBuffer, 62 | const torch::Tensor& imageBuffer, 63 | const bool debug); 64 | 65 | torch::Tensor markVisible( 66 | torch::Tensor& means3D, 67 | torch::Tensor& viewmatrix, 68 | torch::Tensor& projmatrix); 69 | -------------------------------------------------------------------------------- /submodules/depth-diff-gaussian-rasterization-min/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from setuptools import setup 13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 14 | import os 15 | os.path.dirname(os.path.abspath(__file__)) 16 | 17 | setup( 18 | name="depth_diff_gaussian_rasterization_min", 19 | packages=['depth_diff_gaussian_rasterization_min'], 20 | ext_modules=[ 21 | CUDAExtension( 22 | name="depth_diff_gaussian_rasterization_min._C", 23 | sources=[ 24 | "cuda_rasterizer/rasterizer_impl.cu", 25 | "cuda_rasterizer/forward.cu", 26 | "cuda_rasterizer/backward.cu", 27 | "rasterize_points.cu", 28 | "ext.cpp"], 29 | extra_compile_args={"nvcc": ["-I" + os.path.join(os.path.dirname(os.path.abspath(__file__)), "third_party/glm/")]}) 30 | ], 31 | cmdclass={ 32 | 'build_ext': BuildExtension 33 | } 34 | ) 35 | -------------------------------------------------------------------------------- /submodules/simple-knn/ext.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | #include "spatial.h" 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("distCUDA2", &distCUDA2); 17 | } 18 | -------------------------------------------------------------------------------- /submodules/simple-knn/setup.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from setuptools import setup 13 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension 14 | import os 15 | 16 | cxx_compiler_flags = [] 17 | 18 | if os.name == 'nt': 19 | cxx_compiler_flags.append("/wd4624") 20 | 21 | setup( 22 | name="simple_knn", 23 | ext_modules=[ 24 | CUDAExtension( 25 | name="simple_knn._C", 26 | sources=[ 27 | "spatial.cu", 28 | "simple_knn.cu", 29 | "ext.cpp"], 30 | extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags}) 31 | ], 32 | cmdclass={ 33 | 'build_ext': BuildExtension 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /submodules/simple-knn/simple_knn.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #ifndef SIMPLEKNN_H_INCLUDED 13 | #define SIMPLEKNN_H_INCLUDED 14 | 15 | class SimpleKNN 16 | { 17 | public: 18 | static void knn(int P, float3* points, float* meanDists); 19 | }; 20 | 21 | #endif -------------------------------------------------------------------------------- /submodules/simple-knn/simple_knn/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/submodules/simple-knn/simple_knn/.gitkeep -------------------------------------------------------------------------------- /submodules/simple-knn/spatial.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include "spatial.h" 13 | #include "simple_knn.h" 14 | 15 | torch::Tensor 16 | distCUDA2(const torch::Tensor& points) 17 | { 18 | const int P = points.size(0); 19 | 20 | auto float_opts = points.options().dtype(torch::kFloat32); 21 | torch::Tensor means = torch::full({P}, 0.0, float_opts); 22 | 23 | SimpleKNN::knn(P, (float3*)points.contiguous().data(), means.contiguous().data()); 24 | 25 | return means; 26 | } -------------------------------------------------------------------------------- /submodules/simple-knn/spatial.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | #include 13 | 14 | torch::Tensor distCUDA2(const torch::Tensor& points); -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luciddreamer-cvlab/LucidDreamer/76ed990fad840e298a3e586697cb866e89fa94e2/utils/__init__.py -------------------------------------------------------------------------------- /utils/camera.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import json 12 | 13 | import numpy as np 14 | import torch 15 | 16 | from scene.cameras import Camera, MiniCam 17 | from utils.general import PILtoTorch 18 | from utils.graphics import fov2focal, focal2fov, getWorld2View, getProjectionMatrix 19 | 20 | 21 | WARNED = False 22 | 23 | 24 | def load_json(path, H, W): 25 | cams = [] 26 | with open(path) as json_file: 27 | contents = json.load(json_file) 28 | FoVx = contents["camera_angle_x"] 29 | FoVy = focal2fov(fov2focal(FoVx, W), H) 30 | zfar = 100.0 31 | znear = 0.01 32 | 33 | frames = contents["frames"] 34 | for idx, frame in enumerate(frames): 35 | # NeRF 'transform_matrix' is a camera-to-world transform 36 | c2w = np.array(frame["transform_matrix"]) 37 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 38 | c2w[:3, 1:3] *= -1 39 | if c2w.shape[0] == 3: 40 | one = np.zeros((1, 4)) 41 | one[0, -1] = 1 42 | c2w = np.concatenate((c2w, one), axis=0) 43 | 44 | # get the world-to-camera transform and set R, T 45 | w2c = np.linalg.inv(c2w) 46 | R = np.transpose(w2c[:3, :3]) # R is stored transposed due to 'glm' in CUDA code 47 | T = w2c[:3, 3] 48 | 49 | w2c = torch.as_tensor(getWorld2View(R, T)).T.cuda() 50 | proj = getProjectionMatrix(znear, zfar, FoVx, FoVy).T.cuda() 51 | cams.append(MiniCam(W, H, FoVx, FoVy, znear, zfar, w2c, w2c @ proj)) 52 | return cams 53 | 54 | 55 | def loadCam(args, id, cam_info, resolution_scale): 56 | orig_w, orig_h = cam_info.image.size 57 | 58 | if args.resolution in [1, 2, 4, 8]: 59 | resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution)) 60 | else: # should be a type that converts to float 61 | if args.resolution == -1: 62 | if orig_w > 1600: 63 | global WARNED 64 | if not WARNED: 65 | print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n " 66 | "If this is not desired, please explicitly specify '--resolution/-r' as 1") 67 | WARNED = True 68 | global_down = orig_w / 1600 69 | else: 70 | global_down = 1 71 | else: 72 | global_down = orig_w / args.resolution 73 | 74 | scale = float(global_down) * float(resolution_scale) 75 | resolution = (int(orig_w / scale), int(orig_h / scale)) 76 | 77 | resized_image_rgb = PILtoTorch(cam_info.image, resolution) 78 | 79 | gt_image = resized_image_rgb[:3, ...] 80 | loaded_mask = None 81 | 82 | if resized_image_rgb.shape[1] == 4: 83 | loaded_mask = resized_image_rgb[3:4, ...] 84 | 85 | return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, 86 | FoVx=cam_info.FovX, FoVy=cam_info.FovY, 87 | image=gt_image, gt_alpha_mask=loaded_mask, 88 | image_name=cam_info.image_name, uid=id, data_device=args.data_device) 89 | 90 | 91 | def cameraList_from_camInfos(cam_infos, resolution_scale, args): 92 | camera_list = [] 93 | 94 | for id, c in enumerate(cam_infos): 95 | camera_list.append(loadCam(args, id, c, resolution_scale)) 96 | 97 | return camera_list 98 | 99 | 100 | def camera_to_JSON(id, camera : Camera): 101 | Rt = np.zeros((4, 4)) 102 | Rt[:3, :3] = camera.R.transpose() 103 | Rt[:3, 3] = camera.T 104 | Rt[3, 3] = 1.0 105 | 106 | W2C = np.linalg.inv(Rt) 107 | pos = W2C[:3, 3] 108 | rot = W2C[:3, :3] 109 | serializable_array_2d = [x.tolist() for x in rot] 110 | camera_entry = { 111 | 'id' : id, 112 | 'img_name' : camera.image_name, 113 | 'width' : camera.width, 114 | 'height' : camera.height, 115 | 'position': pos.tolist(), 116 | 'rotation': serializable_array_2d, 117 | 'fy' : fov2focal(camera.FovY, camera.height), 118 | 'fx' : fov2focal(camera.FovX, camera.width) 119 | } 120 | return camera_entry 121 | -------------------------------------------------------------------------------- /utils/depth.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import matplotlib.cm 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def colorize(value, vmin=None, vmax=None, cmap='jet', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): 8 | """Converts a depth map to a color image. 9 | 10 | Args: 11 | value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed 12 | vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. 13 | vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. 14 | cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. 15 | invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. 16 | invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. 17 | background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). 18 | gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. 19 | value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. 20 | 21 | Returns: 22 | numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) 23 | """ 24 | if isinstance(value, torch.Tensor): 25 | value = value.detach().cpu().numpy() 26 | 27 | value = value.squeeze() 28 | if invalid_mask is None: 29 | invalid_mask = value == invalid_val 30 | mask = np.logical_not(invalid_mask) 31 | 32 | # normalize 33 | vmin = np.percentile(value[mask],2) if vmin is None else vmin 34 | vmax = np.percentile(value[mask],98) if vmax is None else vmax 35 | if vmin != vmax: 36 | value = (value - vmin) / (vmax - vmin) # vmin..vmax 37 | else: 38 | # Avoid 0-division 39 | value = value * 0. 40 | 41 | # squeeze last dim if it exists 42 | # grey out the invalid values 43 | 44 | value[invalid_mask] = np.nan 45 | cmapper = matplotlib.cm.get_cmap(cmap) 46 | if value_transform: 47 | value = value_transform(value) 48 | # value = value / value.max() 49 | value = cmapper(value, bytes=True) # (nxmx4) 50 | 51 | # img = value[:, :, :] 52 | img = value[...] 53 | img[invalid_mask] = background_color 54 | 55 | # return img.transpose((2, 0, 1)) 56 | if gamma_corrected: 57 | # gamma correction 58 | img = img / 255 59 | img = np.power(img, 2.2) 60 | img = img * 255 61 | img = img.astype(np.uint8) 62 | return img -------------------------------------------------------------------------------- /utils/general.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import sys 12 | import random 13 | from datetime import datetime 14 | import numpy as np 15 | import torch 16 | 17 | 18 | def inverse_sigmoid(x): 19 | return torch.log(x/(1-x)) 20 | 21 | 22 | def PILtoTorch(pil_image, resolution): 23 | resized_image_PIL = pil_image.resize(resolution) 24 | resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0 25 | if len(resized_image.shape) == 3: 26 | return resized_image.permute(2, 0, 1) 27 | else: 28 | return resized_image.unsqueeze(dim=-1).permute(2, 0, 1) 29 | 30 | 31 | def get_expon_lr_func( 32 | lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000 33 | ): 34 | """ 35 | Copied from Plenoxels 36 | 37 | Continuous learning rate decay function. Adapted from JaxNeRF 38 | The returned rate is lr_init when step=0 and lr_final when step=max_steps, and 39 | is log-linearly interpolated elsewhere (equivalent to exponential decay). 40 | If lr_delay_steps>0 then the learning rate will be scaled by some smooth 41 | function of lr_delay_mult, such that the initial learning rate is 42 | lr_init*lr_delay_mult at the beginning of optimization but will be eased back 43 | to the normal learning rate when steps>lr_delay_steps. 44 | :param conf: config subtree 'lr' or similar 45 | :param max_steps: int, the number of steps during optimization. 46 | :return HoF which takes step as input 47 | """ 48 | 49 | def helper(step): 50 | if step < 0 or (lr_init == 0.0 and lr_final == 0.0): 51 | # Disable this parameter 52 | return 0.0 53 | if lr_delay_steps > 0: 54 | # A kind of reverse cosine decay. 55 | delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin( 56 | 0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1) 57 | ) 58 | else: 59 | delay_rate = 1.0 60 | t = np.clip(step / max_steps, 0, 1) 61 | log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t) 62 | return delay_rate * log_lerp 63 | 64 | return helper 65 | 66 | 67 | def strip_lowerdiag(L): 68 | uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda") 69 | 70 | uncertainty[:, 0] = L[:, 0, 0] 71 | uncertainty[:, 1] = L[:, 0, 1] 72 | uncertainty[:, 2] = L[:, 0, 2] 73 | uncertainty[:, 3] = L[:, 1, 1] 74 | uncertainty[:, 4] = L[:, 1, 2] 75 | uncertainty[:, 5] = L[:, 2, 2] 76 | return uncertainty 77 | 78 | 79 | def strip_symmetric(sym): 80 | return strip_lowerdiag(sym) 81 | 82 | 83 | def build_rotation(r): 84 | norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3]) 85 | 86 | q = r / norm[:, None] 87 | 88 | R = torch.zeros((q.size(0), 3, 3), device='cuda') 89 | 90 | r = q[:, 0] 91 | x = q[:, 1] 92 | y = q[:, 2] 93 | z = q[:, 3] 94 | 95 | R[:, 0, 0] = 1 - 2 * (y*y + z*z) 96 | R[:, 0, 1] = 2 * (x*y - r*z) 97 | R[:, 0, 2] = 2 * (x*z + r*y) 98 | R[:, 1, 0] = 2 * (x*y + r*z) 99 | R[:, 1, 1] = 1 - 2 * (x*x + z*z) 100 | R[:, 1, 2] = 2 * (y*z - r*x) 101 | R[:, 2, 0] = 2 * (x*z - r*y) 102 | R[:, 2, 1] = 2 * (y*z + r*x) 103 | R[:, 2, 2] = 1 - 2 * (x*x + y*y) 104 | return R 105 | 106 | 107 | def build_scaling_rotation(s, r): 108 | L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda") 109 | R = build_rotation(r) 110 | 111 | L[:,0,0] = s[:,0] 112 | L[:,1,1] = s[:,1] 113 | L[:,2,2] = s[:,2] 114 | 115 | L = R @ L 116 | return L 117 | 118 | 119 | def safe_state(silent): 120 | old_f = sys.stdout 121 | class F: 122 | def __init__(self, silent): 123 | self.silent = silent 124 | 125 | def write(self, x): 126 | if not self.silent: 127 | if x.endswith("\n"): 128 | old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S"))))) 129 | else: 130 | old_f.write(x) 131 | 132 | def flush(self): 133 | old_f.flush() 134 | 135 | sys.stdout = F(silent) 136 | 137 | random.seed(0) 138 | np.random.seed(0) 139 | torch.manual_seed(0) 140 | torch.cuda.set_device(torch.device("cuda:0")) 141 | -------------------------------------------------------------------------------- /utils/graphics.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import math 12 | from typing import NamedTuple 13 | import numpy as np 14 | import torch 15 | 16 | 17 | class BasicPointCloud(NamedTuple): 18 | points : np.array 19 | colors : np.array 20 | normals : np.array 21 | 22 | 23 | def geom_transform_points(points, transf_matrix): 24 | P, _ = points.shape 25 | ones = torch.ones(P, 1, dtype=points.dtype, device=points.device) 26 | points_hom = torch.cat([points, ones], dim=1) 27 | points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0)) 28 | 29 | denom = points_out[..., 3:] + 0.0000001 30 | return (points_out[..., :3] / denom).squeeze(dim=0) 31 | 32 | 33 | def getWorld2View(R, t): 34 | Rt = np.zeros((4, 4)) 35 | Rt[:3, :3] = R.transpose() 36 | Rt[:3, 3] = t 37 | Rt[3, 3] = 1.0 38 | return np.float32(Rt) 39 | 40 | 41 | def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0): 42 | Rt = np.zeros((4, 4)) 43 | Rt[:3, :3] = R.transpose() 44 | Rt[:3, 3] = t 45 | Rt[3, 3] = 1.0 46 | 47 | C2W = np.linalg.inv(Rt) 48 | cam_center = C2W[:3, 3] 49 | cam_center = (cam_center + translate) * scale 50 | C2W[:3, 3] = cam_center 51 | Rt = np.linalg.inv(C2W) 52 | return np.float32(Rt) 53 | 54 | 55 | def getProjectionMatrix(znear, zfar, fovX, fovY): 56 | tanHalfFovY = math.tan((fovY / 2)) 57 | tanHalfFovX = math.tan((fovX / 2)) 58 | 59 | top = tanHalfFovY * znear 60 | bottom = -top 61 | right = tanHalfFovX * znear 62 | left = -right 63 | 64 | P = torch.zeros(4, 4) 65 | 66 | z_sign = 1.0 67 | 68 | P[0, 0] = 2.0 * znear / (right - left) 69 | P[1, 1] = 2.0 * znear / (top - bottom) 70 | P[0, 2] = (right + left) / (right - left) 71 | P[1, 2] = (top + bottom) / (top - bottom) 72 | P[3, 2] = z_sign 73 | P[2, 2] = z_sign * zfar / (zfar - znear) 74 | P[2, 3] = -(zfar * znear) / (zfar - znear) 75 | return P 76 | 77 | 78 | def fov2focal(fov, pixels): 79 | return pixels / (2 * math.tan(fov / 2)) 80 | 81 | 82 | def focal2fov(focal, pixels): 83 | return 2*math.atan(pixels/(2*focal)) -------------------------------------------------------------------------------- /utils/image.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | import torch 12 | 13 | 14 | def mse(img1, img2): 15 | return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 16 | 17 | 18 | def psnr(img1, img2): 19 | mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) 20 | return 20 * torch.log10(1.0 / torch.sqrt(mse)) 21 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | from math import exp 12 | 13 | import torch 14 | import torch.nn.functional as F 15 | from torch.autograd import Variable 16 | 17 | 18 | def l1_loss(network_output, gt): 19 | return torch.abs((network_output - gt)).mean() 20 | 21 | 22 | def l2_loss(network_output, gt): 23 | return ((network_output - gt) ** 2).mean() 24 | 25 | 26 | def gaussian(window_size, sigma): 27 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) 28 | return gauss / gauss.sum() 29 | 30 | 31 | def create_window(window_size, channel): 32 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 33 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 34 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 35 | return window 36 | 37 | 38 | def ssim(img1, img2, window_size=11, size_average=True): 39 | channel = img1.size(-3) 40 | window = create_window(window_size, channel) 41 | 42 | if img1.is_cuda: 43 | window = window.cuda(img1.get_device()) 44 | window = window.type_as(img1) 45 | 46 | return _ssim(img1, img2, window, window_size, channel, size_average) 47 | 48 | 49 | def _ssim(img1, img2, window, window_size, channel, size_average=True): 50 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) 51 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) 52 | 53 | mu1_sq = mu1.pow(2) 54 | mu2_sq = mu2.pow(2) 55 | mu1_mu2 = mu1 * mu2 56 | 57 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq 58 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq 59 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 60 | 61 | C1 = 0.01 ** 2 62 | C2 = 0.03 ** 2 63 | 64 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) 65 | 66 | if size_average: 67 | return ssim_map.mean() 68 | else: 69 | return ssim_map.mean(1).mean(1).mean(1) 70 | 71 | 72 | import numpy as np 73 | import cv2 74 | def image2canny(image, thres1, thres2, isEdge1=True): 75 | """ image: (H, W, 3)""" 76 | canny_mask = torch.from_numpy(cv2.Canny((image.detach().cpu().numpy()*255.).astype(np.uint8), thres1, thres2)/255.) 77 | if not isEdge1: 78 | canny_mask = 1. - canny_mask 79 | return canny_mask.float() 80 | 81 | with torch.no_grad(): 82 | kernelsize=3 83 | conv = torch.nn.Conv2d(1, 1, kernel_size=kernelsize, padding=(kernelsize//2)) 84 | kernel = torch.tensor([[0.,1.,0.],[1.,0.,1.],[0.,1.,0.]]).reshape(1,1,kernelsize,kernelsize) 85 | conv.weight.data = kernel #torch.ones((1,1,kernelsize,kernelsize)) 86 | conv.bias.data = torch.tensor([0.]) 87 | conv.requires_grad_(False) 88 | conv = conv.cuda() 89 | 90 | 91 | def nearMean_map(array, mask, kernelsize=3): 92 | """ array: (H,W) / mask: (H,W) """ 93 | cnt_map = torch.ones_like(array) 94 | 95 | nearMean_map = conv((array * mask)[None,None]) 96 | cnt_map = conv((cnt_map * mask)[None,None]) 97 | nearMean_map = (nearMean_map / (cnt_map+1e-8)).squeeze() 98 | 99 | return nearMean_map -------------------------------------------------------------------------------- /utils/sh.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The PlenOctree Authors. 2 | # Redistribution and use in source and binary forms, with or without 3 | # modification, are permitted provided that the following conditions are met: 4 | # 5 | # 1. Redistributions of source code must retain the above copyright notice, 6 | # this list of conditions and the following disclaimer. 7 | # 8 | # 2. Redistributions in binary form must reproduce the above copyright notice, 9 | # this list of conditions and the following disclaimer in the documentation 10 | # and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 13 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 16 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 17 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 18 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 19 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 20 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 21 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 22 | # POSSIBILITY OF SUCH DAMAGE. 23 | import torch 24 | 25 | 26 | C0 = 0.28209479177387814 27 | C1 = 0.4886025119029199 28 | C2 = [ 29 | 1.0925484305920792, 30 | -1.0925484305920792, 31 | 0.31539156525252005, 32 | -1.0925484305920792, 33 | 0.5462742152960396 34 | ] 35 | C3 = [ 36 | -0.5900435899266435, 37 | 2.890611442640554, 38 | -0.4570457994644658, 39 | 0.3731763325901154, 40 | -0.4570457994644658, 41 | 1.445305721320277, 42 | -0.5900435899266435 43 | ] 44 | C4 = [ 45 | 2.5033429417967046, 46 | -1.7701307697799304, 47 | 0.9461746957575601, 48 | -0.6690465435572892, 49 | 0.10578554691520431, 50 | -0.6690465435572892, 51 | 0.47308734787878004, 52 | -1.7701307697799304, 53 | 0.6258357354491761, 54 | ] 55 | 56 | 57 | def eval_sh(deg, sh, dirs): 58 | """ 59 | Evaluate spherical harmonics at unit directions 60 | using hardcoded SH polynomials. 61 | Works with torch/np/jnp. 62 | ... Can be 0 or more batch dimensions. 63 | Args: 64 | deg: int SH deg. Currently, 0-3 supported 65 | sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2] 66 | dirs: jnp.ndarray unit directions [..., 3] 67 | Returns: 68 | [..., C] 69 | """ 70 | assert deg <= 4 and deg >= 0 71 | coeff = (deg + 1) ** 2 72 | assert sh.shape[-1] >= coeff 73 | 74 | result = C0 * sh[..., 0] 75 | if deg > 0: 76 | x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3] 77 | result = (result - 78 | C1 * y * sh[..., 1] + 79 | C1 * z * sh[..., 2] - 80 | C1 * x * sh[..., 3]) 81 | 82 | if deg > 1: 83 | xx, yy, zz = x * x, y * y, z * z 84 | xy, yz, xz = x * y, y * z, x * z 85 | result = (result + 86 | C2[0] * xy * sh[..., 4] + 87 | C2[1] * yz * sh[..., 5] + 88 | C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] + 89 | C2[3] * xz * sh[..., 7] + 90 | C2[4] * (xx - yy) * sh[..., 8]) 91 | 92 | if deg > 2: 93 | result = (result + 94 | C3[0] * y * (3 * xx - yy) * sh[..., 9] + 95 | C3[1] * xy * z * sh[..., 10] + 96 | C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] + 97 | C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] + 98 | C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] + 99 | C3[5] * z * (xx - yy) * sh[..., 14] + 100 | C3[6] * x * (xx - 3 * yy) * sh[..., 15]) 101 | 102 | if deg > 3: 103 | result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] + 104 | C4[1] * yz * (3 * xx - yy) * sh[..., 17] + 105 | C4[2] * xy * (7 * zz - 1) * sh[..., 18] + 106 | C4[3] * yz * (7 * zz - 3) * sh[..., 19] + 107 | C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] + 108 | C4[5] * xz * (7 * zz - 3) * sh[..., 21] + 109 | C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] + 110 | C4[7] * xz * (xx - 3 * yy) * sh[..., 23] + 111 | C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24]) 112 | return result 113 | 114 | 115 | def RGB2SH(rgb): 116 | return (rgb - 0.5) / C0 117 | 118 | 119 | def SH2RGB(sh): 120 | return sh * C0 + 0.5 -------------------------------------------------------------------------------- /utils/system.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | from errno import EEXIST 12 | from os import makedirs, path 13 | import os 14 | 15 | 16 | def mkdir_p(folder_path): 17 | # Creates a directory. equivalent to using mkdir -p on the command line 18 | try: 19 | makedirs(folder_path) 20 | except OSError as exc: # Python >2.5 21 | if exc.errno == EEXIST and path.isdir(folder_path): 22 | pass 23 | else: 24 | raise 25 | 26 | 27 | def searchForMaxIteration(folder): 28 | saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)] 29 | return max(saved_iters) 30 | --------------------------------------------------------------------------------