├── .gitignore ├── requirements.txt ├── torchhub ├── facebookresearch_dinov2_main │ ├── requirements-dev.txt │ ├── setup.cfg │ ├── __pycache__ │ │ ├── hubconf.cpython-39.pyc │ │ └── vision_transformer.cpython-39.pyc │ ├── dinov2 │ │ ├── __pycache__ │ │ │ └── __init__.cpython-39.pyc │ │ ├── layers │ │ │ ├── __pycache__ │ │ │ │ ├── mlp.cpython-39.pyc │ │ │ │ ├── block.cpython-39.pyc │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ ├── attention.cpython-39.pyc │ │ │ │ ├── dino_head.cpython-39.pyc │ │ │ │ ├── drop_path.cpython-39.pyc │ │ │ │ ├── layer_scale.cpython-39.pyc │ │ │ │ ├── patch_embed.cpython-39.pyc │ │ │ │ └── swiglu_ffn.cpython-39.pyc │ │ │ ├── __init__.py │ │ │ ├── layer_scale.py │ │ │ ├── drop_path.py │ │ │ ├── mlp.py │ │ │ ├── swiglu_ffn.py │ │ │ ├── dino_head.py │ │ │ ├── attention.py │ │ │ └── patch_embed.py │ │ ├── configs │ │ │ ├── train │ │ │ │ ├── vitl16_short.yaml │ │ │ │ ├── vitl14.yaml │ │ │ │ └── vitg14.yaml │ │ │ ├── eval │ │ │ │ ├── vitb14_pretrain.yaml │ │ │ │ ├── vitl14_pretrain.yaml │ │ │ │ ├── vits14_pretrain.yaml │ │ │ │ └── vitg14_pretrain.yaml │ │ │ ├── __init__.py │ │ │ └── ssl_default_config.yaml │ │ ├── run │ │ │ ├── __init__.py │ │ │ ├── eval │ │ │ │ ├── knn.py │ │ │ │ ├── linear.py │ │ │ │ └── log_regression.py │ │ │ ├── train │ │ │ │ └── train.py │ │ │ └── submit.py │ │ ├── eval │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ ├── metrics.py │ │ │ └── utils.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── dtype.py │ │ │ ├── config.py │ │ │ ├── cluster.py │ │ │ ├── utils.py │ │ │ └── param_groups.py │ │ ├── __init__.py │ │ ├── data │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── decoders.py │ │ │ │ └── extended.py │ │ │ ├── __init__.py │ │ │ ├── adapters.py │ │ │ ├── collate.py │ │ │ ├── masking.py │ │ │ ├── transforms.py │ │ │ └── augmentations.py │ │ ├── train │ │ │ └── __init__.py │ │ ├── loss │ │ │ ├── __init__.py │ │ │ ├── koleo_loss.py │ │ │ └── dino_clstoken_loss.py │ │ ├── models │ │ │ └── __init__.py │ │ ├── logging │ │ │ └── __init__.py │ │ └── fsdp │ │ │ └── __init__.py │ ├── requirements.txt │ ├── scripts │ │ └── lint.sh │ ├── conda.yaml │ ├── pyproject.toml │ ├── CONTRIBUTING.md │ ├── utils.py │ ├── setup.py │ ├── CODE_OF_CONDUCT.md │ └── hubconf.py └── README.md ├── CalibrationMatrix_college_cpt.npz ├── zoedepth ├── models │ ├── __pycache__ │ │ ├── builder.cpython-39.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── model_io.cpython-39.pyc │ │ └── depth_model.cpython-39.pyc │ ├── base_models │ │ ├── __pycache__ │ │ │ ├── midas.cpython-39.pyc │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── depth_anything.cpython-39.pyc │ │ ├── dpt_dinov2 │ │ │ ├── __pycache__ │ │ │ │ ├── dpt.cpython-39.pyc │ │ │ │ └── blocks.cpython-39.pyc │ │ │ └── blocks.py │ │ └── __init__.py │ ├── layers │ │ ├── __pycache__ │ │ │ ├── attractor.cpython-39.pyc │ │ │ ├── dist_layers.cpython-39.pyc │ │ │ ├── localbins_layers.cpython-39.pyc │ │ │ └── patch_transformer.cpython-39.pyc │ │ ├── patch_transformer.py │ │ └── dist_layers.py │ ├── zoedepth │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── zoedepth_v1.cpython-39.pyc │ │ ├── config_zoedepth_kitti.json │ │ ├── __init__.py │ │ └── config_zoedepth.json │ ├── zoedepth_nk │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── zoedepth_nk_v1.cpython-39.pyc │ │ ├── __init__.py │ │ └── config_zoedepth_nk.json │ ├── __init__.py │ ├── builder.py │ └── model_io.py ├── utils │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── config.cpython-39.pyc │ │ └── arg_utils.cpython-39.pyc │ ├── easydict │ │ ├── __pycache__ │ │ │ └── __init__.cpython-39.pyc │ │ └── __init__.py │ ├── arg_utils.py │ ├── __init__.py │ └── geometry.py ├── data │ ├── __init__.py │ ├── ibims.py │ ├── diml_outdoor_test.py │ ├── diode.py │ ├── ddad.py │ ├── diml_indoor_test.py │ ├── sun_rgbd_loader.py │ ├── hypersim.py │ └── vkitti.py └── trainers │ └── builder.py ├── heic2png.py ├── calibration-camera.py ├── README.md └── depth_to_pointcloud.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pt 3 | *.pth 4 | *.png 5 | *.ply -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/requirements.txt -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black==22.6.0 2 | flake8==5.0.4 3 | pylint==2.15.0 4 | -------------------------------------------------------------------------------- /CalibrationMatrix_college_cpt.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/CalibrationMatrix_college_cpt.npz -------------------------------------------------------------------------------- /torchhub/README.md: -------------------------------------------------------------------------------- 1 | # Local PyTorch Hub 2 | 3 | This directory is for loading the DINOv2 encoder locally in case of no Internet connection. 4 | -------------------------------------------------------------------------------- /zoedepth/models/__pycache__/builder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/builder.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/utils/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/__pycache__/model_io.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/model_io.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/utils/__pycache__/arg_utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/arg_utils.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/__pycache__/depth_model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/depth_model.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/base_models/__pycache__/midas.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/midas.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/layers/__pycache__/attractor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/attractor.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/zoedepth/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/utils/easydict/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/easydict/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | ignore = E203,E501,W503 4 | per-file-ignores = 5 | __init__.py:F401 6 | exclude = 7 | venv 8 | -------------------------------------------------------------------------------- /zoedepth/models/layers/__pycache__/dist_layers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/dist_layers.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/base_models/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/zoedepth/__pycache__/zoedepth_v1.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth/__pycache__/zoedepth_v1.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/zoedepth_nk/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth_nk/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/layers/__pycache__/localbins_layers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/localbins_layers.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/layers/__pycache__/patch_transformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/patch_transformer.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/base_models/__pycache__/depth_anything.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/depth_anything.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/base_models/dpt_dinov2/__pycache__/dpt.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/dpt_dinov2/__pycache__/dpt.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/zoedepth_nk/__pycache__/zoedepth_nk_v1.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth_nk/__pycache__/zoedepth_nk_v1.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-39.pyc -------------------------------------------------------------------------------- /zoedepth/models/base_models/dpt_dinov2/__pycache__/blocks.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/dpt_dinov2/__pycache__/blocks.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitl16_short.yaml: -------------------------------------------------------------------------------- 1 | # this corresponds to the default config 2 | train: 3 | dataset_path: ImageNet:split=TRAIN 4 | batch_size_per_gpu: 64 5 | student: 6 | block_chunks: 4 7 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-39.pyc -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitb14_pretrain.yaml: -------------------------------------------------------------------------------- 1 | student: 2 | arch: vit_base 3 | patch_size: 14 4 | crops: 5 | global_crops_size: 518 # this is to set up the position embeddings properly 6 | local_crops_size: 98 -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitl14_pretrain.yaml: -------------------------------------------------------------------------------- 1 | student: 2 | arch: vit_large 3 | patch_size: 14 4 | crops: 5 | global_crops_size: 518 # this is to set up the position embeddings properly 6 | local_crops_size: 98 -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vits14_pretrain.yaml: -------------------------------------------------------------------------------- 1 | student: 2 | arch: vit_small 3 | patch_size: 14 4 | crops: 5 | global_crops_size: 518 # this is to set up the position embeddings properly 6 | local_crops_size: 98 -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitg14_pretrain.yaml: -------------------------------------------------------------------------------- 1 | student: 2 | arch: vit_giant2 3 | patch_size: 14 4 | ffn_layer: swiglufused 5 | crops: 6 | global_crops_size: 518 # this is to set up the position embeddings properly 7 | local_crops_size: 98 -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/eval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | __version__ = "0.0.1" 8 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://download.pytorch.org/whl/cu117 2 | torch==2.0.0 3 | torchvision==0.15.0 4 | omegaconf 5 | torchmetrics==0.10.3 6 | fvcore 7 | iopath 8 | xformers==0.0.18 9 | submitit 10 | --extra-index-url https://pypi.nvidia.com 11 | cuml-cu11 12 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .image_net import ImageNet 8 | from .image_net_22k import ImageNet22k 9 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/train/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .train import get_args_parser, main 8 | from .ssl_meta_arch import SSLMetaArch 9 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/loss/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .dino_clstoken_loss import DINOLoss 8 | from .ibot_patch_loss import iBOTPatchLoss 9 | from .koleo_loss import KoLeoLoss 10 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/scripts/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -n "$1" ]; then 4 | echo "linting \"$1\"" 5 | fi 6 | 7 | echo "running black" 8 | if [ -n "$1" ]; then 9 | black "$1" 10 | else 11 | black dinov2 12 | fi 13 | 14 | echo "running flake8" 15 | if [ -n "$1" ]; then 16 | flake8 "$1" 17 | else 18 | flake8 19 | fi 20 | 21 | echo "running pylint" 22 | if [ -n "$1" ]; then 23 | pylint "$1" 24 | else 25 | pylint dinov2 26 | fi 27 | 28 | exit 0 29 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .dino_head import DINOHead 8 | from .mlp import Mlp 9 | from .patch_embed import PatchEmbed 10 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused 11 | from .block import NestedTensorBlock 12 | from .attention import MemEffAttention 13 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .adapters import DatasetWithEnumeratedTargets 8 | from .loaders import make_data_loader, make_dataset, SamplerType 9 | from .collate import collate_data_and_cast 10 | from .masking import MaskingGenerator 11 | from .augmentations import DataAugmentationDINO 12 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/conda.yaml: -------------------------------------------------------------------------------- 1 | name: dinov2 2 | channels: 3 | - defaults 4 | - pytorch 5 | - nvidia 6 | - xformers 7 | - conda-forge 8 | dependencies: 9 | - python=3.9 10 | - pytorch::pytorch=2.0.0 11 | - pytorch::pytorch-cuda=11.7.0 12 | - pytorch::torchvision=0.15.0 13 | - omegaconf 14 | - torchmetrics=0.10.3 15 | - fvcore 16 | - iopath 17 | - xformers::xformers=0.0.18 18 | - pip 19 | - pip: 20 | - git+https://github.com/facebookincubator/submitit 21 | - --extra-index-url https://pypi.nvidia.com 22 | - cuml-cu11 23 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | 4 | [tool.pylint.master] 5 | persistent = false 6 | score = false 7 | 8 | [tool.pylint.messages_control] 9 | disable = "all" 10 | enable = [ 11 | "miscellaneous", 12 | "similarities", 13 | ] 14 | 15 | [tool.pylint.similarities] 16 | ignore-comments = true 17 | ignore-docstrings = true 18 | ignore-imports = true 19 | min-similarity-lines = 8 20 | 21 | [tool.pylint.reports] 22 | reports = false 23 | 24 | [tool.pylint.miscellaneous] 25 | notes = [ 26 | "FIXME", 27 | "XXX", 28 | "TODO", 29 | ] 30 | -------------------------------------------------------------------------------- /zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitl14.yaml: -------------------------------------------------------------------------------- 1 | dino: 2 | head_n_prototypes: 131072 3 | head_bottleneck_dim: 384 4 | ibot: 5 | separate_head: true 6 | head_n_prototypes: 131072 7 | train: 8 | batch_size_per_gpu: 32 9 | dataset_path: ImageNet22k 10 | centering: sinkhorn_knopp 11 | student: 12 | arch: vit_large 13 | patch_size: 14 14 | drop_path_rate: 0.4 15 | ffn_layer: swiglufused 16 | block_chunks: 4 17 | teacher: 18 | momentum_teacher: 0.994 19 | optim: 20 | epochs: 500 21 | weight_decay_end: 0.2 22 | base_lr: 2.0e-04 # learning rate for a batch size of 1024 23 | warmup_epochs: 80 24 | layerwise_decay: 1.0 25 | crops: 26 | local_crops_size: 98 -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitg14.yaml: -------------------------------------------------------------------------------- 1 | dino: 2 | head_n_prototypes: 131072 3 | head_bottleneck_dim: 384 4 | ibot: 5 | separate_head: true 6 | head_n_prototypes: 131072 7 | train: 8 | batch_size_per_gpu: 12 9 | dataset_path: ImageNet22k 10 | centering: sinkhorn_knopp 11 | student: 12 | arch: vit_giant2 13 | patch_size: 14 14 | drop_path_rate: 0.4 15 | ffn_layer: swiglufused 16 | block_chunks: 4 17 | teacher: 18 | momentum_teacher: 0.994 19 | optim: 20 | epochs: 500 21 | weight_decay_end: 0.2 22 | base_lr: 2.0e-04 # learning rate for a batch size of 1024 23 | warmup_epochs: 80 24 | layerwise_decay: 1.0 25 | crops: 26 | local_crops_size: 98 -------------------------------------------------------------------------------- /zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import pathlib 8 | 9 | from omegaconf import OmegaConf 10 | 11 | 12 | def load_config(config_name: str): 13 | config_filename = config_name + ".yaml" 14 | return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename) 15 | 16 | 17 | dinov2_default_config = load_config("ssl_default_config") 18 | 19 | 20 | def load_and_merge_config(config_name: str): 21 | default_config = OmegaConf.create(dinov2_default_config) 22 | loaded_config = load_config(config_name) 23 | return OmegaConf.merge(default_config, loaded_config) 24 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/decoders.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from io import BytesIO 8 | from typing import Any 9 | 10 | from PIL import Image 11 | 12 | 13 | class Decoder: 14 | def decode(self) -> Any: 15 | raise NotImplementedError 16 | 17 | 18 | class ImageDataDecoder(Decoder): 19 | def __init__(self, image_data: bytes) -> None: 20 | self._image_data = image_data 21 | 22 | def decode(self) -> Image: 23 | f = BytesIO(self._image_data) 24 | return Image.open(f).convert(mode="RGB") 25 | 26 | 27 | class TargetDecoder(Decoder): 28 | def __init__(self, target: Any): 29 | self._target = target 30 | 31 | def decode(self) -> Any: 32 | return self._target 33 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/layer_scale.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110 8 | 9 | from typing import Union 10 | 11 | import torch 12 | from torch import Tensor 13 | from torch import nn 14 | 15 | 16 | class LayerScale(nn.Module): 17 | def __init__( 18 | self, 19 | dim: int, 20 | init_values: Union[float, Tensor] = 1e-5, 21 | inplace: bool = False, 22 | ) -> None: 23 | super().__init__() 24 | self.inplace = inplace 25 | self.gamma = nn.Parameter(init_values * torch.ones(dim)) 26 | 27 | def forward(self, x: Tensor) -> Tensor: 28 | return x.mul_(self.gamma) if self.inplace else x * self.gamma 29 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/adapters.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from typing import Any, Tuple 8 | 9 | from torch.utils.data import Dataset 10 | 11 | 12 | class DatasetWithEnumeratedTargets(Dataset): 13 | def __init__(self, dataset): 14 | self._dataset = dataset 15 | 16 | def get_image_data(self, index: int) -> bytes: 17 | return self._dataset.get_image_data(index) 18 | 19 | def get_target(self, index: int) -> Tuple[Any, int]: 20 | target = self._dataset.get_target(index) 21 | return (index, target) 22 | 23 | def __getitem__(self, index: int) -> Tuple[Any, Tuple[Any, int]]: 24 | image, target = self._dataset[index] 25 | target = index if target is None else target 26 | return image, (index, target) 27 | 28 | def __len__(self) -> int: 29 | return len(self._dataset) 30 | -------------------------------------------------------------------------------- /heic2png.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | from pillow_heif import register_heif_opener 4 | 5 | # Register the HEIF opener 6 | register_heif_opener() 7 | 8 | # Path to the directory containing HEIC images 9 | directory_path = 'my_test/input/indoor' 10 | 11 | # Loop through all files in the directory 12 | for filename in os.listdir(directory_path): 13 | if filename.lower().endswith('.heic'): 14 | # Construct full file path 15 | heic_file_path = os.path.join(directory_path, filename) 16 | 17 | # Open the HEIC image 18 | image = Image.open(heic_file_path) 19 | 20 | # Create the output PNG file path 21 | png_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + '.png') 22 | 23 | # Save the image as PNG 24 | image.save(png_file_path, format='PNG') 25 | 26 | # Remove the original HEIC file 27 | os.remove(heic_file_path) 28 | 29 | print(f"Converted and removed: {filename}") 30 | 31 | print("All HEIC files have been converted to PNG and the originals have been removed.") 32 | -------------------------------------------------------------------------------- /zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/dtype.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | from typing import Dict, Union 9 | 10 | import numpy as np 11 | import torch 12 | 13 | 14 | TypeSpec = Union[str, np.dtype, torch.dtype] 15 | 16 | 17 | _NUMPY_TO_TORCH_DTYPE: Dict[np.dtype, torch.dtype] = { 18 | np.dtype("bool"): torch.bool, 19 | np.dtype("uint8"): torch.uint8, 20 | np.dtype("int8"): torch.int8, 21 | np.dtype("int16"): torch.int16, 22 | np.dtype("int32"): torch.int32, 23 | np.dtype("int64"): torch.int64, 24 | np.dtype("float16"): torch.float16, 25 | np.dtype("float32"): torch.float32, 26 | np.dtype("float64"): torch.float64, 27 | np.dtype("complex64"): torch.complex64, 28 | np.dtype("complex128"): torch.complex128, 29 | } 30 | 31 | 32 | def as_torch_dtype(dtype: TypeSpec) -> torch.dtype: 33 | if isinstance(dtype, torch.dtype): 34 | return dtype 35 | if isinstance(dtype, str): 36 | dtype = np.dtype(dtype) 37 | assert isinstance(dtype, np.dtype), f"Expected an instance of nunpy dtype, got {type(dtype)}" 38 | return _NUMPY_TO_TORCH_DTYPE[dtype] 39 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/drop_path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # References: 8 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 9 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py 10 | 11 | 12 | from torch import nn 13 | 14 | 15 | def drop_path(x, drop_prob: float = 0.0, training: bool = False): 16 | if drop_prob == 0.0 or not training: 17 | return x 18 | keep_prob = 1 - drop_prob 19 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets 20 | random_tensor = x.new_empty(shape).bernoulli_(keep_prob) 21 | if keep_prob > 0.0: 22 | random_tensor.div_(keep_prob) 23 | output = x * random_tensor 24 | return output 25 | 26 | 27 | class DropPath(nn.Module): 28 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" 29 | 30 | def __init__(self, drop_prob=None): 31 | super(DropPath, self).__init__() 32 | self.drop_prob = drop_prob 33 | 34 | def forward(self, x): 35 | return drop_path(x, self.drop_prob, self.training) 36 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to DINOv2 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Meta's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to DINOv2, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. 32 | -------------------------------------------------------------------------------- /zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/extended.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from typing import Any, Tuple 8 | 9 | from torchvision.datasets import VisionDataset 10 | 11 | from .decoders import TargetDecoder, ImageDataDecoder 12 | 13 | 14 | class ExtendedVisionDataset(VisionDataset): 15 | def __init__(self, *args, **kwargs) -> None: 16 | super().__init__(*args, **kwargs) # type: ignore 17 | 18 | def get_image_data(self, index: int) -> bytes: 19 | raise NotImplementedError 20 | 21 | def get_target(self, index: int) -> Any: 22 | raise NotImplementedError 23 | 24 | def __getitem__(self, index: int) -> Tuple[Any, Any]: 25 | try: 26 | image_data = self.get_image_data(index) 27 | image = ImageDataDecoder(image_data).decode() 28 | except Exception as e: 29 | raise RuntimeError(f"can not read image for sample {index}") from e 30 | target = self.get_target(index) 31 | target = TargetDecoder(target).decode() 32 | 33 | if self.transforms is not None: 34 | image, target = self.transforms(image, target) 35 | 36 | return image, target 37 | 38 | def __len__(self) -> int: 39 | raise NotImplementedError 40 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | import itertools 7 | import math 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | _DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2" 15 | 16 | 17 | def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str: 18 | compact_arch_name = arch_name.replace("_", "")[:4] 19 | registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else "" 20 | return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}" 21 | 22 | 23 | class CenterPadding(nn.Module): 24 | def __init__(self, multiple): 25 | super().__init__() 26 | self.multiple = multiple 27 | 28 | def _get_pad(self, size): 29 | new_size = math.ceil(size / self.multiple) * self.multiple 30 | pad_size = new_size - size 31 | pad_size_left = pad_size // 2 32 | pad_size_right = pad_size - pad_size_left 33 | return pad_size_left, pad_size_right 34 | 35 | @torch.inference_mode() 36 | def forward(self, x): 37 | pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1])) 38 | output = F.pad(x, pads) 39 | return output 40 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/mlp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # References: 8 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 9 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py 10 | 11 | 12 | from typing import Callable, Optional 13 | 14 | from torch import Tensor, nn 15 | 16 | 17 | class Mlp(nn.Module): 18 | def __init__( 19 | self, 20 | in_features: int, 21 | hidden_features: Optional[int] = None, 22 | out_features: Optional[int] = None, 23 | act_layer: Callable[..., nn.Module] = nn.GELU, 24 | drop: float = 0.0, 25 | bias: bool = True, 26 | ) -> None: 27 | super().__init__() 28 | out_features = out_features or in_features 29 | hidden_features = hidden_features or in_features 30 | self.fc1 = nn.Linear(in_features, hidden_features, bias=bias) 31 | self.act = act_layer() 32 | self.fc2 = nn.Linear(hidden_features, out_features, bias=bias) 33 | self.drop = nn.Dropout(drop) 34 | 35 | def forward(self, x: Tensor) -> Tensor: 36 | x = self.fc1(x) 37 | x = self.act(x) 38 | x = self.drop(x) 39 | x = self.fc2(x) 40 | x = self.drop(x) 41 | return x 42 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | 9 | from . import vision_transformer as vits 10 | 11 | 12 | logger = logging.getLogger("dinov2") 13 | 14 | 15 | def build_model(args, only_teacher=False, img_size=224): 16 | args.arch = args.arch.removesuffix("_memeff") 17 | if "vit" in args.arch: 18 | vit_kwargs = dict( 19 | img_size=img_size, 20 | patch_size=args.patch_size, 21 | init_values=args.layerscale, 22 | ffn_layer=args.ffn_layer, 23 | block_chunks=args.block_chunks, 24 | qkv_bias=args.qkv_bias, 25 | proj_bias=args.proj_bias, 26 | ffn_bias=args.ffn_bias, 27 | ) 28 | teacher = vits.__dict__[args.arch](**vit_kwargs) 29 | if only_teacher: 30 | return teacher, teacher.embed_dim 31 | student = vits.__dict__[args.arch]( 32 | **vit_kwargs, 33 | drop_path_rate=args.drop_path_rate, 34 | drop_path_uniform=args.drop_path_uniform, 35 | ) 36 | embed_dim = student.embed_dim 37 | return student, teacher, embed_dim 38 | 39 | 40 | def build_model_from_cfg(cfg, only_teacher=False): 41 | return build_model(cfg.student, only_teacher=only_teacher, img_size=cfg.crops.global_crops_size) 42 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/loss/koleo_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | # import torch.distributed as dist 14 | 15 | 16 | logger = logging.getLogger("dinov2") 17 | 18 | 19 | class KoLeoLoss(nn.Module): 20 | """Kozachenko-Leonenko entropic loss regularizer from Sablayrolles et al. - 2018 - Spreading vectors for similarity search""" 21 | 22 | def __init__(self): 23 | super().__init__() 24 | self.pdist = nn.PairwiseDistance(2, eps=1e-8) 25 | 26 | def pairwise_NNs_inner(self, x): 27 | """ 28 | Pairwise nearest neighbors for L2-normalized vectors. 29 | Uses Torch rather than Faiss to remain on GPU. 30 | """ 31 | # parwise dot products (= inverse distance) 32 | dots = torch.mm(x, x.t()) 33 | n = x.shape[0] 34 | dots.view(-1)[:: (n + 1)].fill_(-1) # Trick to fill diagonal with -1 35 | # max inner prod -> min distance 36 | _, I = torch.max(dots, dim=1) # noqa: E741 37 | return I 38 | 39 | def forward(self, student_output, eps=1e-8): 40 | """ 41 | Args: 42 | student_output (BxD): backbone output of student 43 | """ 44 | with torch.cuda.amp.autocast(enabled=False): 45 | student_output = F.normalize(student_output, eps=eps, p=2, dim=-1) 46 | I = self.pairwise_NNs_inner(student_output) # noqa: E741 47 | distances = self.pdist(student_output, student_output[I]) # BxD, BxD -> B 48 | loss = -torch.log(distances + eps).mean() 49 | return loss 50 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/eval/knn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | import os 9 | import sys 10 | 11 | from dinov2.eval.knn import get_args_parser as get_knn_args_parser 12 | from dinov2.logging import setup_logging 13 | from dinov2.run.submit import get_args_parser, submit_jobs 14 | 15 | 16 | logger = logging.getLogger("dinov2") 17 | 18 | 19 | class Evaluator: 20 | def __init__(self, args): 21 | self.args = args 22 | 23 | def __call__(self): 24 | from dinov2.eval.knn import main as knn_main 25 | 26 | self._setup_args() 27 | knn_main(self.args) 28 | 29 | def checkpoint(self): 30 | import submitit 31 | 32 | logger.info(f"Requeuing {self.args}") 33 | empty = type(self)(self.args) 34 | return submitit.helpers.DelayedSubmission(empty) 35 | 36 | def _setup_args(self): 37 | import submitit 38 | 39 | job_env = submitit.JobEnvironment() 40 | self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id)) 41 | logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}") 42 | logger.info(f"Args: {self.args}") 43 | 44 | 45 | def main(): 46 | description = "Submitit launcher for DINOv2 k-NN evaluation" 47 | knn_args_parser = get_knn_args_parser(add_help=False) 48 | parents = [knn_args_parser] 49 | args_parser = get_args_parser(description=description, parents=parents) 50 | args = args_parser.parse_args() 51 | 52 | setup_logging() 53 | 54 | assert os.path.exists(args.config_file), "Configuration file does not exist!" 55 | submit_jobs(Evaluator, args, name="dinov2:knn") 56 | return 0 57 | 58 | 59 | if __name__ == "__main__": 60 | sys.exit(main()) 61 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/train/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | import os 9 | import sys 10 | 11 | from dinov2.logging import setup_logging 12 | from dinov2.train import get_args_parser as get_train_args_parser 13 | from dinov2.run.submit import get_args_parser, submit_jobs 14 | 15 | 16 | logger = logging.getLogger("dinov2") 17 | 18 | 19 | class Trainer(object): 20 | def __init__(self, args): 21 | self.args = args 22 | 23 | def __call__(self): 24 | from dinov2.train import main as train_main 25 | 26 | self._setup_args() 27 | train_main(self.args) 28 | 29 | def checkpoint(self): 30 | import submitit 31 | 32 | logger.info(f"Requeuing {self.args}") 33 | empty = type(self)(self.args) 34 | return submitit.helpers.DelayedSubmission(empty) 35 | 36 | def _setup_args(self): 37 | import submitit 38 | 39 | job_env = submitit.JobEnvironment() 40 | self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id)) 41 | logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}") 42 | logger.info(f"Args: {self.args}") 43 | 44 | 45 | def main(): 46 | description = "Submitit launcher for DINOv2 training" 47 | train_args_parser = get_train_args_parser(add_help=False) 48 | parents = [train_args_parser] 49 | args_parser = get_args_parser(description=description, parents=parents) 50 | args = args_parser.parse_args() 51 | 52 | setup_logging() 53 | 54 | assert os.path.exists(args.config_file), "Configuration file does not exist!" 55 | submit_jobs(Trainer, args, name="dinov2:train") 56 | return 0 57 | 58 | 59 | if __name__ == "__main__": 60 | sys.exit(main()) 61 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/eval/linear.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | import os 9 | import sys 10 | 11 | from dinov2.eval.linear import get_args_parser as get_linear_args_parser 12 | from dinov2.logging import setup_logging 13 | from dinov2.run.submit import get_args_parser, submit_jobs 14 | 15 | 16 | logger = logging.getLogger("dinov2") 17 | 18 | 19 | class Evaluator: 20 | def __init__(self, args): 21 | self.args = args 22 | 23 | def __call__(self): 24 | from dinov2.eval.linear import main as linear_main 25 | 26 | self._setup_args() 27 | linear_main(self.args) 28 | 29 | def checkpoint(self): 30 | import submitit 31 | 32 | logger.info(f"Requeuing {self.args}") 33 | empty = type(self)(self.args) 34 | return submitit.helpers.DelayedSubmission(empty) 35 | 36 | def _setup_args(self): 37 | import submitit 38 | 39 | job_env = submitit.JobEnvironment() 40 | self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id)) 41 | logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}") 42 | logger.info(f"Args: {self.args}") 43 | 44 | 45 | def main(): 46 | description = "Submitit launcher for DINOv2 linear evaluation" 47 | linear_args_parser = get_linear_args_parser(add_help=False) 48 | parents = [linear_args_parser] 49 | args_parser = get_args_parser(description=description, parents=parents) 50 | args = args_parser.parse_args() 51 | 52 | setup_logging() 53 | 54 | assert os.path.exists(args.config_file), "Configuration file does not exist!" 55 | submit_jobs(Evaluator, args, name="dinov2:linear") 56 | return 0 57 | 58 | 59 | if __name__ == "__main__": 60 | sys.exit(main()) 61 | -------------------------------------------------------------------------------- /zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [392, 518] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 50, 40 | "encoder_lr_factor":50, 41 | "pos_enc_lr_factor":50, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt", 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt" 57 | } 58 | } -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/eval/log_regression.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | import os 9 | import sys 10 | 11 | from dinov2.eval.log_regression import get_args_parser as get_log_regression_args_parser 12 | from dinov2.logging import setup_logging 13 | from dinov2.run.submit import get_args_parser, submit_jobs 14 | 15 | 16 | logger = logging.getLogger("dinov2") 17 | 18 | 19 | class Evaluator: 20 | def __init__(self, args): 21 | self.args = args 22 | 23 | def __call__(self): 24 | from dinov2.eval.log_regression import main as log_regression_main 25 | 26 | self._setup_args() 27 | log_regression_main(self.args) 28 | 29 | def checkpoint(self): 30 | import submitit 31 | 32 | logger.info(f"Requeuing {self.args}") 33 | empty = type(self)(self.args) 34 | return submitit.helpers.DelayedSubmission(empty) 35 | 36 | def _setup_args(self): 37 | import submitit 38 | 39 | job_env = submitit.JobEnvironment() 40 | self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id)) 41 | logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}") 42 | logger.info(f"Args: {self.args}") 43 | 44 | 45 | def main(): 46 | description = "Submitit launcher for DINOv2 logistic evaluation" 47 | log_regression_args_parser = get_log_regression_args_parser(add_help=False) 48 | parents = [log_regression_args_parser] 49 | args_parser = get_args_parser(description=description, parents=parents) 50 | args = args_parser.parse_args() 51 | 52 | setup_logging() 53 | 54 | assert os.path.exists(args.config_file), "Configuration file does not exist!" 55 | submit_jobs(Evaluator, args, name="dinov2:logreg") 56 | return 0 57 | 58 | 59 | if __name__ == "__main__": 60 | sys.exit(main()) 61 | -------------------------------------------------------------------------------- /calibration-camera.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import glob 4 | 5 | # Define the dimensions of the checkerboard 6 | CHECKERBOARD = (9, 6) 7 | # Define the real-world size of the squares in meters (e.g., 20mm = 0.02 meters) 8 | SQUARE_SIZE = 0.024 9 | 10 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) 11 | 12 | # Create arrays to store object points and image points from all the images 13 | objpoints = [] 14 | imgpoints = [] 15 | 16 | # Prepare the object points 17 | objp = np.zeros((CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32) 18 | objp[:, :2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2) 19 | objp = objp * SQUARE_SIZE # Scale the object points by the real size of the squares 20 | 21 | # Get the paths of all the images 22 | images = glob.glob('chessboard_calibration/*.png') # Update with the path to your images 23 | 24 | for image_file in images: 25 | print(image_file) 26 | img = cv2.imread(image_file) 27 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 28 | 29 | # Find the checkerboard corners 30 | ret, corners = cv2.findChessboardCorners(gray, CHECKERBOARD, None) 31 | 32 | # If found, add object points and image points (after refining them) 33 | if ret: 34 | objpoints.append(objp) 35 | corners2 = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria) 36 | imgpoints.append(corners2) 37 | 38 | # Draw and display the corners 39 | img = cv2.drawChessboardCorners(img, CHECKERBOARD, corners2, ret) 40 | else: 41 | print(f"Checkerboard not detected in image: {image_file}") 42 | 43 | # Calibrate the camera 44 | ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None) 45 | 46 | # Save the calibration results 47 | np.savez( 48 | "CalibrationMatrix_college_cpt", 49 | Camera_matrix=mtx, 50 | distCoeff=dist, 51 | RotationalV=rvecs, 52 | TranslationV=tvecs 53 | ) 54 | 55 | # Extract the focal lengths 56 | fx = mtx[0, 0] 57 | fy = mtx[1, 1] 58 | 59 | print(f"Focal length in x direction (fx): {fx}") 60 | print(f"Focal length in y direction (fy): {fy}") 61 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/swiglu_ffn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from typing import Callable, Optional 8 | 9 | from torch import Tensor, nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class SwiGLUFFN(nn.Module): 14 | def __init__( 15 | self, 16 | in_features: int, 17 | hidden_features: Optional[int] = None, 18 | out_features: Optional[int] = None, 19 | act_layer: Callable[..., nn.Module] = None, 20 | drop: float = 0.0, 21 | bias: bool = True, 22 | ) -> None: 23 | super().__init__() 24 | out_features = out_features or in_features 25 | hidden_features = hidden_features or in_features 26 | self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias) 27 | self.w3 = nn.Linear(hidden_features, out_features, bias=bias) 28 | 29 | def forward(self, x: Tensor) -> Tensor: 30 | x12 = self.w12(x) 31 | x1, x2 = x12.chunk(2, dim=-1) 32 | hidden = F.silu(x1) * x2 33 | return self.w3(hidden) 34 | 35 | 36 | try: 37 | from xformers.ops import SwiGLU 38 | 39 | XFORMERS_AVAILABLE = True 40 | except ImportError: 41 | SwiGLU = SwiGLUFFN 42 | XFORMERS_AVAILABLE = False 43 | 44 | 45 | class SwiGLUFFNFused(SwiGLU): 46 | def __init__( 47 | self, 48 | in_features: int, 49 | hidden_features: Optional[int] = None, 50 | out_features: Optional[int] = None, 51 | act_layer: Callable[..., nn.Module] = None, 52 | drop: float = 0.0, 53 | bias: bool = True, 54 | ) -> None: 55 | out_features = out_features or in_features 56 | hidden_features = hidden_features or in_features 57 | hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 58 | super().__init__( 59 | in_features=in_features, 60 | hidden_features=hidden_features, 61 | out_features=out_features, 62 | bias=bias, 63 | ) 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Metric Point Cloud Creation from Single Images using Depth Anything Model 2 | 3 | This project leverages the Depth Anything model to create metric point clouds from single images. This repository includes all necessary scripts and files to perform depth estimation and generate accurate 3D point clouds with real-world measurements. 4 | 5 | ### Requirements 6 | 7 | ------------ 8 | 9 | To install the required packages, run: 10 | `pip install -r requirements.txt` 11 | 12 | ### Installation 13 | 14 | ------------ 15 | 16 | 1. Clone this repository: 17 | `git clone https://github.com/bohdanvodianyk/image-to-pcd.git` 18 | `cd your-repo-name` 19 | 2. Install the necessary Python packages: 20 | `pip install -r requirements.txt` 21 | 3. Download the model checkpoints from [Google Drive](https://drive.google.com/drive/folders/1LJRnpOhNuzZXlVE0oGzzUb7ZiXeF6f-8?usp=sharing "Google Drive") and place them in the appropriate directory within the project. 22 | 23 | ### Usage 24 | 25 | ------------ 26 | 27 | #### Calibration 28 | Before generating point clouds, calibrate your camera using the `calibration-camera.py` script. Ensure you have a chessboard pattern printed for the calibration process. 29 | 30 | #### Depth Estimation to Point Cloud 31 | To convert depth maps into metric point clouds, use the `depth_to_pointcloud.py` script. Ensure your input image is correctly formatted and accessible. 32 | 33 | #### HEIC to PNG Conversion 34 | If your input images are in HEIC format, convert them to PNG using the `heic2png.py` script. 35 | 36 | ### Model Checkpoints 37 | 38 | ------------ 39 | 40 | Model checkpoints necessary for depth estimation can be downloaded from the following Google Drive link: 41 | 42 | [Google Drive - Model Checkpoints](https://drive.google.com/drive/folders/1LJRnpOhNuzZXlVE0oGzzUb7ZiXeF6f-8?usp=sharing "Google Drive - Model Checkpoints") 43 | 44 | Download and place these checkpoints in the appropriate directory within your project to ensure the model functions correctly. 45 | 46 | ### Acknowledgements 47 | 48 | ------------ 49 | 50 | Special thanks to the developers of the **Depth Anything** model and all contributors who made this project possible. Your work in computer vision and deep learning is greatly appreciated. 51 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/collate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import random 9 | 10 | 11 | def collate_data_and_cast(samples_list, mask_ratio_tuple, mask_probability, dtype, n_tokens=None, mask_generator=None): 12 | # dtype = torch.half # TODO: Remove 13 | 14 | n_global_crops = len(samples_list[0][0]["global_crops"]) 15 | n_local_crops = len(samples_list[0][0]["local_crops"]) 16 | 17 | collated_global_crops = torch.stack([s[0]["global_crops"][i] for i in range(n_global_crops) for s in samples_list]) 18 | 19 | collated_local_crops = torch.stack([s[0]["local_crops"][i] for i in range(n_local_crops) for s in samples_list]) 20 | 21 | B = len(collated_global_crops) 22 | N = n_tokens 23 | n_samples_masked = int(B * mask_probability) 24 | probs = torch.linspace(*mask_ratio_tuple, n_samples_masked + 1) 25 | upperbound = 0 26 | masks_list = [] 27 | for i in range(0, n_samples_masked): 28 | prob_min = probs[i] 29 | prob_max = probs[i + 1] 30 | masks_list.append(torch.BoolTensor(mask_generator(int(N * random.uniform(prob_min, prob_max))))) 31 | upperbound += int(N * prob_max) 32 | for i in range(n_samples_masked, B): 33 | masks_list.append(torch.BoolTensor(mask_generator(0))) 34 | 35 | random.shuffle(masks_list) 36 | 37 | collated_masks = torch.stack(masks_list).flatten(1) 38 | mask_indices_list = collated_masks.flatten().nonzero().flatten() 39 | 40 | masks_weight = (1 / collated_masks.sum(-1).clamp(min=1.0)).unsqueeze(-1).expand_as(collated_masks)[collated_masks] 41 | 42 | return { 43 | "collated_global_crops": collated_global_crops.to(dtype), 44 | "collated_local_crops": collated_local_crops.to(dtype), 45 | "collated_masks": collated_masks, 46 | "mask_indices_list": mask_indices_list, 47 | "masks_weight": masks_weight, 48 | "upperbound": upperbound, 49 | "n_masked_patches": torch.full((1,), fill_value=mask_indices_list.shape[0], dtype=torch.long), 50 | } 51 | -------------------------------------------------------------------------------- /zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [392, 518] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 10, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 50, 51 | "encoder_lr_factor": 50, 52 | "pos_enc_lr_factor": 50 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/dino_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.nn.init import trunc_normal_ 10 | from torch.nn.utils import weight_norm 11 | 12 | 13 | class DINOHead(nn.Module): 14 | def __init__( 15 | self, 16 | in_dim, 17 | out_dim, 18 | use_bn=False, 19 | nlayers=3, 20 | hidden_dim=2048, 21 | bottleneck_dim=256, 22 | mlp_bias=True, 23 | ): 24 | super().__init__() 25 | nlayers = max(nlayers, 1) 26 | self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias) 27 | self.apply(self._init_weights) 28 | self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False)) 29 | self.last_layer.weight_g.data.fill_(1) 30 | 31 | def _init_weights(self, m): 32 | if isinstance(m, nn.Linear): 33 | trunc_normal_(m.weight, std=0.02) 34 | if isinstance(m, nn.Linear) and m.bias is not None: 35 | nn.init.constant_(m.bias, 0) 36 | 37 | def forward(self, x): 38 | x = self.mlp(x) 39 | eps = 1e-6 if x.dtype == torch.float16 else 1e-12 40 | x = nn.functional.normalize(x, dim=-1, p=2, eps=eps) 41 | x = self.last_layer(x) 42 | return x 43 | 44 | 45 | def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True): 46 | if nlayers == 1: 47 | return nn.Linear(in_dim, bottleneck_dim, bias=bias) 48 | else: 49 | layers = [nn.Linear(in_dim, hidden_dim, bias=bias)] 50 | if use_bn: 51 | layers.append(nn.BatchNorm1d(hidden_dim)) 52 | layers.append(nn.GELU()) 53 | for _ in range(nlayers - 2): 54 | layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias)) 55 | if use_bn: 56 | layers.append(nn.BatchNorm1d(hidden_dim)) 57 | layers.append(nn.GELU()) 58 | layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias)) 59 | return nn.Sequential(*layers) 60 | -------------------------------------------------------------------------------- /zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/eval/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import argparse 8 | from typing import Any, List, Optional, Tuple 9 | 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | 13 | from dinov2.models import build_model_from_cfg 14 | from dinov2.utils.config import setup 15 | import dinov2.utils.utils as dinov2_utils 16 | 17 | 18 | def get_args_parser( 19 | description: Optional[str] = None, 20 | parents: Optional[List[argparse.ArgumentParser]] = None, 21 | add_help: bool = True, 22 | ): 23 | parser = argparse.ArgumentParser( 24 | description=description, 25 | parents=parents or [], 26 | add_help=add_help, 27 | ) 28 | parser.add_argument( 29 | "--config-file", 30 | type=str, 31 | help="Model configuration file", 32 | ) 33 | parser.add_argument( 34 | "--pretrained-weights", 35 | type=str, 36 | help="Pretrained model weights", 37 | ) 38 | parser.add_argument( 39 | "--output-dir", 40 | default="", 41 | type=str, 42 | help="Output directory to write results and logs", 43 | ) 44 | parser.add_argument( 45 | "--opts", 46 | help="Extra configuration options", 47 | default=[], 48 | nargs="+", 49 | ) 50 | return parser 51 | 52 | 53 | def get_autocast_dtype(config): 54 | teacher_dtype_str = config.compute_precision.teacher.backbone.mixed_precision.param_dtype 55 | if teacher_dtype_str == "fp16": 56 | return torch.half 57 | elif teacher_dtype_str == "bf16": 58 | return torch.bfloat16 59 | else: 60 | return torch.float 61 | 62 | 63 | def build_model_for_eval(config, pretrained_weights): 64 | model, _ = build_model_from_cfg(config, only_teacher=True) 65 | dinov2_utils.load_pretrained_weights(model, pretrained_weights, "teacher") 66 | model.eval() 67 | model.cuda() 68 | return model 69 | 70 | 71 | def setup_and_build_model(args) -> Tuple[Any, torch.dtype]: 72 | cudnn.benchmark = True 73 | config = setup(args) 74 | model = build_model_for_eval(config, args.pretrained_weights) 75 | autocast_dtype = get_autocast_dtype(config) 76 | return model, autocast_dtype 77 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | import logging 9 | import os 10 | 11 | from omegaconf import OmegaConf 12 | 13 | import dinov2.distributed as distributed 14 | from dinov2.logging import setup_logging 15 | from dinov2.utils import utils 16 | from dinov2.configs import dinov2_default_config 17 | 18 | 19 | logger = logging.getLogger("dinov2") 20 | 21 | 22 | def apply_scaling_rules_to_cfg(cfg): # to fix 23 | if cfg.optim.scaling_rule == "sqrt_wrt_1024": 24 | base_lr = cfg.optim.base_lr 25 | cfg.optim.lr = base_lr 26 | cfg.optim.lr *= math.sqrt(cfg.train.batch_size_per_gpu * distributed.get_global_size() / 1024.0) 27 | logger.info(f"sqrt scaling learning rate; base: {base_lr}, new: {cfg.optim.lr}") 28 | else: 29 | raise NotImplementedError 30 | return cfg 31 | 32 | 33 | def write_config(cfg, output_dir, name="config.yaml"): 34 | logger.info(OmegaConf.to_yaml(cfg)) 35 | saved_cfg_path = os.path.join(output_dir, name) 36 | with open(saved_cfg_path, "w") as f: 37 | OmegaConf.save(config=cfg, f=f) 38 | return saved_cfg_path 39 | 40 | 41 | def get_cfg_from_args(args): 42 | args.output_dir = os.path.abspath(args.output_dir) 43 | args.opts += [f"train.output_dir={args.output_dir}"] 44 | default_cfg = OmegaConf.create(dinov2_default_config) 45 | cfg = OmegaConf.load(args.config_file) 46 | cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts)) 47 | return cfg 48 | 49 | 50 | def default_setup(args): 51 | distributed.enable(overwrite=True) 52 | seed = getattr(args, "seed", 0) 53 | rank = distributed.get_global_rank() 54 | 55 | global logger 56 | setup_logging(output=args.output_dir, level=logging.INFO) 57 | logger = logging.getLogger("dinov2") 58 | 59 | utils.fix_random_seeds(seed + rank) 60 | logger.info("git:\n {}\n".format(utils.get_sha())) 61 | logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items()))) 62 | 63 | 64 | def setup(args): 65 | """ 66 | Create configs and perform basic setups. 67 | """ 68 | cfg = get_cfg_from_args(args) 69 | os.makedirs(args.output_dir, exist_ok=True) 70 | default_setup(args) 71 | apply_scaling_rules_to_cfg(cfg) 72 | write_config(cfg, args.output_dir) 73 | return cfg 74 | -------------------------------------------------------------------------------- /zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from zoedepth.models.depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # References: 8 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 9 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py 10 | 11 | import logging 12 | 13 | from torch import Tensor 14 | from torch import nn 15 | 16 | 17 | logger = logging.getLogger("dinov2") 18 | 19 | 20 | try: 21 | from xformers.ops import memory_efficient_attention, unbind, fmha 22 | 23 | XFORMERS_AVAILABLE = True 24 | except ImportError: 25 | logger.warning("xFormers not available") 26 | XFORMERS_AVAILABLE = False 27 | 28 | 29 | class Attention(nn.Module): 30 | def __init__( 31 | self, 32 | dim: int, 33 | num_heads: int = 8, 34 | qkv_bias: bool = False, 35 | proj_bias: bool = True, 36 | attn_drop: float = 0.0, 37 | proj_drop: float = 0.0, 38 | ) -> None: 39 | super().__init__() 40 | self.num_heads = num_heads 41 | head_dim = dim // num_heads 42 | self.scale = head_dim**-0.5 43 | 44 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 45 | self.attn_drop = nn.Dropout(attn_drop) 46 | self.proj = nn.Linear(dim, dim, bias=proj_bias) 47 | self.proj_drop = nn.Dropout(proj_drop) 48 | 49 | def forward(self, x: Tensor) -> Tensor: 50 | B, N, C = x.shape 51 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) 52 | 53 | q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] 54 | attn = q @ k.transpose(-2, -1) 55 | 56 | attn = attn.softmax(dim=-1) 57 | attn = self.attn_drop(attn) 58 | 59 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 60 | x = self.proj(x) 61 | x = self.proj_drop(x) 62 | return x 63 | 64 | 65 | class MemEffAttention(Attention): 66 | def forward(self, x: Tensor, attn_bias=None) -> Tensor: 67 | if not XFORMERS_AVAILABLE: 68 | assert attn_bias is None, "xFormers is required for nested tensors usage" 69 | return super().forward(x) 70 | 71 | B, N, C = x.shape 72 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) 73 | 74 | q, k, v = unbind(qkv, 2) 75 | 76 | x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) 77 | x = x.reshape([B, N, C]) 78 | 79 | x = self.proj(x) 80 | x = self.proj_drop(x) 81 | return x 82 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from pathlib import Path 8 | import re 9 | from typing import List, Tuple 10 | 11 | from setuptools import setup, find_packages 12 | 13 | 14 | NAME = "dinov2" 15 | DESCRIPTION = "PyTorch code and models for the DINOv2 self-supervised learning method." 16 | 17 | URL = "https://github.com/facebookresearch/dinov2" 18 | AUTHOR = "FAIR" 19 | REQUIRES_PYTHON = ">=3.9.0" 20 | HERE = Path(__file__).parent 21 | 22 | 23 | try: 24 | with open(HERE / "README.md", encoding="utf-8") as f: 25 | long_description = "\n" + f.read() 26 | except FileNotFoundError: 27 | long_description = DESCRIPTION 28 | 29 | 30 | def get_requirements(path: str = HERE / "requirements.txt") -> Tuple[List[str], List[str]]: 31 | requirements = [] 32 | extra_indices = [] 33 | with open(path) as f: 34 | for line in f.readlines(): 35 | line = line.rstrip("\r\n") 36 | if line.startswith("--extra-index-url "): 37 | extra_indices.append(line[18:]) 38 | continue 39 | requirements.append(line) 40 | return requirements, extra_indices 41 | 42 | 43 | def get_package_version() -> str: 44 | with open(HERE / "dinov2/__init__.py") as f: 45 | result = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M) 46 | if result: 47 | return result.group(1) 48 | raise RuntimeError("Can't get package version") 49 | 50 | 51 | requirements, extra_indices = get_requirements() 52 | version = get_package_version() 53 | dev_requirements, _ = get_requirements(HERE / "requirements-dev.txt") 54 | 55 | 56 | setup( 57 | name=NAME, 58 | version=version, 59 | description=DESCRIPTION, 60 | long_description=long_description, 61 | long_description_content_type="text/markdown", 62 | author=AUTHOR, 63 | python_requires=REQUIRES_PYTHON, 64 | url=URL, 65 | packages=find_packages(), 66 | package_data={ 67 | "": ["*.yaml"], 68 | }, 69 | install_requires=requirements, 70 | dependency_links=extra_indices, 71 | extras_require={ 72 | "dev": dev_requirements, 73 | }, 74 | install_package_data=True, 75 | license="CC-BY-NC", 76 | license_files=("LICENSE",), 77 | classifiers=[ 78 | # Trove classifiers: https://github.com/pypa/trove-classifiers/blob/main/src/trove_classifiers/__init__.py 79 | "Development Status :: 3 - Alpha", 80 | "Intended Audience :: Developers", 81 | "Intended Audience :: Science/Research", 82 | "License :: Other/Proprietary License", 83 | "Programming Language :: Python :: 3.9", 84 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 85 | "Topic :: Software Development :: Libraries :: Python Modules", 86 | ], 87 | ) 88 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/configs/ssl_default_config.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | WEIGHTS: '' 3 | compute_precision: 4 | grad_scaler: true 5 | teacher: 6 | backbone: 7 | sharding_strategy: SHARD_GRAD_OP 8 | mixed_precision: 9 | param_dtype: fp16 10 | reduce_dtype: fp16 11 | buffer_dtype: fp32 12 | dino_head: 13 | sharding_strategy: SHARD_GRAD_OP 14 | mixed_precision: 15 | param_dtype: fp16 16 | reduce_dtype: fp16 17 | buffer_dtype: fp32 18 | ibot_head: 19 | sharding_strategy: SHARD_GRAD_OP 20 | mixed_precision: 21 | param_dtype: fp16 22 | reduce_dtype: fp16 23 | buffer_dtype: fp32 24 | student: 25 | backbone: 26 | sharding_strategy: SHARD_GRAD_OP 27 | mixed_precision: 28 | param_dtype: fp16 29 | reduce_dtype: fp16 30 | buffer_dtype: fp32 31 | dino_head: 32 | sharding_strategy: SHARD_GRAD_OP 33 | mixed_precision: 34 | param_dtype: fp16 35 | reduce_dtype: fp32 36 | buffer_dtype: fp32 37 | ibot_head: 38 | sharding_strategy: SHARD_GRAD_OP 39 | mixed_precision: 40 | param_dtype: fp16 41 | reduce_dtype: fp32 42 | buffer_dtype: fp32 43 | dino: 44 | loss_weight: 1.0 45 | head_n_prototypes: 65536 46 | head_bottleneck_dim: 256 47 | head_nlayers: 3 48 | head_hidden_dim: 2048 49 | koleo_loss_weight: 0.1 50 | ibot: 51 | loss_weight: 1.0 52 | mask_sample_probability: 0.5 53 | mask_ratio_min_max: 54 | - 0.1 55 | - 0.5 56 | separate_head: false 57 | head_n_prototypes: 65536 58 | head_bottleneck_dim: 256 59 | head_nlayers: 3 60 | head_hidden_dim: 2048 61 | train: 62 | batch_size_per_gpu: 64 63 | dataset_path: ImageNet:split=TRAIN 64 | output_dir: . 65 | saveckp_freq: 20 66 | seed: 0 67 | num_workers: 10 68 | OFFICIAL_EPOCH_LENGTH: 1250 69 | cache_dataset: true 70 | centering: "centering" # or "sinkhorn_knopp" 71 | student: 72 | arch: vit_large 73 | patch_size: 16 74 | drop_path_rate: 0.3 75 | layerscale: 1.0e-05 76 | drop_path_uniform: true 77 | pretrained_weights: '' 78 | ffn_layer: "mlp" 79 | block_chunks: 0 80 | qkv_bias: true 81 | proj_bias: true 82 | ffn_bias: true 83 | teacher: 84 | momentum_teacher: 0.992 85 | final_momentum_teacher: 1 86 | warmup_teacher_temp: 0.04 87 | teacher_temp: 0.07 88 | warmup_teacher_temp_epochs: 30 89 | optim: 90 | epochs: 100 91 | weight_decay: 0.04 92 | weight_decay_end: 0.4 93 | base_lr: 0.004 # learning rate for a batch size of 1024 94 | lr: 0. # will be set after applying scaling rule 95 | warmup_epochs: 10 96 | min_lr: 1.0e-06 97 | clip_grad: 3.0 98 | freeze_last_layer_epochs: 1 99 | scaling_rule: sqrt_wrt_1024 100 | patch_embed_lr_mult: 0.2 101 | layerwise_decay: 0.9 102 | adamw_beta1: 0.9 103 | adamw_beta2: 0.999 104 | crops: 105 | global_crops_scale: 106 | - 0.32 107 | - 1.0 108 | local_crops_number: 8 109 | local_crops_scale: 110 | - 0.05 111 | - 0.32 112 | global_crops_size: 224 113 | local_crops_size: 96 114 | evaluation: 115 | eval_period_iterations: 12500 116 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # References: 8 | # https://github.com/facebookresearch/dino/blob/master/vision_transformer.py 9 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py 10 | 11 | from typing import Callable, Optional, Tuple, Union 12 | 13 | from torch import Tensor 14 | import torch.nn as nn 15 | 16 | 17 | def make_2tuple(x): 18 | if isinstance(x, tuple): 19 | assert len(x) == 2 20 | return x 21 | 22 | assert isinstance(x, int) 23 | return (x, x) 24 | 25 | 26 | class PatchEmbed(nn.Module): 27 | """ 28 | 2D image to patch embedding: (B,C,H,W) -> (B,N,D) 29 | 30 | Args: 31 | img_size: Image size. 32 | patch_size: Patch token size. 33 | in_chans: Number of input image channels. 34 | embed_dim: Number of linear projection output channels. 35 | norm_layer: Normalization layer. 36 | """ 37 | 38 | def __init__( 39 | self, 40 | img_size: Union[int, Tuple[int, int]] = 224, 41 | patch_size: Union[int, Tuple[int, int]] = 16, 42 | in_chans: int = 3, 43 | embed_dim: int = 768, 44 | norm_layer: Optional[Callable] = None, 45 | flatten_embedding: bool = True, 46 | ) -> None: 47 | super().__init__() 48 | 49 | image_HW = make_2tuple(img_size) 50 | patch_HW = make_2tuple(patch_size) 51 | patch_grid_size = ( 52 | image_HW[0] // patch_HW[0], 53 | image_HW[1] // patch_HW[1], 54 | ) 55 | 56 | self.img_size = image_HW 57 | self.patch_size = patch_HW 58 | self.patches_resolution = patch_grid_size 59 | self.num_patches = patch_grid_size[0] * patch_grid_size[1] 60 | 61 | self.in_chans = in_chans 62 | self.embed_dim = embed_dim 63 | 64 | self.flatten_embedding = flatten_embedding 65 | 66 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW) 67 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 68 | 69 | def forward(self, x: Tensor) -> Tensor: 70 | _, _, H, W = x.shape 71 | patch_H, patch_W = self.patch_size 72 | 73 | assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" 74 | assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" 75 | 76 | x = self.proj(x) # B C H W 77 | H, W = x.size(2), x.size(3) 78 | x = x.flatten(2).transpose(1, 2) # B HW C 79 | x = self.norm(x) 80 | if not self.flatten_embedding: 81 | x = x.reshape(-1, H, W, self.embed_dim) # B H W C 82 | return x 83 | 84 | def flops(self) -> float: 85 | Ho, Wo = self.patches_resolution 86 | flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) 87 | if self.norm is not None: 88 | flops += Ho * Wo * self.embed_dim 89 | return flops 90 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/masking.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import random 8 | import math 9 | import numpy as np 10 | 11 | 12 | class MaskingGenerator: 13 | def __init__( 14 | self, 15 | input_size, 16 | num_masking_patches=None, 17 | min_num_patches=4, 18 | max_num_patches=None, 19 | min_aspect=0.3, 20 | max_aspect=None, 21 | ): 22 | if not isinstance(input_size, tuple): 23 | input_size = (input_size,) * 2 24 | self.height, self.width = input_size 25 | 26 | self.num_patches = self.height * self.width 27 | self.num_masking_patches = num_masking_patches 28 | 29 | self.min_num_patches = min_num_patches 30 | self.max_num_patches = num_masking_patches if max_num_patches is None else max_num_patches 31 | 32 | max_aspect = max_aspect or 1 / min_aspect 33 | self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect)) 34 | 35 | def __repr__(self): 36 | repr_str = "Generator(%d, %d -> [%d ~ %d], max = %d, %.3f ~ %.3f)" % ( 37 | self.height, 38 | self.width, 39 | self.min_num_patches, 40 | self.max_num_patches, 41 | self.num_masking_patches, 42 | self.log_aspect_ratio[0], 43 | self.log_aspect_ratio[1], 44 | ) 45 | return repr_str 46 | 47 | def get_shape(self): 48 | return self.height, self.width 49 | 50 | def _mask(self, mask, max_mask_patches): 51 | delta = 0 52 | for _ in range(10): 53 | target_area = random.uniform(self.min_num_patches, max_mask_patches) 54 | aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio)) 55 | h = int(round(math.sqrt(target_area * aspect_ratio))) 56 | w = int(round(math.sqrt(target_area / aspect_ratio))) 57 | if w < self.width and h < self.height: 58 | top = random.randint(0, self.height - h) 59 | left = random.randint(0, self.width - w) 60 | 61 | num_masked = mask[top : top + h, left : left + w].sum() 62 | # Overlap 63 | if 0 < h * w - num_masked <= max_mask_patches: 64 | for i in range(top, top + h): 65 | for j in range(left, left + w): 66 | if mask[i, j] == 0: 67 | mask[i, j] = 1 68 | delta += 1 69 | 70 | if delta > 0: 71 | break 72 | return delta 73 | 74 | def __call__(self, num_masking_patches=0): 75 | mask = np.zeros(shape=self.get_shape(), dtype=bool) 76 | mask_count = 0 77 | while mask_count < num_masking_patches: 78 | max_mask_patches = num_masking_patches - mask_count 79 | max_mask_patches = min(max_mask_patches, self.max_num_patches) 80 | 81 | delta = self._mask(mask, max_mask_patches) 82 | if delta == 0: 83 | break 84 | else: 85 | mask_count += delta 86 | 87 | return mask 88 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/cluster.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from enum import Enum 8 | import os 9 | from pathlib import Path 10 | from typing import Any, Dict, Optional 11 | 12 | 13 | class ClusterType(Enum): 14 | AWS = "aws" 15 | FAIR = "fair" 16 | RSC = "rsc" 17 | 18 | 19 | def _guess_cluster_type() -> ClusterType: 20 | uname = os.uname() 21 | if uname.sysname == "Linux": 22 | if uname.release.endswith("-aws"): 23 | # Linux kernel versions on AWS instances are of the form "5.4.0-1051-aws" 24 | return ClusterType.AWS 25 | elif uname.nodename.startswith("rsc"): 26 | # Linux kernel versions on RSC instances are standard ones but hostnames start with "rsc" 27 | return ClusterType.RSC 28 | 29 | return ClusterType.FAIR 30 | 31 | 32 | def get_cluster_type(cluster_type: Optional[ClusterType] = None) -> Optional[ClusterType]: 33 | if cluster_type is None: 34 | return _guess_cluster_type() 35 | 36 | return cluster_type 37 | 38 | 39 | def get_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]: 40 | cluster_type = get_cluster_type(cluster_type) 41 | if cluster_type is None: 42 | return None 43 | 44 | CHECKPOINT_DIRNAMES = { 45 | ClusterType.AWS: "checkpoints", 46 | ClusterType.FAIR: "checkpoint", 47 | ClusterType.RSC: "checkpoint/dino", 48 | } 49 | return Path("/") / CHECKPOINT_DIRNAMES[cluster_type] 50 | 51 | 52 | def get_user_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]: 53 | checkpoint_path = get_checkpoint_path(cluster_type) 54 | if checkpoint_path is None: 55 | return None 56 | 57 | username = os.environ.get("USER") 58 | assert username is not None 59 | return checkpoint_path / username 60 | 61 | 62 | def get_slurm_partition(cluster_type: Optional[ClusterType] = None) -> Optional[str]: 63 | cluster_type = get_cluster_type(cluster_type) 64 | if cluster_type is None: 65 | return None 66 | 67 | SLURM_PARTITIONS = { 68 | ClusterType.AWS: "learnlab", 69 | ClusterType.FAIR: "learnlab", 70 | ClusterType.RSC: "learn", 71 | } 72 | return SLURM_PARTITIONS[cluster_type] 73 | 74 | 75 | def get_slurm_executor_parameters( 76 | nodes: int, num_gpus_per_node: int, cluster_type: Optional[ClusterType] = None, **kwargs 77 | ) -> Dict[str, Any]: 78 | # create default parameters 79 | params = { 80 | "mem_gb": 0, # Requests all memory on a node, see https://slurm.schedmd.com/sbatch.html 81 | "gpus_per_node": num_gpus_per_node, 82 | "tasks_per_node": num_gpus_per_node, # one task per GPU 83 | "cpus_per_task": 10, 84 | "nodes": nodes, 85 | "slurm_partition": get_slurm_partition(cluster_type), 86 | } 87 | # apply cluster-specific adjustments 88 | cluster_type = get_cluster_type(cluster_type) 89 | if cluster_type == ClusterType.AWS: 90 | params["cpus_per_task"] = 12 91 | del params["mem_gb"] 92 | elif cluster_type == ClusterType.RSC: 93 | params["cpus_per_task"] = 12 94 | # set additional parameters / apply overrides 95 | params.update(kwargs) 96 | return params 97 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from typing import Sequence 8 | 9 | import torch 10 | from torchvision import transforms 11 | 12 | 13 | class GaussianBlur(transforms.RandomApply): 14 | """ 15 | Apply Gaussian Blur to the PIL image. 16 | """ 17 | 18 | def __init__(self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0): 19 | # NOTE: torchvision is applying 1 - probability to return the original image 20 | keep_p = 1 - p 21 | transform = transforms.GaussianBlur(kernel_size=9, sigma=(radius_min, radius_max)) 22 | super().__init__(transforms=[transform], p=keep_p) 23 | 24 | 25 | class MaybeToTensor(transforms.ToTensor): 26 | """ 27 | Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor, or keep as is if already a tensor. 28 | """ 29 | 30 | def __call__(self, pic): 31 | """ 32 | Args: 33 | pic (PIL Image, numpy.ndarray or torch.tensor): Image to be converted to tensor. 34 | Returns: 35 | Tensor: Converted image. 36 | """ 37 | if isinstance(pic, torch.Tensor): 38 | return pic 39 | return super().__call__(pic) 40 | 41 | 42 | # Use timm's names 43 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) 44 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) 45 | 46 | 47 | def make_normalize_transform( 48 | mean: Sequence[float] = IMAGENET_DEFAULT_MEAN, 49 | std: Sequence[float] = IMAGENET_DEFAULT_STD, 50 | ) -> transforms.Normalize: 51 | return transforms.Normalize(mean=mean, std=std) 52 | 53 | 54 | # This roughly matches torchvision's preset for classification training: 55 | # https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L6-L44 56 | def make_classification_train_transform( 57 | *, 58 | crop_size: int = 224, 59 | interpolation=transforms.InterpolationMode.BICUBIC, 60 | hflip_prob: float = 0.5, 61 | mean: Sequence[float] = IMAGENET_DEFAULT_MEAN, 62 | std: Sequence[float] = IMAGENET_DEFAULT_STD, 63 | ): 64 | transforms_list = [transforms.RandomResizedCrop(crop_size, interpolation=interpolation)] 65 | if hflip_prob > 0.0: 66 | transforms_list.append(transforms.RandomHorizontalFlip(hflip_prob)) 67 | transforms_list.extend( 68 | [ 69 | MaybeToTensor(), 70 | make_normalize_transform(mean=mean, std=std), 71 | ] 72 | ) 73 | return transforms.Compose(transforms_list) 74 | 75 | 76 | # This matches (roughly) torchvision's preset for classification evaluation: 77 | # https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L47-L69 78 | def make_classification_eval_transform( 79 | *, 80 | resize_size: int = 256, 81 | interpolation=transforms.InterpolationMode.BICUBIC, 82 | crop_size: int = 224, 83 | mean: Sequence[float] = IMAGENET_DEFAULT_MEAN, 84 | std: Sequence[float] = IMAGENET_DEFAULT_STD, 85 | ) -> transforms.Compose: 86 | transforms_list = [ 87 | transforms.Resize(resize_size, interpolation=interpolation), 88 | transforms.CenterCrop(crop_size), 89 | MaybeToTensor(), 90 | make_normalize_transform(mean=mean, std=std), 91 | ] 92 | return transforms.Compose(transforms_list) 93 | -------------------------------------------------------------------------------- /zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | import os 9 | import random 10 | import subprocess 11 | from urllib.parse import urlparse 12 | 13 | import numpy as np 14 | import torch 15 | from torch import nn 16 | 17 | 18 | logger = logging.getLogger("dinov2") 19 | 20 | 21 | def load_pretrained_weights(model, pretrained_weights, checkpoint_key): 22 | if urlparse(pretrained_weights).scheme: # If it looks like an URL 23 | state_dict = torch.hub.load_state_dict_from_url(pretrained_weights, map_location="cpu") 24 | else: 25 | state_dict = torch.load(pretrained_weights, map_location="cpu") 26 | if checkpoint_key is not None and checkpoint_key in state_dict: 27 | logger.info(f"Take key {checkpoint_key} in provided checkpoint dict") 28 | state_dict = state_dict[checkpoint_key] 29 | # remove `module.` prefix 30 | state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()} 31 | # remove `backbone.` prefix induced by multicrop wrapper 32 | state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()} 33 | msg = model.load_state_dict(state_dict, strict=False) 34 | logger.info("Pretrained weights found at {} and loaded with msg: {}".format(pretrained_weights, msg)) 35 | 36 | 37 | def fix_random_seeds(seed=31): 38 | """ 39 | Fix random seeds. 40 | """ 41 | torch.manual_seed(seed) 42 | torch.cuda.manual_seed_all(seed) 43 | np.random.seed(seed) 44 | random.seed(seed) 45 | 46 | 47 | def get_sha(): 48 | cwd = os.path.dirname(os.path.abspath(__file__)) 49 | 50 | def _run(command): 51 | return subprocess.check_output(command, cwd=cwd).decode("ascii").strip() 52 | 53 | sha = "N/A" 54 | diff = "clean" 55 | branch = "N/A" 56 | try: 57 | sha = _run(["git", "rev-parse", "HEAD"]) 58 | subprocess.check_output(["git", "diff"], cwd=cwd) 59 | diff = _run(["git", "diff-index", "HEAD"]) 60 | diff = "has uncommitted changes" if diff else "clean" 61 | branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"]) 62 | except Exception: 63 | pass 64 | message = f"sha: {sha}, status: {diff}, branch: {branch}" 65 | return message 66 | 67 | 68 | class CosineScheduler(object): 69 | def __init__(self, base_value, final_value, total_iters, warmup_iters=0, start_warmup_value=0, freeze_iters=0): 70 | super().__init__() 71 | self.final_value = final_value 72 | self.total_iters = total_iters 73 | 74 | freeze_schedule = np.zeros((freeze_iters)) 75 | 76 | warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters) 77 | 78 | iters = np.arange(total_iters - warmup_iters - freeze_iters) 79 | schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters))) 80 | self.schedule = np.concatenate((freeze_schedule, warmup_schedule, schedule)) 81 | 82 | assert len(self.schedule) == self.total_iters 83 | 84 | def __getitem__(self, it): 85 | if it >= self.total_iters: 86 | return self.final_value 87 | else: 88 | return self.schedule[it] 89 | 90 | 91 | def has_batchnorms(model): 92 | bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm) 93 | for name, module in model.named_modules(): 94 | if isinstance(module, bn_types): 95 | return True 96 | return False 97 | -------------------------------------------------------------------------------- /zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/logging/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import functools 8 | import logging 9 | import os 10 | import sys 11 | from typing import Optional 12 | 13 | import dinov2.distributed as distributed 14 | from .helpers import MetricLogger, SmoothedValue 15 | 16 | 17 | # So that calling _configure_logger multiple times won't add many handlers 18 | @functools.lru_cache() 19 | def _configure_logger( 20 | name: Optional[str] = None, 21 | *, 22 | level: int = logging.DEBUG, 23 | output: Optional[str] = None, 24 | ): 25 | """ 26 | Configure a logger. 27 | 28 | Adapted from Detectron2. 29 | 30 | Args: 31 | name: The name of the logger to configure. 32 | level: The logging level to use. 33 | output: A file name or a directory to save log. If None, will not save log file. 34 | If ends with ".txt" or ".log", assumed to be a file name. 35 | Otherwise, logs will be saved to `output/log.txt`. 36 | 37 | Returns: 38 | The configured logger. 39 | """ 40 | 41 | logger = logging.getLogger(name) 42 | logger.setLevel(level) 43 | logger.propagate = False 44 | 45 | # Loosely match Google glog format: 46 | # [IWEF]yyyymmdd hh:mm:ss.uuuuuu threadid file:line] msg 47 | # but use a shorter timestamp and include the logger name: 48 | # [IWEF]yyyymmdd hh:mm:ss logger threadid file:line] msg 49 | fmt_prefix = "%(levelname).1s%(asctime)s %(process)s %(name)s %(filename)s:%(lineno)s] " 50 | fmt_message = "%(message)s" 51 | fmt = fmt_prefix + fmt_message 52 | datefmt = "%Y%m%d %H:%M:%S" 53 | formatter = logging.Formatter(fmt=fmt, datefmt=datefmt) 54 | 55 | # stdout logging for main worker only 56 | if distributed.is_main_process(): 57 | handler = logging.StreamHandler(stream=sys.stdout) 58 | handler.setLevel(logging.DEBUG) 59 | handler.setFormatter(formatter) 60 | logger.addHandler(handler) 61 | 62 | # file logging for all workers 63 | if output: 64 | if os.path.splitext(output)[-1] in (".txt", ".log"): 65 | filename = output 66 | else: 67 | filename = os.path.join(output, "logs", "log.txt") 68 | 69 | if not distributed.is_main_process(): 70 | global_rank = distributed.get_global_rank() 71 | filename = filename + ".rank{}".format(global_rank) 72 | 73 | os.makedirs(os.path.dirname(filename), exist_ok=True) 74 | 75 | handler = logging.StreamHandler(open(filename, "a")) 76 | handler.setLevel(logging.DEBUG) 77 | handler.setFormatter(formatter) 78 | logger.addHandler(handler) 79 | 80 | return logger 81 | 82 | 83 | def setup_logging( 84 | output: Optional[str] = None, 85 | *, 86 | name: Optional[str] = None, 87 | level: int = logging.DEBUG, 88 | capture_warnings: bool = True, 89 | ) -> None: 90 | """ 91 | Setup logging. 92 | 93 | Args: 94 | output: A file name or a directory to save log files. If None, log 95 | files will not be saved. If output ends with ".txt" or ".log", it 96 | is assumed to be a file name. 97 | Otherwise, logs will be saved to `output/log.txt`. 98 | name: The name of the logger to configure, by default the root logger. 99 | level: The logging level to use. 100 | capture_warnings: Whether warnings should be captured as logs. 101 | """ 102 | logging.captureWarnings(capture_warnings) 103 | _configure_logger(name, level=level, output=output) 104 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq 81 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/run/submit.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import argparse 8 | import logging 9 | import os 10 | from pathlib import Path 11 | from typing import List, Optional 12 | 13 | import submitit 14 | 15 | from dinov2.utils.cluster import ( 16 | get_slurm_executor_parameters, 17 | get_slurm_partition, 18 | get_user_checkpoint_path, 19 | ) 20 | 21 | 22 | logger = logging.getLogger("dinov2") 23 | 24 | 25 | def get_args_parser( 26 | description: Optional[str] = None, 27 | parents: Optional[List[argparse.ArgumentParser]] = None, 28 | add_help: bool = True, 29 | ) -> argparse.ArgumentParser: 30 | parents = parents or [] 31 | slurm_partition = get_slurm_partition() 32 | parser = argparse.ArgumentParser( 33 | description=description, 34 | parents=parents, 35 | add_help=add_help, 36 | ) 37 | parser.add_argument( 38 | "--ngpus", 39 | "--gpus", 40 | "--gpus-per-node", 41 | default=8, 42 | type=int, 43 | help="Number of GPUs to request on each node", 44 | ) 45 | parser.add_argument( 46 | "--nodes", 47 | "--nnodes", 48 | default=2, 49 | type=int, 50 | help="Number of nodes to request", 51 | ) 52 | parser.add_argument( 53 | "--timeout", 54 | default=2800, 55 | type=int, 56 | help="Duration of the job", 57 | ) 58 | parser.add_argument( 59 | "--partition", 60 | default=slurm_partition, 61 | type=str, 62 | help="Partition where to submit", 63 | ) 64 | parser.add_argument( 65 | "--use-volta32", 66 | action="store_true", 67 | help="Request V100-32GB GPUs", 68 | ) 69 | parser.add_argument( 70 | "--comment", 71 | default="", 72 | type=str, 73 | help="Comment to pass to scheduler, e.g. priority message", 74 | ) 75 | parser.add_argument( 76 | "--exclude", 77 | default="", 78 | type=str, 79 | help="Nodes to exclude", 80 | ) 81 | return parser 82 | 83 | 84 | def get_shared_folder() -> Path: 85 | user_checkpoint_path = get_user_checkpoint_path() 86 | if user_checkpoint_path is None: 87 | raise RuntimeError("Path to user checkpoint cannot be determined") 88 | path = user_checkpoint_path / "experiments" 89 | path.mkdir(exist_ok=True) 90 | return path 91 | 92 | 93 | def submit_jobs(task_class, args, name: str): 94 | if not args.output_dir: 95 | args.output_dir = str(get_shared_folder() / "%j") 96 | 97 | Path(args.output_dir).mkdir(parents=True, exist_ok=True) 98 | executor = submitit.AutoExecutor(folder=args.output_dir, slurm_max_num_timeout=30) 99 | 100 | kwargs = {} 101 | if args.use_volta32: 102 | kwargs["slurm_constraint"] = "volta32gb" 103 | if args.comment: 104 | kwargs["slurm_comment"] = args.comment 105 | if args.exclude: 106 | kwargs["slurm_exclude"] = args.exclude 107 | 108 | executor_params = get_slurm_executor_parameters( 109 | nodes=args.nodes, 110 | num_gpus_per_node=args.ngpus, 111 | timeout_min=args.timeout, # max is 60 * 72 112 | slurm_signal_delay_s=120, 113 | slurm_partition=args.partition, 114 | **kwargs, 115 | ) 116 | executor.update_parameters(name=name, **executor_params) 117 | 118 | task = task_class(args) 119 | job = executor.submit(task) 120 | 121 | logger.info(f"Submitted job_id: {job.job_id}") 122 | str_output_dir = os.path.abspath(args.output_dir).replace("%j", str(job.job_id)) 123 | logger.info(f"Logs and checkpoints will be saved at: {str_output_dir}") 124 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/utils/param_groups.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from collections import defaultdict 8 | import logging 9 | 10 | 11 | logger = logging.getLogger("dinov2") 12 | 13 | 14 | def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12, force_is_backbone=False, chunked_blocks=False): 15 | """ 16 | Calculate lr decay rate for different ViT blocks. 17 | Args: 18 | name (string): parameter name. 19 | lr_decay_rate (float): base lr decay rate. 20 | num_layers (int): number of ViT blocks. 21 | Returns: 22 | lr decay rate for the given parameter. 23 | """ 24 | layer_id = num_layers + 1 25 | if name.startswith("backbone") or force_is_backbone: 26 | if ".pos_embed" in name or ".patch_embed" in name or ".mask_token" in name or ".cls_token" in name: 27 | layer_id = 0 28 | elif force_is_backbone and ( 29 | "pos_embed" in name or "patch_embed" in name or "mask_token" in name or "cls_token" in name 30 | ): 31 | layer_id = 0 32 | elif ".blocks." in name and ".residual." not in name: 33 | layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1 34 | elif chunked_blocks and "blocks." in name and "residual." not in name: 35 | layer_id = int(name[name.find("blocks.") :].split(".")[2]) + 1 36 | elif "blocks." in name and "residual." not in name: 37 | layer_id = int(name[name.find("blocks.") :].split(".")[1]) + 1 38 | 39 | return lr_decay_rate ** (num_layers + 1 - layer_id) 40 | 41 | 42 | def get_params_groups_with_decay(model, lr_decay_rate=1.0, patch_embed_lr_mult=1.0): 43 | chunked_blocks = False 44 | if hasattr(model, "n_blocks"): 45 | logger.info("chunked fsdp") 46 | n_blocks = model.n_blocks 47 | chunked_blocks = model.chunked_blocks 48 | elif hasattr(model, "blocks"): 49 | logger.info("first code branch") 50 | n_blocks = len(model.blocks) 51 | elif hasattr(model, "backbone"): 52 | logger.info("second code branch") 53 | n_blocks = len(model.backbone.blocks) 54 | else: 55 | logger.info("else code branch") 56 | n_blocks = 0 57 | all_param_groups = [] 58 | 59 | for name, param in model.named_parameters(): 60 | name = name.replace("_fsdp_wrapped_module.", "") 61 | if not param.requires_grad: 62 | continue 63 | decay_rate = get_vit_lr_decay_rate( 64 | name, lr_decay_rate, num_layers=n_blocks, force_is_backbone=n_blocks > 0, chunked_blocks=chunked_blocks 65 | ) 66 | d = {"params": param, "is_last_layer": False, "lr_multiplier": decay_rate, "wd_multiplier": 1.0, "name": name} 67 | 68 | if "last_layer" in name: 69 | d.update({"is_last_layer": True}) 70 | 71 | if name.endswith(".bias") or "norm" in name or "gamma" in name: 72 | d.update({"wd_multiplier": 0.0}) 73 | 74 | if "patch_embed" in name: 75 | d.update({"lr_multiplier": d["lr_multiplier"] * patch_embed_lr_mult}) 76 | 77 | all_param_groups.append(d) 78 | logger.info(f"""{name}: lr_multiplier: {d["lr_multiplier"]}, wd_multiplier: {d["wd_multiplier"]}""") 79 | 80 | return all_param_groups 81 | 82 | 83 | def fuse_params_groups(all_params_groups, keys=("lr_multiplier", "wd_multiplier", "is_last_layer")): 84 | fused_params_groups = defaultdict(lambda: {"params": []}) 85 | for d in all_params_groups: 86 | identifier = "" 87 | for k in keys: 88 | identifier += k + str(d[k]) + "_" 89 | 90 | for k in keys: 91 | fused_params_groups[identifier][k] = d[k] 92 | fused_params_groups[identifier]["params"].append(d["params"]) 93 | 94 | return fused_params_groups.values() 95 | -------------------------------------------------------------------------------- /depth_to_pointcloud.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import glob 4 | import torch 5 | import numpy as np 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | import open3d as o3d 9 | from tqdm import tqdm 10 | from zoedepth.models.builder import build_model 11 | from zoedepth.utils.config import get_config 12 | 13 | # Load the saved calibration parameters 14 | calibration_data = np.load("CalibrationMatrix_college_cpt.npz") 15 | 16 | # Extract the camera matrix 17 | camera_matrix = calibration_data['Camera_matrix'] 18 | 19 | # The camera matrix typically looks like this: 20 | # [[fx, 0, cx], 21 | # [ 0, fy, cy], 22 | # [ 0, 0, 1]] 23 | 24 | # Extract the focal lengths 25 | FX = camera_matrix[0, 0] # Focal length in the x direction 26 | FY = camera_matrix[1, 1] # Focal length in the y direction 27 | FL = (FX + FY) / 2 # Average focal length 28 | 29 | # Print the extracted parameters 30 | print(f"FX: {FX}") 31 | print(f"FY: {FY}") 32 | print(f"FL (average focal length): {FL}") 33 | 34 | NYU_DATA = False 35 | INPUT_DIR = './my_test/input/indoor' 36 | OUTPUT_DIR = './my_test/output/indoor/' 37 | DATASET = 'nyu' # For INDOOR 38 | # DATASET = 'kitti' # For OUTDOOR 39 | 40 | 41 | def process_images(model): 42 | if not os.path.exists(OUTPUT_DIR): 43 | os.makedirs(OUTPUT_DIR) 44 | 45 | image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg')) 46 | for image_path in tqdm(image_paths, desc="Processing Images"): 47 | try: 48 | color_image = Image.open(image_path).convert('RGB') 49 | original_width, original_height = color_image.size 50 | FINAL_HEIGHT = original_height 51 | FINAL_WIDTH = original_width 52 | image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to( 53 | 'cuda' if torch.cuda.is_available() else 'cpu') 54 | 55 | pred = model(image_tensor, dataset=DATASET) 56 | if isinstance(pred, dict): 57 | pred = pred.get('metric_depth', pred.get('out')) 58 | elif isinstance(pred, (list, tuple)): 59 | pred = pred[-1] 60 | pred = pred.squeeze().detach().cpu().numpy() 61 | 62 | # Resize color image and depth to final size 63 | resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS) 64 | resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST) 65 | 66 | focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL) 67 | x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT)) 68 | x = (x - FINAL_WIDTH / 2) / focal_length_x 69 | y = (y - FINAL_HEIGHT / 2) / focal_length_y 70 | z = np.array(resized_pred) 71 | points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3) 72 | colors = np.array(resized_color_image).reshape(-1, 3) / 255.0 73 | 74 | pcd = o3d.geometry.PointCloud() 75 | pcd.points = o3d.utility.Vector3dVector(points) 76 | pcd.colors = o3d.utility.Vector3dVector(colors) 77 | pcd = pcd.voxel_down_sample(voxel_size=0.01) 78 | o3d.io.write_point_cloud( 79 | os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd) 80 | except Exception as e: 81 | print(f"Error processing {image_path}: {e}") 82 | 83 | 84 | def main(model_name, pretrained_resource): 85 | config = get_config(model_name, "eval", DATASET) 86 | config.pretrained_resource = pretrained_resource 87 | model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu') 88 | model.eval() 89 | process_images(model) 90 | 91 | 92 | if __name__ == '__main__': 93 | model = 'zoedepth' 94 | pretrained_resource = 'local::./checkpoints/depth_anything_metric_depth_indoor.pt' 95 | # pretrained_resource = 'local::./checkpoints/depth_anything_metric_depth_outdoor.pt' 96 | main(model, pretrained_resource) 97 | -------------------------------------------------------------------------------- /zoedepth/utils/geometry.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | 27 | def get_intrinsics(H,W): 28 | """ 29 | Intrinsics for a pinhole camera model. 30 | Assume fov of 55 degrees and central principal point. 31 | """ 32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) 33 | cx = 0.5 * W 34 | cy = 0.5 * H 35 | return np.array([[f, 0, cx], 36 | [0, f, cy], 37 | [0, 0, 1]]) 38 | 39 | def depth_to_points(depth, R=None, t=None): 40 | 41 | K = get_intrinsics(depth.shape[1], depth.shape[2]) 42 | Kinv = np.linalg.inv(K) 43 | if R is None: 44 | R = np.eye(3) 45 | if t is None: 46 | t = np.zeros(3) 47 | 48 | # M converts from your coordinate to PyTorch3D's coordinate system 49 | M = np.eye(3) 50 | M[0, 0] = -1.0 51 | M[1, 1] = -1.0 52 | 53 | height, width = depth.shape[1:3] 54 | 55 | x = np.arange(width) 56 | y = np.arange(height) 57 | coord = np.stack(np.meshgrid(x, y), -1) 58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 59 | coord = coord.astype(np.float32) 60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) 61 | coord = coord[None] # bs, h, w, 3 62 | 63 | D = depth[:, :, :, None, None] 64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) 65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] 66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's 67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1 68 | # from reference to targe tviewpoint 69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] 70 | # pts3D_2 = pts3D_1 71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w 72 | return pts3D_2[:, :, :, :3, 0][0] 73 | 74 | 75 | def create_triangles(h, w, mask=None): 76 | """ 77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 78 | Creates mesh triangle indices from a given pixel grid size. 79 | This function is not and need not be differentiable as triangle indices are 80 | fixed. 81 | Args: 82 | h: (int) denoting the height of the image. 83 | w: (int) denoting the width of the image. 84 | Returns: 85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) 86 | """ 87 | x, y = np.meshgrid(range(w - 1), range(h - 1)) 88 | tl = y * w + x 89 | tr = y * w + x + 1 90 | bl = (y + 1) * w + x 91 | br = (y + 1) * w + x + 1 92 | triangles = np.array([tl, bl, tr, br, tr, bl]) 93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape( 94 | ((w - 1) * (h - 1) * 2, 3)) 95 | if mask is not None: 96 | mask = mask.reshape(-1) 97 | triangles = triangles[mask[triangles].all(1)] 98 | return triangles 99 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/loss/dino_clstoken_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.distributed as dist 9 | import torch.nn.functional as F 10 | from torch import nn 11 | 12 | 13 | class DINOLoss(nn.Module): 14 | def __init__( 15 | self, 16 | out_dim, 17 | student_temp=0.1, 18 | center_momentum=0.9, 19 | ): 20 | super().__init__() 21 | self.student_temp = student_temp 22 | self.center_momentum = center_momentum 23 | self.register_buffer("center", torch.zeros(1, out_dim)) 24 | self.updated = True 25 | self.reduce_handle = None 26 | self.len_teacher_output = None 27 | self.async_batch_center = None 28 | 29 | @torch.no_grad() 30 | def softmax_center_teacher(self, teacher_output, teacher_temp): 31 | self.apply_center_update() 32 | # teacher centering and sharpening 33 | return F.softmax((teacher_output - self.center) / teacher_temp, dim=-1) 34 | 35 | @torch.no_grad() 36 | def sinkhorn_knopp_teacher(self, teacher_output, teacher_temp, n_iterations=3): 37 | teacher_output = teacher_output.float() 38 | world_size = dist.get_world_size() if dist.is_initialized() else 1 39 | Q = torch.exp(teacher_output / teacher_temp).t() # Q is K-by-B for consistency with notations from our paper 40 | B = Q.shape[1] * world_size # number of samples to assign 41 | K = Q.shape[0] # how many prototypes 42 | 43 | # make the matrix sums to 1 44 | sum_Q = torch.sum(Q) 45 | if dist.is_initialized(): 46 | dist.all_reduce(sum_Q) 47 | Q /= sum_Q 48 | 49 | for it in range(n_iterations): 50 | # normalize each row: total weight per prototype must be 1/K 51 | sum_of_rows = torch.sum(Q, dim=1, keepdim=True) 52 | if dist.is_initialized(): 53 | dist.all_reduce(sum_of_rows) 54 | Q /= sum_of_rows 55 | Q /= K 56 | 57 | # normalize each column: total weight per sample must be 1/B 58 | Q /= torch.sum(Q, dim=0, keepdim=True) 59 | Q /= B 60 | 61 | Q *= B # the columns must sum to 1 so that Q is an assignment 62 | return Q.t() 63 | 64 | def forward(self, student_output_list, teacher_out_softmaxed_centered_list): 65 | """ 66 | Cross-entropy between softmax outputs of the teacher and student networks. 67 | """ 68 | # TODO: Use cross_entropy_distribution here 69 | total_loss = 0 70 | for s in student_output_list: 71 | lsm = F.log_softmax(s / self.student_temp, dim=-1) 72 | for t in teacher_out_softmaxed_centered_list: 73 | loss = torch.sum(t * lsm, dim=-1) 74 | total_loss -= loss.mean() 75 | return total_loss 76 | 77 | @torch.no_grad() 78 | def update_center(self, teacher_output): 79 | self.reduce_center_update(teacher_output) 80 | 81 | @torch.no_grad() 82 | def reduce_center_update(self, teacher_output): 83 | self.updated = False 84 | self.len_teacher_output = len(teacher_output) 85 | self.async_batch_center = torch.sum(teacher_output, dim=0, keepdim=True) 86 | if dist.is_initialized(): 87 | self.reduce_handle = dist.all_reduce(self.async_batch_center, async_op=True) 88 | 89 | @torch.no_grad() 90 | def apply_center_update(self): 91 | if self.updated is False: 92 | world_size = dist.get_world_size() if dist.is_initialized() else 1 93 | 94 | if self.reduce_handle is not None: 95 | self.reduce_handle.wait() 96 | _t = self.async_batch_center / (self.len_teacher_output * world_size) 97 | 98 | self.center = self.center * self.center_momentum + _t * (1 - self.center_momentum) 99 | 100 | self.updated = True 101 | -------------------------------------------------------------------------------- /zoedepth/utils/easydict/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyDict 3 | Copy/pasted from https://github.com/makinacorpus/easydict 4 | Original author: Mathieu Leplatre 5 | """ 6 | 7 | class EasyDict(dict): 8 | """ 9 | Get attributes 10 | 11 | >>> d = EasyDict({'foo':3}) 12 | >>> d['foo'] 13 | 3 14 | >>> d.foo 15 | 3 16 | >>> d.bar 17 | Traceback (most recent call last): 18 | ... 19 | AttributeError: 'EasyDict' object has no attribute 'bar' 20 | 21 | Works recursively 22 | 23 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) 24 | >>> isinstance(d.bar, dict) 25 | True 26 | >>> d.bar.x 27 | 1 28 | 29 | Bullet-proof 30 | 31 | >>> EasyDict({}) 32 | {} 33 | >>> EasyDict(d={}) 34 | {} 35 | >>> EasyDict(None) 36 | {} 37 | >>> d = {'a': 1} 38 | >>> EasyDict(**d) 39 | {'a': 1} 40 | >>> EasyDict((('a', 1), ('b', 2))) 41 | {'a': 1, 'b': 2} 42 | 43 | Set attributes 44 | 45 | >>> d = EasyDict() 46 | >>> d.foo = 3 47 | >>> d.foo 48 | 3 49 | >>> d.bar = {'prop': 'value'} 50 | >>> d.bar.prop 51 | 'value' 52 | >>> d 53 | {'foo': 3, 'bar': {'prop': 'value'}} 54 | >>> d.bar.prop = 'newer' 55 | >>> d.bar.prop 56 | 'newer' 57 | 58 | 59 | Values extraction 60 | 61 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) 62 | >>> isinstance(d.bar, list) 63 | True 64 | >>> from operator import attrgetter 65 | >>> list(map(attrgetter('x'), d.bar)) 66 | [1, 3] 67 | >>> list(map(attrgetter('y'), d.bar)) 68 | [2, 4] 69 | >>> d = EasyDict() 70 | >>> list(d.keys()) 71 | [] 72 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) 73 | >>> d.foo 74 | 3 75 | >>> d.bar.x 76 | 1 77 | 78 | Still like a dict though 79 | 80 | >>> o = EasyDict({'clean':True}) 81 | >>> list(o.items()) 82 | [('clean', True)] 83 | 84 | And like a class 85 | 86 | >>> class Flower(EasyDict): 87 | ... power = 1 88 | ... 89 | >>> f = Flower() 90 | >>> f.power 91 | 1 92 | >>> f = Flower({'height': 12}) 93 | >>> f.height 94 | 12 95 | >>> f['power'] 96 | 1 97 | >>> sorted(f.keys()) 98 | ['height', 'power'] 99 | 100 | update and pop items 101 | >>> d = EasyDict(a=1, b='2') 102 | >>> e = EasyDict(c=3.0, a=9.0) 103 | >>> d.update(e) 104 | >>> d.c 105 | 3.0 106 | >>> d['c'] 107 | 3.0 108 | >>> d.get('c') 109 | 3.0 110 | >>> d.update(a=4, b=4) 111 | >>> d.b 112 | 4 113 | >>> d.pop('a') 114 | 4 115 | >>> d.a 116 | Traceback (most recent call last): 117 | ... 118 | AttributeError: 'EasyDict' object has no attribute 'a' 119 | """ 120 | def __init__(self, d=None, **kwargs): 121 | if d is None: 122 | d = {} 123 | else: 124 | d = dict(d) 125 | if kwargs: 126 | d.update(**kwargs) 127 | for k, v in d.items(): 128 | setattr(self, k, v) 129 | # Class attributes 130 | for k in self.__class__.__dict__.keys(): 131 | if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): 132 | setattr(self, k, getattr(self, k)) 133 | 134 | def __setattr__(self, name, value): 135 | if isinstance(value, (list, tuple)): 136 | value = [self.__class__(x) 137 | if isinstance(x, dict) else x for x in value] 138 | elif isinstance(value, dict) and not isinstance(value, self.__class__): 139 | value = self.__class__(value) 140 | super(EasyDict, self).__setattr__(name, value) 141 | super(EasyDict, self).__setitem__(name, value) 142 | 143 | __setitem__ = __setattr__ 144 | 145 | def update(self, e=None, **f): 146 | d = e or dict() 147 | d.update(f) 148 | for k in d: 149 | setattr(self, k, d[k]) 150 | 151 | def pop(self, k, d=None): 152 | delattr(self, k) 153 | return super(EasyDict, self).pop(k, d) 154 | 155 | 156 | if __name__ == "__main__": 157 | import doctest 158 | doctest.testmod() -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/eval/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from enum import Enum 8 | import logging 9 | from typing import Any, Dict, Optional 10 | 11 | import torch 12 | from torch import Tensor 13 | from torchmetrics import Metric, MetricCollection 14 | from torchmetrics.classification import MulticlassAccuracy 15 | from torchmetrics.utilities.data import dim_zero_cat, select_topk 16 | 17 | 18 | logger = logging.getLogger("dinov2") 19 | 20 | 21 | class MetricType(Enum): 22 | MEAN_ACCURACY = "mean_accuracy" 23 | MEAN_PER_CLASS_ACCURACY = "mean_per_class_accuracy" 24 | PER_CLASS_ACCURACY = "per_class_accuracy" 25 | IMAGENET_REAL_ACCURACY = "imagenet_real_accuracy" 26 | 27 | @property 28 | def accuracy_averaging(self): 29 | return getattr(AccuracyAveraging, self.name, None) 30 | 31 | def __str__(self): 32 | return self.value 33 | 34 | 35 | class AccuracyAveraging(Enum): 36 | MEAN_ACCURACY = "micro" 37 | MEAN_PER_CLASS_ACCURACY = "macro" 38 | PER_CLASS_ACCURACY = "none" 39 | 40 | def __str__(self): 41 | return self.value 42 | 43 | 44 | def build_metric(metric_type: MetricType, *, num_classes: int, ks: Optional[tuple] = None): 45 | if metric_type.accuracy_averaging is not None: 46 | return build_topk_accuracy_metric( 47 | average_type=metric_type.accuracy_averaging, 48 | num_classes=num_classes, 49 | ks=(1, 5) if ks is None else ks, 50 | ) 51 | elif metric_type == MetricType.IMAGENET_REAL_ACCURACY: 52 | return build_topk_imagenet_real_accuracy_metric( 53 | num_classes=num_classes, 54 | ks=(1, 5) if ks is None else ks, 55 | ) 56 | 57 | raise ValueError(f"Unknown metric type {metric_type}") 58 | 59 | 60 | def build_topk_accuracy_metric(average_type: AccuracyAveraging, num_classes: int, ks: tuple = (1, 5)): 61 | metrics: Dict[str, Metric] = { 62 | f"top-{k}": MulticlassAccuracy(top_k=k, num_classes=int(num_classes), average=average_type.value) for k in ks 63 | } 64 | return MetricCollection(metrics) 65 | 66 | 67 | def build_topk_imagenet_real_accuracy_metric(num_classes: int, ks: tuple = (1, 5)): 68 | metrics: Dict[str, Metric] = {f"top-{k}": ImageNetReaLAccuracy(top_k=k, num_classes=int(num_classes)) for k in ks} 69 | return MetricCollection(metrics) 70 | 71 | 72 | class ImageNetReaLAccuracy(Metric): 73 | is_differentiable: bool = False 74 | higher_is_better: Optional[bool] = None 75 | full_state_update: bool = False 76 | 77 | def __init__( 78 | self, 79 | num_classes: int, 80 | top_k: int = 1, 81 | **kwargs: Any, 82 | ) -> None: 83 | super().__init__(**kwargs) 84 | self.num_classes = num_classes 85 | self.top_k = top_k 86 | self.add_state("tp", [], dist_reduce_fx="cat") 87 | 88 | def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore 89 | # preds [B, D] 90 | # target [B, A] 91 | # preds_oh [B, D] with 0 and 1 92 | # select top K highest probabilities, use one hot representation 93 | preds_oh = select_topk(preds, self.top_k) 94 | # target_oh [B, D + 1] with 0 and 1 95 | target_oh = torch.zeros((preds_oh.shape[0], preds_oh.shape[1] + 1), device=target.device, dtype=torch.int32) 96 | target = target.long() 97 | # for undefined targets (-1) use a fake value `num_classes` 98 | target[target == -1] = self.num_classes 99 | # fill targets, use one hot representation 100 | target_oh.scatter_(1, target, 1) 101 | # target_oh [B, D] (remove the fake target at index `num_classes`) 102 | target_oh = target_oh[:, :-1] 103 | # tp [B] with 0 and 1 104 | tp = (preds_oh * target_oh == 1).sum(dim=1) 105 | # at least one match between prediction and target 106 | tp.clip_(max=1) 107 | # ignore instances where no targets are defined 108 | mask = target_oh.sum(dim=1) > 0 109 | tp = tp[mask] 110 | self.tp.append(tp) # type: ignore 111 | 112 | def compute(self) -> Tensor: 113 | tp = dim_zero_cat(self.tp) # type: ignore 114 | return tp.float().mean() 115 | -------------------------------------------------------------------------------- /zoedepth/models/layers/patch_transformer.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | class PatchTransformerEncoder(nn.Module): 30 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): 31 | """ViT-like transformer block 32 | 33 | Args: 34 | in_channels (int): Input channels 35 | patch_size (int, optional): patch size. Defaults to 10. 36 | embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. 37 | num_heads (int, optional): number of attention heads. Defaults to 4. 38 | use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. 39 | """ 40 | super(PatchTransformerEncoder, self).__init__() 41 | self.use_class_token = use_class_token 42 | encoder_layers = nn.TransformerEncoderLayer( 43 | embedding_dim, num_heads, dim_feedforward=1024) 44 | self.transformer_encoder = nn.TransformerEncoder( 45 | encoder_layers, num_layers=4) # takes shape S,N,E 46 | 47 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, 48 | kernel_size=patch_size, stride=patch_size, padding=0) 49 | 50 | def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): 51 | """Generate positional encodings 52 | 53 | Args: 54 | sequence_length (int): Sequence length 55 | embedding_dim (int): Embedding dimension 56 | 57 | Returns: 58 | torch.Tensor SBE: Positional encodings 59 | """ 60 | position = torch.arange( 61 | 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) 62 | index = torch.arange( 63 | 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) 64 | div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) 65 | pos_encoding = position * div_term 66 | pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) 67 | pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) 68 | return pos_encoding 69 | 70 | 71 | def forward(self, x): 72 | """Forward pass 73 | 74 | Args: 75 | x (torch.Tensor - NCHW): Input feature tensor 76 | 77 | Returns: 78 | torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim 79 | """ 80 | embeddings = self.embedding_convPxP(x).flatten( 81 | 2) # .shape = n,c,s = n, embedding_dim, s 82 | if self.use_class_token: 83 | # extra special token at start ? 84 | embeddings = nn.functional.pad(embeddings, (1, 0)) 85 | 86 | # change to S,N,E format required by transformer 87 | embeddings = embeddings.permute(2, 0, 1) 88 | S, N, E = embeddings.shape 89 | embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device) 90 | x = self.transformer_encoder(embeddings) # .shape = S, N, E 91 | return x 92 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/data/augmentations.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | 9 | from torchvision import transforms 10 | 11 | from .transforms import ( 12 | GaussianBlur, 13 | make_normalize_transform, 14 | ) 15 | 16 | 17 | logger = logging.getLogger("dinov2") 18 | 19 | 20 | class DataAugmentationDINO(object): 21 | def __init__( 22 | self, 23 | global_crops_scale, 24 | local_crops_scale, 25 | local_crops_number, 26 | global_crops_size=224, 27 | local_crops_size=96, 28 | ): 29 | self.global_crops_scale = global_crops_scale 30 | self.local_crops_scale = local_crops_scale 31 | self.local_crops_number = local_crops_number 32 | self.global_crops_size = global_crops_size 33 | self.local_crops_size = local_crops_size 34 | 35 | logger.info("###################################") 36 | logger.info("Using data augmentation parameters:") 37 | logger.info(f"global_crops_scale: {global_crops_scale}") 38 | logger.info(f"local_crops_scale: {local_crops_scale}") 39 | logger.info(f"local_crops_number: {local_crops_number}") 40 | logger.info(f"global_crops_size: {global_crops_size}") 41 | logger.info(f"local_crops_size: {local_crops_size}") 42 | logger.info("###################################") 43 | 44 | # random resized crop and flip 45 | self.geometric_augmentation_global = transforms.Compose( 46 | [ 47 | transforms.RandomResizedCrop( 48 | global_crops_size, scale=global_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC 49 | ), 50 | transforms.RandomHorizontalFlip(p=0.5), 51 | ] 52 | ) 53 | 54 | self.geometric_augmentation_local = transforms.Compose( 55 | [ 56 | transforms.RandomResizedCrop( 57 | local_crops_size, scale=local_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC 58 | ), 59 | transforms.RandomHorizontalFlip(p=0.5), 60 | ] 61 | ) 62 | 63 | # color distorsions / blurring 64 | color_jittering = transforms.Compose( 65 | [ 66 | transforms.RandomApply( 67 | [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)], 68 | p=0.8, 69 | ), 70 | transforms.RandomGrayscale(p=0.2), 71 | ] 72 | ) 73 | 74 | global_transfo1_extra = GaussianBlur(p=1.0) 75 | 76 | global_transfo2_extra = transforms.Compose( 77 | [ 78 | GaussianBlur(p=0.1), 79 | transforms.RandomSolarize(threshold=128, p=0.2), 80 | ] 81 | ) 82 | 83 | local_transfo_extra = GaussianBlur(p=0.5) 84 | 85 | # normalization 86 | self.normalize = transforms.Compose( 87 | [ 88 | transforms.ToTensor(), 89 | make_normalize_transform(), 90 | ] 91 | ) 92 | 93 | self.global_transfo1 = transforms.Compose([color_jittering, global_transfo1_extra, self.normalize]) 94 | self.global_transfo2 = transforms.Compose([color_jittering, global_transfo2_extra, self.normalize]) 95 | self.local_transfo = transforms.Compose([color_jittering, local_transfo_extra, self.normalize]) 96 | 97 | def __call__(self, image): 98 | output = {} 99 | 100 | # global crops: 101 | im1_base = self.geometric_augmentation_global(image) 102 | global_crop_1 = self.global_transfo1(im1_base) 103 | 104 | im2_base = self.geometric_augmentation_global(image) 105 | global_crop_2 = self.global_transfo2(im2_base) 106 | 107 | output["global_crops"] = [global_crop_1, global_crop_2] 108 | 109 | # global crops for teacher: 110 | output["global_crops_teacher"] = [global_crop_1, global_crop_2] 111 | 112 | # local crops: 113 | local_crops = [ 114 | self.local_transfo(self.geometric_augmentation_local(image)) for _ in range(self.local_crops_number) 115 | ] 116 | output["local_crops"] = local_crops 117 | output["offsets"] = () 118 | 119 | return output 120 | -------------------------------------------------------------------------------- /zoedepth/data/diml_outdoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class DIML_Outdoor(Dataset): 79 | def __init__(self, data_dir_root): 80 | import glob 81 | 82 | # image paths are of the form /{outleft, depthmap}/*.png 83 | self.image_files = glob.glob(os.path.join( 84 | data_dir_root, 'outleft', '*.png')) 85 | self.depth_files = [r.replace("outleft", "depthmap") 86 | for r in self.image_files] 87 | self.transform = ToTensor() 88 | 89 | def __getitem__(self, idx): 90 | image_path = self.image_files[idx] 91 | depth_path = self.depth_files[idx] 92 | 93 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 94 | depth = np.asarray(Image.open(depth_path), 95 | dtype='uint16') / 1000.0 # mm to meters 96 | 97 | # depth[depth > 8] = -1 98 | depth = depth[..., None] 99 | 100 | sample = dict(image=image, depth=depth, dataset="diml_outdoor") 101 | 102 | # return sample 103 | return self.transform(sample) 104 | 105 | def __len__(self): 106 | return len(self.image_files) 107 | 108 | 109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs): 110 | dataset = DIML_Outdoor(data_dir_root) 111 | return DataLoader(dataset, batch_size, **kwargs) 112 | 113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR") 114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR") 115 | -------------------------------------------------------------------------------- /zoedepth/models/base_models/dpt_dinov2/blocks.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def _make_scratch(in_shape, out_shape, groups=1, expand=False): 5 | scratch = nn.Module() 6 | 7 | out_shape1 = out_shape 8 | out_shape2 = out_shape 9 | out_shape3 = out_shape 10 | if len(in_shape) >= 4: 11 | out_shape4 = out_shape 12 | 13 | if expand: 14 | out_shape1 = out_shape 15 | out_shape2 = out_shape*2 16 | out_shape3 = out_shape*4 17 | if len(in_shape) >= 4: 18 | out_shape4 = out_shape*8 19 | 20 | scratch.layer1_rn = nn.Conv2d( 21 | in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups 22 | ) 23 | scratch.layer2_rn = nn.Conv2d( 24 | in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups 25 | ) 26 | scratch.layer3_rn = nn.Conv2d( 27 | in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups 28 | ) 29 | if len(in_shape) >= 4: 30 | scratch.layer4_rn = nn.Conv2d( 31 | in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups 32 | ) 33 | 34 | return scratch 35 | 36 | 37 | class ResidualConvUnit(nn.Module): 38 | """Residual convolution module. 39 | """ 40 | 41 | def __init__(self, features, activation, bn): 42 | """Init. 43 | 44 | Args: 45 | features (int): number of features 46 | """ 47 | super().__init__() 48 | 49 | self.bn = bn 50 | 51 | self.groups=1 52 | 53 | self.conv1 = nn.Conv2d( 54 | features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups 55 | ) 56 | 57 | self.conv2 = nn.Conv2d( 58 | features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups 59 | ) 60 | 61 | if self.bn==True: 62 | self.bn1 = nn.BatchNorm2d(features) 63 | self.bn2 = nn.BatchNorm2d(features) 64 | 65 | self.activation = activation 66 | 67 | self.skip_add = nn.quantized.FloatFunctional() 68 | 69 | def forward(self, x): 70 | """Forward pass. 71 | 72 | Args: 73 | x (tensor): input 74 | 75 | Returns: 76 | tensor: output 77 | """ 78 | 79 | out = self.activation(x) 80 | out = self.conv1(out) 81 | if self.bn==True: 82 | out = self.bn1(out) 83 | 84 | out = self.activation(out) 85 | out = self.conv2(out) 86 | if self.bn==True: 87 | out = self.bn2(out) 88 | 89 | if self.groups > 1: 90 | out = self.conv_merge(out) 91 | 92 | return self.skip_add.add(out, x) 93 | 94 | 95 | class FeatureFusionBlock(nn.Module): 96 | """Feature fusion block. 97 | """ 98 | 99 | def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None): 100 | """Init. 101 | 102 | Args: 103 | features (int): number of features 104 | """ 105 | super(FeatureFusionBlock, self).__init__() 106 | 107 | self.deconv = deconv 108 | self.align_corners = align_corners 109 | 110 | self.groups=1 111 | 112 | self.expand = expand 113 | out_features = features 114 | if self.expand==True: 115 | out_features = features//2 116 | 117 | self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) 118 | 119 | self.resConfUnit1 = ResidualConvUnit(features, activation, bn) 120 | self.resConfUnit2 = ResidualConvUnit(features, activation, bn) 121 | 122 | self.skip_add = nn.quantized.FloatFunctional() 123 | 124 | self.size=size 125 | 126 | def forward(self, *xs, size=None): 127 | """Forward pass. 128 | 129 | Returns: 130 | tensor: output 131 | """ 132 | output = xs[0] 133 | 134 | if len(xs) == 2: 135 | res = self.resConfUnit1(xs[1]) 136 | output = self.skip_add.add(output, res) 137 | 138 | output = self.resConfUnit2(output) 139 | 140 | if (size is None) and (self.size is None): 141 | modifier = {"scale_factor": 2} 142 | elif size is None: 143 | modifier = {"size": self.size} 144 | else: 145 | modifier = {"size": size} 146 | 147 | output = nn.functional.interpolate( 148 | output, **modifier, mode="bilinear", align_corners=self.align_corners 149 | ) 150 | 151 | output = self.out_conv(output) 152 | 153 | return output 154 | -------------------------------------------------------------------------------- /zoedepth/data/diode.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(480) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diode"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DIODE(Dataset): 83 | def __init__(self, data_dir_root): 84 | import glob 85 | 86 | # image paths are of the form /scene_#/scan_#/*.png 87 | self.image_files = glob.glob( 88 | os.path.join(data_dir_root, '*', '*', '*.png')) 89 | self.depth_files = [r.replace(".png", "_depth.npy") 90 | for r in self.image_files] 91 | self.depth_mask_files = [ 92 | r.replace(".png", "_depth_mask.npy") for r in self.image_files] 93 | self.transform = ToTensor() 94 | 95 | def __getitem__(self, idx): 96 | image_path = self.image_files[idx] 97 | depth_path = self.depth_files[idx] 98 | depth_mask_path = self.depth_mask_files[idx] 99 | 100 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 101 | depth = np.load(depth_path) # in meters 102 | valid = np.load(depth_mask_path) # binary 103 | 104 | # depth[depth > 8] = -1 105 | # depth = depth[..., None] 106 | 107 | sample = dict(image=image, depth=depth, valid=valid) 108 | 109 | # return sample 110 | sample = self.transform(sample) 111 | 112 | if idx == 0: 113 | print(sample["image"].shape) 114 | 115 | return sample 116 | 117 | def __len__(self): 118 | return len(self.image_files) 119 | 120 | 121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs): 122 | dataset = DIODE(data_dir_root) 123 | return DataLoader(dataset, batch_size, **kwargs) 124 | 125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor") 126 | -------------------------------------------------------------------------------- /zoedepth/data/ddad.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self, resize_shape): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(resize_shape) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "ddad"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DDAD(Dataset): 83 | def __init__(self, data_dir_root, resize_shape): 84 | import glob 85 | 86 | # image paths are of the form /{outleft, depthmap}/*.png 87 | 88 | # self.image_files = glob.glob(os.path.join(data_dir_root, '*.png')) 89 | # self.depth_files = [r.replace("_rgb.png", "_depth.npy") 90 | # for r in self.image_files] 91 | self.image_files, self.depth_files = [], [] 92 | with open('/mnt/bn/liheyang/MTL-SA-1B/dataset/splits/ddad/val.txt', 'r') as f: 93 | lines = f.read().splitlines() 94 | for line in lines: 95 | self.image_files.append(line.split(' ')[0]) 96 | self.depth_files.append(line.split(' ')[1]) 97 | 98 | self.transform = ToTensor(resize_shape) 99 | 100 | def __getitem__(self, idx): 101 | 102 | image_path = self.image_files[idx] 103 | depth_path = self.depth_files[idx] 104 | 105 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 106 | depth = np.load(depth_path) # meters 107 | 108 | # depth[depth > 8] = -1 109 | depth = depth[..., None] 110 | 111 | sample = dict(image=image, depth=depth) 112 | sample = self.transform(sample) 113 | 114 | if idx == 0: 115 | print(sample["image"].shape) 116 | 117 | return sample 118 | 119 | def __len__(self): 120 | return len(self.image_files) 121 | 122 | 123 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs): 124 | dataset = DDAD(data_dir_root, resize_shape) 125 | return DataLoader(dataset, batch_size, **kwargs) 126 | -------------------------------------------------------------------------------- /zoedepth/data/diml_indoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize((480, 640)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diml_indoor"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | if isinstance(img, torch.ByteTensor): 76 | return img.float() 77 | else: 78 | return img 79 | 80 | 81 | class DIML_Indoor(Dataset): 82 | def __init__(self, data_dir_root): 83 | import glob 84 | 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "LR", '*', 'color', '*.png')) 88 | self.depth_files = [r.replace("color", "depth_filled").replace( 89 | "_c.png", "_depth_filled.png") for r in self.image_files] 90 | self.transform = ToTensor() 91 | 92 | def __getitem__(self, idx): 93 | image_path = self.image_files[idx] 94 | depth_path = self.depth_files[idx] 95 | 96 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 97 | depth = np.asarray(Image.open(depth_path), 98 | dtype='uint16') / 1000.0 # mm to meters 99 | 100 | # print(np.shape(image)) 101 | # print(np.shape(depth)) 102 | 103 | # depth[depth > 8] = -1 104 | depth = depth[..., None] 105 | 106 | sample = dict(image=image, depth=depth) 107 | 108 | # return sample 109 | sample = self.transform(sample) 110 | 111 | if idx == 0: 112 | print(sample["image"].shape) 113 | 114 | return sample 115 | 116 | def __len__(self): 117 | return len(self.image_files) 118 | 119 | 120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs): 121 | dataset = DIML_Indoor(data_dir_root) 122 | return DataLoader(dataset, batch_size, **kwargs) 123 | 124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR") 125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR") 126 | -------------------------------------------------------------------------------- /zoedepth/data/sun_rgbd_loader.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "sunrgbd"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class SunRGBD(Dataset): 79 | def __init__(self, data_dir_root): 80 | # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze() 81 | # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs] 82 | # self.all_test = [os.path.join(data_dir_root, t) for t in all_test] 83 | import glob 84 | # self.image_files = glob.glob( 85 | # os.path.join(data_dir_root, 'rgb', 'rgb', '*')) 86 | # self.depth_files = [ 87 | # r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files] 88 | 89 | self.image_files, self.depth_files = [], [] 90 | filenames = os.listdir(os.path.join(data_dir_root, 'rgb')) 91 | for i, filename in enumerate(filenames): 92 | self.image_files.append(os.path.join(data_dir_root, 'rgb', filename)) 93 | base_num = int(filename.replace('.jpg', '').replace('img-', '')) 94 | self.depth_files.append(os.path.join(data_dir_root, 'depth', str(base_num) + '.png')) 95 | 96 | self.transform = ToTensor() 97 | 98 | def __getitem__(self, idx): 99 | image_path = self.image_files[idx] 100 | depth_path = self.depth_files[idx] 101 | 102 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 103 | depth = np.asarray(Image.open(depth_path), dtype='uint16') / 10000.0 104 | # print(depth, depth.min(), depth.max()) 105 | depth[depth > 8] = -1 106 | depth = depth[..., None] 107 | return self.transform(dict(image=image, depth=depth)) 108 | 109 | def __len__(self): 110 | return len(self.image_files) 111 | 112 | 113 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs): 114 | dataset = SunRGBD(data_dir_root) 115 | return DataLoader(dataset, batch_size, **kwargs) 116 | -------------------------------------------------------------------------------- /zoedepth/models/layers/dist_layers.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | def log_binom(n, k, eps=1e-7): 30 | """ log(nCk) using stirling approximation """ 31 | n = n + eps 32 | k = k + eps 33 | return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps) 34 | 35 | 36 | class LogBinomial(nn.Module): 37 | def __init__(self, n_classes=256, act=torch.softmax): 38 | """Compute log binomial distribution for n_classes 39 | 40 | Args: 41 | n_classes (int, optional): number of output classes. Defaults to 256. 42 | """ 43 | super().__init__() 44 | self.K = n_classes 45 | self.act = act 46 | self.register_buffer('k_idx', torch.arange( 47 | 0, n_classes).view(1, -1, 1, 1)) 48 | self.register_buffer('K_minus_1', torch.Tensor( 49 | [self.K-1]).view(1, -1, 1, 1)) 50 | 51 | def forward(self, x, t=1., eps=1e-4): 52 | """Compute log binomial distribution for x 53 | 54 | Args: 55 | x (torch.Tensor - NCHW): probabilities 56 | t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1.. 57 | eps (float, optional): Small number for numerical stability. Defaults to 1e-4. 58 | 59 | Returns: 60 | torch.Tensor -NCHW: log binomial distribution logbinomial(p;t) 61 | """ 62 | if x.ndim == 3: 63 | x = x.unsqueeze(1) # make it nchw 64 | 65 | one_minus_x = torch.clamp(1 - x, eps, 1) 66 | x = torch.clamp(x, eps, 1) 67 | y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \ 68 | torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x) 69 | return self.act(y/t, dim=1) 70 | 71 | 72 | class ConditionalLogBinomial(nn.Module): 73 | def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax): 74 | """Conditional Log Binomial distribution 75 | 76 | Args: 77 | in_features (int): number of input channels in main feature 78 | condition_dim (int): number of input channels in condition feature 79 | n_classes (int, optional): Number of classes. Defaults to 256. 80 | bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2. 81 | p_eps (float, optional): small eps value. Defaults to 1e-4. 82 | max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50. 83 | min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7. 84 | """ 85 | super().__init__() 86 | self.p_eps = p_eps 87 | self.max_temp = max_temp 88 | self.min_temp = min_temp 89 | self.log_binomial_transform = LogBinomial(n_classes, act=act) 90 | bottleneck = (in_features + condition_dim) // bottleneck_factor 91 | self.mlp = nn.Sequential( 92 | nn.Conv2d(in_features + condition_dim, bottleneck, 93 | kernel_size=1, stride=1, padding=0), 94 | nn.GELU(), 95 | # 2 for p linear norm, 2 for t linear norm 96 | nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0), 97 | nn.Softplus() 98 | ) 99 | 100 | def forward(self, x, cond): 101 | """Forward pass 102 | 103 | Args: 104 | x (torch.Tensor - NCHW): Main feature 105 | cond (torch.Tensor - NCHW): condition feature 106 | 107 | Returns: 108 | torch.Tensor: Output log binomial distribution 109 | """ 110 | pt = self.mlp(torch.concat((x, cond), dim=1)) 111 | p, t = pt[:, :2, ...], pt[:, 2:, ...] 112 | 113 | p = p + self.p_eps 114 | p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...]) 115 | 116 | t = t + self.p_eps 117 | t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...]) 118 | t = t.unsqueeze(1) 119 | t = (self.max_temp - self.min_temp) * t + self.min_temp 120 | 121 | return self.log_binomial_transform(p, t) 122 | -------------------------------------------------------------------------------- /zoedepth/data/hypersim.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import glob 26 | import os 27 | 28 | import h5py 29 | import numpy as np 30 | import torch 31 | from PIL import Image 32 | from torch.utils.data import DataLoader, Dataset 33 | from torchvision import transforms 34 | 35 | 36 | def hypersim_distance_to_depth(npyDistance): 37 | intWidth, intHeight, fltFocal = 1024, 768, 886.81 38 | 39 | npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape( 40 | 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None] 41 | npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, 42 | intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None] 43 | npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32) 44 | npyImageplane = np.concatenate( 45 | [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2) 46 | 47 | npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal 48 | return npyDepth 49 | 50 | 51 | class ToTensor(object): 52 | def __init__(self): 53 | # self.normalize = transforms.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x: x 56 | self.resize = transforms.Resize((480, 640)) 57 | 58 | def __call__(self, sample): 59 | image, depth = sample['image'], sample['depth'] 60 | image = self.to_tensor(image) 61 | image = self.normalize(image) 62 | depth = self.to_tensor(depth) 63 | 64 | image = self.resize(image) 65 | 66 | return {'image': image, 'depth': depth, 'dataset': "hypersim"} 67 | 68 | def to_tensor(self, pic): 69 | 70 | if isinstance(pic, np.ndarray): 71 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 72 | return img 73 | 74 | # # handle PIL Image 75 | if pic.mode == 'I': 76 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 77 | elif pic.mode == 'I;16': 78 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 79 | else: 80 | img = torch.ByteTensor( 81 | torch.ByteStorage.from_buffer(pic.tobytes())) 82 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 83 | if pic.mode == 'YCbCr': 84 | nchannel = 3 85 | elif pic.mode == 'I;16': 86 | nchannel = 1 87 | else: 88 | nchannel = len(pic.mode) 89 | img = img.view(pic.size[1], pic.size[0], nchannel) 90 | 91 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 92 | if isinstance(img, torch.ByteTensor): 93 | return img.float() 94 | else: 95 | return img 96 | 97 | 98 | class HyperSim(Dataset): 99 | def __init__(self, data_dir_root): 100 | # image paths are of the form //images/scene_cam_#_final_preview/*.tonemap.jpg 101 | # depth paths are of the form //images/scene_cam_#_final_preview/*.depth_meters.hdf5 102 | self.image_files = glob.glob(os.path.join( 103 | data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg')) 104 | self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace( 105 | ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files] 106 | self.transform = ToTensor() 107 | 108 | def __getitem__(self, idx): 109 | image_path = self.image_files[idx] 110 | depth_path = self.depth_files[idx] 111 | 112 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 113 | 114 | # depth from hdf5 115 | depth_fd = h5py.File(depth_path, "r") 116 | # in meters (Euclidean distance) 117 | distance_meters = np.array(depth_fd['dataset']) 118 | depth = hypersim_distance_to_depth( 119 | distance_meters) # in meters (planar depth) 120 | 121 | # depth[depth > 8] = -1 122 | depth = depth[..., None] 123 | 124 | sample = dict(image=image, depth=depth) 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = HyperSim(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/fsdp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | from typing import Any 9 | 10 | import torch 11 | import dinov2.distributed as distributed 12 | from functools import partial 13 | from fvcore.common.checkpoint import Checkpointer 14 | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP 15 | from torch.distributed.fsdp import ShardingStrategy 16 | from torch.distributed.fsdp import MixedPrecision 17 | from torch.distributed.fsdp import StateDictType 18 | from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler 19 | from torch.distributed.fsdp.wrap import ModuleWrapPolicy 20 | from torch.distributed.fsdp._runtime_utils import _reshard 21 | 22 | 23 | def get_fsdp_wrapper(model_cfg, modules_to_wrap=set()): 24 | sharding_strategy_dict = { 25 | "NO_SHARD": ShardingStrategy.NO_SHARD, 26 | "SHARD_GRAD_OP": ShardingStrategy.SHARD_GRAD_OP, 27 | "FULL_SHARD": ShardingStrategy.FULL_SHARD, 28 | } 29 | 30 | dtype_dict = { 31 | "fp32": torch.float32, 32 | "fp16": torch.float16, 33 | "bf16": torch.bfloat16, 34 | } 35 | 36 | mixed_precision_config = MixedPrecision( 37 | param_dtype=dtype_dict[model_cfg.mixed_precision.param_dtype], 38 | reduce_dtype=dtype_dict[model_cfg.mixed_precision.reduce_dtype], 39 | buffer_dtype=dtype_dict[model_cfg.mixed_precision.buffer_dtype], 40 | ) 41 | 42 | sharding_strategy_config = sharding_strategy_dict[model_cfg.sharding_strategy] 43 | 44 | local_rank = distributed.get_local_rank() 45 | 46 | fsdp_wrapper = partial( 47 | FSDP, 48 | sharding_strategy=sharding_strategy_config, 49 | mixed_precision=mixed_precision_config, 50 | device_id=local_rank, 51 | sync_module_states=True, 52 | use_orig_params=True, 53 | auto_wrap_policy=ModuleWrapPolicy(modules_to_wrap), 54 | ) 55 | return fsdp_wrapper 56 | 57 | 58 | def is_fsdp(x): 59 | return isinstance(x, FSDP) 60 | 61 | 62 | def is_sharded_fsdp(x): 63 | return is_fsdp(x) and x.sharding_strategy is not ShardingStrategy.NO_SHARD 64 | 65 | 66 | def free_if_fsdp(x): 67 | if is_sharded_fsdp(x): 68 | handles = x._handles 69 | true_list = [True for h in handles] 70 | _reshard(x, handles, true_list) 71 | 72 | 73 | def get_fsdp_modules(x): 74 | return FSDP.fsdp_modules(x) 75 | 76 | 77 | def reshard_fsdp_model(x): 78 | for m in get_fsdp_modules(x): 79 | free_if_fsdp(m) 80 | 81 | 82 | def rankstr(): 83 | return f"rank_{distributed.get_global_rank()}" 84 | 85 | 86 | class FSDPCheckpointer(Checkpointer): 87 | def save(self, name: str, **kwargs: Any) -> None: 88 | """ 89 | Dump model and checkpointables to a file. 90 | 91 | Args: 92 | name (str): name of the file. 93 | kwargs (dict): extra arbitrary data to save. 94 | """ 95 | if not self.save_dir or not self.save_to_disk: 96 | return 97 | 98 | data = {} 99 | with FSDP.state_dict_type(self.model, StateDictType.LOCAL_STATE_DICT): 100 | data["model"] = self.model.state_dict() 101 | 102 | # data["model"] = self.model.state_dict() 103 | for key, obj in self.checkpointables.items(): 104 | data[key] = obj.state_dict() 105 | data.update(kwargs) 106 | 107 | basename = f"{name}.{rankstr()}.pth" 108 | save_file = os.path.join(self.save_dir, basename) 109 | assert os.path.basename(save_file) == basename, basename 110 | self.logger.info("Saving checkpoint to {}".format(save_file)) 111 | with self.path_manager.open(save_file, "wb") as f: 112 | torch.save(data, f) 113 | self.tag_last_checkpoint(basename) 114 | 115 | def load(self, *args, **kwargs): 116 | with FSDP.state_dict_type(self.model, StateDictType.LOCAL_STATE_DICT): 117 | return super().load(*args, **kwargs) 118 | 119 | def has_checkpoint(self) -> bool: 120 | """ 121 | Returns: 122 | bool: whether a checkpoint exists in the target directory. 123 | """ 124 | save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}") 125 | return self.path_manager.exists(save_file) 126 | 127 | def get_checkpoint_file(self) -> str: 128 | """ 129 | Returns: 130 | str: The latest checkpoint file in target directory. 131 | """ 132 | save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}") 133 | try: 134 | with self.path_manager.open(save_file, "r") as f: 135 | last_saved = f.read().strip() 136 | except IOError: 137 | # if file doesn't exist, maybe because it has just been 138 | # deleted by a separate process 139 | return "" 140 | # pyre-fixme[6]: For 2nd param expected `Union[PathLike[str], str]` but got 141 | # `Union[bytes, str]`. 142 | return os.path.join(self.save_dir, last_saved) 143 | 144 | def tag_last_checkpoint(self, last_filename_basename: str) -> None: 145 | """ 146 | Tag the last checkpoint. 147 | 148 | Args: 149 | last_filename_basename (str): the basename of the last filename. 150 | """ 151 | if distributed.is_enabled(): 152 | torch.distributed.barrier() 153 | save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}") 154 | with self.path_manager.open(save_file, "w") as f: 155 | f.write(last_filename_basename) # pyre-ignore 156 | 157 | 158 | ShardedGradScaler = ShardedGradScaler 159 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/dinov2/eval/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import logging 8 | from typing import Dict, Optional 9 | 10 | import torch 11 | from torch import nn 12 | from torchmetrics import MetricCollection 13 | 14 | from dinov2.data import DatasetWithEnumeratedTargets, SamplerType, make_data_loader 15 | import dinov2.distributed as distributed 16 | from dinov2.logging import MetricLogger 17 | 18 | 19 | logger = logging.getLogger("dinov2") 20 | 21 | 22 | class ModelWithNormalize(torch.nn.Module): 23 | def __init__(self, model): 24 | super().__init__() 25 | self.model = model 26 | 27 | def forward(self, samples): 28 | return nn.functional.normalize(self.model(samples), dim=1, p=2) 29 | 30 | 31 | class ModelWithIntermediateLayers(nn.Module): 32 | def __init__(self, feature_model, n_last_blocks, autocast_ctx): 33 | super().__init__() 34 | self.feature_model = feature_model 35 | self.feature_model.eval() 36 | self.n_last_blocks = n_last_blocks 37 | self.autocast_ctx = autocast_ctx 38 | 39 | def forward(self, images): 40 | with torch.inference_mode(): 41 | with self.autocast_ctx(): 42 | features = self.feature_model.get_intermediate_layers( 43 | images, self.n_last_blocks, return_class_token=True 44 | ) 45 | return features 46 | 47 | 48 | @torch.inference_mode() 49 | def evaluate( 50 | model: nn.Module, 51 | data_loader, 52 | postprocessors: Dict[str, nn.Module], 53 | metrics: Dict[str, MetricCollection], 54 | device: torch.device, 55 | criterion: Optional[nn.Module] = None, 56 | ): 57 | model.eval() 58 | if criterion is not None: 59 | criterion.eval() 60 | 61 | for metric in metrics.values(): 62 | metric = metric.to(device) 63 | 64 | metric_logger = MetricLogger(delimiter=" ") 65 | header = "Test:" 66 | 67 | for samples, targets, *_ in metric_logger.log_every(data_loader, 10, header): 68 | outputs = model(samples.to(device)) 69 | targets = targets.to(device) 70 | 71 | if criterion is not None: 72 | loss = criterion(outputs, targets) 73 | metric_logger.update(loss=loss.item()) 74 | 75 | for k, metric in metrics.items(): 76 | metric_inputs = postprocessors[k](outputs, targets) 77 | metric.update(**metric_inputs) 78 | 79 | metric_logger.synchronize_between_processes() 80 | logger.info(f"Averaged stats: {metric_logger}") 81 | 82 | stats = {k: metric.compute() for k, metric in metrics.items()} 83 | metric_logger_stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} 84 | return metric_logger_stats, stats 85 | 86 | 87 | def all_gather_and_flatten(tensor_rank): 88 | tensor_all_ranks = torch.empty( 89 | distributed.get_global_size(), 90 | *tensor_rank.shape, 91 | dtype=tensor_rank.dtype, 92 | device=tensor_rank.device, 93 | ) 94 | tensor_list = list(tensor_all_ranks.unbind(0)) 95 | torch.distributed.all_gather(tensor_list, tensor_rank.contiguous()) 96 | return tensor_all_ranks.flatten(end_dim=1) 97 | 98 | 99 | def extract_features(model, dataset, batch_size, num_workers, gather_on_cpu=False): 100 | dataset_with_enumerated_targets = DatasetWithEnumeratedTargets(dataset) 101 | sample_count = len(dataset_with_enumerated_targets) 102 | data_loader = make_data_loader( 103 | dataset=dataset_with_enumerated_targets, 104 | batch_size=batch_size, 105 | num_workers=num_workers, 106 | sampler_type=SamplerType.DISTRIBUTED, 107 | drop_last=False, 108 | shuffle=False, 109 | ) 110 | return extract_features_with_dataloader(model, data_loader, sample_count, gather_on_cpu) 111 | 112 | 113 | @torch.inference_mode() 114 | def extract_features_with_dataloader(model, data_loader, sample_count, gather_on_cpu=False): 115 | gather_device = torch.device("cpu") if gather_on_cpu else torch.device("cuda") 116 | metric_logger = MetricLogger(delimiter=" ") 117 | features, all_labels = None, None 118 | for samples, (index, labels_rank) in metric_logger.log_every(data_loader, 10): 119 | samples = samples.cuda(non_blocking=True) 120 | labels_rank = labels_rank.cuda(non_blocking=True) 121 | index = index.cuda(non_blocking=True) 122 | features_rank = model(samples).float() 123 | 124 | # init storage feature matrix 125 | if features is None: 126 | features = torch.zeros(sample_count, features_rank.shape[-1], device=gather_device) 127 | labels_shape = list(labels_rank.shape) 128 | labels_shape[0] = sample_count 129 | all_labels = torch.full(labels_shape, fill_value=-1, device=gather_device) 130 | logger.info(f"Storing features into tensor of shape {features.shape}") 131 | 132 | # share indexes, features and labels between processes 133 | index_all = all_gather_and_flatten(index).to(gather_device) 134 | features_all_ranks = all_gather_and_flatten(features_rank).to(gather_device) 135 | labels_all_ranks = all_gather_and_flatten(labels_rank).to(gather_device) 136 | 137 | # update storage feature matrix 138 | if len(index_all) > 0: 139 | features.index_copy_(0, index_all, features_all_ranks) 140 | all_labels.index_copy_(0, index_all, labels_all_ranks) 141 | 142 | logger.info(f"Features shape: {tuple(features.shape)}") 143 | logger.info(f"Labels shape: {tuple(all_labels.shape)}") 144 | 145 | assert torch.all(all_labels > -1) 146 | 147 | return features, all_labels 148 | -------------------------------------------------------------------------------- /zoedepth/data/vkitti.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | from torch.utils.data import Dataset, DataLoader 27 | from torchvision import transforms 28 | import os 29 | 30 | from PIL import Image 31 | import numpy as np 32 | import cv2 33 | 34 | 35 | class ToTensor(object): 36 | def __init__(self): 37 | self.normalize = transforms.Normalize( 38 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 39 | # self.resize = transforms.Resize((375, 1242)) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | 44 | image = self.to_tensor(image) 45 | image = self.normalize(image) 46 | depth = self.to_tensor(depth) 47 | 48 | # image = self.resize(image) 49 | 50 | return {'image': image, 'depth': depth, 'dataset': "vkitti"} 51 | 52 | def to_tensor(self, pic): 53 | 54 | if isinstance(pic, np.ndarray): 55 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 56 | return img 57 | 58 | # # handle PIL Image 59 | if pic.mode == 'I': 60 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 61 | elif pic.mode == 'I;16': 62 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 63 | else: 64 | img = torch.ByteTensor( 65 | torch.ByteStorage.from_buffer(pic.tobytes())) 66 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 67 | if pic.mode == 'YCbCr': 68 | nchannel = 3 69 | elif pic.mode == 'I;16': 70 | nchannel = 1 71 | else: 72 | nchannel = len(pic.mode) 73 | img = img.view(pic.size[1], pic.size[0], nchannel) 74 | 75 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class VKITTI(Dataset): 83 | def __init__(self, data_dir_root, do_kb_crop=True): 84 | import glob 85 | # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png 86 | self.image_files = glob.glob(os.path.join( 87 | data_dir_root, "test_color", '*.png')) 88 | self.depth_files = [r.replace("test_color", "test_depth") 89 | for r in self.image_files] 90 | self.do_kb_crop = True 91 | self.transform = ToTensor() 92 | 93 | def __getitem__(self, idx): 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = Image.open(image_path) 98 | depth = Image.open(depth_path) 99 | depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | 100 | cv2.IMREAD_ANYDEPTH) 101 | print("dpeth min max", depth.min(), depth.max()) 102 | 103 | # print(np.shape(image)) 104 | # print(np.shape(depth)) 105 | 106 | # depth[depth > 8] = -1 107 | 108 | if self.do_kb_crop and False: 109 | height = image.height 110 | width = image.width 111 | top_margin = int(height - 352) 112 | left_margin = int((width - 1216) / 2) 113 | depth = depth.crop( 114 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 115 | image = image.crop( 116 | (left_margin, top_margin, left_margin + 1216, top_margin + 352)) 117 | # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216] 118 | 119 | image = np.asarray(image, dtype=np.float32) / 255.0 120 | # depth = np.asarray(depth, dtype=np.uint16) /1. 121 | depth = depth[..., None] 122 | sample = dict(image=image, depth=depth) 123 | 124 | # return sample 125 | sample = self.transform(sample) 126 | 127 | if idx == 0: 128 | print(sample["image"].shape) 129 | 130 | return sample 131 | 132 | def __len__(self): 133 | return len(self.image_files) 134 | 135 | 136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs): 137 | dataset = VKITTI(data_dir_root) 138 | return DataLoader(dataset, batch_size, **kwargs) 139 | 140 | 141 | if __name__ == "__main__": 142 | loader = get_vkitti_loader( 143 | data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test") 144 | print("Total files", len(loader.dataset)) 145 | for i, sample in enumerate(loader): 146 | print(sample["image"].shape) 147 | print(sample["depth"].shape) 148 | print(sample["dataset"]) 149 | print(sample['depth'].min(), sample['depth'].max()) 150 | if i > 5: 151 | break 152 | -------------------------------------------------------------------------------- /torchhub/facebookresearch_dinov2_main/hubconf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the Apache License, Version 2.0 4 | # found in the LICENSE file in the root directory of this source tree. 5 | 6 | from enum import Enum 7 | from typing import Union 8 | 9 | import torch 10 | 11 | _DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2" 12 | 13 | 14 | def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str: 15 | compact_arch_name = arch_name.replace("_", "")[:4] 16 | registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else "" 17 | return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}" 18 | 19 | 20 | class Weights(Enum): 21 | LVD142M = "LVD142M" 22 | 23 | 24 | def _make_dinov2_model( 25 | *, 26 | arch_name: str = "vit_large", 27 | img_size: int = 518, 28 | patch_size: int = 14, 29 | init_values: float = 1.0, 30 | ffn_layer: str = "mlp", 31 | block_chunks: int = 0, 32 | num_register_tokens: int = 0, 33 | interpolate_antialias: bool = False, 34 | interpolate_offset: float = 0.1, 35 | pretrained: bool = True, 36 | weights: Union[Weights, str] = Weights.LVD142M, 37 | **kwargs, 38 | ): 39 | import vision_transformer as vits 40 | 41 | if isinstance(weights, str): 42 | try: 43 | weights = Weights[weights] 44 | except KeyError: 45 | raise AssertionError(f"Unsupported weights: {weights}") 46 | 47 | model_base_name = _make_dinov2_model_name(arch_name, patch_size) 48 | vit_kwargs = dict( 49 | img_size=img_size, 50 | patch_size=patch_size, 51 | init_values=init_values, 52 | ffn_layer=ffn_layer, 53 | block_chunks=block_chunks, 54 | num_register_tokens=num_register_tokens, 55 | interpolate_antialias=interpolate_antialias, 56 | interpolate_offset=interpolate_offset, 57 | ) 58 | vit_kwargs.update(**kwargs) 59 | model = vits.__dict__[arch_name](**vit_kwargs) 60 | 61 | if pretrained: 62 | model_full_name = _make_dinov2_model_name(arch_name, patch_size, num_register_tokens) 63 | url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth" 64 | state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu") 65 | model.load_state_dict(state_dict, strict=True) 66 | 67 | return model 68 | 69 | 70 | def dinov2_vits14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 71 | """ 72 | DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset. 73 | """ 74 | return _make_dinov2_model(arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs) 75 | 76 | 77 | def dinov2_vitb14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 78 | """ 79 | DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset. 80 | """ 81 | return _make_dinov2_model(arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs) 82 | 83 | 84 | def dinov2_vitl14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 85 | """ 86 | DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset. 87 | """ 88 | return _make_dinov2_model(arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs) 89 | 90 | 91 | def dinov2_vitg14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 92 | """ 93 | DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset. 94 | """ 95 | return _make_dinov2_model( 96 | arch_name="vit_giant2", 97 | ffn_layer="swiglufused", 98 | weights=weights, 99 | pretrained=pretrained, 100 | **kwargs, 101 | ) 102 | 103 | 104 | def dinov2_vits14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 105 | """ 106 | DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset. 107 | """ 108 | return _make_dinov2_model( 109 | arch_name="vit_small", 110 | pretrained=pretrained, 111 | weights=weights, 112 | num_register_tokens=4, 113 | interpolate_antialias=True, 114 | interpolate_offset=0.0, 115 | **kwargs, 116 | ) 117 | 118 | 119 | def dinov2_vitb14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 120 | """ 121 | DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset. 122 | """ 123 | return _make_dinov2_model( 124 | arch_name="vit_base", 125 | pretrained=pretrained, 126 | weights=weights, 127 | num_register_tokens=4, 128 | interpolate_antialias=True, 129 | interpolate_offset=0.0, 130 | **kwargs, 131 | ) 132 | 133 | 134 | def dinov2_vitl14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 135 | """ 136 | DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset. 137 | """ 138 | return _make_dinov2_model( 139 | arch_name="vit_large", 140 | pretrained=pretrained, 141 | weights=weights, 142 | num_register_tokens=4, 143 | interpolate_antialias=True, 144 | interpolate_offset=0.0, 145 | **kwargs, 146 | ) 147 | 148 | 149 | def dinov2_vitg14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs): 150 | """ 151 | DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset. 152 | """ 153 | return _make_dinov2_model( 154 | arch_name="vit_giant2", 155 | ffn_layer="swiglufused", 156 | weights=weights, 157 | pretrained=pretrained, 158 | num_register_tokens=4, 159 | interpolate_antialias=True, 160 | interpolate_offset=0.0, 161 | **kwargs, 162 | ) 163 | --------------------------------------------------------------------------------