├── .gitignore
├── requirements.txt
├── torchhub
    ├── facebookresearch_dinov2_main
    │   ├── requirements-dev.txt
    │   ├── setup.cfg
    │   ├── __pycache__
    │   │   ├── hubconf.cpython-39.pyc
    │   │   └── vision_transformer.cpython-39.pyc
    │   ├── dinov2
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-39.pyc
    │   │   ├── layers
    │   │   │   ├── __pycache__
    │   │   │   │   ├── mlp.cpython-39.pyc
    │   │   │   │   ├── block.cpython-39.pyc
    │   │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   │   ├── attention.cpython-39.pyc
    │   │   │   │   ├── dino_head.cpython-39.pyc
    │   │   │   │   ├── drop_path.cpython-39.pyc
    │   │   │   │   ├── layer_scale.cpython-39.pyc
    │   │   │   │   ├── patch_embed.cpython-39.pyc
    │   │   │   │   └── swiglu_ffn.cpython-39.pyc
    │   │   │   ├── __init__.py
    │   │   │   ├── layer_scale.py
    │   │   │   ├── drop_path.py
    │   │   │   ├── mlp.py
    │   │   │   ├── swiglu_ffn.py
    │   │   │   ├── dino_head.py
    │   │   │   ├── attention.py
    │   │   │   └── patch_embed.py
    │   │   ├── configs
    │   │   │   ├── train
    │   │   │   │   ├── vitl16_short.yaml
    │   │   │   │   ├── vitl14.yaml
    │   │   │   │   └── vitg14.yaml
    │   │   │   ├── eval
    │   │   │   │   ├── vitb14_pretrain.yaml
    │   │   │   │   ├── vitl14_pretrain.yaml
    │   │   │   │   ├── vits14_pretrain.yaml
    │   │   │   │   └── vitg14_pretrain.yaml
    │   │   │   ├── __init__.py
    │   │   │   └── ssl_default_config.yaml
    │   │   ├── run
    │   │   │   ├── __init__.py
    │   │   │   ├── eval
    │   │   │   │   ├── knn.py
    │   │   │   │   ├── linear.py
    │   │   │   │   └── log_regression.py
    │   │   │   ├── train
    │   │   │   │   └── train.py
    │   │   │   └── submit.py
    │   │   ├── eval
    │   │   │   ├── __init__.py
    │   │   │   ├── setup.py
    │   │   │   ├── metrics.py
    │   │   │   └── utils.py
    │   │   ├── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── dtype.py
    │   │   │   ├── config.py
    │   │   │   ├── cluster.py
    │   │   │   ├── utils.py
    │   │   │   └── param_groups.py
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── datasets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── decoders.py
    │   │   │   │   └── extended.py
    │   │   │   ├── __init__.py
    │   │   │   ├── adapters.py
    │   │   │   ├── collate.py
    │   │   │   ├── masking.py
    │   │   │   ├── transforms.py
    │   │   │   └── augmentations.py
    │   │   ├── train
    │   │   │   └── __init__.py
    │   │   ├── loss
    │   │   │   ├── __init__.py
    │   │   │   ├── koleo_loss.py
    │   │   │   └── dino_clstoken_loss.py
    │   │   ├── models
    │   │   │   └── __init__.py
    │   │   ├── logging
    │   │   │   └── __init__.py
    │   │   └── fsdp
    │   │   │   └── __init__.py
    │   ├── requirements.txt
    │   ├── scripts
    │   │   └── lint.sh
    │   ├── conda.yaml
    │   ├── pyproject.toml
    │   ├── CONTRIBUTING.md
    │   ├── utils.py
    │   ├── setup.py
    │   ├── CODE_OF_CONDUCT.md
    │   └── hubconf.py
    └── README.md
├── CalibrationMatrix_college_cpt.npz
├── zoedepth
    ├── models
    │   ├── __pycache__
    │   │   ├── builder.cpython-39.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── model_io.cpython-39.pyc
    │   │   └── depth_model.cpython-39.pyc
    │   ├── base_models
    │   │   ├── __pycache__
    │   │   │   ├── midas.cpython-39.pyc
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   └── depth_anything.cpython-39.pyc
    │   │   ├── dpt_dinov2
    │   │   │   ├── __pycache__
    │   │   │   │   ├── dpt.cpython-39.pyc
    │   │   │   │   └── blocks.cpython-39.pyc
    │   │   │   └── blocks.py
    │   │   └── __init__.py
    │   ├── layers
    │   │   ├── __pycache__
    │   │   │   ├── attractor.cpython-39.pyc
    │   │   │   ├── dist_layers.cpython-39.pyc
    │   │   │   ├── localbins_layers.cpython-39.pyc
    │   │   │   └── patch_transformer.cpython-39.pyc
    │   │   ├── patch_transformer.py
    │   │   └── dist_layers.py
    │   ├── zoedepth
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   └── zoedepth_v1.cpython-39.pyc
    │   │   ├── config_zoedepth_kitti.json
    │   │   ├── __init__.py
    │   │   └── config_zoedepth.json
    │   ├── zoedepth_nk
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   └── zoedepth_nk_v1.cpython-39.pyc
    │   │   ├── __init__.py
    │   │   └── config_zoedepth_nk.json
    │   ├── __init__.py
    │   ├── builder.py
    │   └── model_io.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── config.cpython-39.pyc
    │   │   └── arg_utils.cpython-39.pyc
    │   ├── easydict
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-39.pyc
    │   │   └── __init__.py
    │   ├── arg_utils.py
    │   ├── __init__.py
    │   └── geometry.py
    ├── data
    │   ├── __init__.py
    │   ├── ibims.py
    │   ├── diml_outdoor_test.py
    │   ├── diode.py
    │   ├── ddad.py
    │   ├── diml_indoor_test.py
    │   ├── sun_rgbd_loader.py
    │   ├── hypersim.py
    │   └── vkitti.py
    └── trainers
    │   └── builder.py
├── heic2png.py
├── calibration-camera.py
├── README.md
└── depth_to_pointcloud.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.pt
3 | *.pth
4 | *.png
5 | *.ply


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/requirements.txt


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | black==22.6.0
2 | flake8==5.0.4
3 | pylint==2.15.0
4 | 


--------------------------------------------------------------------------------
/CalibrationMatrix_college_cpt.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/CalibrationMatrix_college_cpt.npz


--------------------------------------------------------------------------------
/torchhub/README.md:
--------------------------------------------------------------------------------
1 | # Local PyTorch Hub
2 | 
3 | This directory is for loading the DINOv2 encoder locally in case of no Internet connection.
4 | 


--------------------------------------------------------------------------------
/zoedepth/models/__pycache__/builder.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/builder.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/utils/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/config.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/__pycache__/model_io.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/model_io.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/utils/__pycache__/arg_utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/__pycache__/arg_utils.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/__pycache__/depth_model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/__pycache__/depth_model.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/base_models/__pycache__/midas.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/midas.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/layers/__pycache__/attractor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/attractor.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/utils/easydict/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/utils/easydict/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203,E501,W503
4 | per-file-ignores =
5 |   __init__.py:F401
6 | exclude =
7 |     venv
8 | 


--------------------------------------------------------------------------------
/zoedepth/models/layers/__pycache__/dist_layers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/dist_layers.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/base_models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth/__pycache__/zoedepth_v1.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth/__pycache__/zoedepth_v1.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth_nk/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth_nk/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/layers/__pycache__/localbins_layers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/localbins_layers.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/layers/__pycache__/patch_transformer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/layers/__pycache__/patch_transformer.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/base_models/__pycache__/depth_anything.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/__pycache__/depth_anything.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/base_models/dpt_dinov2/__pycache__/dpt.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/dpt_dinov2/__pycache__/dpt.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth_nk/__pycache__/zoedepth_nk_v1.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/zoedepth_nk/__pycache__/zoedepth_nk_v1.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/__pycache__/hubconf.cpython-39.pyc


--------------------------------------------------------------------------------
/zoedepth/models/base_models/dpt_dinov2/__pycache__/blocks.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/zoedepth/models/base_models/dpt_dinov2/__pycache__/blocks.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/mlp.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/__pycache__/vision_transformer.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/block.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitl16_short.yaml:
--------------------------------------------------------------------------------
1 | # this corresponds to the default config
2 | train:
3 |   dataset_path: ImageNet:split=TRAIN
4 |   batch_size_per_gpu: 64
5 | student:
6 |   block_chunks: 4
7 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/attention.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/dino_head.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/drop_path.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/layer_scale.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/patch_embed.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bohdanvodianyk/image-to-pcd/HEAD/torchhub/facebookresearch_dinov2_main/dinov2/layers/__pycache__/swiglu_ffn.cpython-39.pyc


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitb14_pretrain.yaml:
--------------------------------------------------------------------------------
1 | student:
2 |   arch: vit_base
3 |   patch_size: 14
4 | crops:
5 |   global_crops_size: 518  # this is to set up the position embeddings properly
6 |   local_crops_size: 98


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitl14_pretrain.yaml:
--------------------------------------------------------------------------------
1 | student:
2 |   arch: vit_large
3 |   patch_size: 14
4 | crops:
5 |   global_crops_size: 518  # this is to set up the position embeddings properly
6 |   local_crops_size: 98


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vits14_pretrain.yaml:
--------------------------------------------------------------------------------
1 | student:
2 |   arch: vit_small
3 |   patch_size: 14
4 | crops:
5 |   global_crops_size: 518  # this is to set up the position embeddings properly
6 |   local_crops_size: 98


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/eval/vitg14_pretrain.yaml:
--------------------------------------------------------------------------------
1 | student:
2 |   arch: vit_giant2
3 |   patch_size: 14
4 |   ffn_layer: swiglufused
5 | crops:
6 |   global_crops_size: 518  # this is to set up the position embeddings properly
7 |   local_crops_size: 98


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/eval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | __version__ = "0.0.1"
8 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cu117
 2 | torch==2.0.0
 3 | torchvision==0.15.0
 4 | omegaconf
 5 | torchmetrics==0.10.3
 6 | fvcore
 7 | iopath
 8 | xformers==0.0.18
 9 | submitit
10 | --extra-index-url https://pypi.nvidia.com
11 | cuml-cu11
12 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | from .image_net import ImageNet
8 | from .image_net_22k import ImageNet22k
9 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/train/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | from .train import get_args_parser, main
8 | from .ssl_meta_arch import SSLMetaArch
9 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .dino_clstoken_loss import DINOLoss
 8 | from .ibot_patch_loss import iBOTPatchLoss
 9 | from .koleo_loss import KoLeoLoss
10 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/scripts/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ -n "$1" ]; then
 4 |   echo "linting \"$1\""
 5 | fi
 6 | 
 7 | echo "running black"
 8 | if [ -n "$1" ]; then
 9 |   black "$1"
10 | else
11 |   black dinov2
12 | fi
13 | 
14 | echo "running flake8"
15 | if [ -n "$1" ]; then
16 |   flake8 "$1"
17 | else
18 |   flake8
19 | fi
20 | 
21 | echo "running pylint"
22 | if [ -n "$1" ]; then
23 |   pylint "$1"
24 | else
25 |   pylint dinov2
26 | fi
27 | 
28 | exit 0
29 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .dino_head import DINOHead
 8 | from .mlp import Mlp
 9 | from .patch_embed import PatchEmbed
10 | from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
11 | from .block import NestedTensorBlock
12 | from .attention import MemEffAttention
13 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .adapters import DatasetWithEnumeratedTargets
 8 | from .loaders import make_data_loader, make_dataset, SamplerType
 9 | from .collate import collate_data_and_cast
10 | from .masking import MaskingGenerator
11 | from .augmentations import DataAugmentationDINO
12 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: dinov2
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 |   - xformers
 7 |   - conda-forge
 8 | dependencies:
 9 |   - python=3.9
10 |   - pytorch::pytorch=2.0.0
11 |   - pytorch::pytorch-cuda=11.7.0
12 |   - pytorch::torchvision=0.15.0
13 |   - omegaconf
14 |   - torchmetrics=0.10.3
15 |   - fvcore
16 |   - iopath
17 |   - xformers::xformers=0.0.18
18 |   - pip
19 |   - pip:
20 |     - git+https://github.com/facebookincubator/submitit
21 |     - --extra-index-url https://pypi.nvidia.com
22 |     - cuml-cu11
23 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 120
 3 | 
 4 | [tool.pylint.master]
 5 | persistent = false
 6 | score = false
 7 | 
 8 | [tool.pylint.messages_control]
 9 | disable = "all"
10 | enable = [
11 |   "miscellaneous",
12 |   "similarities",
13 | ]
14 | 
15 | [tool.pylint.similarities]
16 | ignore-comments = true
17 | ignore-docstrings = true
18 | ignore-imports = true
19 | min-similarity-lines = 8
20 | 
21 | [tool.pylint.reports]
22 | reports = false
23 | 
24 | [tool.pylint.miscellaneous]
25 | notes = [
26 |   "FIXME",
27 |   "XXX",
28 |   "TODO",
29 | ]
30 | 


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth/config_zoedepth_kitti.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "bin_centers_type": "normed",
 4 |         "img_size": [384, 768]
 5 |     },
 6 |     
 7 |     "train": {
 8 |     },
 9 | 
10 |     "infer":{
11 |         "train_midas": false,
12 |         "use_pretrained_midas": false,
13 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14 |         "force_keep_ar": true
15 |     },
16 | 
17 |     "eval":{
18 |         "train_midas": false,
19 |         "use_pretrained_midas": false,
20 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21 |     }
22 | }


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitl14.yaml:
--------------------------------------------------------------------------------
 1 | dino:
 2 |   head_n_prototypes: 131072
 3 |   head_bottleneck_dim: 384
 4 | ibot:
 5 |   separate_head: true
 6 |   head_n_prototypes: 131072
 7 | train:
 8 |   batch_size_per_gpu: 32
 9 |   dataset_path: ImageNet22k
10 |   centering: sinkhorn_knopp
11 | student:
12 |   arch: vit_large
13 |   patch_size: 14
14 |   drop_path_rate: 0.4
15 |   ffn_layer: swiglufused
16 |   block_chunks: 4
17 | teacher:
18 |   momentum_teacher: 0.994
19 | optim:
20 |   epochs: 500
21 |   weight_decay_end: 0.2
22 |   base_lr: 2.0e-04  # learning rate for a batch size of 1024
23 |   warmup_epochs: 80
24 |   layerwise_decay: 1.0
25 | crops:
26 |   local_crops_size: 98


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/train/vitg14.yaml:
--------------------------------------------------------------------------------
 1 | dino:
 2 |   head_n_prototypes: 131072
 3 |   head_bottleneck_dim: 384
 4 | ibot:
 5 |   separate_head: true
 6 |   head_n_prototypes: 131072
 7 | train:
 8 |   batch_size_per_gpu: 12
 9 |   dataset_path: ImageNet22k
10 |   centering: sinkhorn_knopp
11 | student:
12 |   arch: vit_giant2
13 |   patch_size: 14
14 |   drop_path_rate: 0.4
15 |   ffn_layer: swiglufused
16 |   block_chunks: 4
17 | teacher:
18 |   momentum_teacher: 0.994
19 | optim:
20 |   epochs: 500
21 |   weight_decay_end: 0.2
22 |   base_lr: 2.0e-04  # learning rate for a batch size of 1024
23 |   warmup_epochs: 80
24 |   layerwise_decay: 1.0
25 | crops:
26 |   local_crops_size: 98


--------------------------------------------------------------------------------
/zoedepth/utils/arg_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def infer_type(x):  # hacky way to infer type from string args
 4 |     if not isinstance(x, str):
 5 |         return x
 6 | 
 7 |     try:
 8 |         x = int(x)
 9 |         return x
10 |     except ValueError:
11 |         pass
12 | 
13 |     try:
14 |         x = float(x)
15 |         return x
16 |     except ValueError:
17 |         pass
18 | 
19 |     return x
20 | 
21 | 
22 | def parse_unknown(unknown_args):
23 |     clean = []
24 |     for a in unknown_args:
25 |         if "=" in a:
26 |             k, v = a.split("=")
27 |             clean.extend([k, v])
28 |         else:
29 |             clean.append(a)
30 | 
31 |     keys = clean[::2]
32 |     values = clean[1::2]
33 |     return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
34 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import pathlib
 8 | 
 9 | from omegaconf import OmegaConf
10 | 
11 | 
12 | def load_config(config_name: str):
13 |     config_filename = config_name + ".yaml"
14 |     return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
15 | 
16 | 
17 | dinov2_default_config = load_config("ssl_default_config")
18 | 
19 | 
20 | def load_and_merge_config(config_name: str):
21 |     default_config = OmegaConf.create(dinov2_default_config)
22 |     loaded_config = load_config(config_name)
23 |     return OmegaConf.merge(default_config, loaded_config)
24 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/decoders.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from io import BytesIO
 8 | from typing import Any
 9 | 
10 | from PIL import Image
11 | 
12 | 
13 | class Decoder:
14 |     def decode(self) -> Any:
15 |         raise NotImplementedError
16 | 
17 | 
18 | class ImageDataDecoder(Decoder):
19 |     def __init__(self, image_data: bytes) -> None:
20 |         self._image_data = image_data
21 | 
22 |     def decode(self) -> Image:
23 |         f = BytesIO(self._image_data)
24 |         return Image.open(f).convert(mode="RGB")
25 | 
26 | 
27 | class TargetDecoder(Decoder):
28 |     def __init__(self, target: Any):
29 |         self._target = target
30 | 
31 |     def decode(self) -> Any:
32 |         return self._target
33 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/layer_scale.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
 8 | 
 9 | from typing import Union
10 | 
11 | import torch
12 | from torch import Tensor
13 | from torch import nn
14 | 
15 | 
16 | class LayerScale(nn.Module):
17 |     def __init__(
18 |         self,
19 |         dim: int,
20 |         init_values: Union[float, Tensor] = 1e-5,
21 |         inplace: bool = False,
22 |     ) -> None:
23 |         super().__init__()
24 |         self.inplace = inplace
25 |         self.gamma = nn.Parameter(init_values * torch.ones(dim))
26 | 
27 |     def forward(self, x: Tensor) -> Tensor:
28 |         return x.mul_(self.gamma) if self.inplace else x * self.gamma
29 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/adapters.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Any, Tuple
 8 | 
 9 | from torch.utils.data import Dataset
10 | 
11 | 
12 | class DatasetWithEnumeratedTargets(Dataset):
13 |     def __init__(self, dataset):
14 |         self._dataset = dataset
15 | 
16 |     def get_image_data(self, index: int) -> bytes:
17 |         return self._dataset.get_image_data(index)
18 | 
19 |     def get_target(self, index: int) -> Tuple[Any, int]:
20 |         target = self._dataset.get_target(index)
21 |         return (index, target)
22 | 
23 |     def __getitem__(self, index: int) -> Tuple[Any, Tuple[Any, int]]:
24 |         image, target = self._dataset[index]
25 |         target = index if target is None else target
26 |         return image, (index, target)
27 | 
28 |     def __len__(self) -> int:
29 |         return len(self._dataset)
30 | 


--------------------------------------------------------------------------------
/heic2png.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | from pillow_heif import register_heif_opener
 4 | 
 5 | # Register the HEIF opener
 6 | register_heif_opener()
 7 | 
 8 | # Path to the directory containing HEIC images
 9 | directory_path = 'my_test/input/indoor'
10 | 
11 | # Loop through all files in the directory
12 | for filename in os.listdir(directory_path):
13 |     if filename.lower().endswith('.heic'):
14 |         # Construct full file path
15 |         heic_file_path = os.path.join(directory_path, filename)
16 | 
17 |         # Open the HEIC image
18 |         image = Image.open(heic_file_path)
19 | 
20 |         # Create the output PNG file path
21 |         png_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + '.png')
22 | 
23 |         # Save the image as PNG
24 |         image.save(png_file_path, format='PNG')
25 | 
26 |         # Remove the original HEIC file
27 |         os.remove(heic_file_path)
28 | 
29 |         print(f"Converted and removed: {filename}")
30 | 
31 | print("All HEIC files have been converted to PNG and the originals have been removed.")
32 | 


--------------------------------------------------------------------------------
/zoedepth/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/zoedepth/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/zoedepth/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/zoedepth/models/base_models/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/dtype.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from typing import Dict, Union
 9 | 
10 | import numpy as np
11 | import torch
12 | 
13 | 
14 | TypeSpec = Union[str, np.dtype, torch.dtype]
15 | 
16 | 
17 | _NUMPY_TO_TORCH_DTYPE: Dict[np.dtype, torch.dtype] = {
18 |     np.dtype("bool"): torch.bool,
19 |     np.dtype("uint8"): torch.uint8,
20 |     np.dtype("int8"): torch.int8,
21 |     np.dtype("int16"): torch.int16,
22 |     np.dtype("int32"): torch.int32,
23 |     np.dtype("int64"): torch.int64,
24 |     np.dtype("float16"): torch.float16,
25 |     np.dtype("float32"): torch.float32,
26 |     np.dtype("float64"): torch.float64,
27 |     np.dtype("complex64"): torch.complex64,
28 |     np.dtype("complex128"): torch.complex128,
29 | }
30 | 
31 | 
32 | def as_torch_dtype(dtype: TypeSpec) -> torch.dtype:
33 |     if isinstance(dtype, torch.dtype):
34 |         return dtype
35 |     if isinstance(dtype, str):
36 |         dtype = np.dtype(dtype)
37 |     assert isinstance(dtype, np.dtype), f"Expected an instance of nunpy dtype, got {type(dtype)}"
38 |     return _NUMPY_TO_TORCH_DTYPE[dtype]
39 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/drop_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
10 | 
11 | 
12 | from torch import nn
13 | 
14 | 
15 | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
16 |     if drop_prob == 0.0 or not training:
17 |         return x
18 |     keep_prob = 1 - drop_prob
19 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
20 |     random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
21 |     if keep_prob > 0.0:
22 |         random_tensor.div_(keep_prob)
23 |     output = x * random_tensor
24 |     return output
25 | 
26 | 
27 | class DropPath(nn.Module):
28 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
29 | 
30 |     def __init__(self, drop_prob=None):
31 |         super(DropPath, self).__init__()
32 |         self.drop_prob = drop_prob
33 | 
34 |     def forward(self, x):
35 |         return drop_path(x, self.drop_prob, self.training)
36 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to DINOv2
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Meta's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to DINOv2, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
32 | 


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_v1 import ZoeDepth 
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepth,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth_nk/__init__.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from .zoedepth_nk_v1 import ZoeDepthNK
26 | 
27 | all_versions = {
28 |     "v1": ZoeDepthNK,
29 | }
30 | 
31 | get_version = lambda v : all_versions[v]


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/datasets/extended.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Any, Tuple
 8 | 
 9 | from torchvision.datasets import VisionDataset
10 | 
11 | from .decoders import TargetDecoder, ImageDataDecoder
12 | 
13 | 
14 | class ExtendedVisionDataset(VisionDataset):
15 |     def __init__(self, *args, **kwargs) -> None:
16 |         super().__init__(*args, **kwargs)  # type: ignore
17 | 
18 |     def get_image_data(self, index: int) -> bytes:
19 |         raise NotImplementedError
20 | 
21 |     def get_target(self, index: int) -> Any:
22 |         raise NotImplementedError
23 | 
24 |     def __getitem__(self, index: int) -> Tuple[Any, Any]:
25 |         try:
26 |             image_data = self.get_image_data(index)
27 |             image = ImageDataDecoder(image_data).decode()
28 |         except Exception as e:
29 |             raise RuntimeError(f"can not read image for sample {index}") from e
30 |         target = self.get_target(index)
31 |         target = TargetDecoder(target).decode()
32 | 
33 |         if self.transforms is not None:
34 |             image, target = self.transforms(image, target)
35 | 
36 |         return image, target
37 | 
38 |     def __len__(self) -> int:
39 |         raise NotImplementedError
40 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #
 3 | # This source code is licensed under the Apache License, Version 2.0
 4 | # found in the LICENSE file in the root directory of this source tree.
 5 | 
 6 | import itertools
 7 | import math
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | 
14 | _DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
15 | 
16 | 
17 | def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str:
18 |     compact_arch_name = arch_name.replace("_", "")[:4]
19 |     registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
20 |     return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"
21 | 
22 | 
23 | class CenterPadding(nn.Module):
24 |     def __init__(self, multiple):
25 |         super().__init__()
26 |         self.multiple = multiple
27 | 
28 |     def _get_pad(self, size):
29 |         new_size = math.ceil(size / self.multiple) * self.multiple
30 |         pad_size = new_size - size
31 |         pad_size_left = pad_size // 2
32 |         pad_size_right = pad_size - pad_size_left
33 |         return pad_size_left, pad_size_right
34 | 
35 |     @torch.inference_mode()
36 |     def forward(self, x):
37 |         pads = list(itertools.chain.from_iterable(self._get_pad(m) for m in x.shape[:1:-1]))
38 |         output = F.pad(x, pads)
39 |         return output
40 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/mlp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
10 | 
11 | 
12 | from typing import Callable, Optional
13 | 
14 | from torch import Tensor, nn
15 | 
16 | 
17 | class Mlp(nn.Module):
18 |     def __init__(
19 |         self,
20 |         in_features: int,
21 |         hidden_features: Optional[int] = None,
22 |         out_features: Optional[int] = None,
23 |         act_layer: Callable[..., nn.Module] = nn.GELU,
24 |         drop: float = 0.0,
25 |         bias: bool = True,
26 |     ) -> None:
27 |         super().__init__()
28 |         out_features = out_features or in_features
29 |         hidden_features = hidden_features or in_features
30 |         self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
31 |         self.act = act_layer()
32 |         self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
33 |         self.drop = nn.Dropout(drop)
34 | 
35 |     def forward(self, x: Tensor) -> Tensor:
36 |         x = self.fc1(x)
37 |         x = self.act(x)
38 |         x = self.drop(x)
39 |         x = self.fc2(x)
40 |         x = self.drop(x)
41 |         return x
42 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | 
 9 | from . import vision_transformer as vits
10 | 
11 | 
12 | logger = logging.getLogger("dinov2")
13 | 
14 | 
15 | def build_model(args, only_teacher=False, img_size=224):
16 |     args.arch = args.arch.removesuffix("_memeff")
17 |     if "vit" in args.arch:
18 |         vit_kwargs = dict(
19 |             img_size=img_size,
20 |             patch_size=args.patch_size,
21 |             init_values=args.layerscale,
22 |             ffn_layer=args.ffn_layer,
23 |             block_chunks=args.block_chunks,
24 |             qkv_bias=args.qkv_bias,
25 |             proj_bias=args.proj_bias,
26 |             ffn_bias=args.ffn_bias,
27 |         )
28 |         teacher = vits.__dict__[args.arch](**vit_kwargs)
29 |         if only_teacher:
30 |             return teacher, teacher.embed_dim
31 |         student = vits.__dict__[args.arch](
32 |             **vit_kwargs,
33 |             drop_path_rate=args.drop_path_rate,
34 |             drop_path_uniform=args.drop_path_uniform,
35 |         )
36 |         embed_dim = student.embed_dim
37 |     return student, teacher, embed_dim
38 | 
39 | 
40 | def build_model_from_cfg(cfg, only_teacher=False):
41 |     return build_model(cfg.student, only_teacher=only_teacher, img_size=cfg.crops.global_crops_size)
42 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/loss/koleo_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | # import torch.distributed as dist
14 | 
15 | 
16 | logger = logging.getLogger("dinov2")
17 | 
18 | 
19 | class KoLeoLoss(nn.Module):
20 |     """Kozachenko-Leonenko entropic loss regularizer from Sablayrolles et al. - 2018 - Spreading vectors for similarity search"""
21 | 
22 |     def __init__(self):
23 |         super().__init__()
24 |         self.pdist = nn.PairwiseDistance(2, eps=1e-8)
25 | 
26 |     def pairwise_NNs_inner(self, x):
27 |         """
28 |         Pairwise nearest neighbors for L2-normalized vectors.
29 |         Uses Torch rather than Faiss to remain on GPU.
30 |         """
31 |         # parwise dot products (= inverse distance)
32 |         dots = torch.mm(x, x.t())
33 |         n = x.shape[0]
34 |         dots.view(-1)[:: (n + 1)].fill_(-1)  # Trick to fill diagonal with -1
35 |         # max inner prod -> min distance
36 |         _, I = torch.max(dots, dim=1)  # noqa: E741
37 |         return I
38 | 
39 |     def forward(self, student_output, eps=1e-8):
40 |         """
41 |         Args:
42 |             student_output (BxD): backbone output of student
43 |         """
44 |         with torch.cuda.amp.autocast(enabled=False):
45 |             student_output = F.normalize(student_output, eps=eps, p=2, dim=-1)
46 |             I = self.pairwise_NNs_inner(student_output)  # noqa: E741
47 |             distances = self.pdist(student_output, student_output[I])  # BxD, BxD -> B
48 |             loss = -torch.log(distances + eps).mean()
49 |         return loss
50 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/eval/knn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import os
 9 | import sys
10 | 
11 | from dinov2.eval.knn import get_args_parser as get_knn_args_parser
12 | from dinov2.logging import setup_logging
13 | from dinov2.run.submit import get_args_parser, submit_jobs
14 | 
15 | 
16 | logger = logging.getLogger("dinov2")
17 | 
18 | 
19 | class Evaluator:
20 |     def __init__(self, args):
21 |         self.args = args
22 | 
23 |     def __call__(self):
24 |         from dinov2.eval.knn import main as knn_main
25 | 
26 |         self._setup_args()
27 |         knn_main(self.args)
28 | 
29 |     def checkpoint(self):
30 |         import submitit
31 | 
32 |         logger.info(f"Requeuing {self.args}")
33 |         empty = type(self)(self.args)
34 |         return submitit.helpers.DelayedSubmission(empty)
35 | 
36 |     def _setup_args(self):
37 |         import submitit
38 | 
39 |         job_env = submitit.JobEnvironment()
40 |         self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
41 |         logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
42 |         logger.info(f"Args: {self.args}")
43 | 
44 | 
45 | def main():
46 |     description = "Submitit launcher for DINOv2 k-NN evaluation"
47 |     knn_args_parser = get_knn_args_parser(add_help=False)
48 |     parents = [knn_args_parser]
49 |     args_parser = get_args_parser(description=description, parents=parents)
50 |     args = args_parser.parse_args()
51 | 
52 |     setup_logging()
53 | 
54 |     assert os.path.exists(args.config_file), "Configuration file does not exist!"
55 |     submit_jobs(Evaluator, args, name="dinov2:knn")
56 |     return 0
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     sys.exit(main())
61 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/train/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import os
 9 | import sys
10 | 
11 | from dinov2.logging import setup_logging
12 | from dinov2.train import get_args_parser as get_train_args_parser
13 | from dinov2.run.submit import get_args_parser, submit_jobs
14 | 
15 | 
16 | logger = logging.getLogger("dinov2")
17 | 
18 | 
19 | class Trainer(object):
20 |     def __init__(self, args):
21 |         self.args = args
22 | 
23 |     def __call__(self):
24 |         from dinov2.train import main as train_main
25 | 
26 |         self._setup_args()
27 |         train_main(self.args)
28 | 
29 |     def checkpoint(self):
30 |         import submitit
31 | 
32 |         logger.info(f"Requeuing {self.args}")
33 |         empty = type(self)(self.args)
34 |         return submitit.helpers.DelayedSubmission(empty)
35 | 
36 |     def _setup_args(self):
37 |         import submitit
38 | 
39 |         job_env = submitit.JobEnvironment()
40 |         self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
41 |         logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
42 |         logger.info(f"Args: {self.args}")
43 | 
44 | 
45 | def main():
46 |     description = "Submitit launcher for DINOv2 training"
47 |     train_args_parser = get_train_args_parser(add_help=False)
48 |     parents = [train_args_parser]
49 |     args_parser = get_args_parser(description=description, parents=parents)
50 |     args = args_parser.parse_args()
51 | 
52 |     setup_logging()
53 | 
54 |     assert os.path.exists(args.config_file), "Configuration file does not exist!"
55 |     submit_jobs(Trainer, args, name="dinov2:train")
56 |     return 0
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     sys.exit(main())
61 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/eval/linear.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import os
 9 | import sys
10 | 
11 | from dinov2.eval.linear import get_args_parser as get_linear_args_parser
12 | from dinov2.logging import setup_logging
13 | from dinov2.run.submit import get_args_parser, submit_jobs
14 | 
15 | 
16 | logger = logging.getLogger("dinov2")
17 | 
18 | 
19 | class Evaluator:
20 |     def __init__(self, args):
21 |         self.args = args
22 | 
23 |     def __call__(self):
24 |         from dinov2.eval.linear import main as linear_main
25 | 
26 |         self._setup_args()
27 |         linear_main(self.args)
28 | 
29 |     def checkpoint(self):
30 |         import submitit
31 | 
32 |         logger.info(f"Requeuing {self.args}")
33 |         empty = type(self)(self.args)
34 |         return submitit.helpers.DelayedSubmission(empty)
35 | 
36 |     def _setup_args(self):
37 |         import submitit
38 | 
39 |         job_env = submitit.JobEnvironment()
40 |         self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
41 |         logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
42 |         logger.info(f"Args: {self.args}")
43 | 
44 | 
45 | def main():
46 |     description = "Submitit launcher for DINOv2 linear evaluation"
47 |     linear_args_parser = get_linear_args_parser(add_help=False)
48 |     parents = [linear_args_parser]
49 |     args_parser = get_args_parser(description=description, parents=parents)
50 |     args = args_parser.parse_args()
51 | 
52 |     setup_logging()
53 | 
54 |     assert os.path.exists(args.config_file), "Configuration file does not exist!"
55 |     submit_jobs(Evaluator, args, name="dinov2:linear")
56 |     return 0
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     sys.exit(main())
61 | 


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth/config_zoedepth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepth",
 4 |         "version_name": "v1",
 5 |         "n_bins": 64,
 6 |         "bin_embedding_dim": 128,
 7 |         "bin_centers_type": "softplus",
 8 |         "n_attractors":[16, 8, 4, 1],
 9 |         "attractor_alpha": 1000,
10 |         "attractor_gamma": 2,
11 |         "attractor_kind" : "mean",
12 |         "attractor_type" : "inv",
13 |         "midas_model_type" : "DPT_BEiT_L_384",
14 |         "min_temp": 0.0212,
15 |         "max_temp": 50.0,
16 |         "output_distribution": "logbinomial",
17 |         "memory_efficient": true,
18 |         "inverse_midas": false,
19 |         "img_size": [392, 518]
20 |     },
21 |     
22 |     "train": {
23 |         "train_midas": true,
24 |         "use_pretrained_midas": true,
25 |         "trainer": "zoedepth",
26 |         "epochs": 5,
27 |         "bs": 16,
28 |         "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30 |         "same_lr": false,
31 |         "w_si": 1,
32 |         "w_domain": 0.2,
33 |         "w_reg": 0,
34 |         "w_grad": 0,
35 |         "avoid_boundary": false,
36 |         "random_crop": false,
37 |         "input_width": 640,
38 |         "input_height": 480,
39 |         "midas_lr_factor": 50,
40 |         "encoder_lr_factor":50,
41 |         "pos_enc_lr_factor":50,
42 |         "freeze_midas_bn": true
43 | 
44 |     },
45 | 
46 |     "infer":{
47 |         "train_midas": false,
48 |         "use_pretrained_midas": false,
49 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
50 |         "force_keep_ar": true
51 |     },
52 | 
53 |     "eval":{
54 |         "train_midas": false,
55 |         "use_pretrained_midas": false,
56 |         "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
57 |     }
58 | }


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/eval/log_regression.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import os
 9 | import sys
10 | 
11 | from dinov2.eval.log_regression import get_args_parser as get_log_regression_args_parser
12 | from dinov2.logging import setup_logging
13 | from dinov2.run.submit import get_args_parser, submit_jobs
14 | 
15 | 
16 | logger = logging.getLogger("dinov2")
17 | 
18 | 
19 | class Evaluator:
20 |     def __init__(self, args):
21 |         self.args = args
22 | 
23 |     def __call__(self):
24 |         from dinov2.eval.log_regression import main as log_regression_main
25 | 
26 |         self._setup_args()
27 |         log_regression_main(self.args)
28 | 
29 |     def checkpoint(self):
30 |         import submitit
31 | 
32 |         logger.info(f"Requeuing {self.args}")
33 |         empty = type(self)(self.args)
34 |         return submitit.helpers.DelayedSubmission(empty)
35 | 
36 |     def _setup_args(self):
37 |         import submitit
38 | 
39 |         job_env = submitit.JobEnvironment()
40 |         self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
41 |         logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
42 |         logger.info(f"Args: {self.args}")
43 | 
44 | 
45 | def main():
46 |     description = "Submitit launcher for DINOv2 logistic evaluation"
47 |     log_regression_args_parser = get_log_regression_args_parser(add_help=False)
48 |     parents = [log_regression_args_parser]
49 |     args_parser = get_args_parser(description=description, parents=parents)
50 |     args = args_parser.parse_args()
51 | 
52 |     setup_logging()
53 | 
54 |     assert os.path.exists(args.config_file), "Configuration file does not exist!"
55 |     submit_jobs(Evaluator, args, name="dinov2:logreg")
56 |     return 0
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     sys.exit(main())
61 | 


--------------------------------------------------------------------------------
/calibration-camera.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import glob
 4 | 
 5 | # Define the dimensions of the checkerboard
 6 | CHECKERBOARD = (9, 6)
 7 | # Define the real-world size of the squares in meters (e.g., 20mm = 0.02 meters)
 8 | SQUARE_SIZE = 0.024
 9 | 
10 | criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
11 | 
12 | # Create arrays to store object points and image points from all the images
13 | objpoints = []
14 | imgpoints = []
15 | 
16 | # Prepare the object points
17 | objp = np.zeros((CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
18 | objp[:, :2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2)
19 | objp = objp * SQUARE_SIZE  # Scale the object points by the real size of the squares
20 | 
21 | # Get the paths of all the images
22 | images = glob.glob('chessboard_calibration/*.png')  # Update with the path to your images
23 | 
24 | for image_file in images:
25 |     print(image_file)
26 |     img = cv2.imread(image_file)
27 |     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
28 | 
29 |     # Find the checkerboard corners
30 |     ret, corners = cv2.findChessboardCorners(gray, CHECKERBOARD, None)
31 | 
32 |     # If found, add object points and image points (after refining them)
33 |     if ret:
34 |         objpoints.append(objp)
35 |         corners2 = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)
36 |         imgpoints.append(corners2)
37 | 
38 |         # Draw and display the corners
39 |         img = cv2.drawChessboardCorners(img, CHECKERBOARD, corners2, ret)
40 |     else:
41 |         print(f"Checkerboard not detected in image: {image_file}")
42 | 
43 | # Calibrate the camera
44 | ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
45 | 
46 | # Save the calibration results
47 | np.savez(
48 |     "CalibrationMatrix_college_cpt",
49 |     Camera_matrix=mtx,
50 |     distCoeff=dist,
51 |     RotationalV=rvecs,
52 |     TranslationV=tvecs
53 | )
54 | 
55 | # Extract the focal lengths
56 | fx = mtx[0, 0]
57 | fy = mtx[1, 1]
58 | 
59 | print(f"Focal length in x direction (fx): {fx}")
60 | print(f"Focal length in y direction (fy): {fy}")
61 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/swiglu_ffn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, Optional
 8 | 
 9 | from torch import Tensor, nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class SwiGLUFFN(nn.Module):
14 |     def __init__(
15 |         self,
16 |         in_features: int,
17 |         hidden_features: Optional[int] = None,
18 |         out_features: Optional[int] = None,
19 |         act_layer: Callable[..., nn.Module] = None,
20 |         drop: float = 0.0,
21 |         bias: bool = True,
22 |     ) -> None:
23 |         super().__init__()
24 |         out_features = out_features or in_features
25 |         hidden_features = hidden_features or in_features
26 |         self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
27 |         self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
28 | 
29 |     def forward(self, x: Tensor) -> Tensor:
30 |         x12 = self.w12(x)
31 |         x1, x2 = x12.chunk(2, dim=-1)
32 |         hidden = F.silu(x1) * x2
33 |         return self.w3(hidden)
34 | 
35 | 
36 | try:
37 |     from xformers.ops import SwiGLU
38 | 
39 |     XFORMERS_AVAILABLE = True
40 | except ImportError:
41 |     SwiGLU = SwiGLUFFN
42 |     XFORMERS_AVAILABLE = False
43 | 
44 | 
45 | class SwiGLUFFNFused(SwiGLU):
46 |     def __init__(
47 |         self,
48 |         in_features: int,
49 |         hidden_features: Optional[int] = None,
50 |         out_features: Optional[int] = None,
51 |         act_layer: Callable[..., nn.Module] = None,
52 |         drop: float = 0.0,
53 |         bias: bool = True,
54 |     ) -> None:
55 |         out_features = out_features or in_features
56 |         hidden_features = hidden_features or in_features
57 |         hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
58 |         super().__init__(
59 |             in_features=in_features,
60 |             hidden_features=hidden_features,
61 |             out_features=out_features,
62 |             bias=bias,
63 |         )
64 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Metric Point Cloud Creation from Single Images using Depth Anything Model
 2 | 
 3 | This project leverages the Depth Anything model to create metric point clouds from single images. This repository includes all necessary scripts and files to perform depth estimation and generate accurate 3D point clouds with real-world measurements.
 4 | 
 5 | ### Requirements
 6 | 
 7 | ------------
 8 | 
 9 | To install the required packages, run:
10 | `pip install -r requirements.txt`
11 | 
12 | ### Installation
13 | 
14 | ------------
15 | 
16 | 1. Clone this repository:
17 | `git clone https://github.com/bohdanvodianyk/image-to-pcd.git`
18 | `cd your-repo-name`
19 | 2. Install the necessary Python packages:
20 | `pip install -r requirements.txt`
21 | 3. Download the model checkpoints from [Google Drive](https://drive.google.com/drive/folders/1LJRnpOhNuzZXlVE0oGzzUb7ZiXeF6f-8?usp=sharing "Google Drive") and place them in the appropriate directory within the project.
22 | 
23 | ### Usage
24 | 
25 | ------------
26 | 
27 | #### Calibration
28 | Before generating point clouds, calibrate your camera using the `calibration-camera.py` script. Ensure you have a chessboard pattern printed for the calibration process.
29 | 
30 | #### Depth Estimation to Point Cloud
31 | To convert depth maps into metric point clouds, use the `depth_to_pointcloud.py` script. Ensure your input image is correctly formatted and accessible.
32 | 
33 | #### HEIC to PNG Conversion
34 | If your input images are in HEIC format, convert them to PNG using the `heic2png.py` script.
35 | 
36 | ### Model Checkpoints
37 | 
38 | ------------
39 | 
40 | Model checkpoints necessary for depth estimation can be downloaded from the following Google Drive link:
41 | 
42 | [Google Drive - Model Checkpoints](https://drive.google.com/drive/folders/1LJRnpOhNuzZXlVE0oGzzUb7ZiXeF6f-8?usp=sharing "Google Drive - Model Checkpoints")
43 | 
44 | Download and place these checkpoints in the appropriate directory within your project to ensure the model functions correctly.
45 | 
46 | ### Acknowledgements
47 | 
48 | ------------
49 | 
50 | Special thanks to the developers of the **Depth Anything** model and all contributors who made this project possible. Your work in computer vision and deep learning is greatly appreciated.
51 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/collate.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import random
 9 | 
10 | 
11 | def collate_data_and_cast(samples_list, mask_ratio_tuple, mask_probability, dtype, n_tokens=None, mask_generator=None):
12 |     # dtype = torch.half  # TODO: Remove
13 | 
14 |     n_global_crops = len(samples_list[0][0]["global_crops"])
15 |     n_local_crops = len(samples_list[0][0]["local_crops"])
16 | 
17 |     collated_global_crops = torch.stack([s[0]["global_crops"][i] for i in range(n_global_crops) for s in samples_list])
18 | 
19 |     collated_local_crops = torch.stack([s[0]["local_crops"][i] for i in range(n_local_crops) for s in samples_list])
20 | 
21 |     B = len(collated_global_crops)
22 |     N = n_tokens
23 |     n_samples_masked = int(B * mask_probability)
24 |     probs = torch.linspace(*mask_ratio_tuple, n_samples_masked + 1)
25 |     upperbound = 0
26 |     masks_list = []
27 |     for i in range(0, n_samples_masked):
28 |         prob_min = probs[i]
29 |         prob_max = probs[i + 1]
30 |         masks_list.append(torch.BoolTensor(mask_generator(int(N * random.uniform(prob_min, prob_max)))))
31 |         upperbound += int(N * prob_max)
32 |     for i in range(n_samples_masked, B):
33 |         masks_list.append(torch.BoolTensor(mask_generator(0)))
34 | 
35 |     random.shuffle(masks_list)
36 | 
37 |     collated_masks = torch.stack(masks_list).flatten(1)
38 |     mask_indices_list = collated_masks.flatten().nonzero().flatten()
39 | 
40 |     masks_weight = (1 / collated_masks.sum(-1).clamp(min=1.0)).unsqueeze(-1).expand_as(collated_masks)[collated_masks]
41 | 
42 |     return {
43 |         "collated_global_crops": collated_global_crops.to(dtype),
44 |         "collated_local_crops": collated_local_crops.to(dtype),
45 |         "collated_masks": collated_masks,
46 |         "mask_indices_list": mask_indices_list,
47 |         "masks_weight": masks_weight,
48 |         "upperbound": upperbound,
49 |         "n_masked_patches": torch.full((1,), fill_value=mask_indices_list.shape[0], dtype=torch.long),
50 |     }
51 | 


--------------------------------------------------------------------------------
/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": {
 3 |         "name": "ZoeDepthNK",
 4 |         "version_name": "v1",
 5 |         "bin_conf" : [
 6 |             {
 7 |                 "name": "nyu",
 8 |                 "n_bins": 64,
 9 |                 "min_depth": 1e-3,
10 |                 "max_depth": 10.0
11 |             },
12 |             {
13 |                 "name": "kitti",
14 |                 "n_bins": 64,
15 |                 "min_depth": 1e-3,
16 |                 "max_depth": 80.0
17 |             }
18 |         ], 
19 |         "bin_embedding_dim": 128,
20 |         "bin_centers_type": "softplus",
21 |         "n_attractors":[16, 8, 4, 1],
22 |         "attractor_alpha": 1000,
23 |         "attractor_gamma": 2,
24 |         "attractor_kind" : "mean",
25 |         "attractor_type" : "inv",
26 |         "min_temp": 0.0212,
27 |         "max_temp": 50.0, 
28 |         "memory_efficient": true, 
29 |         "midas_model_type" : "DPT_BEiT_L_384",
30 |         "img_size": [392, 518]
31 |     },
32 | 
33 |     "train": {
34 |         "train_midas": true,
35 |         "use_pretrained_midas": true,
36 |         "trainer": "zoedepth_nk",
37 |         "epochs": 10,
38 |         "bs": 16,
39 |         "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40 |         "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41 |         "same_lr": false,
42 |         "w_si": 1,
43 |         "w_domain": 100,
44 |         "avoid_boundary": false,
45 |         "random_crop": false,
46 |         "input_width": 640,
47 |         "input_height": 480,
48 |         "w_grad": 0,
49 |         "w_reg": 0,
50 |         "midas_lr_factor": 50,
51 |         "encoder_lr_factor": 50,
52 |         "pos_enc_lr_factor": 50
53 |     },
54 | 
55 |     "infer": {
56 |         "train_midas": false,
57 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58 |         "use_pretrained_midas": false,
59 |         "force_keep_ar": true
60 |     },
61 |     
62 |     "eval": {
63 |         "train_midas": false,
64 |         "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65 |         "use_pretrained_midas": false
66 |     }
67 | }


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/dino_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | from torch.nn.init import trunc_normal_
10 | from torch.nn.utils import weight_norm
11 | 
12 | 
13 | class DINOHead(nn.Module):
14 |     def __init__(
15 |         self,
16 |         in_dim,
17 |         out_dim,
18 |         use_bn=False,
19 |         nlayers=3,
20 |         hidden_dim=2048,
21 |         bottleneck_dim=256,
22 |         mlp_bias=True,
23 |     ):
24 |         super().__init__()
25 |         nlayers = max(nlayers, 1)
26 |         self.mlp = _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=hidden_dim, use_bn=use_bn, bias=mlp_bias)
27 |         self.apply(self._init_weights)
28 |         self.last_layer = weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
29 |         self.last_layer.weight_g.data.fill_(1)
30 | 
31 |     def _init_weights(self, m):
32 |         if isinstance(m, nn.Linear):
33 |             trunc_normal_(m.weight, std=0.02)
34 |             if isinstance(m, nn.Linear) and m.bias is not None:
35 |                 nn.init.constant_(m.bias, 0)
36 | 
37 |     def forward(self, x):
38 |         x = self.mlp(x)
39 |         eps = 1e-6 if x.dtype == torch.float16 else 1e-12
40 |         x = nn.functional.normalize(x, dim=-1, p=2, eps=eps)
41 |         x = self.last_layer(x)
42 |         return x
43 | 
44 | 
45 | def _build_mlp(nlayers, in_dim, bottleneck_dim, hidden_dim=None, use_bn=False, bias=True):
46 |     if nlayers == 1:
47 |         return nn.Linear(in_dim, bottleneck_dim, bias=bias)
48 |     else:
49 |         layers = [nn.Linear(in_dim, hidden_dim, bias=bias)]
50 |         if use_bn:
51 |             layers.append(nn.BatchNorm1d(hidden_dim))
52 |         layers.append(nn.GELU())
53 |         for _ in range(nlayers - 2):
54 |             layers.append(nn.Linear(hidden_dim, hidden_dim, bias=bias))
55 |             if use_bn:
56 |                 layers.append(nn.BatchNorm1d(hidden_dim))
57 |             layers.append(nn.GELU())
58 |         layers.append(nn.Linear(hidden_dim, bottleneck_dim, bias=bias))
59 |         return nn.Sequential(*layers)
60 | 


--------------------------------------------------------------------------------
/zoedepth/trainers/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | 
27 | 
28 | def get_trainer(config):
29 |     """Builds and returns a trainer based on the config.
30 | 
31 |     Args:
32 |         config (dict): the config dict (typically constructed using utils.config.get_config)
33 |             config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module
34 | 
35 |     Raises:
36 |         ValueError: If the specified trainer does not exist under trainers/ folder
37 | 
38 |     Returns:
39 |         Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object
40 |     """
41 |     assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format(
42 |         config)
43 |     try:
44 |         Trainer = getattr(import_module(
45 |             f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer')
46 |     except ModuleNotFoundError as e:
47 |         raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e
48 |     return Trainer
49 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/eval/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | from typing import Any, List, Optional, Tuple
 9 | 
10 | import torch
11 | import torch.backends.cudnn as cudnn
12 | 
13 | from dinov2.models import build_model_from_cfg
14 | from dinov2.utils.config import setup
15 | import dinov2.utils.utils as dinov2_utils
16 | 
17 | 
18 | def get_args_parser(
19 |     description: Optional[str] = None,
20 |     parents: Optional[List[argparse.ArgumentParser]] = None,
21 |     add_help: bool = True,
22 | ):
23 |     parser = argparse.ArgumentParser(
24 |         description=description,
25 |         parents=parents or [],
26 |         add_help=add_help,
27 |     )
28 |     parser.add_argument(
29 |         "--config-file",
30 |         type=str,
31 |         help="Model configuration file",
32 |     )
33 |     parser.add_argument(
34 |         "--pretrained-weights",
35 |         type=str,
36 |         help="Pretrained model weights",
37 |     )
38 |     parser.add_argument(
39 |         "--output-dir",
40 |         default="",
41 |         type=str,
42 |         help="Output directory to write results and logs",
43 |     )
44 |     parser.add_argument(
45 |         "--opts",
46 |         help="Extra configuration options",
47 |         default=[],
48 |         nargs="+",
49 |     )
50 |     return parser
51 | 
52 | 
53 | def get_autocast_dtype(config):
54 |     teacher_dtype_str = config.compute_precision.teacher.backbone.mixed_precision.param_dtype
55 |     if teacher_dtype_str == "fp16":
56 |         return torch.half
57 |     elif teacher_dtype_str == "bf16":
58 |         return torch.bfloat16
59 |     else:
60 |         return torch.float
61 | 
62 | 
63 | def build_model_for_eval(config, pretrained_weights):
64 |     model, _ = build_model_from_cfg(config, only_teacher=True)
65 |     dinov2_utils.load_pretrained_weights(model, pretrained_weights, "teacher")
66 |     model.eval()
67 |     model.cuda()
68 |     return model
69 | 
70 | 
71 | def setup_and_build_model(args) -> Tuple[Any, torch.dtype]:
72 |     cudnn.benchmark = True
73 |     config = setup(args)
74 |     model = build_model_for_eval(config, args.pretrained_weights)
75 |     autocast_dtype = get_autocast_dtype(config)
76 |     return model, autocast_dtype
77 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | import logging
 9 | import os
10 | 
11 | from omegaconf import OmegaConf
12 | 
13 | import dinov2.distributed as distributed
14 | from dinov2.logging import setup_logging
15 | from dinov2.utils import utils
16 | from dinov2.configs import dinov2_default_config
17 | 
18 | 
19 | logger = logging.getLogger("dinov2")
20 | 
21 | 
22 | def apply_scaling_rules_to_cfg(cfg):  # to fix
23 |     if cfg.optim.scaling_rule == "sqrt_wrt_1024":
24 |         base_lr = cfg.optim.base_lr
25 |         cfg.optim.lr = base_lr
26 |         cfg.optim.lr *= math.sqrt(cfg.train.batch_size_per_gpu * distributed.get_global_size() / 1024.0)
27 |         logger.info(f"sqrt scaling learning rate; base: {base_lr}, new: {cfg.optim.lr}")
28 |     else:
29 |         raise NotImplementedError
30 |     return cfg
31 | 
32 | 
33 | def write_config(cfg, output_dir, name="config.yaml"):
34 |     logger.info(OmegaConf.to_yaml(cfg))
35 |     saved_cfg_path = os.path.join(output_dir, name)
36 |     with open(saved_cfg_path, "w") as f:
37 |         OmegaConf.save(config=cfg, f=f)
38 |     return saved_cfg_path
39 | 
40 | 
41 | def get_cfg_from_args(args):
42 |     args.output_dir = os.path.abspath(args.output_dir)
43 |     args.opts += [f"train.output_dir={args.output_dir}"]
44 |     default_cfg = OmegaConf.create(dinov2_default_config)
45 |     cfg = OmegaConf.load(args.config_file)
46 |     cfg = OmegaConf.merge(default_cfg, cfg, OmegaConf.from_cli(args.opts))
47 |     return cfg
48 | 
49 | 
50 | def default_setup(args):
51 |     distributed.enable(overwrite=True)
52 |     seed = getattr(args, "seed", 0)
53 |     rank = distributed.get_global_rank()
54 | 
55 |     global logger
56 |     setup_logging(output=args.output_dir, level=logging.INFO)
57 |     logger = logging.getLogger("dinov2")
58 | 
59 |     utils.fix_random_seeds(seed + rank)
60 |     logger.info("git:\n  {}\n".format(utils.get_sha()))
61 |     logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
62 | 
63 | 
64 | def setup(args):
65 |     """
66 |     Create configs and perform basic setups.
67 |     """
68 |     cfg = get_cfg_from_args(args)
69 |     os.makedirs(args.output_dir, exist_ok=True)
70 |     default_setup(args)
71 |     apply_scaling_rules_to_cfg(cfg)
72 |     write_config(cfg, args.output_dir)
73 |     return cfg
74 | 


--------------------------------------------------------------------------------
/zoedepth/models/builder.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | from importlib import import_module
26 | from zoedepth.models.depth_model import DepthModel
27 | 
28 | def build_model(config) -> DepthModel:
29 |     """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30 |     This function should be used to construct models for training and evaluation.
31 | 
32 |     Args:
33 |         config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34 | 
35 |     Returns:
36 |         torch.nn.Module: Model corresponding to name and version as specified in config
37 |     """
38 |     module_name = f"zoedepth.models.{config.model}"
39 |     try:
40 |         module = import_module(module_name)
41 |     except ModuleNotFoundError as e:
42 |         # print the original error message
43 |         print(e)
44 |         raise ValueError(
45 |             f"Model {config.model} not found. Refer above error for details.") from e
46 |     try:
47 |         get_version = getattr(module, "get_version")
48 |     except AttributeError as e:
49 |         raise ValueError(
50 |             f"Model {config.model} has no get_version function.") from e
51 |     return get_version(config.version_name).build_from_config(config)
52 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
10 | 
11 | import logging
12 | 
13 | from torch import Tensor
14 | from torch import nn
15 | 
16 | 
17 | logger = logging.getLogger("dinov2")
18 | 
19 | 
20 | try:
21 |     from xformers.ops import memory_efficient_attention, unbind, fmha
22 | 
23 |     XFORMERS_AVAILABLE = True
24 | except ImportError:
25 |     logger.warning("xFormers not available")
26 |     XFORMERS_AVAILABLE = False
27 | 
28 | 
29 | class Attention(nn.Module):
30 |     def __init__(
31 |         self,
32 |         dim: int,
33 |         num_heads: int = 8,
34 |         qkv_bias: bool = False,
35 |         proj_bias: bool = True,
36 |         attn_drop: float = 0.0,
37 |         proj_drop: float = 0.0,
38 |     ) -> None:
39 |         super().__init__()
40 |         self.num_heads = num_heads
41 |         head_dim = dim // num_heads
42 |         self.scale = head_dim**-0.5
43 | 
44 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
45 |         self.attn_drop = nn.Dropout(attn_drop)
46 |         self.proj = nn.Linear(dim, dim, bias=proj_bias)
47 |         self.proj_drop = nn.Dropout(proj_drop)
48 | 
49 |     def forward(self, x: Tensor) -> Tensor:
50 |         B, N, C = x.shape
51 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
52 | 
53 |         q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
54 |         attn = q @ k.transpose(-2, -1)
55 | 
56 |         attn = attn.softmax(dim=-1)
57 |         attn = self.attn_drop(attn)
58 | 
59 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
60 |         x = self.proj(x)
61 |         x = self.proj_drop(x)
62 |         return x
63 | 
64 | 
65 | class MemEffAttention(Attention):
66 |     def forward(self, x: Tensor, attn_bias=None) -> Tensor:
67 |         if not XFORMERS_AVAILABLE:
68 |             assert attn_bias is None, "xFormers is required for nested tensors usage"
69 |             return super().forward(x)
70 | 
71 |         B, N, C = x.shape
72 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
73 | 
74 |         q, k, v = unbind(qkv, 2)
75 | 
76 |         x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
77 |         x = x.reshape([B, N, C])
78 | 
79 |         x = self.proj(x)
80 |         x = self.proj_drop(x)
81 |         return x
82 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from pathlib import Path
 8 | import re
 9 | from typing import List, Tuple
10 | 
11 | from setuptools import setup, find_packages
12 | 
13 | 
14 | NAME = "dinov2"
15 | DESCRIPTION = "PyTorch code and models for the DINOv2 self-supervised learning method."
16 | 
17 | URL = "https://github.com/facebookresearch/dinov2"
18 | AUTHOR = "FAIR"
19 | REQUIRES_PYTHON = ">=3.9.0"
20 | HERE = Path(__file__).parent
21 | 
22 | 
23 | try:
24 |     with open(HERE / "README.md", encoding="utf-8") as f:
25 |         long_description = "\n" + f.read()
26 | except FileNotFoundError:
27 |     long_description = DESCRIPTION
28 | 
29 | 
30 | def get_requirements(path: str = HERE / "requirements.txt") -> Tuple[List[str], List[str]]:
31 |     requirements = []
32 |     extra_indices = []
33 |     with open(path) as f:
34 |         for line in f.readlines():
35 |             line = line.rstrip("\r\n")
36 |             if line.startswith("--extra-index-url "):
37 |                 extra_indices.append(line[18:])
38 |                 continue
39 |             requirements.append(line)
40 |     return requirements, extra_indices
41 | 
42 | 
43 | def get_package_version() -> str:
44 |     with open(HERE / "dinov2/__init__.py") as f:
45 |         result = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M)
46 |         if result:
47 |             return result.group(1)
48 |     raise RuntimeError("Can't get package version")
49 | 
50 | 
51 | requirements, extra_indices = get_requirements()
52 | version = get_package_version()
53 | dev_requirements, _ = get_requirements(HERE / "requirements-dev.txt")
54 | 
55 | 
56 | setup(
57 |     name=NAME,
58 |     version=version,
59 |     description=DESCRIPTION,
60 |     long_description=long_description,
61 |     long_description_content_type="text/markdown",
62 |     author=AUTHOR,
63 |     python_requires=REQUIRES_PYTHON,
64 |     url=URL,
65 |     packages=find_packages(),
66 |     package_data={
67 |         "": ["*.yaml"],
68 |     },
69 |     install_requires=requirements,
70 |     dependency_links=extra_indices,
71 |     extras_require={
72 |         "dev": dev_requirements,
73 |     },
74 |     install_package_data=True,
75 |     license="CC-BY-NC",
76 |     license_files=("LICENSE",),
77 |     classifiers=[
78 |         # Trove classifiers: https://github.com/pypa/trove-classifiers/blob/main/src/trove_classifiers/__init__.py
79 |         "Development Status :: 3 - Alpha",
80 |         "Intended Audience :: Developers",
81 |         "Intended Audience :: Science/Research",
82 |         "License :: Other/Proprietary License",
83 |         "Programming Language :: Python :: 3.9",
84 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
85 |         "Topic :: Software Development :: Libraries :: Python Modules",
86 |     ],
87 | )
88 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/configs/ssl_default_config.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   WEIGHTS: ''
  3 | compute_precision:
  4 |   grad_scaler: true
  5 |   teacher:
  6 |     backbone:
  7 |       sharding_strategy: SHARD_GRAD_OP
  8 |       mixed_precision:
  9 |         param_dtype: fp16
 10 |         reduce_dtype: fp16
 11 |         buffer_dtype: fp32
 12 |     dino_head:
 13 |       sharding_strategy: SHARD_GRAD_OP
 14 |       mixed_precision:
 15 |         param_dtype: fp16
 16 |         reduce_dtype: fp16
 17 |         buffer_dtype: fp32
 18 |     ibot_head:
 19 |       sharding_strategy: SHARD_GRAD_OP
 20 |       mixed_precision:
 21 |         param_dtype: fp16
 22 |         reduce_dtype: fp16
 23 |         buffer_dtype: fp32
 24 |   student:
 25 |     backbone:
 26 |       sharding_strategy: SHARD_GRAD_OP
 27 |       mixed_precision:
 28 |         param_dtype: fp16
 29 |         reduce_dtype: fp16
 30 |         buffer_dtype: fp32
 31 |     dino_head:
 32 |       sharding_strategy: SHARD_GRAD_OP
 33 |       mixed_precision:
 34 |         param_dtype: fp16
 35 |         reduce_dtype: fp32
 36 |         buffer_dtype: fp32
 37 |     ibot_head:
 38 |       sharding_strategy: SHARD_GRAD_OP
 39 |       mixed_precision:
 40 |         param_dtype: fp16
 41 |         reduce_dtype: fp32
 42 |         buffer_dtype: fp32
 43 | dino:
 44 |   loss_weight: 1.0
 45 |   head_n_prototypes: 65536
 46 |   head_bottleneck_dim: 256
 47 |   head_nlayers: 3
 48 |   head_hidden_dim: 2048
 49 |   koleo_loss_weight: 0.1
 50 | ibot:
 51 |   loss_weight: 1.0
 52 |   mask_sample_probability: 0.5
 53 |   mask_ratio_min_max:
 54 |   - 0.1
 55 |   - 0.5
 56 |   separate_head: false
 57 |   head_n_prototypes: 65536
 58 |   head_bottleneck_dim: 256
 59 |   head_nlayers: 3
 60 |   head_hidden_dim: 2048
 61 | train:
 62 |   batch_size_per_gpu: 64
 63 |   dataset_path: ImageNet:split=TRAIN
 64 |   output_dir: .
 65 |   saveckp_freq: 20
 66 |   seed: 0
 67 |   num_workers: 10
 68 |   OFFICIAL_EPOCH_LENGTH: 1250
 69 |   cache_dataset: true
 70 |   centering: "centering" # or "sinkhorn_knopp"
 71 | student:
 72 |   arch: vit_large
 73 |   patch_size: 16
 74 |   drop_path_rate: 0.3
 75 |   layerscale: 1.0e-05
 76 |   drop_path_uniform: true
 77 |   pretrained_weights: ''
 78 |   ffn_layer: "mlp"
 79 |   block_chunks: 0
 80 |   qkv_bias: true
 81 |   proj_bias: true
 82 |   ffn_bias: true
 83 | teacher:
 84 |   momentum_teacher: 0.992
 85 |   final_momentum_teacher: 1
 86 |   warmup_teacher_temp: 0.04
 87 |   teacher_temp: 0.07
 88 |   warmup_teacher_temp_epochs: 30
 89 | optim:
 90 |   epochs: 100
 91 |   weight_decay: 0.04
 92 |   weight_decay_end: 0.4
 93 |   base_lr: 0.004  # learning rate for a batch size of 1024
 94 |   lr: 0.  # will be set after applying scaling rule
 95 |   warmup_epochs: 10
 96 |   min_lr: 1.0e-06
 97 |   clip_grad: 3.0
 98 |   freeze_last_layer_epochs: 1
 99 |   scaling_rule: sqrt_wrt_1024
100 |   patch_embed_lr_mult: 0.2
101 |   layerwise_decay: 0.9
102 |   adamw_beta1: 0.9
103 |   adamw_beta2: 0.999
104 | crops:
105 |   global_crops_scale:
106 |   - 0.32
107 |   - 1.0
108 |   local_crops_number: 8
109 |   local_crops_scale:
110 |   - 0.05
111 |   - 0.32
112 |   global_crops_size: 224
113 |   local_crops_size: 96
114 | evaluation:
115 |   eval_period_iterations: 12500
116 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/layers/patch_embed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # References:
 8 | #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 9 | #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
10 | 
11 | from typing import Callable, Optional, Tuple, Union
12 | 
13 | from torch import Tensor
14 | import torch.nn as nn
15 | 
16 | 
17 | def make_2tuple(x):
18 |     if isinstance(x, tuple):
19 |         assert len(x) == 2
20 |         return x
21 | 
22 |     assert isinstance(x, int)
23 |     return (x, x)
24 | 
25 | 
26 | class PatchEmbed(nn.Module):
27 |     """
28 |     2D image to patch embedding: (B,C,H,W) -> (B,N,D)
29 | 
30 |     Args:
31 |         img_size: Image size.
32 |         patch_size: Patch token size.
33 |         in_chans: Number of input image channels.
34 |         embed_dim: Number of linear projection output channels.
35 |         norm_layer: Normalization layer.
36 |     """
37 | 
38 |     def __init__(
39 |         self,
40 |         img_size: Union[int, Tuple[int, int]] = 224,
41 |         patch_size: Union[int, Tuple[int, int]] = 16,
42 |         in_chans: int = 3,
43 |         embed_dim: int = 768,
44 |         norm_layer: Optional[Callable] = None,
45 |         flatten_embedding: bool = True,
46 |     ) -> None:
47 |         super().__init__()
48 | 
49 |         image_HW = make_2tuple(img_size)
50 |         patch_HW = make_2tuple(patch_size)
51 |         patch_grid_size = (
52 |             image_HW[0] // patch_HW[0],
53 |             image_HW[1] // patch_HW[1],
54 |         )
55 | 
56 |         self.img_size = image_HW
57 |         self.patch_size = patch_HW
58 |         self.patches_resolution = patch_grid_size
59 |         self.num_patches = patch_grid_size[0] * patch_grid_size[1]
60 | 
61 |         self.in_chans = in_chans
62 |         self.embed_dim = embed_dim
63 | 
64 |         self.flatten_embedding = flatten_embedding
65 | 
66 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
67 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
68 | 
69 |     def forward(self, x: Tensor) -> Tensor:
70 |         _, _, H, W = x.shape
71 |         patch_H, patch_W = self.patch_size
72 | 
73 |         assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
74 |         assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
75 | 
76 |         x = self.proj(x)  # B C H W
77 |         H, W = x.size(2), x.size(3)
78 |         x = x.flatten(2).transpose(1, 2)  # B HW C
79 |         x = self.norm(x)
80 |         if not self.flatten_embedding:
81 |             x = x.reshape(-1, H, W, self.embed_dim)  # B H W C
82 |         return x
83 | 
84 |     def flops(self) -> float:
85 |         Ho, Wo = self.patches_resolution
86 |         flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
87 |         if self.norm is not None:
88 |             flops += Ho * Wo * self.embed_dim
89 |         return flops
90 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/masking.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import random
 8 | import math
 9 | import numpy as np
10 | 
11 | 
12 | class MaskingGenerator:
13 |     def __init__(
14 |         self,
15 |         input_size,
16 |         num_masking_patches=None,
17 |         min_num_patches=4,
18 |         max_num_patches=None,
19 |         min_aspect=0.3,
20 |         max_aspect=None,
21 |     ):
22 |         if not isinstance(input_size, tuple):
23 |             input_size = (input_size,) * 2
24 |         self.height, self.width = input_size
25 | 
26 |         self.num_patches = self.height * self.width
27 |         self.num_masking_patches = num_masking_patches
28 | 
29 |         self.min_num_patches = min_num_patches
30 |         self.max_num_patches = num_masking_patches if max_num_patches is None else max_num_patches
31 | 
32 |         max_aspect = max_aspect or 1 / min_aspect
33 |         self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
34 | 
35 |     def __repr__(self):
36 |         repr_str = "Generator(%d, %d -> [%d ~ %d], max = %d, %.3f ~ %.3f)" % (
37 |             self.height,
38 |             self.width,
39 |             self.min_num_patches,
40 |             self.max_num_patches,
41 |             self.num_masking_patches,
42 |             self.log_aspect_ratio[0],
43 |             self.log_aspect_ratio[1],
44 |         )
45 |         return repr_str
46 | 
47 |     def get_shape(self):
48 |         return self.height, self.width
49 | 
50 |     def _mask(self, mask, max_mask_patches):
51 |         delta = 0
52 |         for _ in range(10):
53 |             target_area = random.uniform(self.min_num_patches, max_mask_patches)
54 |             aspect_ratio = math.exp(random.uniform(*self.log_aspect_ratio))
55 |             h = int(round(math.sqrt(target_area * aspect_ratio)))
56 |             w = int(round(math.sqrt(target_area / aspect_ratio)))
57 |             if w < self.width and h < self.height:
58 |                 top = random.randint(0, self.height - h)
59 |                 left = random.randint(0, self.width - w)
60 | 
61 |                 num_masked = mask[top : top + h, left : left + w].sum()
62 |                 # Overlap
63 |                 if 0 < h * w - num_masked <= max_mask_patches:
64 |                     for i in range(top, top + h):
65 |                         for j in range(left, left + w):
66 |                             if mask[i, j] == 0:
67 |                                 mask[i, j] = 1
68 |                                 delta += 1
69 | 
70 |                 if delta > 0:
71 |                     break
72 |         return delta
73 | 
74 |     def __call__(self, num_masking_patches=0):
75 |         mask = np.zeros(shape=self.get_shape(), dtype=bool)
76 |         mask_count = 0
77 |         while mask_count < num_masking_patches:
78 |             max_mask_patches = num_masking_patches - mask_count
79 |             max_mask_patches = min(max_mask_patches, self.max_num_patches)
80 | 
81 |             delta = self._mask(mask, max_mask_patches)
82 |             if delta == 0:
83 |                 break
84 |             else:
85 |                 mask_count += delta
86 | 
87 |         return mask
88 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/cluster.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from enum import Enum
 8 | import os
 9 | from pathlib import Path
10 | from typing import Any, Dict, Optional
11 | 
12 | 
13 | class ClusterType(Enum):
14 |     AWS = "aws"
15 |     FAIR = "fair"
16 |     RSC = "rsc"
17 | 
18 | 
19 | def _guess_cluster_type() -> ClusterType:
20 |     uname = os.uname()
21 |     if uname.sysname == "Linux":
22 |         if uname.release.endswith("-aws"):
23 |             # Linux kernel versions on AWS instances are of the form "5.4.0-1051-aws"
24 |             return ClusterType.AWS
25 |         elif uname.nodename.startswith("rsc"):
26 |             # Linux kernel versions on RSC instances are standard ones but hostnames start with "rsc"
27 |             return ClusterType.RSC
28 | 
29 |     return ClusterType.FAIR
30 | 
31 | 
32 | def get_cluster_type(cluster_type: Optional[ClusterType] = None) -> Optional[ClusterType]:
33 |     if cluster_type is None:
34 |         return _guess_cluster_type()
35 | 
36 |     return cluster_type
37 | 
38 | 
39 | def get_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
40 |     cluster_type = get_cluster_type(cluster_type)
41 |     if cluster_type is None:
42 |         return None
43 | 
44 |     CHECKPOINT_DIRNAMES = {
45 |         ClusterType.AWS: "checkpoints",
46 |         ClusterType.FAIR: "checkpoint",
47 |         ClusterType.RSC: "checkpoint/dino",
48 |     }
49 |     return Path("/") / CHECKPOINT_DIRNAMES[cluster_type]
50 | 
51 | 
52 | def get_user_checkpoint_path(cluster_type: Optional[ClusterType] = None) -> Optional[Path]:
53 |     checkpoint_path = get_checkpoint_path(cluster_type)
54 |     if checkpoint_path is None:
55 |         return None
56 | 
57 |     username = os.environ.get("USER")
58 |     assert username is not None
59 |     return checkpoint_path / username
60 | 
61 | 
62 | def get_slurm_partition(cluster_type: Optional[ClusterType] = None) -> Optional[str]:
63 |     cluster_type = get_cluster_type(cluster_type)
64 |     if cluster_type is None:
65 |         return None
66 | 
67 |     SLURM_PARTITIONS = {
68 |         ClusterType.AWS: "learnlab",
69 |         ClusterType.FAIR: "learnlab",
70 |         ClusterType.RSC: "learn",
71 |     }
72 |     return SLURM_PARTITIONS[cluster_type]
73 | 
74 | 
75 | def get_slurm_executor_parameters(
76 |     nodes: int, num_gpus_per_node: int, cluster_type: Optional[ClusterType] = None, **kwargs
77 | ) -> Dict[str, Any]:
78 |     # create default parameters
79 |     params = {
80 |         "mem_gb": 0,  # Requests all memory on a node, see https://slurm.schedmd.com/sbatch.html
81 |         "gpus_per_node": num_gpus_per_node,
82 |         "tasks_per_node": num_gpus_per_node,  # one task per GPU
83 |         "cpus_per_task": 10,
84 |         "nodes": nodes,
85 |         "slurm_partition": get_slurm_partition(cluster_type),
86 |     }
87 |     # apply cluster-specific adjustments
88 |     cluster_type = get_cluster_type(cluster_type)
89 |     if cluster_type == ClusterType.AWS:
90 |         params["cpus_per_task"] = 12
91 |         del params["mem_gb"]
92 |     elif cluster_type == ClusterType.RSC:
93 |         params["cpus_per_task"] = 12
94 |     # set additional parameters / apply overrides
95 |     params.update(kwargs)
96 |     return params
97 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Sequence
 8 | 
 9 | import torch
10 | from torchvision import transforms
11 | 
12 | 
13 | class GaussianBlur(transforms.RandomApply):
14 |     """
15 |     Apply Gaussian Blur to the PIL image.
16 |     """
17 | 
18 |     def __init__(self, *, p: float = 0.5, radius_min: float = 0.1, radius_max: float = 2.0):
19 |         # NOTE: torchvision is applying 1 - probability to return the original image
20 |         keep_p = 1 - p
21 |         transform = transforms.GaussianBlur(kernel_size=9, sigma=(radius_min, radius_max))
22 |         super().__init__(transforms=[transform], p=keep_p)
23 | 
24 | 
25 | class MaybeToTensor(transforms.ToTensor):
26 |     """
27 |     Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor, or keep as is if already a tensor.
28 |     """
29 | 
30 |     def __call__(self, pic):
31 |         """
32 |         Args:
33 |             pic (PIL Image, numpy.ndarray or torch.tensor): Image to be converted to tensor.
34 |         Returns:
35 |             Tensor: Converted image.
36 |         """
37 |         if isinstance(pic, torch.Tensor):
38 |             return pic
39 |         return super().__call__(pic)
40 | 
41 | 
42 | # Use timm's names
43 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
44 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
45 | 
46 | 
47 | def make_normalize_transform(
48 |     mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
49 |     std: Sequence[float] = IMAGENET_DEFAULT_STD,
50 | ) -> transforms.Normalize:
51 |     return transforms.Normalize(mean=mean, std=std)
52 | 
53 | 
54 | # This roughly matches torchvision's preset for classification training:
55 | #   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L6-L44
56 | def make_classification_train_transform(
57 |     *,
58 |     crop_size: int = 224,
59 |     interpolation=transforms.InterpolationMode.BICUBIC,
60 |     hflip_prob: float = 0.5,
61 |     mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
62 |     std: Sequence[float] = IMAGENET_DEFAULT_STD,
63 | ):
64 |     transforms_list = [transforms.RandomResizedCrop(crop_size, interpolation=interpolation)]
65 |     if hflip_prob > 0.0:
66 |         transforms_list.append(transforms.RandomHorizontalFlip(hflip_prob))
67 |     transforms_list.extend(
68 |         [
69 |             MaybeToTensor(),
70 |             make_normalize_transform(mean=mean, std=std),
71 |         ]
72 |     )
73 |     return transforms.Compose(transforms_list)
74 | 
75 | 
76 | # This matches (roughly) torchvision's preset for classification evaluation:
77 | #   https://github.com/pytorch/vision/blob/main/references/classification/presets.py#L47-L69
78 | def make_classification_eval_transform(
79 |     *,
80 |     resize_size: int = 256,
81 |     interpolation=transforms.InterpolationMode.BICUBIC,
82 |     crop_size: int = 224,
83 |     mean: Sequence[float] = IMAGENET_DEFAULT_MEAN,
84 |     std: Sequence[float] = IMAGENET_DEFAULT_STD,
85 | ) -> transforms.Compose:
86 |     transforms_list = [
87 |         transforms.Resize(resize_size, interpolation=interpolation),
88 |         transforms.CenterCrop(crop_size),
89 |         MaybeToTensor(),
90 |         make_normalize_transform(mean=mean, std=std),
91 |     ]
92 |     return transforms.Compose(transforms_list)
93 | 


--------------------------------------------------------------------------------
/zoedepth/data/ibims.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import os
26 | 
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms as T
32 | 
33 | 
34 | class iBims(Dataset):
35 |     def __init__(self, config):
36 |         root_folder = config.ibims_root
37 |         with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38 |             imglist = f.read().split()
39 | 
40 |         samples = []
41 |         for basename in imglist:
42 |             img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43 |             depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44 |             valid_mask_path = os.path.join(
45 |                 root_folder, 'mask_invalid', basename+".png")
46 |             transp_mask_path = os.path.join(
47 |                 root_folder, 'mask_transp', basename+".png")
48 | 
49 |             samples.append(
50 |                 (img_path, depth_path, valid_mask_path, transp_mask_path))
51 | 
52 |         self.samples = samples
53 |         # self.normalize = T.Normalize(
54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55 |         self.normalize = lambda x : x
56 | 
57 |     def __getitem__(self, idx):
58 |         img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59 | 
60 |         img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61 |         depth = np.asarray(Image.open(depth_path),
62 |                            dtype=np.uint16).astype('float')*50.0/65535
63 | 
64 |         mask_valid = np.asarray(Image.open(valid_mask_path))
65 |         mask_transp = np.asarray(Image.open(transp_mask_path))
66 | 
67 |         # depth = depth * mask_valid * mask_transp
68 |         depth = np.where(mask_valid * mask_transp, depth, -1)
69 | 
70 |         img = torch.from_numpy(img).permute(2, 0, 1)
71 |         img = self.normalize(img)
72 |         depth = torch.from_numpy(depth).unsqueeze(0)
73 |         return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74 | 
75 |     def __len__(self):
76 |         return len(self.samples)
77 | 
78 | 
79 | def get_ibims_loader(config, batch_size=1, **kwargs):
80 |     dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81 |     return dataloader
82 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import os
 9 | import random
10 | import subprocess
11 | from urllib.parse import urlparse
12 | 
13 | import numpy as np
14 | import torch
15 | from torch import nn
16 | 
17 | 
18 | logger = logging.getLogger("dinov2")
19 | 
20 | 
21 | def load_pretrained_weights(model, pretrained_weights, checkpoint_key):
22 |     if urlparse(pretrained_weights).scheme:  # If it looks like an URL
23 |         state_dict = torch.hub.load_state_dict_from_url(pretrained_weights, map_location="cpu")
24 |     else:
25 |         state_dict = torch.load(pretrained_weights, map_location="cpu")
26 |     if checkpoint_key is not None and checkpoint_key in state_dict:
27 |         logger.info(f"Take key {checkpoint_key} in provided checkpoint dict")
28 |         state_dict = state_dict[checkpoint_key]
29 |     # remove `module.` prefix
30 |     state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
31 |     # remove `backbone.` prefix induced by multicrop wrapper
32 |     state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
33 |     msg = model.load_state_dict(state_dict, strict=False)
34 |     logger.info("Pretrained weights found at {} and loaded with msg: {}".format(pretrained_weights, msg))
35 | 
36 | 
37 | def fix_random_seeds(seed=31):
38 |     """
39 |     Fix random seeds.
40 |     """
41 |     torch.manual_seed(seed)
42 |     torch.cuda.manual_seed_all(seed)
43 |     np.random.seed(seed)
44 |     random.seed(seed)
45 | 
46 | 
47 | def get_sha():
48 |     cwd = os.path.dirname(os.path.abspath(__file__))
49 | 
50 |     def _run(command):
51 |         return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
52 | 
53 |     sha = "N/A"
54 |     diff = "clean"
55 |     branch = "N/A"
56 |     try:
57 |         sha = _run(["git", "rev-parse", "HEAD"])
58 |         subprocess.check_output(["git", "diff"], cwd=cwd)
59 |         diff = _run(["git", "diff-index", "HEAD"])
60 |         diff = "has uncommitted changes" if diff else "clean"
61 |         branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
62 |     except Exception:
63 |         pass
64 |     message = f"sha: {sha}, status: {diff}, branch: {branch}"
65 |     return message
66 | 
67 | 
68 | class CosineScheduler(object):
69 |     def __init__(self, base_value, final_value, total_iters, warmup_iters=0, start_warmup_value=0, freeze_iters=0):
70 |         super().__init__()
71 |         self.final_value = final_value
72 |         self.total_iters = total_iters
73 | 
74 |         freeze_schedule = np.zeros((freeze_iters))
75 | 
76 |         warmup_schedule = np.linspace(start_warmup_value, base_value, warmup_iters)
77 | 
78 |         iters = np.arange(total_iters - warmup_iters - freeze_iters)
79 |         schedule = final_value + 0.5 * (base_value - final_value) * (1 + np.cos(np.pi * iters / len(iters)))
80 |         self.schedule = np.concatenate((freeze_schedule, warmup_schedule, schedule))
81 | 
82 |         assert len(self.schedule) == self.total_iters
83 | 
84 |     def __getitem__(self, it):
85 |         if it >= self.total_iters:
86 |             return self.final_value
87 |         else:
88 |             return self.schedule[it]
89 | 
90 | 
91 | def has_batchnorms(model):
92 |     bn_types = (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)
93 |     for name, module in model.named_modules():
94 |         if isinstance(module, bn_types):
95 |             return True
96 |     return False
97 | 


--------------------------------------------------------------------------------
/zoedepth/models/model_io.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | 
27 | def load_state_dict(model, state_dict):
28 |     """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29 | 
30 |     DataParallel prefixes state_dict keys with 'module.' when saving.
31 |     If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32 |     If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33 |     """
34 |     state_dict = state_dict.get('model', state_dict)
35 |     # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36 | 
37 |     do_prefix = isinstance(
38 |         model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39 |     state = {}
40 |     for k, v in state_dict.items():
41 |         if k.startswith('module.') and not do_prefix:
42 |             k = k[7:]
43 | 
44 |         if not k.startswith('module.') and do_prefix:
45 |             k = 'module.' + k
46 | 
47 |         state[k] = v
48 | 
49 |     model.load_state_dict(state)
50 |     print("Loaded successfully")
51 |     return model
52 | 
53 | 
54 | def load_wts(model, checkpoint_path):
55 |     ckpt = torch.load(checkpoint_path, map_location='cpu')
56 |     return load_state_dict(model, ckpt)
57 | 
58 | 
59 | def load_state_dict_from_url(model, url, **kwargs):
60 |     state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61 |     return load_state_dict(model, state_dict)
62 | 
63 | 
64 | def load_state_from_resource(model, resource: str):
65 |     """Loads weights to the model from a given resource. A resource can be of following types:
66 |         1. URL. Prefixed with "url::"
67 |                 e.g. url::http(s)://url.resource.com/ckpt.pt
68 | 
69 |         2. Local path. Prefixed with "local::"
70 |                 e.g. local::/path/to/ckpt.pt
71 | 
72 | 
73 |     Args:
74 |         model (torch.nn.Module): Model
75 |         resource (str): resource string
76 | 
77 |     Returns:
78 |         torch.nn.Module: Model with loaded weights
79 |     """
80 |     print(f"Using pretrained resource {resource}")
81 | 
82 |     if resource.startswith('url::'):
83 |         url = resource.split('url::')[1]
84 |         return load_state_dict_from_url(model, url, progress=True)
85 | 
86 |     elif resource.startswith('local::'):
87 |         path = resource.split('local::')[1]
88 |         return load_wts(model, path)
89 |         
90 |     else:
91 |         raise ValueError("Invalid resource type, only url:: and local:: are supported")
92 |     


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/logging/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import functools
  8 | import logging
  9 | import os
 10 | import sys
 11 | from typing import Optional
 12 | 
 13 | import dinov2.distributed as distributed
 14 | from .helpers import MetricLogger, SmoothedValue
 15 | 
 16 | 
 17 | # So that calling _configure_logger multiple times won't add many handlers
 18 | @functools.lru_cache()
 19 | def _configure_logger(
 20 |     name: Optional[str] = None,
 21 |     *,
 22 |     level: int = logging.DEBUG,
 23 |     output: Optional[str] = None,
 24 | ):
 25 |     """
 26 |     Configure a logger.
 27 | 
 28 |     Adapted from Detectron2.
 29 | 
 30 |     Args:
 31 |         name: The name of the logger to configure.
 32 |         level: The logging level to use.
 33 |         output: A file name or a directory to save log. If None, will not save log file.
 34 |             If ends with ".txt" or ".log", assumed to be a file name.
 35 |             Otherwise, logs will be saved to `output/log.txt`.
 36 | 
 37 |     Returns:
 38 |         The configured logger.
 39 |     """
 40 | 
 41 |     logger = logging.getLogger(name)
 42 |     logger.setLevel(level)
 43 |     logger.propagate = False
 44 | 
 45 |     # Loosely match Google glog format:
 46 |     #   [IWEF]yyyymmdd hh:mm:ss.uuuuuu threadid file:line] msg
 47 |     # but use a shorter timestamp and include the logger name:
 48 |     #   [IWEF]yyyymmdd hh:mm:ss logger threadid file:line] msg
 49 |     fmt_prefix = "%(levelname).1s%(asctime)s %(process)s %(name)s %(filename)s:%(lineno)s] "
 50 |     fmt_message = "%(message)s"
 51 |     fmt = fmt_prefix + fmt_message
 52 |     datefmt = "%Y%m%d %H:%M:%S"
 53 |     formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
 54 | 
 55 |     # stdout logging for main worker only
 56 |     if distributed.is_main_process():
 57 |         handler = logging.StreamHandler(stream=sys.stdout)
 58 |         handler.setLevel(logging.DEBUG)
 59 |         handler.setFormatter(formatter)
 60 |         logger.addHandler(handler)
 61 | 
 62 |     # file logging for all workers
 63 |     if output:
 64 |         if os.path.splitext(output)[-1] in (".txt", ".log"):
 65 |             filename = output
 66 |         else:
 67 |             filename = os.path.join(output, "logs", "log.txt")
 68 | 
 69 |         if not distributed.is_main_process():
 70 |             global_rank = distributed.get_global_rank()
 71 |             filename = filename + ".rank{}".format(global_rank)
 72 | 
 73 |         os.makedirs(os.path.dirname(filename), exist_ok=True)
 74 | 
 75 |         handler = logging.StreamHandler(open(filename, "a"))
 76 |         handler.setLevel(logging.DEBUG)
 77 |         handler.setFormatter(formatter)
 78 |         logger.addHandler(handler)
 79 | 
 80 |     return logger
 81 | 
 82 | 
 83 | def setup_logging(
 84 |     output: Optional[str] = None,
 85 |     *,
 86 |     name: Optional[str] = None,
 87 |     level: int = logging.DEBUG,
 88 |     capture_warnings: bool = True,
 89 | ) -> None:
 90 |     """
 91 |     Setup logging.
 92 | 
 93 |     Args:
 94 |         output: A file name or a directory to save log files. If None, log
 95 |             files will not be saved. If output ends with ".txt" or ".log", it
 96 |             is assumed to be a file name.
 97 |             Otherwise, logs will be saved to `output/log.txt`.
 98 |         name: The name of the logger to configure, by default the root logger.
 99 |         level: The logging level to use.
100 |         capture_warnings: Whether warnings should be captured as logs.
101 |     """
102 |     logging.captureWarnings(capture_warnings)
103 |     _configure_logger(name, level=level, output=output)
104 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@meta.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/run/submit.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import argparse
  8 | import logging
  9 | import os
 10 | from pathlib import Path
 11 | from typing import List, Optional
 12 | 
 13 | import submitit
 14 | 
 15 | from dinov2.utils.cluster import (
 16 |     get_slurm_executor_parameters,
 17 |     get_slurm_partition,
 18 |     get_user_checkpoint_path,
 19 | )
 20 | 
 21 | 
 22 | logger = logging.getLogger("dinov2")
 23 | 
 24 | 
 25 | def get_args_parser(
 26 |     description: Optional[str] = None,
 27 |     parents: Optional[List[argparse.ArgumentParser]] = None,
 28 |     add_help: bool = True,
 29 | ) -> argparse.ArgumentParser:
 30 |     parents = parents or []
 31 |     slurm_partition = get_slurm_partition()
 32 |     parser = argparse.ArgumentParser(
 33 |         description=description,
 34 |         parents=parents,
 35 |         add_help=add_help,
 36 |     )
 37 |     parser.add_argument(
 38 |         "--ngpus",
 39 |         "--gpus",
 40 |         "--gpus-per-node",
 41 |         default=8,
 42 |         type=int,
 43 |         help="Number of GPUs to request on each node",
 44 |     )
 45 |     parser.add_argument(
 46 |         "--nodes",
 47 |         "--nnodes",
 48 |         default=2,
 49 |         type=int,
 50 |         help="Number of nodes to request",
 51 |     )
 52 |     parser.add_argument(
 53 |         "--timeout",
 54 |         default=2800,
 55 |         type=int,
 56 |         help="Duration of the job",
 57 |     )
 58 |     parser.add_argument(
 59 |         "--partition",
 60 |         default=slurm_partition,
 61 |         type=str,
 62 |         help="Partition where to submit",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--use-volta32",
 66 |         action="store_true",
 67 |         help="Request V100-32GB GPUs",
 68 |     )
 69 |     parser.add_argument(
 70 |         "--comment",
 71 |         default="",
 72 |         type=str,
 73 |         help="Comment to pass to scheduler, e.g. priority message",
 74 |     )
 75 |     parser.add_argument(
 76 |         "--exclude",
 77 |         default="",
 78 |         type=str,
 79 |         help="Nodes to exclude",
 80 |     )
 81 |     return parser
 82 | 
 83 | 
 84 | def get_shared_folder() -> Path:
 85 |     user_checkpoint_path = get_user_checkpoint_path()
 86 |     if user_checkpoint_path is None:
 87 |         raise RuntimeError("Path to user checkpoint cannot be determined")
 88 |     path = user_checkpoint_path / "experiments"
 89 |     path.mkdir(exist_ok=True)
 90 |     return path
 91 | 
 92 | 
 93 | def submit_jobs(task_class, args, name: str):
 94 |     if not args.output_dir:
 95 |         args.output_dir = str(get_shared_folder() / "%j")
 96 | 
 97 |     Path(args.output_dir).mkdir(parents=True, exist_ok=True)
 98 |     executor = submitit.AutoExecutor(folder=args.output_dir, slurm_max_num_timeout=30)
 99 | 
100 |     kwargs = {}
101 |     if args.use_volta32:
102 |         kwargs["slurm_constraint"] = "volta32gb"
103 |     if args.comment:
104 |         kwargs["slurm_comment"] = args.comment
105 |     if args.exclude:
106 |         kwargs["slurm_exclude"] = args.exclude
107 | 
108 |     executor_params = get_slurm_executor_parameters(
109 |         nodes=args.nodes,
110 |         num_gpus_per_node=args.ngpus,
111 |         timeout_min=args.timeout,  # max is 60 * 72
112 |         slurm_signal_delay_s=120,
113 |         slurm_partition=args.partition,
114 |         **kwargs,
115 |     )
116 |     executor.update_parameters(name=name, **executor_params)
117 | 
118 |     task = task_class(args)
119 |     job = executor.submit(task)
120 | 
121 |     logger.info(f"Submitted job_id: {job.job_id}")
122 |     str_output_dir = os.path.abspath(args.output_dir).replace("%j", str(job.job_id))
123 |     logger.info(f"Logs and checkpoints will be saved at: {str_output_dir}")
124 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/utils/param_groups.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from collections import defaultdict
 8 | import logging
 9 | 
10 | 
11 | logger = logging.getLogger("dinov2")
12 | 
13 | 
14 | def get_vit_lr_decay_rate(name, lr_decay_rate=1.0, num_layers=12, force_is_backbone=False, chunked_blocks=False):
15 |     """
16 |     Calculate lr decay rate for different ViT blocks.
17 |     Args:
18 |         name (string): parameter name.
19 |         lr_decay_rate (float): base lr decay rate.
20 |         num_layers (int): number of ViT blocks.
21 |     Returns:
22 |         lr decay rate for the given parameter.
23 |     """
24 |     layer_id = num_layers + 1
25 |     if name.startswith("backbone") or force_is_backbone:
26 |         if ".pos_embed" in name or ".patch_embed" in name or ".mask_token" in name or ".cls_token" in name:
27 |             layer_id = 0
28 |         elif force_is_backbone and (
29 |             "pos_embed" in name or "patch_embed" in name or "mask_token" in name or "cls_token" in name
30 |         ):
31 |             layer_id = 0
32 |         elif ".blocks." in name and ".residual." not in name:
33 |             layer_id = int(name[name.find(".blocks.") :].split(".")[2]) + 1
34 |         elif chunked_blocks and "blocks." in name and "residual." not in name:
35 |             layer_id = int(name[name.find("blocks.") :].split(".")[2]) + 1
36 |         elif "blocks." in name and "residual." not in name:
37 |             layer_id = int(name[name.find("blocks.") :].split(".")[1]) + 1
38 | 
39 |     return lr_decay_rate ** (num_layers + 1 - layer_id)
40 | 
41 | 
42 | def get_params_groups_with_decay(model, lr_decay_rate=1.0, patch_embed_lr_mult=1.0):
43 |     chunked_blocks = False
44 |     if hasattr(model, "n_blocks"):
45 |         logger.info("chunked fsdp")
46 |         n_blocks = model.n_blocks
47 |         chunked_blocks = model.chunked_blocks
48 |     elif hasattr(model, "blocks"):
49 |         logger.info("first code branch")
50 |         n_blocks = len(model.blocks)
51 |     elif hasattr(model, "backbone"):
52 |         logger.info("second code branch")
53 |         n_blocks = len(model.backbone.blocks)
54 |     else:
55 |         logger.info("else code branch")
56 |         n_blocks = 0
57 |     all_param_groups = []
58 | 
59 |     for name, param in model.named_parameters():
60 |         name = name.replace("_fsdp_wrapped_module.", "")
61 |         if not param.requires_grad:
62 |             continue
63 |         decay_rate = get_vit_lr_decay_rate(
64 |             name, lr_decay_rate, num_layers=n_blocks, force_is_backbone=n_blocks > 0, chunked_blocks=chunked_blocks
65 |         )
66 |         d = {"params": param, "is_last_layer": False, "lr_multiplier": decay_rate, "wd_multiplier": 1.0, "name": name}
67 | 
68 |         if "last_layer" in name:
69 |             d.update({"is_last_layer": True})
70 | 
71 |         if name.endswith(".bias") or "norm" in name or "gamma" in name:
72 |             d.update({"wd_multiplier": 0.0})
73 | 
74 |         if "patch_embed" in name:
75 |             d.update({"lr_multiplier": d["lr_multiplier"] * patch_embed_lr_mult})
76 | 
77 |         all_param_groups.append(d)
78 |         logger.info(f"""{name}: lr_multiplier: {d["lr_multiplier"]}, wd_multiplier: {d["wd_multiplier"]}""")
79 | 
80 |     return all_param_groups
81 | 
82 | 
83 | def fuse_params_groups(all_params_groups, keys=("lr_multiplier", "wd_multiplier", "is_last_layer")):
84 |     fused_params_groups = defaultdict(lambda: {"params": []})
85 |     for d in all_params_groups:
86 |         identifier = ""
87 |         for k in keys:
88 |             identifier += k + str(d[k]) + "_"
89 | 
90 |         for k in keys:
91 |             fused_params_groups[identifier][k] = d[k]
92 |         fused_params_groups[identifier]["params"].append(d["params"])
93 | 
94 |     return fused_params_groups.values()
95 | 


--------------------------------------------------------------------------------
/depth_to_pointcloud.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import glob
 4 | import torch
 5 | import numpy as np
 6 | from PIL import Image
 7 | import torchvision.transforms as transforms
 8 | import open3d as o3d
 9 | from tqdm import tqdm
10 | from zoedepth.models.builder import build_model
11 | from zoedepth.utils.config import get_config
12 | 
13 | # Load the saved calibration parameters
14 | calibration_data = np.load("CalibrationMatrix_college_cpt.npz")
15 | 
16 | # Extract the camera matrix
17 | camera_matrix = calibration_data['Camera_matrix']
18 | 
19 | # The camera matrix typically looks like this:
20 | # [[fx,  0, cx],
21 | #  [ 0, fy, cy],
22 | #  [ 0,  0,  1]]
23 | 
24 | # Extract the focal lengths
25 | FX = camera_matrix[0, 0]  # Focal length in the x direction
26 | FY = camera_matrix[1, 1]  # Focal length in the y direction
27 | FL = (FX + FY) / 2  # Average focal length
28 | 
29 | # Print the extracted parameters
30 | print(f"FX: {FX}")
31 | print(f"FY: {FY}")
32 | print(f"FL (average focal length): {FL}")
33 | 
34 | NYU_DATA = False
35 | INPUT_DIR = './my_test/input/indoor'
36 | OUTPUT_DIR = './my_test/output/indoor/'
37 | DATASET = 'nyu'  # For INDOOR
38 | # DATASET = 'kitti'  # For OUTDOOR
39 | 
40 | 
41 | def process_images(model):
42 |     if not os.path.exists(OUTPUT_DIR):
43 |         os.makedirs(OUTPUT_DIR)
44 | 
45 |     image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg'))
46 |     for image_path in tqdm(image_paths, desc="Processing Images"):
47 |         try:
48 |             color_image = Image.open(image_path).convert('RGB')
49 |             original_width, original_height = color_image.size
50 |             FINAL_HEIGHT = original_height
51 |             FINAL_WIDTH = original_width
52 |             image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to(
53 |                 'cuda' if torch.cuda.is_available() else 'cpu')
54 | 
55 |             pred = model(image_tensor, dataset=DATASET)
56 |             if isinstance(pred, dict):
57 |                 pred = pred.get('metric_depth', pred.get('out'))
58 |             elif isinstance(pred, (list, tuple)):
59 |                 pred = pred[-1]
60 |             pred = pred.squeeze().detach().cpu().numpy()
61 | 
62 |             # Resize color image and depth to final size
63 |             resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
64 |             resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)
65 | 
66 |             focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
67 |             x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
68 |             x = (x - FINAL_WIDTH / 2) / focal_length_x
69 |             y = (y - FINAL_HEIGHT / 2) / focal_length_y
70 |             z = np.array(resized_pred)
71 |             points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
72 |             colors = np.array(resized_color_image).reshape(-1, 3) / 255.0
73 | 
74 |             pcd = o3d.geometry.PointCloud()
75 |             pcd.points = o3d.utility.Vector3dVector(points)
76 |             pcd.colors = o3d.utility.Vector3dVector(colors)
77 |             pcd = pcd.voxel_down_sample(voxel_size=0.01)
78 |             o3d.io.write_point_cloud(
79 |                 os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
80 |         except Exception as e:
81 |             print(f"Error processing {image_path}: {e}")
82 | 
83 | 
84 | def main(model_name, pretrained_resource):
85 |     config = get_config(model_name, "eval", DATASET)
86 |     config.pretrained_resource = pretrained_resource
87 |     model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
88 |     model.eval()
89 |     process_images(model)
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     model = 'zoedepth'
94 |     pretrained_resource = 'local::./checkpoints/depth_anything_metric_depth_indoor.pt'
95 |     # pretrained_resource = 'local::./checkpoints/depth_anything_metric_depth_outdoor.pt'
96 |     main(model, pretrained_resource)
97 | 


--------------------------------------------------------------------------------
/zoedepth/utils/geometry.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import numpy as np
26 | 
27 | def get_intrinsics(H,W):
28 |     """
29 |     Intrinsics for a pinhole camera model.
30 |     Assume fov of 55 degrees and central principal point.
31 |     """
32 |     f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33 |     cx = 0.5 * W
34 |     cy = 0.5 * H
35 |     return np.array([[f, 0, cx],
36 |                      [0, f, cy],
37 |                      [0, 0, 1]])
38 | 
39 | def depth_to_points(depth, R=None, t=None):
40 | 
41 |     K = get_intrinsics(depth.shape[1], depth.shape[2])
42 |     Kinv = np.linalg.inv(K)
43 |     if R is None:
44 |         R = np.eye(3)
45 |     if t is None:
46 |         t = np.zeros(3)
47 | 
48 |     # M converts from your coordinate to PyTorch3D's coordinate system
49 |     M = np.eye(3)
50 |     M[0, 0] = -1.0
51 |     M[1, 1] = -1.0
52 | 
53 |     height, width = depth.shape[1:3]
54 | 
55 |     x = np.arange(width)
56 |     y = np.arange(height)
57 |     coord = np.stack(np.meshgrid(x, y), -1)
58 |     coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1)  # z=1
59 |     coord = coord.astype(np.float32)
60 |     # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61 |     coord = coord[None]  # bs, h, w, 3
62 | 
63 |     D = depth[:, :, :, None, None]
64 |     # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65 |     pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66 |     # pts3D_1 live in your coordinate system. Convert them to Py3D's
67 |     pts3D_1 = M[None, None, None, ...] @ pts3D_1
68 |     # from reference to targe tviewpoint
69 |     pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70 |     # pts3D_2 = pts3D_1
71 |     # depth_2 = pts3D_2[:, :, :, 2, :]  # b,1,h,w
72 |     return pts3D_2[:, :, :, :3, 0][0]
73 | 
74 | 
75 | def create_triangles(h, w, mask=None):
76 |     """
77 |     Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78 |     Creates mesh triangle indices from a given pixel grid size.
79 |         This function is not and need not be differentiable as triangle indices are
80 |         fixed.
81 |     Args:
82 |     h: (int) denoting the height of the image.
83 |     w: (int) denoting the width of the image.
84 |     Returns:
85 |     triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86 |     """
87 |     x, y = np.meshgrid(range(w - 1), range(h - 1))
88 |     tl = y * w + x
89 |     tr = y * w + x + 1
90 |     bl = (y + 1) * w + x
91 |     br = (y + 1) * w + x + 1
92 |     triangles = np.array([tl, bl, tr, br, tr, bl])
93 |     triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94 |         ((w - 1) * (h - 1) * 2, 3))
95 |     if mask is not None:
96 |         mask = mask.reshape(-1)
97 |         triangles = triangles[mask[triangles].all(1)]
98 |     return triangles
99 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/loss/dino_clstoken_loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.distributed as dist
  9 | import torch.nn.functional as F
 10 | from torch import nn
 11 | 
 12 | 
 13 | class DINOLoss(nn.Module):
 14 |     def __init__(
 15 |         self,
 16 |         out_dim,
 17 |         student_temp=0.1,
 18 |         center_momentum=0.9,
 19 |     ):
 20 |         super().__init__()
 21 |         self.student_temp = student_temp
 22 |         self.center_momentum = center_momentum
 23 |         self.register_buffer("center", torch.zeros(1, out_dim))
 24 |         self.updated = True
 25 |         self.reduce_handle = None
 26 |         self.len_teacher_output = None
 27 |         self.async_batch_center = None
 28 | 
 29 |     @torch.no_grad()
 30 |     def softmax_center_teacher(self, teacher_output, teacher_temp):
 31 |         self.apply_center_update()
 32 |         # teacher centering and sharpening
 33 |         return F.softmax((teacher_output - self.center) / teacher_temp, dim=-1)
 34 | 
 35 |     @torch.no_grad()
 36 |     def sinkhorn_knopp_teacher(self, teacher_output, teacher_temp, n_iterations=3):
 37 |         teacher_output = teacher_output.float()
 38 |         world_size = dist.get_world_size() if dist.is_initialized() else 1
 39 |         Q = torch.exp(teacher_output / teacher_temp).t()  # Q is K-by-B for consistency with notations from our paper
 40 |         B = Q.shape[1] * world_size  # number of samples to assign
 41 |         K = Q.shape[0]  # how many prototypes
 42 | 
 43 |         # make the matrix sums to 1
 44 |         sum_Q = torch.sum(Q)
 45 |         if dist.is_initialized():
 46 |             dist.all_reduce(sum_Q)
 47 |         Q /= sum_Q
 48 | 
 49 |         for it in range(n_iterations):
 50 |             # normalize each row: total weight per prototype must be 1/K
 51 |             sum_of_rows = torch.sum(Q, dim=1, keepdim=True)
 52 |             if dist.is_initialized():
 53 |                 dist.all_reduce(sum_of_rows)
 54 |             Q /= sum_of_rows
 55 |             Q /= K
 56 | 
 57 |             # normalize each column: total weight per sample must be 1/B
 58 |             Q /= torch.sum(Q, dim=0, keepdim=True)
 59 |             Q /= B
 60 | 
 61 |         Q *= B  # the columns must sum to 1 so that Q is an assignment
 62 |         return Q.t()
 63 | 
 64 |     def forward(self, student_output_list, teacher_out_softmaxed_centered_list):
 65 |         """
 66 |         Cross-entropy between softmax outputs of the teacher and student networks.
 67 |         """
 68 |         # TODO: Use cross_entropy_distribution here
 69 |         total_loss = 0
 70 |         for s in student_output_list:
 71 |             lsm = F.log_softmax(s / self.student_temp, dim=-1)
 72 |             for t in teacher_out_softmaxed_centered_list:
 73 |                 loss = torch.sum(t * lsm, dim=-1)
 74 |                 total_loss -= loss.mean()
 75 |         return total_loss
 76 | 
 77 |     @torch.no_grad()
 78 |     def update_center(self, teacher_output):
 79 |         self.reduce_center_update(teacher_output)
 80 | 
 81 |     @torch.no_grad()
 82 |     def reduce_center_update(self, teacher_output):
 83 |         self.updated = False
 84 |         self.len_teacher_output = len(teacher_output)
 85 |         self.async_batch_center = torch.sum(teacher_output, dim=0, keepdim=True)
 86 |         if dist.is_initialized():
 87 |             self.reduce_handle = dist.all_reduce(self.async_batch_center, async_op=True)
 88 | 
 89 |     @torch.no_grad()
 90 |     def apply_center_update(self):
 91 |         if self.updated is False:
 92 |             world_size = dist.get_world_size() if dist.is_initialized() else 1
 93 | 
 94 |             if self.reduce_handle is not None:
 95 |                 self.reduce_handle.wait()
 96 |             _t = self.async_batch_center / (self.len_teacher_output * world_size)
 97 | 
 98 |             self.center = self.center * self.center_momentum + _t * (1 - self.center_momentum)
 99 | 
100 |             self.updated = True
101 | 


--------------------------------------------------------------------------------
/zoedepth/utils/easydict/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | EasyDict
  3 | Copy/pasted from https://github.com/makinacorpus/easydict
  4 | Original author: Mathieu Leplatre <mathieu.leplatre@makina-corpus.com>
  5 | """
  6 | 
  7 | class EasyDict(dict):
  8 |     """
  9 |     Get attributes
 10 | 
 11 |     >>> d = EasyDict({'foo':3})
 12 |     >>> d['foo']
 13 |     3
 14 |     >>> d.foo
 15 |     3
 16 |     >>> d.bar
 17 |     Traceback (most recent call last):
 18 |     ...
 19 |     AttributeError: 'EasyDict' object has no attribute 'bar'
 20 | 
 21 |     Works recursively
 22 | 
 23 |     >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
 24 |     >>> isinstance(d.bar, dict)
 25 |     True
 26 |     >>> d.bar.x
 27 |     1
 28 | 
 29 |     Bullet-proof
 30 | 
 31 |     >>> EasyDict({})
 32 |     {}
 33 |     >>> EasyDict(d={})
 34 |     {}
 35 |     >>> EasyDict(None)
 36 |     {}
 37 |     >>> d = {'a': 1}
 38 |     >>> EasyDict(**d)
 39 |     {'a': 1}
 40 |     >>> EasyDict((('a', 1), ('b', 2)))
 41 |     {'a': 1, 'b': 2}
 42 |     
 43 |     Set attributes
 44 | 
 45 |     >>> d = EasyDict()
 46 |     >>> d.foo = 3
 47 |     >>> d.foo
 48 |     3
 49 |     >>> d.bar = {'prop': 'value'}
 50 |     >>> d.bar.prop
 51 |     'value'
 52 |     >>> d
 53 |     {'foo': 3, 'bar': {'prop': 'value'}}
 54 |     >>> d.bar.prop = 'newer'
 55 |     >>> d.bar.prop
 56 |     'newer'
 57 | 
 58 | 
 59 |     Values extraction
 60 | 
 61 |     >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
 62 |     >>> isinstance(d.bar, list)
 63 |     True
 64 |     >>> from operator import attrgetter
 65 |     >>> list(map(attrgetter('x'), d.bar))
 66 |     [1, 3]
 67 |     >>> list(map(attrgetter('y'), d.bar))
 68 |     [2, 4]
 69 |     >>> d = EasyDict()
 70 |     >>> list(d.keys())
 71 |     []
 72 |     >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
 73 |     >>> d.foo
 74 |     3
 75 |     >>> d.bar.x
 76 |     1
 77 | 
 78 |     Still like a dict though
 79 | 
 80 |     >>> o = EasyDict({'clean':True})
 81 |     >>> list(o.items())
 82 |     [('clean', True)]
 83 | 
 84 |     And like a class
 85 | 
 86 |     >>> class Flower(EasyDict):
 87 |     ...     power = 1
 88 |     ...
 89 |     >>> f = Flower()
 90 |     >>> f.power
 91 |     1
 92 |     >>> f = Flower({'height': 12})
 93 |     >>> f.height
 94 |     12
 95 |     >>> f['power']
 96 |     1
 97 |     >>> sorted(f.keys())
 98 |     ['height', 'power']
 99 | 
100 |     update and pop items
101 |     >>> d = EasyDict(a=1, b='2')
102 |     >>> e = EasyDict(c=3.0, a=9.0)
103 |     >>> d.update(e)
104 |     >>> d.c
105 |     3.0
106 |     >>> d['c']
107 |     3.0
108 |     >>> d.get('c')
109 |     3.0
110 |     >>> d.update(a=4, b=4)
111 |     >>> d.b
112 |     4
113 |     >>> d.pop('a')
114 |     4
115 |     >>> d.a
116 |     Traceback (most recent call last):
117 |     ...
118 |     AttributeError: 'EasyDict' object has no attribute 'a'
119 |     """
120 |     def __init__(self, d=None, **kwargs):
121 |         if d is None:
122 |             d = {}
123 |         else:
124 |             d = dict(d)        
125 |         if kwargs:
126 |             d.update(**kwargs)
127 |         for k, v in d.items():
128 |             setattr(self, k, v)
129 |         # Class attributes
130 |         for k in self.__class__.__dict__.keys():
131 |             if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
132 |                 setattr(self, k, getattr(self, k))
133 | 
134 |     def __setattr__(self, name, value):
135 |         if isinstance(value, (list, tuple)):
136 |             value = [self.__class__(x)
137 |                      if isinstance(x, dict) else x for x in value]
138 |         elif isinstance(value, dict) and not isinstance(value, self.__class__):
139 |             value = self.__class__(value)
140 |         super(EasyDict, self).__setattr__(name, value)
141 |         super(EasyDict, self).__setitem__(name, value)
142 | 
143 |     __setitem__ = __setattr__
144 | 
145 |     def update(self, e=None, **f):
146 |         d = e or dict()
147 |         d.update(f)
148 |         for k in d:
149 |             setattr(self, k, d[k])
150 | 
151 |     def pop(self, k, d=None):
152 |         delattr(self, k)
153 |         return super(EasyDict, self).pop(k, d)
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     import doctest
158 |     doctest.testmod()


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/eval/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from enum import Enum
  8 | import logging
  9 | from typing import Any, Dict, Optional
 10 | 
 11 | import torch
 12 | from torch import Tensor
 13 | from torchmetrics import Metric, MetricCollection
 14 | from torchmetrics.classification import MulticlassAccuracy
 15 | from torchmetrics.utilities.data import dim_zero_cat, select_topk
 16 | 
 17 | 
 18 | logger = logging.getLogger("dinov2")
 19 | 
 20 | 
 21 | class MetricType(Enum):
 22 |     MEAN_ACCURACY = "mean_accuracy"
 23 |     MEAN_PER_CLASS_ACCURACY = "mean_per_class_accuracy"
 24 |     PER_CLASS_ACCURACY = "per_class_accuracy"
 25 |     IMAGENET_REAL_ACCURACY = "imagenet_real_accuracy"
 26 | 
 27 |     @property
 28 |     def accuracy_averaging(self):
 29 |         return getattr(AccuracyAveraging, self.name, None)
 30 | 
 31 |     def __str__(self):
 32 |         return self.value
 33 | 
 34 | 
 35 | class AccuracyAveraging(Enum):
 36 |     MEAN_ACCURACY = "micro"
 37 |     MEAN_PER_CLASS_ACCURACY = "macro"
 38 |     PER_CLASS_ACCURACY = "none"
 39 | 
 40 |     def __str__(self):
 41 |         return self.value
 42 | 
 43 | 
 44 | def build_metric(metric_type: MetricType, *, num_classes: int, ks: Optional[tuple] = None):
 45 |     if metric_type.accuracy_averaging is not None:
 46 |         return build_topk_accuracy_metric(
 47 |             average_type=metric_type.accuracy_averaging,
 48 |             num_classes=num_classes,
 49 |             ks=(1, 5) if ks is None else ks,
 50 |         )
 51 |     elif metric_type == MetricType.IMAGENET_REAL_ACCURACY:
 52 |         return build_topk_imagenet_real_accuracy_metric(
 53 |             num_classes=num_classes,
 54 |             ks=(1, 5) if ks is None else ks,
 55 |         )
 56 | 
 57 |     raise ValueError(f"Unknown metric type {metric_type}")
 58 | 
 59 | 
 60 | def build_topk_accuracy_metric(average_type: AccuracyAveraging, num_classes: int, ks: tuple = (1, 5)):
 61 |     metrics: Dict[str, Metric] = {
 62 |         f"top-{k}": MulticlassAccuracy(top_k=k, num_classes=int(num_classes), average=average_type.value) for k in ks
 63 |     }
 64 |     return MetricCollection(metrics)
 65 | 
 66 | 
 67 | def build_topk_imagenet_real_accuracy_metric(num_classes: int, ks: tuple = (1, 5)):
 68 |     metrics: Dict[str, Metric] = {f"top-{k}": ImageNetReaLAccuracy(top_k=k, num_classes=int(num_classes)) for k in ks}
 69 |     return MetricCollection(metrics)
 70 | 
 71 | 
 72 | class ImageNetReaLAccuracy(Metric):
 73 |     is_differentiable: bool = False
 74 |     higher_is_better: Optional[bool] = None
 75 |     full_state_update: bool = False
 76 | 
 77 |     def __init__(
 78 |         self,
 79 |         num_classes: int,
 80 |         top_k: int = 1,
 81 |         **kwargs: Any,
 82 |     ) -> None:
 83 |         super().__init__(**kwargs)
 84 |         self.num_classes = num_classes
 85 |         self.top_k = top_k
 86 |         self.add_state("tp", [], dist_reduce_fx="cat")
 87 | 
 88 |     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
 89 |         # preds [B, D]
 90 |         # target [B, A]
 91 |         # preds_oh [B, D] with 0 and 1
 92 |         # select top K highest probabilities, use one hot representation
 93 |         preds_oh = select_topk(preds, self.top_k)
 94 |         # target_oh [B, D + 1] with 0 and 1
 95 |         target_oh = torch.zeros((preds_oh.shape[0], preds_oh.shape[1] + 1), device=target.device, dtype=torch.int32)
 96 |         target = target.long()
 97 |         # for undefined targets (-1) use a fake value `num_classes`
 98 |         target[target == -1] = self.num_classes
 99 |         # fill targets, use one hot representation
100 |         target_oh.scatter_(1, target, 1)
101 |         # target_oh [B, D] (remove the fake target at index `num_classes`)
102 |         target_oh = target_oh[:, :-1]
103 |         # tp [B] with 0 and 1
104 |         tp = (preds_oh * target_oh == 1).sum(dim=1)
105 |         # at least one match between prediction and target
106 |         tp.clip_(max=1)
107 |         # ignore instances where no targets are defined
108 |         mask = target_oh.sum(dim=1) > 0
109 |         tp = tp[mask]
110 |         self.tp.append(tp)  # type: ignore
111 | 
112 |     def compute(self) -> Tensor:
113 |         tp = dim_zero_cat(self.tp)  # type: ignore
114 |         return tp.float().mean()
115 | 


--------------------------------------------------------------------------------
/zoedepth/models/layers/patch_transformer.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | # Copyright (c) 2022 Intelligent Systems Lab Org
 4 | 
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to deal
 7 | # in the Software without restriction, including without limitation the rights
 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | 
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | 
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | 
23 | # File author: Shariq Farooq Bhat
24 | 
25 | import torch
26 | import torch.nn as nn
27 | 
28 | 
29 | class PatchTransformerEncoder(nn.Module):
30 |     def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31 |         """ViT-like transformer block
32 | 
33 |         Args:
34 |             in_channels (int): Input channels
35 |             patch_size (int, optional): patch size. Defaults to 10.
36 |             embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37 |             num_heads (int, optional): number of attention heads. Defaults to 4.
38 |             use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39 |         """
40 |         super(PatchTransformerEncoder, self).__init__()
41 |         self.use_class_token = use_class_token
42 |         encoder_layers = nn.TransformerEncoderLayer(
43 |             embedding_dim, num_heads, dim_feedforward=1024)
44 |         self.transformer_encoder = nn.TransformerEncoder(
45 |             encoder_layers, num_layers=4)  # takes shape S,N,E
46 | 
47 |         self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48 |                                            kernel_size=patch_size, stride=patch_size, padding=0)
49 |         
50 |     def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51 |         """Generate positional encodings
52 | 
53 |         Args:
54 |             sequence_length (int): Sequence length
55 |             embedding_dim (int): Embedding dimension
56 | 
57 |         Returns:
58 |             torch.Tensor SBE: Positional encodings
59 |         """
60 |         position = torch.arange(
61 |             0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62 |         index = torch.arange(
63 |             0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64 |         div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65 |         pos_encoding = position * div_term
66 |         pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67 |         pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68 |         return pos_encoding
69 |         
70 | 
71 |     def forward(self, x):
72 |         """Forward pass
73 | 
74 |         Args:
75 |             x (torch.Tensor - NCHW): Input feature tensor
76 | 
77 |         Returns:
78 |             torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79 |         """
80 |         embeddings = self.embedding_convPxP(x).flatten(
81 |             2)  # .shape = n,c,s = n, embedding_dim, s
82 |         if self.use_class_token:
83 |             # extra special token at start ?
84 |             embeddings = nn.functional.pad(embeddings, (1, 0))
85 |         
86 |         # change to S,N,E format required by transformer
87 |         embeddings = embeddings.permute(2, 0, 1)
88 |         S, N, E = embeddings.shape
89 |         embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90 |         x = self.transformer_encoder(embeddings)  # .shape = S, N, E
91 |         return x
92 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/data/augmentations.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import logging
  8 | 
  9 | from torchvision import transforms
 10 | 
 11 | from .transforms import (
 12 |     GaussianBlur,
 13 |     make_normalize_transform,
 14 | )
 15 | 
 16 | 
 17 | logger = logging.getLogger("dinov2")
 18 | 
 19 | 
 20 | class DataAugmentationDINO(object):
 21 |     def __init__(
 22 |         self,
 23 |         global_crops_scale,
 24 |         local_crops_scale,
 25 |         local_crops_number,
 26 |         global_crops_size=224,
 27 |         local_crops_size=96,
 28 |     ):
 29 |         self.global_crops_scale = global_crops_scale
 30 |         self.local_crops_scale = local_crops_scale
 31 |         self.local_crops_number = local_crops_number
 32 |         self.global_crops_size = global_crops_size
 33 |         self.local_crops_size = local_crops_size
 34 | 
 35 |         logger.info("###################################")
 36 |         logger.info("Using data augmentation parameters:")
 37 |         logger.info(f"global_crops_scale: {global_crops_scale}")
 38 |         logger.info(f"local_crops_scale: {local_crops_scale}")
 39 |         logger.info(f"local_crops_number: {local_crops_number}")
 40 |         logger.info(f"global_crops_size: {global_crops_size}")
 41 |         logger.info(f"local_crops_size: {local_crops_size}")
 42 |         logger.info("###################################")
 43 | 
 44 |         # random resized crop and flip
 45 |         self.geometric_augmentation_global = transforms.Compose(
 46 |             [
 47 |                 transforms.RandomResizedCrop(
 48 |                     global_crops_size, scale=global_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC
 49 |                 ),
 50 |                 transforms.RandomHorizontalFlip(p=0.5),
 51 |             ]
 52 |         )
 53 | 
 54 |         self.geometric_augmentation_local = transforms.Compose(
 55 |             [
 56 |                 transforms.RandomResizedCrop(
 57 |                     local_crops_size, scale=local_crops_scale, interpolation=transforms.InterpolationMode.BICUBIC
 58 |                 ),
 59 |                 transforms.RandomHorizontalFlip(p=0.5),
 60 |             ]
 61 |         )
 62 | 
 63 |         # color distorsions / blurring
 64 |         color_jittering = transforms.Compose(
 65 |             [
 66 |                 transforms.RandomApply(
 67 |                     [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)],
 68 |                     p=0.8,
 69 |                 ),
 70 |                 transforms.RandomGrayscale(p=0.2),
 71 |             ]
 72 |         )
 73 | 
 74 |         global_transfo1_extra = GaussianBlur(p=1.0)
 75 | 
 76 |         global_transfo2_extra = transforms.Compose(
 77 |             [
 78 |                 GaussianBlur(p=0.1),
 79 |                 transforms.RandomSolarize(threshold=128, p=0.2),
 80 |             ]
 81 |         )
 82 | 
 83 |         local_transfo_extra = GaussianBlur(p=0.5)
 84 | 
 85 |         # normalization
 86 |         self.normalize = transforms.Compose(
 87 |             [
 88 |                 transforms.ToTensor(),
 89 |                 make_normalize_transform(),
 90 |             ]
 91 |         )
 92 | 
 93 |         self.global_transfo1 = transforms.Compose([color_jittering, global_transfo1_extra, self.normalize])
 94 |         self.global_transfo2 = transforms.Compose([color_jittering, global_transfo2_extra, self.normalize])
 95 |         self.local_transfo = transforms.Compose([color_jittering, local_transfo_extra, self.normalize])
 96 | 
 97 |     def __call__(self, image):
 98 |         output = {}
 99 | 
100 |         # global crops:
101 |         im1_base = self.geometric_augmentation_global(image)
102 |         global_crop_1 = self.global_transfo1(im1_base)
103 | 
104 |         im2_base = self.geometric_augmentation_global(image)
105 |         global_crop_2 = self.global_transfo2(im2_base)
106 | 
107 |         output["global_crops"] = [global_crop_1, global_crop_2]
108 | 
109 |         # global crops for teacher:
110 |         output["global_crops_teacher"] = [global_crop_1, global_crop_2]
111 | 
112 |         # local crops:
113 |         local_crops = [
114 |             self.local_transfo(self.geometric_augmentation_local(image)) for _ in range(self.local_crops_number)
115 |         ]
116 |         output["local_crops"] = local_crops
117 |         output["offsets"] = ()
118 | 
119 |         return output
120 | 


--------------------------------------------------------------------------------
/zoedepth/data/diml_outdoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class DIML_Outdoor(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         import glob
 81 | 
 82 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 83 |         self.image_files = glob.glob(os.path.join(
 84 |             data_dir_root, 'outleft', '*.png'))
 85 |         self.depth_files = [r.replace("outleft", "depthmap")
 86 |                             for r in self.image_files]
 87 |         self.transform = ToTensor()
 88 | 
 89 |     def __getitem__(self, idx):
 90 |         image_path = self.image_files[idx]
 91 |         depth_path = self.depth_files[idx]
 92 | 
 93 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 94 |         depth = np.asarray(Image.open(depth_path),
 95 |                            dtype='uint16') / 1000.0  # mm to meters
 96 | 
 97 |         # depth[depth > 8] = -1
 98 |         depth = depth[..., None]
 99 | 
100 |         sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101 | 
102 |         # return sample
103 |         return self.transform(sample)
104 | 
105 |     def __len__(self):
106 |         return len(self.image_files)
107 | 
108 | 
109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110 |     dataset = DIML_Outdoor(data_dir_root)
111 |     return DataLoader(dataset, batch_size, **kwargs)
112 | 
113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
115 | 


--------------------------------------------------------------------------------
/zoedepth/models/base_models/dpt_dinov2/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | 
  4 | def _make_scratch(in_shape, out_shape, groups=1, expand=False):
  5 |     scratch = nn.Module()
  6 | 
  7 |     out_shape1 = out_shape
  8 |     out_shape2 = out_shape
  9 |     out_shape3 = out_shape
 10 |     if len(in_shape) >= 4:
 11 |         out_shape4 = out_shape
 12 | 
 13 |     if expand:
 14 |         out_shape1 = out_shape
 15 |         out_shape2 = out_shape*2
 16 |         out_shape3 = out_shape*4
 17 |         if len(in_shape) >= 4:
 18 |             out_shape4 = out_shape*8
 19 | 
 20 |     scratch.layer1_rn = nn.Conv2d(
 21 |         in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
 22 |     )
 23 |     scratch.layer2_rn = nn.Conv2d(
 24 |         in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
 25 |     )
 26 |     scratch.layer3_rn = nn.Conv2d(
 27 |         in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
 28 |     )
 29 |     if len(in_shape) >= 4:
 30 |         scratch.layer4_rn = nn.Conv2d(
 31 |             in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
 32 |         )
 33 | 
 34 |     return scratch
 35 | 
 36 | 
 37 | class ResidualConvUnit(nn.Module):
 38 |     """Residual convolution module.
 39 |     """
 40 | 
 41 |     def __init__(self, features, activation, bn):
 42 |         """Init.
 43 | 
 44 |         Args:
 45 |             features (int): number of features
 46 |         """
 47 |         super().__init__()
 48 | 
 49 |         self.bn = bn
 50 | 
 51 |         self.groups=1
 52 | 
 53 |         self.conv1 = nn.Conv2d(
 54 |             features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
 55 |         )
 56 |         
 57 |         self.conv2 = nn.Conv2d(
 58 |             features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
 59 |         )
 60 | 
 61 |         if self.bn==True:
 62 |             self.bn1 = nn.BatchNorm2d(features)
 63 |             self.bn2 = nn.BatchNorm2d(features)
 64 | 
 65 |         self.activation = activation
 66 | 
 67 |         self.skip_add = nn.quantized.FloatFunctional()
 68 | 
 69 |     def forward(self, x):
 70 |         """Forward pass.
 71 | 
 72 |         Args:
 73 |             x (tensor): input
 74 | 
 75 |         Returns:
 76 |             tensor: output
 77 |         """
 78 |         
 79 |         out = self.activation(x)
 80 |         out = self.conv1(out)
 81 |         if self.bn==True:
 82 |             out = self.bn1(out)
 83 |        
 84 |         out = self.activation(out)
 85 |         out = self.conv2(out)
 86 |         if self.bn==True:
 87 |             out = self.bn2(out)
 88 | 
 89 |         if self.groups > 1:
 90 |             out = self.conv_merge(out)
 91 | 
 92 |         return self.skip_add.add(out, x)
 93 | 
 94 | 
 95 | class FeatureFusionBlock(nn.Module):
 96 |     """Feature fusion block.
 97 |     """
 98 | 
 99 |     def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
100 |         """Init.
101 | 
102 |         Args:
103 |             features (int): number of features
104 |         """
105 |         super(FeatureFusionBlock, self).__init__()
106 | 
107 |         self.deconv = deconv
108 |         self.align_corners = align_corners
109 | 
110 |         self.groups=1
111 | 
112 |         self.expand = expand
113 |         out_features = features
114 |         if self.expand==True:
115 |             out_features = features//2
116 |         
117 |         self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
118 | 
119 |         self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
120 |         self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
121 |         
122 |         self.skip_add = nn.quantized.FloatFunctional()
123 | 
124 |         self.size=size
125 | 
126 |     def forward(self, *xs, size=None):
127 |         """Forward pass.
128 | 
129 |         Returns:
130 |             tensor: output
131 |         """
132 |         output = xs[0]
133 | 
134 |         if len(xs) == 2:
135 |             res = self.resConfUnit1(xs[1])
136 |             output = self.skip_add.add(output, res)
137 | 
138 |         output = self.resConfUnit2(output)
139 | 
140 |         if (size is None) and (self.size is None):
141 |             modifier = {"scale_factor": 2}
142 |         elif size is None:
143 |             modifier = {"size": self.size}
144 |         else:
145 |             modifier = {"size": size}
146 | 
147 |         output = nn.functional.interpolate(
148 |             output, **modifier, mode="bilinear", align_corners=self.align_corners
149 |         )
150 | 
151 |         output = self.out_conv(output)
152 | 
153 |         return output
154 | 


--------------------------------------------------------------------------------
/zoedepth/data/diode.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(480)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diode"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DIODE(Dataset):
 83 |     def __init__(self, data_dir_root):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/scene_#/scan_#/*.png
 87 |         self.image_files = glob.glob(
 88 |             os.path.join(data_dir_root, '*', '*', '*.png'))
 89 |         self.depth_files = [r.replace(".png", "_depth.npy")
 90 |                             for r in self.image_files]
 91 |         self.depth_mask_files = [
 92 |             r.replace(".png", "_depth_mask.npy") for r in self.image_files]
 93 |         self.transform = ToTensor()
 94 | 
 95 |     def __getitem__(self, idx):
 96 |         image_path = self.image_files[idx]
 97 |         depth_path = self.depth_files[idx]
 98 |         depth_mask_path = self.depth_mask_files[idx]
 99 | 
100 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101 |         depth = np.load(depth_path)  # in meters
102 |         valid = np.load(depth_mask_path)  # binary
103 | 
104 |         # depth[depth > 8] = -1
105 |         # depth = depth[..., None]
106 | 
107 |         sample = dict(image=image, depth=depth, valid=valid)
108 | 
109 |         # return sample
110 |         sample = self.transform(sample)
111 | 
112 |         if idx == 0:
113 |             print(sample["image"].shape)
114 | 
115 |         return sample
116 | 
117 |     def __len__(self):
118 |         return len(self.image_files)
119 | 
120 | 
121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122 |     dataset = DIODE(data_dir_root)
123 |     return DataLoader(dataset, batch_size, **kwargs)
124 | 
125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
126 | 


--------------------------------------------------------------------------------
/zoedepth/data/ddad.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self, resize_shape):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize(resize_shape)
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "ddad"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 | 
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class DDAD(Dataset):
 83 |     def __init__(self, data_dir_root, resize_shape):
 84 |         import glob
 85 | 
 86 |         # image paths are of the form <data_dir_root>/{outleft, depthmap}/*.png
 87 |         
 88 |         # self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
 89 |         # self.depth_files = [r.replace("_rgb.png", "_depth.npy")
 90 |         #                     for r in self.image_files]
 91 |         self.image_files, self.depth_files = [], []
 92 |         with open('/mnt/bn/liheyang/MTL-SA-1B/dataset/splits/ddad/val.txt', 'r') as f:
 93 |             lines = f.read().splitlines()
 94 |         for line in lines:
 95 |             self.image_files.append(line.split(' ')[0])
 96 |             self.depth_files.append(line.split(' ')[1])
 97 |         
 98 |         self.transform = ToTensor(resize_shape)
 99 | 
100 |     def __getitem__(self, idx):
101 | 
102 |         image_path = self.image_files[idx]
103 |         depth_path = self.depth_files[idx]
104 | 
105 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
106 |         depth = np.load(depth_path)  # meters
107 | 
108 |         # depth[depth > 8] = -1
109 |         depth = depth[..., None]
110 | 
111 |         sample = dict(image=image, depth=depth)
112 |         sample = self.transform(sample)
113 | 
114 |         if idx == 0:
115 |             print(sample["image"].shape)
116 | 
117 |         return sample
118 | 
119 |     def __len__(self):
120 |         return len(self.image_files)
121 | 
122 | 
123 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
124 |     dataset = DDAD(data_dir_root, resize_shape)
125 |     return DataLoader(dataset, batch_size, **kwargs)
126 | 


--------------------------------------------------------------------------------
/zoedepth/data/diml_indoor_test.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 |         self.resize = transforms.Resize((480, 640))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 |         image = self.to_tensor(image)
 44 |         image = self.normalize(image)
 45 |         depth = self.to_tensor(depth)
 46 | 
 47 |         image = self.resize(image)
 48 | 
 49 |         return {'image': image, 'depth': depth, 'dataset': "diml_indoor"}
 50 | 
 51 |     def to_tensor(self, pic):
 52 | 
 53 |         if isinstance(pic, np.ndarray):
 54 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 55 |             return img
 56 | 
 57 |         #         # handle PIL Image
 58 |         if pic.mode == 'I':
 59 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 60 |         elif pic.mode == 'I;16':
 61 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 62 |         else:
 63 |             img = torch.ByteTensor(
 64 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 65 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 66 |         if pic.mode == 'YCbCr':
 67 |             nchannel = 3
 68 |         elif pic.mode == 'I;16':
 69 |             nchannel = 1
 70 |         else:
 71 |             nchannel = len(pic.mode)
 72 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 73 | 
 74 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 75 |         if isinstance(img, torch.ByteTensor):
 76 |             return img.float()
 77 |         else:
 78 |             return img
 79 | 
 80 | 
 81 | class DIML_Indoor(Dataset):
 82 |     def __init__(self, data_dir_root):
 83 |         import glob
 84 | 
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "LR", '*', 'color', '*.png'))
 88 |         self.depth_files = [r.replace("color", "depth_filled").replace(
 89 |             "_c.png", "_depth_filled.png") for r in self.image_files]
 90 |         self.transform = ToTensor()
 91 | 
 92 |     def __getitem__(self, idx):
 93 |         image_path = self.image_files[idx]
 94 |         depth_path = self.depth_files[idx]
 95 | 
 96 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
 97 |         depth = np.asarray(Image.open(depth_path),
 98 |                            dtype='uint16') / 1000.0  # mm to meters
 99 | 
100 |         # print(np.shape(image))
101 |         # print(np.shape(depth))
102 | 
103 |         # depth[depth > 8] = -1
104 |         depth = depth[..., None]
105 | 
106 |         sample = dict(image=image, depth=depth)
107 | 
108 |         # return sample
109 |         sample = self.transform(sample)
110 | 
111 |         if idx == 0:
112 |             print(sample["image"].shape)
113 | 
114 |         return sample
115 | 
116 |     def __len__(self):
117 |         return len(self.image_files)
118 | 
119 | 
120 | def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs):
121 |     dataset = DIML_Indoor(data_dir_root)
122 |     return DataLoader(dataset, batch_size, **kwargs)
123 | 
124 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR")
125 | # get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR")
126 | 


--------------------------------------------------------------------------------
/zoedepth/data/sun_rgbd_loader.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import os
 26 | 
 27 | import numpy as np
 28 | import torch
 29 | from PIL import Image
 30 | from torch.utils.data import DataLoader, Dataset
 31 | from torchvision import transforms
 32 | 
 33 | 
 34 | class ToTensor(object):
 35 |     def __init__(self):
 36 |         # self.normalize = transforms.Normalize(
 37 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 38 |         self.normalize = lambda x : x
 39 | 
 40 |     def __call__(self, sample):
 41 |         image, depth = sample['image'], sample['depth']
 42 |         image = self.to_tensor(image)
 43 |         image = self.normalize(image)
 44 |         depth = self.to_tensor(depth)
 45 | 
 46 |         return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
 47 | 
 48 |     def to_tensor(self, pic):
 49 | 
 50 |         if isinstance(pic, np.ndarray):
 51 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 52 |             return img
 53 | 
 54 |         #         # handle PIL Image
 55 |         if pic.mode == 'I':
 56 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 57 |         elif pic.mode == 'I;16':
 58 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 59 |         else:
 60 |             img = torch.ByteTensor(
 61 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 62 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 63 |         if pic.mode == 'YCbCr':
 64 |             nchannel = 3
 65 |         elif pic.mode == 'I;16':
 66 |             nchannel = 1
 67 |         else:
 68 |             nchannel = len(pic.mode)
 69 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 70 | 
 71 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 72 |         if isinstance(img, torch.ByteTensor):
 73 |             return img.float()
 74 |         else:
 75 |             return img
 76 | 
 77 | 
 78 | class SunRGBD(Dataset):
 79 |     def __init__(self, data_dir_root):
 80 |         # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
 81 |         # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
 82 |         # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
 83 |         import glob
 84 |         # self.image_files = glob.glob(
 85 |         #     os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
 86 |         # self.depth_files = [
 87 |         #     r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
 88 |         
 89 |         self.image_files, self.depth_files = [], []
 90 |         filenames = os.listdir(os.path.join(data_dir_root, 'rgb'))
 91 |         for i, filename in enumerate(filenames):
 92 |             self.image_files.append(os.path.join(data_dir_root, 'rgb', filename))
 93 |             base_num = int(filename.replace('.jpg', '').replace('img-', ''))
 94 |             self.depth_files.append(os.path.join(data_dir_root, 'depth', str(base_num) + '.png'))
 95 |         
 96 |         self.transform = ToTensor()
 97 | 
 98 |     def __getitem__(self, idx):
 99 |         image_path = self.image_files[idx]
100 |         depth_path = self.depth_files[idx]
101 | 
102 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
103 |         depth = np.asarray(Image.open(depth_path), dtype='uint16') / 10000.0
104 |         # print(depth, depth.min(), depth.max())
105 |         depth[depth > 8] = -1
106 |         depth = depth[..., None]
107 |         return self.transform(dict(image=image, depth=depth))
108 | 
109 |     def __len__(self):
110 |         return len(self.image_files)
111 | 
112 | 
113 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
114 |     dataset = SunRGBD(data_dir_root)
115 |     return DataLoader(dataset, batch_size, **kwargs)
116 | 


--------------------------------------------------------------------------------
/zoedepth/models/layers/dist_layers.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | import torch.nn as nn
 27 | 
 28 | 
 29 | def log_binom(n, k, eps=1e-7):
 30 |     """ log(nCk) using stirling approximation """
 31 |     n = n + eps
 32 |     k = k + eps
 33 |     return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps)
 34 | 
 35 | 
 36 | class LogBinomial(nn.Module):
 37 |     def __init__(self, n_classes=256, act=torch.softmax):
 38 |         """Compute log binomial distribution for n_classes
 39 | 
 40 |         Args:
 41 |             n_classes (int, optional): number of output classes. Defaults to 256.
 42 |         """
 43 |         super().__init__()
 44 |         self.K = n_classes
 45 |         self.act = act
 46 |         self.register_buffer('k_idx', torch.arange(
 47 |             0, n_classes).view(1, -1, 1, 1))
 48 |         self.register_buffer('K_minus_1', torch.Tensor(
 49 |             [self.K-1]).view(1, -1, 1, 1))
 50 | 
 51 |     def forward(self, x, t=1., eps=1e-4):
 52 |         """Compute log binomial distribution for x
 53 | 
 54 |         Args:
 55 |             x (torch.Tensor - NCHW): probabilities
 56 |             t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1..
 57 |             eps (float, optional): Small number for numerical stability. Defaults to 1e-4.
 58 | 
 59 |         Returns:
 60 |             torch.Tensor -NCHW: log binomial distribution logbinomial(p;t)
 61 |         """
 62 |         if x.ndim == 3:
 63 |             x = x.unsqueeze(1)  # make it nchw
 64 | 
 65 |         one_minus_x = torch.clamp(1 - x, eps, 1)
 66 |         x = torch.clamp(x, eps, 1)
 67 |         y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \
 68 |             torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x)
 69 |         return self.act(y/t, dim=1)
 70 | 
 71 | 
 72 | class ConditionalLogBinomial(nn.Module):
 73 |     def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax):
 74 |         """Conditional Log Binomial distribution
 75 | 
 76 |         Args:
 77 |             in_features (int): number of input channels in main feature
 78 |             condition_dim (int): number of input channels in condition feature
 79 |             n_classes (int, optional): Number of classes. Defaults to 256.
 80 |             bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2.
 81 |             p_eps (float, optional): small eps value. Defaults to 1e-4.
 82 |             max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50.
 83 |             min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7.
 84 |         """
 85 |         super().__init__()
 86 |         self.p_eps = p_eps
 87 |         self.max_temp = max_temp
 88 |         self.min_temp = min_temp
 89 |         self.log_binomial_transform = LogBinomial(n_classes, act=act)
 90 |         bottleneck = (in_features + condition_dim) // bottleneck_factor
 91 |         self.mlp = nn.Sequential(
 92 |             nn.Conv2d(in_features + condition_dim, bottleneck,
 93 |                       kernel_size=1, stride=1, padding=0),
 94 |             nn.GELU(),
 95 |             # 2 for p linear norm, 2 for t linear norm
 96 |             nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0),
 97 |             nn.Softplus()
 98 |         )
 99 | 
100 |     def forward(self, x, cond):
101 |         """Forward pass
102 | 
103 |         Args:
104 |             x (torch.Tensor - NCHW): Main feature
105 |             cond (torch.Tensor - NCHW): condition feature
106 | 
107 |         Returns:
108 |             torch.Tensor: Output log binomial distribution
109 |         """
110 |         pt = self.mlp(torch.concat((x, cond), dim=1))
111 |         p, t = pt[:, :2, ...], pt[:, 2:, ...]
112 | 
113 |         p = p + self.p_eps
114 |         p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...])
115 | 
116 |         t = t + self.p_eps
117 |         t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...])
118 |         t = t.unsqueeze(1)
119 |         t = (self.max_temp - self.min_temp) * t + self.min_temp
120 | 
121 |         return self.log_binomial_transform(p, t)
122 | 


--------------------------------------------------------------------------------
/zoedepth/data/hypersim.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import glob
 26 | import os
 27 | 
 28 | import h5py
 29 | import numpy as np
 30 | import torch
 31 | from PIL import Image
 32 | from torch.utils.data import DataLoader, Dataset
 33 | from torchvision import transforms
 34 | 
 35 | 
 36 | def hypersim_distance_to_depth(npyDistance):
 37 |     intWidth, intHeight, fltFocal = 1024, 768, 886.81
 38 | 
 39 |     npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape(
 40 |         1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None]
 41 |     npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5,
 42 |                                  intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None]
 43 |     npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32)
 44 |     npyImageplane = np.concatenate(
 45 |         [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2)
 46 | 
 47 |     npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal
 48 |     return npyDepth
 49 | 
 50 | 
 51 | class ToTensor(object):
 52 |     def __init__(self):
 53 |         # self.normalize = transforms.Normalize(
 54 |         #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 55 |         self.normalize = lambda x: x
 56 |         self.resize = transforms.Resize((480, 640))
 57 | 
 58 |     def __call__(self, sample):
 59 |         image, depth = sample['image'], sample['depth']
 60 |         image = self.to_tensor(image)
 61 |         image = self.normalize(image)
 62 |         depth = self.to_tensor(depth)
 63 | 
 64 |         image = self.resize(image)
 65 | 
 66 |         return {'image': image, 'depth': depth, 'dataset': "hypersim"}
 67 | 
 68 |     def to_tensor(self, pic):
 69 | 
 70 |         if isinstance(pic, np.ndarray):
 71 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 72 |             return img
 73 | 
 74 |         #         # handle PIL Image
 75 |         if pic.mode == 'I':
 76 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 77 |         elif pic.mode == 'I;16':
 78 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 79 |         else:
 80 |             img = torch.ByteTensor(
 81 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 82 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 83 |         if pic.mode == 'YCbCr':
 84 |             nchannel = 3
 85 |         elif pic.mode == 'I;16':
 86 |             nchannel = 1
 87 |         else:
 88 |             nchannel = len(pic.mode)
 89 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 90 | 
 91 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 92 |         if isinstance(img, torch.ByteTensor):
 93 |             return img.float()
 94 |         else:
 95 |             return img
 96 | 
 97 | 
 98 | class HyperSim(Dataset):
 99 |     def __init__(self, data_dir_root):
100 |         # image paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.tonemap.jpg
101 |         # depth paths are of the form <data_dir_root>/<scene>/images/scene_cam_#_final_preview/*.depth_meters.hdf5
102 |         self.image_files = glob.glob(os.path.join(
103 |             data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg'))
104 |         self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace(
105 |             ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files]
106 |         self.transform = ToTensor()
107 | 
108 |     def __getitem__(self, idx):
109 |         image_path = self.image_files[idx]
110 |         depth_path = self.depth_files[idx]
111 | 
112 |         image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
113 | 
114 |         # depth from hdf5
115 |         depth_fd = h5py.File(depth_path, "r")
116 |         # in meters (Euclidean distance)
117 |         distance_meters = np.array(depth_fd['dataset'])
118 |         depth = hypersim_distance_to_depth(
119 |             distance_meters)  # in meters (planar depth)
120 | 
121 |         # depth[depth > 8] = -1
122 |         depth = depth[..., None]
123 | 
124 |         sample = dict(image=image, depth=depth)
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = HyperSim(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/fsdp/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | from typing import Any
  9 | 
 10 | import torch
 11 | import dinov2.distributed as distributed
 12 | from functools import partial
 13 | from fvcore.common.checkpoint import Checkpointer
 14 | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 15 | from torch.distributed.fsdp import ShardingStrategy
 16 | from torch.distributed.fsdp import MixedPrecision
 17 | from torch.distributed.fsdp import StateDictType
 18 | from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler
 19 | from torch.distributed.fsdp.wrap import ModuleWrapPolicy
 20 | from torch.distributed.fsdp._runtime_utils import _reshard
 21 | 
 22 | 
 23 | def get_fsdp_wrapper(model_cfg, modules_to_wrap=set()):
 24 |     sharding_strategy_dict = {
 25 |         "NO_SHARD": ShardingStrategy.NO_SHARD,
 26 |         "SHARD_GRAD_OP": ShardingStrategy.SHARD_GRAD_OP,
 27 |         "FULL_SHARD": ShardingStrategy.FULL_SHARD,
 28 |     }
 29 | 
 30 |     dtype_dict = {
 31 |         "fp32": torch.float32,
 32 |         "fp16": torch.float16,
 33 |         "bf16": torch.bfloat16,
 34 |     }
 35 | 
 36 |     mixed_precision_config = MixedPrecision(
 37 |         param_dtype=dtype_dict[model_cfg.mixed_precision.param_dtype],
 38 |         reduce_dtype=dtype_dict[model_cfg.mixed_precision.reduce_dtype],
 39 |         buffer_dtype=dtype_dict[model_cfg.mixed_precision.buffer_dtype],
 40 |     )
 41 | 
 42 |     sharding_strategy_config = sharding_strategy_dict[model_cfg.sharding_strategy]
 43 | 
 44 |     local_rank = distributed.get_local_rank()
 45 | 
 46 |     fsdp_wrapper = partial(
 47 |         FSDP,
 48 |         sharding_strategy=sharding_strategy_config,
 49 |         mixed_precision=mixed_precision_config,
 50 |         device_id=local_rank,
 51 |         sync_module_states=True,
 52 |         use_orig_params=True,
 53 |         auto_wrap_policy=ModuleWrapPolicy(modules_to_wrap),
 54 |     )
 55 |     return fsdp_wrapper
 56 | 
 57 | 
 58 | def is_fsdp(x):
 59 |     return isinstance(x, FSDP)
 60 | 
 61 | 
 62 | def is_sharded_fsdp(x):
 63 |     return is_fsdp(x) and x.sharding_strategy is not ShardingStrategy.NO_SHARD
 64 | 
 65 | 
 66 | def free_if_fsdp(x):
 67 |     if is_sharded_fsdp(x):
 68 |         handles = x._handles
 69 |         true_list = [True for h in handles]
 70 |         _reshard(x, handles, true_list)
 71 | 
 72 | 
 73 | def get_fsdp_modules(x):
 74 |     return FSDP.fsdp_modules(x)
 75 | 
 76 | 
 77 | def reshard_fsdp_model(x):
 78 |     for m in get_fsdp_modules(x):
 79 |         free_if_fsdp(m)
 80 | 
 81 | 
 82 | def rankstr():
 83 |     return f"rank_{distributed.get_global_rank()}"
 84 | 
 85 | 
 86 | class FSDPCheckpointer(Checkpointer):
 87 |     def save(self, name: str, **kwargs: Any) -> None:
 88 |         """
 89 |         Dump model and checkpointables to a file.
 90 | 
 91 |         Args:
 92 |             name (str): name of the file.
 93 |             kwargs (dict): extra arbitrary data to save.
 94 |         """
 95 |         if not self.save_dir or not self.save_to_disk:
 96 |             return
 97 | 
 98 |         data = {}
 99 |         with FSDP.state_dict_type(self.model, StateDictType.LOCAL_STATE_DICT):
100 |             data["model"] = self.model.state_dict()
101 | 
102 |         # data["model"] = self.model.state_dict()
103 |         for key, obj in self.checkpointables.items():
104 |             data[key] = obj.state_dict()
105 |         data.update(kwargs)
106 | 
107 |         basename = f"{name}.{rankstr()}.pth"
108 |         save_file = os.path.join(self.save_dir, basename)
109 |         assert os.path.basename(save_file) == basename, basename
110 |         self.logger.info("Saving checkpoint to {}".format(save_file))
111 |         with self.path_manager.open(save_file, "wb") as f:
112 |             torch.save(data, f)
113 |         self.tag_last_checkpoint(basename)
114 | 
115 |     def load(self, *args, **kwargs):
116 |         with FSDP.state_dict_type(self.model, StateDictType.LOCAL_STATE_DICT):
117 |             return super().load(*args, **kwargs)
118 | 
119 |     def has_checkpoint(self) -> bool:
120 |         """
121 |         Returns:
122 |             bool: whether a checkpoint exists in the target directory.
123 |         """
124 |         save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}")
125 |         return self.path_manager.exists(save_file)
126 | 
127 |     def get_checkpoint_file(self) -> str:
128 |         """
129 |         Returns:
130 |             str: The latest checkpoint file in target directory.
131 |         """
132 |         save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}")
133 |         try:
134 |             with self.path_manager.open(save_file, "r") as f:
135 |                 last_saved = f.read().strip()
136 |         except IOError:
137 |             # if file doesn't exist, maybe because it has just been
138 |             # deleted by a separate process
139 |             return ""
140 |         # pyre-fixme[6]: For 2nd param expected `Union[PathLike[str], str]` but got
141 |         #  `Union[bytes, str]`.
142 |         return os.path.join(self.save_dir, last_saved)
143 | 
144 |     def tag_last_checkpoint(self, last_filename_basename: str) -> None:
145 |         """
146 |         Tag the last checkpoint.
147 | 
148 |         Args:
149 |             last_filename_basename (str): the basename of the last filename.
150 |         """
151 |         if distributed.is_enabled():
152 |             torch.distributed.barrier()
153 |         save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}")
154 |         with self.path_manager.open(save_file, "w") as f:
155 |             f.write(last_filename_basename)  # pyre-ignore
156 | 
157 | 
158 | ShardedGradScaler = ShardedGradScaler
159 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/dinov2/eval/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import logging
  8 | from typing import Dict, Optional
  9 | 
 10 | import torch
 11 | from torch import nn
 12 | from torchmetrics import MetricCollection
 13 | 
 14 | from dinov2.data import DatasetWithEnumeratedTargets, SamplerType, make_data_loader
 15 | import dinov2.distributed as distributed
 16 | from dinov2.logging import MetricLogger
 17 | 
 18 | 
 19 | logger = logging.getLogger("dinov2")
 20 | 
 21 | 
 22 | class ModelWithNormalize(torch.nn.Module):
 23 |     def __init__(self, model):
 24 |         super().__init__()
 25 |         self.model = model
 26 | 
 27 |     def forward(self, samples):
 28 |         return nn.functional.normalize(self.model(samples), dim=1, p=2)
 29 | 
 30 | 
 31 | class ModelWithIntermediateLayers(nn.Module):
 32 |     def __init__(self, feature_model, n_last_blocks, autocast_ctx):
 33 |         super().__init__()
 34 |         self.feature_model = feature_model
 35 |         self.feature_model.eval()
 36 |         self.n_last_blocks = n_last_blocks
 37 |         self.autocast_ctx = autocast_ctx
 38 | 
 39 |     def forward(self, images):
 40 |         with torch.inference_mode():
 41 |             with self.autocast_ctx():
 42 |                 features = self.feature_model.get_intermediate_layers(
 43 |                     images, self.n_last_blocks, return_class_token=True
 44 |                 )
 45 |         return features
 46 | 
 47 | 
 48 | @torch.inference_mode()
 49 | def evaluate(
 50 |     model: nn.Module,
 51 |     data_loader,
 52 |     postprocessors: Dict[str, nn.Module],
 53 |     metrics: Dict[str, MetricCollection],
 54 |     device: torch.device,
 55 |     criterion: Optional[nn.Module] = None,
 56 | ):
 57 |     model.eval()
 58 |     if criterion is not None:
 59 |         criterion.eval()
 60 | 
 61 |     for metric in metrics.values():
 62 |         metric = metric.to(device)
 63 | 
 64 |     metric_logger = MetricLogger(delimiter="  ")
 65 |     header = "Test:"
 66 | 
 67 |     for samples, targets, *_ in metric_logger.log_every(data_loader, 10, header):
 68 |         outputs = model(samples.to(device))
 69 |         targets = targets.to(device)
 70 | 
 71 |         if criterion is not None:
 72 |             loss = criterion(outputs, targets)
 73 |             metric_logger.update(loss=loss.item())
 74 | 
 75 |         for k, metric in metrics.items():
 76 |             metric_inputs = postprocessors[k](outputs, targets)
 77 |             metric.update(**metric_inputs)
 78 | 
 79 |     metric_logger.synchronize_between_processes()
 80 |     logger.info(f"Averaged stats: {metric_logger}")
 81 | 
 82 |     stats = {k: metric.compute() for k, metric in metrics.items()}
 83 |     metric_logger_stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
 84 |     return metric_logger_stats, stats
 85 | 
 86 | 
 87 | def all_gather_and_flatten(tensor_rank):
 88 |     tensor_all_ranks = torch.empty(
 89 |         distributed.get_global_size(),
 90 |         *tensor_rank.shape,
 91 |         dtype=tensor_rank.dtype,
 92 |         device=tensor_rank.device,
 93 |     )
 94 |     tensor_list = list(tensor_all_ranks.unbind(0))
 95 |     torch.distributed.all_gather(tensor_list, tensor_rank.contiguous())
 96 |     return tensor_all_ranks.flatten(end_dim=1)
 97 | 
 98 | 
 99 | def extract_features(model, dataset, batch_size, num_workers, gather_on_cpu=False):
100 |     dataset_with_enumerated_targets = DatasetWithEnumeratedTargets(dataset)
101 |     sample_count = len(dataset_with_enumerated_targets)
102 |     data_loader = make_data_loader(
103 |         dataset=dataset_with_enumerated_targets,
104 |         batch_size=batch_size,
105 |         num_workers=num_workers,
106 |         sampler_type=SamplerType.DISTRIBUTED,
107 |         drop_last=False,
108 |         shuffle=False,
109 |     )
110 |     return extract_features_with_dataloader(model, data_loader, sample_count, gather_on_cpu)
111 | 
112 | 
113 | @torch.inference_mode()
114 | def extract_features_with_dataloader(model, data_loader, sample_count, gather_on_cpu=False):
115 |     gather_device = torch.device("cpu") if gather_on_cpu else torch.device("cuda")
116 |     metric_logger = MetricLogger(delimiter="  ")
117 |     features, all_labels = None, None
118 |     for samples, (index, labels_rank) in metric_logger.log_every(data_loader, 10):
119 |         samples = samples.cuda(non_blocking=True)
120 |         labels_rank = labels_rank.cuda(non_blocking=True)
121 |         index = index.cuda(non_blocking=True)
122 |         features_rank = model(samples).float()
123 | 
124 |         # init storage feature matrix
125 |         if features is None:
126 |             features = torch.zeros(sample_count, features_rank.shape[-1], device=gather_device)
127 |             labels_shape = list(labels_rank.shape)
128 |             labels_shape[0] = sample_count
129 |             all_labels = torch.full(labels_shape, fill_value=-1, device=gather_device)
130 |             logger.info(f"Storing features into tensor of shape {features.shape}")
131 | 
132 |         # share indexes, features and labels between processes
133 |         index_all = all_gather_and_flatten(index).to(gather_device)
134 |         features_all_ranks = all_gather_and_flatten(features_rank).to(gather_device)
135 |         labels_all_ranks = all_gather_and_flatten(labels_rank).to(gather_device)
136 | 
137 |         # update storage feature matrix
138 |         if len(index_all) > 0:
139 |             features.index_copy_(0, index_all, features_all_ranks)
140 |             all_labels.index_copy_(0, index_all, labels_all_ranks)
141 | 
142 |     logger.info(f"Features shape: {tuple(features.shape)}")
143 |     logger.info(f"Labels shape: {tuple(all_labels.shape)}")
144 | 
145 |     assert torch.all(all_labels > -1)
146 | 
147 |     return features, all_labels
148 | 


--------------------------------------------------------------------------------
/zoedepth/data/vkitti.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | 
  3 | # Copyright (c) 2022 Intelligent Systems Lab Org
  4 | 
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | 
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | 
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | 
 23 | # File author: Shariq Farooq Bhat
 24 | 
 25 | import torch
 26 | from torch.utils.data import Dataset, DataLoader
 27 | from torchvision import transforms
 28 | import os
 29 | 
 30 | from PIL import Image
 31 | import numpy as np
 32 | import cv2
 33 | 
 34 | 
 35 | class ToTensor(object):
 36 |     def __init__(self):
 37 |         self.normalize = transforms.Normalize(
 38 |             mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 39 |         # self.resize = transforms.Resize((375, 1242))
 40 | 
 41 |     def __call__(self, sample):
 42 |         image, depth = sample['image'], sample['depth']
 43 | 
 44 |         image = self.to_tensor(image)
 45 |         image = self.normalize(image)
 46 |         depth = self.to_tensor(depth)
 47 | 
 48 |         # image = self.resize(image)
 49 | 
 50 |         return {'image': image, 'depth': depth, 'dataset': "vkitti"}
 51 | 
 52 |     def to_tensor(self, pic):
 53 | 
 54 |         if isinstance(pic, np.ndarray):
 55 |             img = torch.from_numpy(pic.transpose((2, 0, 1)))
 56 |             return img
 57 | 
 58 |         #         # handle PIL Image
 59 |         if pic.mode == 'I':
 60 |             img = torch.from_numpy(np.array(pic, np.int32, copy=False))
 61 |         elif pic.mode == 'I;16':
 62 |             img = torch.from_numpy(np.array(pic, np.int16, copy=False))
 63 |         else:
 64 |             img = torch.ByteTensor(
 65 |                 torch.ByteStorage.from_buffer(pic.tobytes()))
 66 |         # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
 67 |         if pic.mode == 'YCbCr':
 68 |             nchannel = 3
 69 |         elif pic.mode == 'I;16':
 70 |             nchannel = 1
 71 |         else:
 72 |             nchannel = len(pic.mode)
 73 |         img = img.view(pic.size[1], pic.size[0], nchannel)
 74 | 
 75 |         img = img.transpose(0, 1).transpose(0, 2).contiguous()
 76 |         if isinstance(img, torch.ByteTensor):
 77 |             return img.float()
 78 |         else:
 79 |             return img
 80 | 
 81 | 
 82 | class VKITTI(Dataset):
 83 |     def __init__(self, data_dir_root, do_kb_crop=True):
 84 |         import glob
 85 |         # image paths are of the form <data_dir_root>/{HR, LR}/<scene>/{color, depth_filled}/*.png
 86 |         self.image_files = glob.glob(os.path.join(
 87 |             data_dir_root, "test_color", '*.png'))
 88 |         self.depth_files = [r.replace("test_color", "test_depth")
 89 |                             for r in self.image_files]
 90 |         self.do_kb_crop = True
 91 |         self.transform = ToTensor()
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         image_path = self.image_files[idx]
 95 |         depth_path = self.depth_files[idx]
 96 | 
 97 |         image = Image.open(image_path)
 98 |         depth = Image.open(depth_path)
 99 |         depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR |
100 |                            cv2.IMREAD_ANYDEPTH)
101 |         print("dpeth min max", depth.min(), depth.max())
102 | 
103 |         # print(np.shape(image))
104 |         # print(np.shape(depth))
105 | 
106 |         # depth[depth > 8] = -1
107 | 
108 |         if self.do_kb_crop and False:
109 |             height = image.height
110 |             width = image.width
111 |             top_margin = int(height - 352)
112 |             left_margin = int((width - 1216) / 2)
113 |             depth = depth.crop(
114 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
115 |             image = image.crop(
116 |                 (left_margin, top_margin, left_margin + 1216, top_margin + 352))
117 |             # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216]
118 | 
119 |         image = np.asarray(image, dtype=np.float32) / 255.0
120 |         # depth = np.asarray(depth, dtype=np.uint16) /1.
121 |         depth = depth[..., None]
122 |         sample = dict(image=image, depth=depth)
123 | 
124 |         # return sample
125 |         sample = self.transform(sample)
126 | 
127 |         if idx == 0:
128 |             print(sample["image"].shape)
129 | 
130 |         return sample
131 | 
132 |     def __len__(self):
133 |         return len(self.image_files)
134 | 
135 | 
136 | def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs):
137 |     dataset = VKITTI(data_dir_root)
138 |     return DataLoader(dataset, batch_size, **kwargs)
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     loader = get_vkitti_loader(
143 |         data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test")
144 |     print("Total files", len(loader.dataset))
145 |     for i, sample in enumerate(loader):
146 |         print(sample["image"].shape)
147 |         print(sample["depth"].shape)
148 |         print(sample["dataset"])
149 |         print(sample['depth'].min(), sample['depth'].max())
150 |         if i > 5:
151 |             break
152 | 


--------------------------------------------------------------------------------
/torchhub/facebookresearch_dinov2_main/hubconf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | #
  3 | # This source code is licensed under the Apache License, Version 2.0
  4 | # found in the LICENSE file in the root directory of this source tree.
  5 | 
  6 | from enum import Enum
  7 | from typing import Union
  8 | 
  9 | import torch
 10 | 
 11 | _DINOV2_BASE_URL = "https://dl.fbaipublicfiles.com/dinov2"
 12 | 
 13 | 
 14 | def _make_dinov2_model_name(arch_name: str, patch_size: int, num_register_tokens: int = 0) -> str:
 15 |     compact_arch_name = arch_name.replace("_", "")[:4]
 16 |     registers_suffix = f"_reg{num_register_tokens}" if num_register_tokens else ""
 17 |     return f"dinov2_{compact_arch_name}{patch_size}{registers_suffix}"
 18 | 
 19 | 
 20 | class Weights(Enum):
 21 |     LVD142M = "LVD142M"
 22 | 
 23 | 
 24 | def _make_dinov2_model(
 25 |     *,
 26 |     arch_name: str = "vit_large",
 27 |     img_size: int = 518,
 28 |     patch_size: int = 14,
 29 |     init_values: float = 1.0,
 30 |     ffn_layer: str = "mlp",
 31 |     block_chunks: int = 0,
 32 |     num_register_tokens: int = 0,
 33 |     interpolate_antialias: bool = False,
 34 |     interpolate_offset: float = 0.1,
 35 |     pretrained: bool = True,
 36 |     weights: Union[Weights, str] = Weights.LVD142M,
 37 |     **kwargs,
 38 | ):
 39 |     import vision_transformer as vits
 40 | 
 41 |     if isinstance(weights, str):
 42 |         try:
 43 |             weights = Weights[weights]
 44 |         except KeyError:
 45 |             raise AssertionError(f"Unsupported weights: {weights}")
 46 | 
 47 |     model_base_name = _make_dinov2_model_name(arch_name, patch_size)
 48 |     vit_kwargs = dict(
 49 |         img_size=img_size,
 50 |         patch_size=patch_size,
 51 |         init_values=init_values,
 52 |         ffn_layer=ffn_layer,
 53 |         block_chunks=block_chunks,
 54 |         num_register_tokens=num_register_tokens,
 55 |         interpolate_antialias=interpolate_antialias,
 56 |         interpolate_offset=interpolate_offset,
 57 |     )
 58 |     vit_kwargs.update(**kwargs)
 59 |     model = vits.__dict__[arch_name](**vit_kwargs)
 60 | 
 61 |     if pretrained:
 62 |         model_full_name = _make_dinov2_model_name(arch_name, patch_size, num_register_tokens)
 63 |         url = _DINOV2_BASE_URL + f"/{model_base_name}/{model_full_name}_pretrain.pth"
 64 |         state_dict = torch.hub.load_state_dict_from_url(url, map_location="cpu")
 65 |         model.load_state_dict(state_dict, strict=True)
 66 | 
 67 |     return model
 68 | 
 69 | 
 70 | def dinov2_vits14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
 71 |     """
 72 |     DINOv2 ViT-S/14 model (optionally) pretrained on the LVD-142M dataset.
 73 |     """
 74 |     return _make_dinov2_model(arch_name="vit_small", pretrained=pretrained, weights=weights, **kwargs)
 75 | 
 76 | 
 77 | def dinov2_vitb14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
 78 |     """
 79 |     DINOv2 ViT-B/14 model (optionally) pretrained on the LVD-142M dataset.
 80 |     """
 81 |     return _make_dinov2_model(arch_name="vit_base", pretrained=pretrained, weights=weights, **kwargs)
 82 | 
 83 | 
 84 | def dinov2_vitl14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
 85 |     """
 86 |     DINOv2 ViT-L/14 model (optionally) pretrained on the LVD-142M dataset.
 87 |     """
 88 |     return _make_dinov2_model(arch_name="vit_large", pretrained=pretrained, weights=weights, **kwargs)
 89 | 
 90 | 
 91 | def dinov2_vitg14(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
 92 |     """
 93 |     DINOv2 ViT-g/14 model (optionally) pretrained on the LVD-142M dataset.
 94 |     """
 95 |     return _make_dinov2_model(
 96 |         arch_name="vit_giant2",
 97 |         ffn_layer="swiglufused",
 98 |         weights=weights,
 99 |         pretrained=pretrained,
100 |         **kwargs,
101 |     )
102 | 
103 | 
104 | def dinov2_vits14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
105 |     """
106 |     DINOv2 ViT-S/14 model with registers (optionally) pretrained on the LVD-142M dataset.
107 |     """
108 |     return _make_dinov2_model(
109 |         arch_name="vit_small",
110 |         pretrained=pretrained,
111 |         weights=weights,
112 |         num_register_tokens=4,
113 |         interpolate_antialias=True,
114 |         interpolate_offset=0.0,
115 |         **kwargs,
116 |     )
117 | 
118 | 
119 | def dinov2_vitb14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
120 |     """
121 |     DINOv2 ViT-B/14 model with registers (optionally) pretrained on the LVD-142M dataset.
122 |     """
123 |     return _make_dinov2_model(
124 |         arch_name="vit_base",
125 |         pretrained=pretrained,
126 |         weights=weights,
127 |         num_register_tokens=4,
128 |         interpolate_antialias=True,
129 |         interpolate_offset=0.0,
130 |         **kwargs,
131 |     )
132 | 
133 | 
134 | def dinov2_vitl14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
135 |     """
136 |     DINOv2 ViT-L/14 model with registers (optionally) pretrained on the LVD-142M dataset.
137 |     """
138 |     return _make_dinov2_model(
139 |         arch_name="vit_large",
140 |         pretrained=pretrained,
141 |         weights=weights,
142 |         num_register_tokens=4,
143 |         interpolate_antialias=True,
144 |         interpolate_offset=0.0,
145 |         **kwargs,
146 |     )
147 | 
148 | 
149 | def dinov2_vitg14_reg(*, pretrained: bool = True, weights: Union[Weights, str] = Weights.LVD142M, **kwargs):
150 |     """
151 |     DINOv2 ViT-g/14 model with registers (optionally) pretrained on the LVD-142M dataset.
152 |     """
153 |     return _make_dinov2_model(
154 |         arch_name="vit_giant2",
155 |         ffn_layer="swiglufused",
156 |         weights=weights,
157 |         pretrained=pretrained,
158 |         num_register_tokens=4,
159 |         interpolate_antialias=True,
160 |         interpolate_offset=0.0,
161 |         **kwargs,
162 |     )
163 | 


--------------------------------------------------------------------------------