├── .gitignore ├── data └── __init__.py ├── environment.yaml ├── .gitmodules ├── models ├── multi_cue_loss.py ├── dust3r.py ├── moge.py └── resnet_backbones.py ├── LICENSE └── configs ├── depth_diw ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── midas-l16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── convnext-b16.yaml ├── dust3r-l16.yaml ├── siglip-b16.yaml ├── croco-b16.yaml ├── lrm-b14.yaml ├── clip-b16.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet18.yaml ├── resnet50.yaml ├── senet154.yaml ├── stablediffusion.yaml ├── depthanythingv2-b14.yaml └── resnext50.yaml ├── depth_nyuv2 ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── midas-l16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── convnext-b16.yaml ├── dust3r-l16.yaml ├── croco-b16.yaml ├── lrm-b14.yaml ├── clip-b16.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet18.yaml ├── resnet50.yaml ├── senet154.yaml ├── stablediffusion.yaml ├── depthanythingv2-b14.yaml ├── resnext50.yaml └── siglip-b16.yaml ├── size ├── midas-l16.yaml ├── ibot-b16.yaml ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── dust3r-l16.yaml ├── mae-b16.yaml ├── convnext-b16.yaml ├── croco-b16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet50.yaml ├── senet154.yaml ├── stablediffusion.yaml ├── clip-b16.yaml ├── resnext50.yaml ├── resnet18.yaml ├── depthanythingv2-b14.yaml └── siglip-b16.yaml ├── occlusion ├── sam-b16.yaml ├── midas-l16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── croco-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── convnext-b16.yaml ├── dust3r-l16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── stablediffusion.yaml ├── depthanythingv2-b14.yaml ├── resnet50.yaml ├── senet154.yaml ├── clip-b16.yaml ├── resnext50.yaml ├── resnet18.yaml └── siglip-b16.yaml ├── light-shadow ├── sam-b16.yaml ├── midas-l16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── croco-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── convnext-b16.yaml ├── dust3r-l16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet50.yaml ├── senet154.yaml ├── resnext50.yaml ├── stablediffusion.yaml ├── clip-b16.yaml ├── depthanythingv2-b14.yaml ├── resnet18.yaml └── siglip-b16.yaml ├── texture-grad ├── midas-l16.yaml ├── ibot-b16.yaml ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── dust3r-l16.yaml ├── mae-b16.yaml ├── convnext-b16.yaml ├── croco-b16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet50.yaml ├── stablediffusion.yaml ├── clip-b16.yaml ├── senet154.yaml ├── resnext50.yaml ├── resnet18.yaml ├── depthanythingv2-b14.yaml └── siglip-b16.yaml ├── elevation ├── midas-l16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── dust3r-l16.yaml ├── convnext-b16.yaml ├── croco-b16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml ├── resnet50.yaml ├── stablediffusion.yaml ├── clip-b16.yaml ├── senet154.yaml └── resnext50.yaml └── perspective ├── midas-l16.yaml ├── sam-b16.yaml ├── deit-b16.yaml ├── dino-b16.yaml ├── ibot-b16.yaml ├── mae-b16.yaml ├── convnext-b16.yaml ├── croco-b16.yaml ├── dust3r-l16.yaml ├── lrm-b14.yaml ├── dinov2-b14.yaml ├── vit-b16.yaml └── stablediffusion.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | wandb/* 2 | logs/* 3 | logs_finetune/* 4 | *pycache* -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from data.occlusion import * 2 | from data.lightshadow import * 3 | from data.perspective import * 4 | from data.size import * 5 | from data.texturegrad import * 6 | from data.elevation import * -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: depthcues 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - python=3.11 9 | - pytorch=2.2.2 10 | - torchvision=0.17.2 11 | - torchaudio=2.2.2 12 | - pytorch-cuda=11.8 13 | - einops=0.8.0 14 | - timm=1.0.3 15 | - transformers=4.39.3 16 | - segment-anything 17 | - omegaconf 18 | - wandb=0.16.5 19 | - diffusers=0.28.1 20 | - loguru 21 | - pycocotools 22 | - open-clip-torch=2.23.0 23 | - pip: 24 | - opencv-python==4.9.0.80 25 | - https://download.pytorch.org/whl/cu118/xformers-0.0.25.post1%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl#sha256=17e55a8d33aa18bdad5b4723bd3924ffa4095ed65d5bc1d95a5ce7841deba3f4 26 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/depth_anything"] 2 | path = submodules/depth_anything 3 | url = https://github.com/danier97/Depth-Anything.git 4 | [submodule "submodules/depth_anything_v2"] 5 | path = submodules/depth_anything_v2 6 | url = https://github.com/danier97/Depth-Anything-V2.git 7 | [submodule "submodules/dust3r"] 8 | path = submodules/dust3r 9 | url = https://github.com/danier97/dust3r.git 10 | [submodule "submodules/moge"] 11 | path = submodules/moge 12 | url = https://github.com/danier97/MoGe.git 13 | [submodule "submodules/openlrm"] 14 | path = submodules/openlrm 15 | url = https://github.com/danier97/OpenLRM.git 16 | [submodule "submodules/probe3d"] 17 | path = submodules/probe3d 18 | url = https://github.com/danier97/probe3d.git 19 | -------------------------------------------------------------------------------- /models/multi_cue_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from utils import instantiate_from_config 3 | 4 | 5 | class MultiCueLoss: 6 | def __init__( 7 | self, 8 | loss_config=None, 9 | cues=['occlusion', 'lightshadow', 'perspective', 'size', 'texturegrad', 'elevation'], 10 | ): 11 | super().__init__() 12 | 13 | self.cues = cues 14 | 15 | self.cue_indices = {} 16 | left, right = 0, 1 # exclusive 17 | for cue in cues: 18 | if cue in ['perspective', 'elevation']: 19 | right += 1 20 | self.cue_indices[cue] = (left, right) 21 | left = right 22 | right += 1 23 | 24 | self.loss_functions = { 25 | cue: instantiate_from_config(loss_config[cue]) for cue in cues 26 | } 27 | 28 | self.loss_weights = { 29 | cue: loss_config[cue].get('weight', 1.) for cue in cues 30 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 danier97 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /configs/depth_diw/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: sam-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_diw/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: deit-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_diw/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: dino-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_diw/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: midas-l16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 1024 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_diw/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: ibot-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_diw/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: mae-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: sam-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 768 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_diw/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: convnext-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 3 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: deit-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 768 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: dino-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 768 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: midas-l16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 1024 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: ibot-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 768 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_diw/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: dust3r-l16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: dot 15 | layer: 23 16 | use_cls: false 17 | probe_config: 18 | type: linear 19 | target: models.probes.LinearProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.diw.DIW 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/diw/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.diw.DIW 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/diw/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.diw.DIW 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/diw/ 41 | split: test 42 | return_path: true 43 | batch_size: 8 44 | loss_config: 45 | target: torch.nn.BCEWithLogitsLoss 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: mae-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: 768 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_diw/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: siglip-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | feature_extractor_config: 13 | target: models.feature_extractor.Probe3DViTFeatureExtractor 14 | params: 15 | feat_type: dot 16 | layer: 8 17 | use_cls: false 18 | probe_config: 19 | type: linear 20 | target: models.probes.LinearProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 1 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.diw.DIW 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/diw/ 30 | split: train 31 | batch_size: 8 32 | val: 33 | target: data.diw.DIW 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/diw/ 36 | split: val 37 | batch_size: 8 38 | test: 39 | target: data.diw.DIW 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/diw/ 42 | split: test 43 | return_path: true 44 | batch_size: 8 45 | loss_config: 46 | target: torch.nn.BCEWithLogitsLoss 47 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: convnext-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 3 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: __check__model__ 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /models/dust3r.py: -------------------------------------------------------------------------------- 1 | from submodules.dust3r.dust3r.model import AsymmetricCroCo3DStereo 2 | import torch 3 | from torchvision.transforms import Compose, Resize, ToTensor, InterpolationMode, Normalize 4 | 5 | class Dust3rBackbone(torch.nn.Module): 6 | def __init__(self,) -> None: 7 | ''' 8 | layer in [0,35], test [17, 23, 32, 35] 9 | feat dim = 768 if 1024 layer < 24 else 10 | ''' 11 | super().__init__() 12 | 13 | self.model = AsymmetricCroCo3DStereo.from_pretrained('naver/DUSt3R_ViTLarge_BaseDecoder_512_linear') 14 | self.feat_dim = [1024]*len(self.model.enc_blocks) + [768]*len(self.model.dec_blocks) 15 | 16 | def forward_intermediates(self, x, layer): 17 | 18 | b, c, h, w = x.shape 19 | 20 | view1 = {'img': x, 21 | 'true_shape': torch.tensor([h, w], dtype=torch.int32, device=x.device).repeat(b, 1)} 22 | 23 | return self.model.forward_intermediates(view1, layer) 24 | 25 | 26 | def get_dust3r_transform(): 27 | return Compose( 28 | [ 29 | Resize(size=(512,512), interpolation=InterpolationMode.LANCZOS, antialias=True), 30 | ToTensor(), 31 | Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 32 | ] 33 | ) -------------------------------------------------------------------------------- /configs/depth_diw/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: croco-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | params: 10 | img_size: 448 11 | create_data_transform_func: 12 | target: models.croco.get_croco_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/size/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 1024 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /raid/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /raid/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /raid/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /raid/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /raid/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /raid/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: dust3r-l16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 23 16 | probe_config: 17 | type: depth 18 | target: models.probes.DepthProbeModel 19 | params: 20 | in_features: __check__model__ 21 | data_config: 22 | num_workers: 4 23 | train: 24 | target: data.nyuv2.NYU_geonet 25 | params: 26 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 27 | split: train 28 | batch_size: 8 29 | val: 30 | target: data.nyuv2.NYU_geonet 31 | params: 32 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 33 | split: val 34 | batch_size: 8 35 | test: 36 | target: data.nyuv2.NYU_test 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 39 | batch_size: 8 40 | loss_config: 41 | target: submodules.probe3d.evals.utils.losses.DepthLoss 42 | metric_config: 43 | target: utils.DepthMetric 44 | -------------------------------------------------------------------------------- /configs/depth_diw/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: lrm-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | output: dense 12 | create_data_transform_func: 13 | target: models.lrm.get_lrm_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.DinoViTFeatureExtractor 16 | params: 17 | feat_type: dot 18 | layer: 8 19 | use_cls: false 20 | probe_config: 21 | type: linear 22 | target: models.probes.LinearProbeModel 23 | params: 24 | in_features: __check__model__ 25 | num_classes: 1 26 | data_config: 27 | num_workers: 4 28 | train: 29 | target: data.diw.DIW 30 | params: 31 | data_path: /disk/scratch_ssd/danier/data/diw/ 32 | split: train 33 | batch_size: 8 34 | val: 35 | target: data.diw.DIW 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/diw/ 38 | split: val 39 | batch_size: 8 40 | test: 41 | target: data.diw.DIW 42 | params: 43 | data_path: /disk/scratch_ssd/danier/data/diw/ 44 | split: test 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_diw/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: clip-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_diw/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: dinov2-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | create_data_transform_func: 12 | target: models.vit.get_dinov2_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_diw/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: vit-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: croco-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | params: 10 | img_size: 448 11 | create_data_transform_func: 12 | target: models.croco.get_croco_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 8 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: 768 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/occlusion/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /raid/danier/data/occlusion_v4 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /raid/danier/data/occlusion_v4 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /raid/danier/data/occlusion_v4 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 1024 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /raid/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /raid/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /raid/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/light-shadow/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /raid/danier/data/lightshadow_v1 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /raid/danier/data/lightshadow_v1 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /raid/danier/data/lightshadow_v1 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /raid/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /raid/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /raid/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_diw/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: resnet18 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_diw/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: resnet50 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_diw/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: senet154 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_diw/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: stablediffusion 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: lrm-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | output: dense 12 | create_data_transform_func: 13 | target: models.lrm.get_lrm_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.DinoViTFeatureExtractor 16 | params: 17 | feat_type: patch 18 | layer: 8 19 | probe_config: 20 | type: depth 21 | target: models.probes.DepthProbeModel 22 | params: 23 | in_features: __check__model__ 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.nyuv2.NYU_geonet 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 30 | split: train 31 | batch_size: 8 32 | val: 33 | target: data.nyuv2.NYU_geonet 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 36 | split: val 37 | batch_size: 8 38 | test: 39 | target: data.nyuv2.NYU_test 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 42 | batch_size: 8 43 | loss_config: 44 | target: submodules.probe3d.evals.utils.losses.DepthLoss 45 | metric_config: 46 | target: utils.DepthMetric 47 | -------------------------------------------------------------------------------- /configs/occlusion/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /raid/danier/data/occlusion_v4 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /raid/danier/data/occlusion_v4 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /raid/danier/data/occlusion_v4 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 23 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /raid/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /raid/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /raid/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_diw/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: clip-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: 768 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/light-shadow/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 1024 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /raid/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /raid/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /raid/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 1024 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /raid/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /raid/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /raid/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_diw/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: depth_diw 2 | model_name: resnext50 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 1 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: dot 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: linear 21 | target: models.probes.LinearProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.diw.DIW 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/diw/ 31 | split: train 32 | batch_size: 8 33 | val: 34 | target: data.diw.DIW 35 | params: 36 | data_path: /disk/scratch_ssd/danier/data/diw/ 37 | split: val 38 | batch_size: 8 39 | test: 40 | target: data.diw.DIW 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/diw/ 43 | split: test 44 | return_path: true 45 | batch_size: 8 46 | loss_config: 47 | target: torch.nn.BCEWithLogitsLoss 48 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: dinov2-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | create_data_transform_func: 12 | target: models.vit.get_dinov2_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: 768 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/texture-grad/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /raid/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /raid/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /raid/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: vit-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: 768 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/light-shadow/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /raid/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /raid/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /raid/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/light-shadow/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /raid/danier/data/lightshadow_v1 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /raid/danier/data/lightshadow_v1 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /raid/danier/data/lightshadow_v1 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /raid/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /raid/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /raid/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 3 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.SizeV2Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.SizeV2Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.SizeV2Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: resnet18 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 4 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: __check__model__ 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: resnet50 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 4 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: __check__model__ 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: senet154 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 4 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: __check__model__ 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: stablediffusion 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 1 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: __check__model__ 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/light-shadow/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /raid/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /raid/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /raid/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: 768 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: resnext50 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 4 18 | probe_config: 19 | type: depth 20 | target: models.probes.DepthProbeModel 21 | params: 22 | in_features: __check__model__ 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.nyuv2.NYU_geonet 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 29 | split: train 30 | batch_size: 8 31 | val: 32 | target: data.nyuv2.NYU_geonet 33 | params: 34 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 35 | split: val 36 | batch_size: 8 37 | test: 38 | target: data.nyuv2.NYU_test 39 | params: 40 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 41 | batch_size: 8 42 | loss_config: 43 | target: submodules.probe3d.evals.utils.losses.DepthLoss 44 | metric_config: 45 | target: utils.DepthMetric 46 | -------------------------------------------------------------------------------- /configs/light-shadow/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/light-shadow/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 5 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 8 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /raid/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /raid/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /raid/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 2 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 35 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /raid/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /raid/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /raid/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/depth_nyuv2/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: depth_nyuv2 2 | model_name: siglip-b16 3 | weight_decay: 0.001 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | feature_extractor_config: 13 | target: models.feature_extractor.Probe3DViTFeatureExtractor 14 | params: 15 | feat_type: patch 16 | layer: 8 17 | probe_config: 18 | type: depth 19 | target: models.probes.DepthProbeModel 20 | params: 21 | in_features: 768 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.nyuv2.NYU_geonet 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 28 | split: train 29 | batch_size: 8 30 | val: 31 | target: data.nyuv2.NYU_geonet 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/nyu_geonet/ 34 | split: val 35 | batch_size: 8 36 | test: 37 | target: data.nyuv2.NYU_test 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/nyuv2/nyuv2_test.pkl 40 | batch_size: 8 41 | loss_config: 42 | target: submodules.probe3d.evals.utils.losses.DepthLoss 43 | metric_config: 44 | target: utils.DepthMetric 45 | create_data_transform_func: 46 | target: models.probe3d_backbones.get_siglip_transform 47 | -------------------------------------------------------------------------------- /configs/light-shadow/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 2 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/size/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: resnet50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /raid/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /raid/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /raid/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: senet154 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /raid/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /raid/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /raid/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /raid/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /raid/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /raid/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 1 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/texture-grad/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 11 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: 768 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.TexturegradV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.TexturegradV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.TexturegradV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /models/moge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision.transforms import Compose, Resize, ToTensor, InterpolationMode, Normalize 5 | from submodules.moge.moge.model import MoGeModel 6 | 7 | class MoGeBackbone(nn.Module): 8 | def __init__(self): 9 | super().__init__() 10 | self.model = MoGeModel.from_pretrained("Ruicheng/moge-vitl") 11 | 12 | def forward_intermediates(self, x, *args, **kwargs): 13 | image = x 14 | raw_img_h, raw_img_w = image.shape[-2:] 15 | patch_h, patch_w = raw_img_h // 14, raw_img_w // 14 16 | 17 | image = (image - self.model.image_mean) / self.model.image_std 18 | 19 | # Apply image transformation for DINOv2 20 | image_14 = F.interpolate(image, (patch_h * 14, patch_w * 14), mode="bilinear", align_corners=False, antialias=True) 21 | 22 | # Get intermediate layers from the backbone 23 | with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=False): 24 | features = self.model.backbone.get_intermediate_layers(image_14, reshape=True)[0] 25 | 26 | return [(features, None)] 27 | 28 | def get_moge_transform(): 29 | return Compose([ 30 | Resize(size=(518, 518), interpolation=InterpolationMode.BICUBIC, antialias=True), 31 | ToTensor(), 32 | ]) -------------------------------------------------------------------------------- /configs/occlusion/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 23 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.OcclusionV4Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.OcclusionV4Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.OcclusionV4Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: clip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: resnext50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /raid/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /raid/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /raid/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/light-shadow/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: mask 15 | layer: 23 16 | use_cls: false 17 | probe_config: 18 | type: mlp 19 | target: models.probes.MlpProbeModel 20 | params: 21 | in_features: __check__model__ 22 | num_classes: 1 23 | data_config: 24 | num_workers: 4 25 | train: 26 | target: data.LightshadowV1Dataset 27 | params: 28 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 29 | split: train 30 | return_mask: true 31 | batch_size: 8 32 | val: 33 | target: data.LightshadowV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 36 | split: val 37 | return_mask: true 38 | batch_size: 8 39 | test: 40 | target: data.LightshadowV1Dataset 41 | params: 42 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 43 | split: test 44 | return_mask: true 45 | return_path: true 46 | batch_size: 8 47 | loss_config: 48 | target: torch.nn.BCEWithLogitsLoss 49 | -------------------------------------------------------------------------------- /configs/occlusion/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /models/resnet_backbones.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import timm 3 | from torchvision.transforms import Compose, Resize, Normalize, InterpolationMode, ToTensor 4 | 5 | 6 | class ResNetBackbone(nn.Module): 7 | def __init__(self, model_name="resnet50.tv_in1k"): 8 | ''' 9 | ResNet 18: feat_dim = [64,128,256,512], layer = [1,2,3,4] 10 | ResNet 50: feat_dim = [256,512,1024,2048], layer = [1,2,3,4] 11 | ''' 12 | super().__init__() 13 | self.model = timm.create_model(model_name=model_name, pretrained=True, num_classes=0) 14 | self.feat_dim = { 15 | "resnet18.tv_in1k": [None,64,128,256,512], 16 | "resnet50.tv_in1k": [None,256,512,1024,2048], 17 | "resnext50_32x4d.tv_in1k": [None,256,512,1024,2048], 18 | "senet154.gluon_in1k": [None,256,512,1024,2048] 19 | }[model_name] 20 | 21 | def forward_intermediates(self, images, layer): 22 | out = self.model.forward_intermediates(images, indices=[layer], stop_early=True, intermediates_only=True) 23 | out = out[0] 24 | 25 | return [(out, None)] 26 | 27 | def get_resnet_transform(): 28 | return Compose( 29 | [ 30 | Resize(size=(518,518), interpolation=InterpolationMode.BILINEAR), 31 | ToTensor(), 32 | Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225]) 33 | ] 34 | ) -------------------------------------------------------------------------------- /configs/elevation/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 1024 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /raid/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /raid/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /raid/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/light-shadow/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/elevation/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /raid/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /raid/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 8 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /raid/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 8 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/light-shadow/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 5 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/elevation/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /raid/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /raid/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /raid/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/light-shadow/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 5 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: resnet18 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 5 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /raid/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /raid/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /raid/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/light-shadow/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: resnet50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.tmp.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: resnet50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: senet154 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/perspective/midas-l16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: midas-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MidasBackbone 9 | params: 10 | midas: true 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 1024 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1Dataset 26 | params: 27 | data_path: /raid/danier/data/perspective_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /raid/danier/data/perspective_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /raid/danier/data/perspective_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/light-shadow/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: senet154 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.tmp.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: clip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: resnext50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /raid/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /raid/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /raid/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/size/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.SizeV2Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.SizeV2Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.SizeV2Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/elevation/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/light-shadow/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: resnext50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.tmp.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 4 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/light-shadow/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: resnet50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /raid/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /raid/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /raid/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /raid/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /raid/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /raid/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/elevation/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 5 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/elevation/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/light-shadow/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: clip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/light-shadow/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 8 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /raid/danier/data/lightshadow_v1 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /raid/danier/data/lightshadow_v1 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /raid/danier/data/lightshadow_v1 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: clip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: senet154 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /raid/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /raid/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /raid/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: resnext50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 2 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /raid/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /raid/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /raid/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/elevation/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 17 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: __check__model__ 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /raid/danier/data/elevation_v1 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /raid/danier/data/elevation_v1 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /raid/danier/data/elevation_v1 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/perspective/sam-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: sam-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SAMBackbone 9 | params: 10 | arch: vit_b 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1DatasetHalf 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/size/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: size_v2 2 | model_name: siglip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | create_data_transform_func: 13 | target: models.probe3d_backbones.get_siglip_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.Probe3DViTFeatureExtractor 16 | params: 17 | feat_type: mask 18 | layer: 11 19 | use_cls: false 20 | probe_config: 21 | type: mlp 22 | target: models.probes.MlpProbeModel 23 | params: 24 | in_features: 768 25 | num_classes: 1 26 | data_config: 27 | num_workers: 4 28 | train: 29 | target: data.SizeV2Dataset 30 | params: 31 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 32 | split: train 33 | return_mask: true 34 | batch_size: 8 35 | val: 36 | target: data.SizeV2Dataset 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 39 | split: val 40 | return_mask: true 41 | batch_size: 8 42 | test: 43 | target: data.SizeV2Dataset 44 | params: 45 | data_path: /disk/scratch_ssd/danier/data/size_v2/ 46 | split: test 47 | return_mask: true 48 | return_path: true 49 | batch_size: 8 50 | loss_config: 51 | target: torch.nn.BCEWithLogitsLoss 52 | -------------------------------------------------------------------------------- /configs/occlusion/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: resnet18 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.OcclusionV4Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.OcclusionV4Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.OcclusionV4Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/occlusion_v4/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/occlusion/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: occlusion_v4 2 | model_name: siglip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 10 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | create_data_transform_func: 13 | target: models.probe3d_backbones.get_siglip_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.Probe3DViTFeatureExtractor 16 | params: 17 | feat_type: mask 18 | layer: 5 19 | use_cls: false 20 | probe_config: 21 | type: mlp 22 | target: models.probes.MlpProbeModel 23 | params: 24 | in_features: 768 25 | num_classes: 1 26 | data_config: 27 | num_workers: 4 28 | train: 29 | target: data.OcclusionV4Dataset 30 | params: 31 | data_path: /raid/danier/data/occlusion_v4/ 32 | split: train 33 | return_mask: true 34 | batch_size: 8 35 | val: 36 | target: data.OcclusionV4Dataset 37 | params: 38 | data_path: /raid/danier/data/occlusion_v4/ 39 | split: val 40 | return_mask: true 41 | batch_size: 8 42 | test: 43 | target: data.OcclusionV4Dataset 44 | params: 45 | data_path: /raid/danier/data/occlusion_v4/ 46 | split: test 47 | return_mask: true 48 | return_path: true 49 | batch_size: 8 50 | loss_config: 51 | target: torch.nn.BCEWithLogitsLoss 52 | -------------------------------------------------------------------------------- /configs/perspective/deit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: deit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DeITBackbone 9 | params: 10 | model_size: base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 5 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/perspective/dino-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: dino-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.DINOBackbone 9 | params: 10 | dino_name: dino 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 5 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/elevation/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 3 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: __check__model__ 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/elevation/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.ElevationV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.ElevationV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.ElevationV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MultiMSEWithLogitsLoss 45 | params: 46 | target_scales: 47 | - 1 48 | - 0.625 49 | metric_config: 50 | target: utils.HorizonErrorWithLogits 51 | params: 52 | target_scales: 53 | - 1 54 | - 0.625 55 | -------------------------------------------------------------------------------- /configs/perspective/ibot-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: ibot-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.iBOTBackbone 9 | params: 10 | model_type: base_in22k 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1DatasetHalf 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/light-shadow/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: resnet18 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 3 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.LightshadowV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.LightshadowV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.LightshadowV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/lightshadow_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/light-shadow/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: lightshadow_v1 2 | model_name: siglip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 50 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | create_data_transform_func: 13 | target: models.probe3d_backbones.get_siglip_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.Probe3DViTFeatureExtractor 16 | params: 17 | feat_type: mask 18 | layer: 5 19 | use_cls: false 20 | probe_config: 21 | type: mlp 22 | target: models.probes.MlpProbeModel 23 | params: 24 | in_features: 768 25 | num_classes: 1 26 | data_config: 27 | num_workers: 4 28 | train: 29 | target: data.LightshadowV1Dataset 30 | params: 31 | data_path: /raid/danier/data/lightshadow_v1/ 32 | split: train 33 | return_mask: true 34 | batch_size: 8 35 | val: 36 | target: data.LightshadowV1Dataset 37 | params: 38 | data_path: /raid/danier/data/lightshadow_v1/ 39 | split: val 40 | return_mask: true 41 | batch_size: 8 42 | test: 43 | target: data.LightshadowV1Dataset 44 | params: 45 | data_path: /raid/danier/data/lightshadow_v1/ 46 | split: test 47 | return_mask: true 48 | return_path: true 49 | batch_size: 8 50 | loss_config: 51 | target: torch.nn.BCEWithLogitsLoss 52 | -------------------------------------------------------------------------------- /configs/perspective/mae-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: mae-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.MAEBackbone 9 | params: 10 | checkpoint: facebook/vit-mae-base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 11 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1DatasetHalf 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/texture-grad/resnet18.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: resnet18 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet18.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 1 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: __check__model__ 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/texture-grad/depthanythingv2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: depthanythingv2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.depth_anything_v2.DepthAnythingV2Backbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.depth_anything_v2.get_depth_anything_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: mask 17 | layer: 11 18 | use_cls: false 19 | probe_config: 20 | type: mlp 21 | target: models.probes.MlpProbeModel 22 | params: 23 | in_features: 768 24 | num_classes: 1 25 | data_config: 26 | num_workers: 4 27 | train: 28 | target: data.TexturegradV1Dataset 29 | params: 30 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 31 | split: train 32 | return_mask: true 33 | batch_size: 8 34 | val: 35 | target: data.TexturegradV1Dataset 36 | params: 37 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 38 | split: val 39 | return_mask: true 40 | batch_size: 8 41 | test: 42 | target: data.TexturegradV1Dataset 43 | params: 44 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 45 | split: test 46 | return_mask: true 47 | return_path: true 48 | batch_size: 8 49 | loss_config: 50 | target: torch.nn.BCEWithLogitsLoss 51 | -------------------------------------------------------------------------------- /configs/perspective/convnext-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: convnext-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.ConvNextBackbone 9 | params: 10 | arch: convnext_base 11 | feature_extractor_config: 12 | target: models.feature_extractor.Probe3DViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 2 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: __check__model__ 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/perspective/croco-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: croco-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.croco.CroCoBackbone 9 | create_data_transform_func: 10 | target: models.croco.get_croco_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 8 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: 768 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1DatasetHalf 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/elevation/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 8 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 8 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 8 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/elevation/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/elevation/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 5 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /raid/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /raid/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /raid/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/perspective/dust3r-l16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: dust3r-l16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.dust3r.Dust3rBackbone 9 | create_data_transform_func: 10 | target: models.dust3r.get_dust3r_transform 11 | feature_extractor_config: 12 | target: models.feature_extractor.DinoViTFeatureExtractor 13 | params: 14 | feat_type: patch 15 | layer: 17 16 | probe_config: 17 | type: attn 18 | target: models.probes.AttentiveProbeModel 19 | params: 20 | in_features: __check__model__ 21 | num_classes: 2 22 | data_config: 23 | num_workers: 4 24 | train: 25 | target: data.PerspectiveV1Dataset 26 | params: 27 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 28 | split: train 29 | batch_size: 64 30 | val: 31 | target: data.PerspectiveV1Dataset 32 | params: 33 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 34 | split: val 35 | batch_size: 64 36 | test: 37 | target: data.PerspectiveV1Dataset 38 | params: 39 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 40 | split: test 41 | return_path: true 42 | batch_size: 64 43 | loss_config: 44 | target: utils.MSEWithLogitsLoss 45 | params: 46 | target_range: 47 | - -1.5 48 | - 1.5 49 | metric_config: 50 | target: utils.EuclideanDistanceWithLogits 51 | params: 52 | target_range: 53 | - -1.5 54 | - 1.5 55 | -------------------------------------------------------------------------------- /configs/texture-grad/siglip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: texturegrad_v1 2 | model_name: siglip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 60 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.SigLIPBackbone 9 | params: 10 | checkpoint: vit_base_patch16_siglip_384.webli 11 | pretrained: true 12 | create_data_transform_func: 13 | target: models.probe3d_backbones.get_siglip_transform 14 | feature_extractor_config: 15 | target: models.feature_extractor.Probe3DViTFeatureExtractor 16 | params: 17 | feat_type: mask 18 | layer: 5 19 | use_cls: false 20 | probe_config: 21 | type: mlp 22 | target: models.probes.MlpProbeModel 23 | params: 24 | in_features: 768 25 | num_classes: 1 26 | data_config: 27 | num_workers: 4 28 | train: 29 | target: data.TexturegradV1Dataset 30 | params: 31 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 32 | split: train 33 | return_mask: true 34 | batch_size: 8 35 | val: 36 | target: data.TexturegradV1Dataset 37 | params: 38 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 39 | split: val 40 | return_mask: true 41 | batch_size: 8 42 | test: 43 | target: data.TexturegradV1Dataset 44 | params: 45 | data_path: /disk/scratch_ssd/danier/data/texturegrad_v1/ 46 | split: test 47 | return_mask: true 48 | return_path: true 49 | batch_size: 8 50 | loss_config: 51 | target: torch.nn.BCEWithLogitsLoss 52 | -------------------------------------------------------------------------------- /configs/perspective/lrm-b14.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: lrm-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.lrm.LRMBackbone 9 | params: 10 | arch: vitb 11 | create_data_transform_func: 12 | target: models.lrm.get_lrm_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.DinoViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.PerspectiveV1Dataset 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.PerspectiveV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.PerspectiveV1Dataset 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/perspective_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MSEWithLogitsLoss 47 | params: 48 | target_range: 49 | - -1.5 50 | - 1.5 51 | metric_config: 52 | target: utils.EuclideanDistanceWithLogits 53 | params: 54 | target_range: 55 | - -1.5 56 | - 1.5 57 | -------------------------------------------------------------------------------- /configs/perspective/dinov2-b14.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: dinov2-b14 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: timm.create_model 9 | params: 10 | model_name: vit_base_patch14_dinov2.lvd142m 11 | pretrained: true 12 | num_classes: 0 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 11 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.PerspectiveV1DatasetHalf 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.PerspectiveV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.PerspectiveV1Dataset 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/perspective_v1 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MSEWithLogitsLoss 47 | params: 48 | target_range: 49 | - -1.5 50 | - 1.5 51 | metric_config: 52 | target: utils.EuclideanDistanceWithLogits 53 | params: 54 | target_range: 55 | - -1.5 56 | - 1.5 57 | -------------------------------------------------------------------------------- /configs/elevation/resnet50.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: resnet50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnet50.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 3 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: __check__model__ 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /raid/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /raid/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /raid/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/elevation/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 1 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: __check__model__ 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /raid/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 12 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /raid/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 12 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /raid/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 12 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/perspective/vit-b16.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: vit-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.vit.TimmViTBackbone 9 | params: 10 | model_name: vit_base_patch16_224.augreg2_in21k_ft_in1k 11 | create_data_transform_func: 12 | target: models.vit.get_vit_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.TimmViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 8 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.PerspectiveV1Dataset 28 | params: 29 | data_path: /raid/danier/data/perspective_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.PerspectiveV1Dataset 34 | params: 35 | data_path: /raid/danier/data/perspective_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.PerspectiveV1Dataset 40 | params: 41 | data_path: /raid/danier/data/perspective_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MSEWithLogitsLoss 47 | params: 48 | target_range: 49 | - -1.5 50 | - 1.5 51 | metric_config: 52 | target: utils.EuclideanDistanceWithLogits 53 | params: 54 | target_range: 55 | - -1.5 56 | - 1.5 57 | -------------------------------------------------------------------------------- /configs/elevation/clip-b16.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: clip-b16 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.CLIPBackbone 9 | params: 10 | arch: ViT-B-16 11 | create_data_transform_func: 12 | target: models.probe3d_backbones.get_clip_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 2 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: 768 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 8 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /disk/scratch_ssd/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 8 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/elevation/senet154.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: senet154 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: senet154.gluon_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 3 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: __check__model__ 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /raid/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /raid/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /raid/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/elevation/resnext50.yaml: -------------------------------------------------------------------------------- 1 | task: elevation_v1 2 | model_name: resnext50 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 16 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.resnet_backbones.ResNetBackbone 9 | params: 10 | model_name: resnext50_32x4d.tv_in1k 11 | create_data_transform_func: 12 | target: models.resnet_backbones.get_resnet_transform 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 3 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: __check__model__ 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.ElevationV1Dataset 28 | params: 29 | data_path: /raid/danier/data/elevation_v1/ 30 | split: train 31 | batch_size: 64 32 | val: 33 | target: data.ElevationV1Dataset 34 | params: 35 | data_path: /raid/danier/data/elevation_v1/ 36 | split: val 37 | batch_size: 64 38 | test: 39 | target: data.ElevationV1Dataset 40 | params: 41 | data_path: /raid/danier/data/elevation_v1/ 42 | split: test 43 | return_path: true 44 | batch_size: 64 45 | loss_config: 46 | target: utils.MultiMSEWithLogitsLoss 47 | params: 48 | target_scales: 49 | - 1 50 | - 0.625 51 | metric_config: 52 | target: utils.HorizonErrorWithLogits 53 | params: 54 | target_scales: 55 | - 1 56 | - 0.625 57 | -------------------------------------------------------------------------------- /configs/perspective/stablediffusion.yaml: -------------------------------------------------------------------------------- 1 | task: perspective_v1 2 | model_name: stablediffusion 3 | weight_decay: 0.01 4 | learning_rate: 0.001 5 | epochs: 120 6 | log_every_n_iter: 50 7 | create_model_func: 8 | target: models.probe3d_backbones.StableDiffusionBackbone 9 | params: 10 | model_id: stabilityai/stable-diffusion-2-1 11 | time_step: 1 12 | return_multilayer: true 13 | feature_extractor_config: 14 | target: models.feature_extractor.Probe3DViTFeatureExtractor 15 | params: 16 | feat_type: patch 17 | layer: 0 18 | probe_config: 19 | type: attn 20 | target: models.probes.AttentiveProbeModel 21 | params: 22 | in_features: __check__model__ 23 | num_classes: 2 24 | data_config: 25 | num_workers: 4 26 | train: 27 | target: data.PerspectiveV1DatasetHalf 28 | params: 29 | data_path: /raid/danier/data/perspective_v1 30 | split: train 31 | batch_size: 8 32 | val: 33 | target: data.PerspectiveV1Dataset 34 | params: 35 | data_path: /raid/danier/data/perspective_v1 36 | split: val 37 | batch_size: 8 38 | test: 39 | target: data.PerspectiveV1Dataset 40 | params: 41 | data_path: /raid/danier/data/perspective_v1 42 | split: test 43 | return_path: true 44 | batch_size: 8 45 | loss_config: 46 | target: utils.MSEWithLogitsLoss 47 | params: 48 | target_range: 49 | - -1.5 50 | - 1.5 51 | metric_config: 52 | target: utils.EuclideanDistanceWithLogits 53 | params: 54 | target_range: 55 | - -1.5 56 | - 1.5 57 | --------------------------------------------------------------------------------