├── .gitignore ├── LICENSE ├── README.md ├── assets ├── paca-seghead.png ├── paca-vit-onsite.png ├── paca-vit-teacher.png ├── paca_scheme.png └── paca_teaser.png ├── classification ├── benchmark.sh ├── benchmark_timm.py ├── configs │ ├── imagenet_conv_adamw.yml │ └── imagenet_vit_adamw.yml ├── timm_custom │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── loader.py │ │ └── transform.py │ ├── optim │ │ ├── __init__.py │ │ └── layerwise_lr_decay.py │ └── utils │ │ ├── __init__.py │ │ ├── cuda.py │ │ └── summary.py ├── train_timm.py ├── train_timm.sh ├── validate.sh └── validate_timm.py ├── detection ├── configs │ ├── _base_ │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade-mask-rcnn_r50_fpn.py │ │ │ ├── cascade-rcnn_r50_fpn.py │ │ │ ├── fast-rcnn_r50_fpn.py │ │ │ ├── faster-rcnn_r50-caffe-c4.py │ │ │ ├── faster-rcnn_r50-caffe-dc5.py │ │ │ ├── faster-rcnn_r50_fpn.py │ │ │ ├── mask-rcnn_r50-caffe-c4.py │ │ │ ├── mask-rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50-caffe-c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ └── paca_vit │ │ └── mask_rcnn_1x │ │ ├── mask_rcnn_pacavit_base_p2cconv_100_0_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_convmixer_base_100_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_convmixer_small_100_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_convmixer_tiny_100_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_100_0_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_100_49_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_100_blockwise_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_100_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_2_0_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_49_0_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cconv_49_100_mstrain_480_800_1x_coco.py │ │ ├── mask_rcnn_pacavit_small_p2cmlp_100_0_mstrain_480_800_1x_coco.py │ │ └── mask_rcnn_pacavit_tiny_p2cconv_100_0_mstrain_480_800_1x_coco.py ├── get_flops.py ├── test_mmdet.py ├── test_mmdet.sh ├── train_mmdet.py └── train_mmdet.sh ├── environment.yaml ├── install.sh ├── models ├── __init__.py ├── layers │ ├── __init__.py │ ├── blur_pool.py │ └── downsample.py └── paca_vit.py └── segmentation ├── configs ├── _base_ │ ├── default_runtime.py │ ├── models │ │ ├── paca_head.py │ │ └── upernet_swin.py │ └── schedules │ │ ├── schedule_160k.py │ │ ├── schedule_20k.py │ │ ├── schedule_240k.py │ │ ├── schedule_320k.py │ │ ├── schedule_40k.py │ │ └── schedule_80k.py └── paca_vit │ ├── paca_head │ ├── pacahead_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py │ ├── pacahead_pacavit_convmixer_base_100_512x512_160k_ade20k.py │ ├── pacahead_pacavit_convmixer_small_100_512x512_160k_ade20k.py │ ├── pacahead_pacavit_convmixer_tiny_100_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_100_49_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_100_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_100_blockwise_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_2_0_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_49_0_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cconv_49_100_512x512_160k_ade20k.py │ ├── pacahead_pacavit_small_p2cmlp_100_0_512x512_160k_ade20k.py │ └── pacahead_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py │ └── upernet │ ├── upernet_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py │ ├── upernet_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py │ └── upernet_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py ├── get_flops.py ├── mmseg_custom └── models │ ├── __init__.py │ ├── decode_heads │ ├── __init__.py │ └── paca_head.py │ └── segmentors │ ├── __init__.py │ └── encoder_decoder_paca.py ├── test_mmseg.py ├── test_mmseg.sh ├── train_mmseg.py └── train_mmseg.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/en/_build/ 68 | docs/zh_cn/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # data/ 108 | # data 109 | .vscode 110 | .idea 111 | .DS_Store 112 | 113 | # custom 114 | *.pkl 115 | *.pkl.json 116 | *.log.json 117 | work_dirs/ 118 | private/ 119 | external/ 120 | datasets/ 121 | outputs/ 122 | pretrained-checkpoints/ 123 | __pycache__/ 124 | 125 | # Pytorch 126 | *.pth 127 | *.py~ 128 | *.sh~ 129 | 130 | # my experimental stuff 131 | my_*.* 132 | 133 | 134 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | ======================================================================= 3 | 4 | Copyright (c) Meta Platforms, Inc. and affiliates. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /assets/paca-seghead.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-seghead.png -------------------------------------------------------------------------------- /assets/paca-vit-onsite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-vit-onsite.png -------------------------------------------------------------------------------- /assets/paca-vit-teacher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-vit-teacher.png -------------------------------------------------------------------------------- /assets/paca_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca_scheme.png -------------------------------------------------------------------------------- /assets/paca_teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca_teaser.png -------------------------------------------------------------------------------- /classification/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 4 ]; then 4 | echo "Usage: me model_name img_size num_classes gpu [others]" 5 | exit 6 | fi 7 | 8 | MODEL_NAME=$1 9 | IMAGE_SIZE=$2 10 | NUM_CLASSES=$3 11 | GPU=$4 12 | 13 | 14 | PYTHON=${PYTHON:-"python"} 15 | 16 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | 18 | RESULT_FILE=$DIR/../work_dirs/classification/all_benchmark_results.csv 19 | 20 | OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPU $PYTHON \ 21 | $DIR/benchmark_timm.py --results-file $RESULT_FILE \ 22 | --model $MODEL_NAME --bench inference \ 23 | --num-bench-iter 100 \ 24 | --batch-size 128 --img-size $IMAGE_SIZE --num-classes $NUM_CLASSES \ 25 | --opt adamw --opt-eps 1e-8 --momentum 0.9 --weight-decay 0.05 \ 26 | --smoothing 0.1 --drop-path 0.1 \ 27 | --amp --channels-last \ 28 | ${@:5} 29 | # --clip-grad 1.0 --clip-mode norm 30 | -------------------------------------------------------------------------------- /classification/configs/imagenet_conv_adamw.yml: -------------------------------------------------------------------------------- 1 | # based on convnext 2 | batch_size: 256 3 | train_interpolation: 'bicubic' 4 | epochs: 300 5 | opt: 'adamw' 6 | opt_eps: 1e-8 7 | opt_betas: 8 | - 0.9 9 | - 0.999 10 | momentum: 0.9 11 | weight_decay: 0.05 12 | lr_base: 4e-3 # base lr, which will be auto-scaled: lr * batch size * nb_gpus / lr_base_size 13 | lr_base_size: 4096 # from convnext 14 | lr_base_scale: 'linear' # linear or sqrt 15 | auto_scale_warmup_min_lr: False 16 | grad_accumulation_steps: 1 17 | min_lr: 1e-6 18 | sched: 'cosine' 19 | warmup_epochs: 20 20 | warmup_lr: 1e-6 21 | cooldown_epochs: 0 22 | amp: True 23 | # clip_grad: 5.0 24 | color_jitter: 0.4 25 | smoothing: 0.1 26 | reprob: 0.25 27 | remode: 'pixel' 28 | recount: 1 29 | aa: 'rand-m9-mstd0.5-inc1' 30 | mixup: 0.8 31 | cutmix: 1.0 32 | mixup_prob: 1.0 33 | mixup_switch_prob: 0.5 34 | mixup_mode: 'batch' 35 | dist_bn: 'reduce' 36 | # model_ema: True 37 | # model_ema_decay: 0.9999 38 | -------------------------------------------------------------------------------- /classification/configs/imagenet_vit_adamw.yml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | train_interpolation: 'bicubic' 3 | epochs: 300 4 | opt: 'adamw' 5 | opt_eps: 1e-8 6 | opt_betas: 7 | - 0.9 8 | - 0.999 9 | momentum: 0.9 10 | weight_decay: 0.05 11 | lr_base: 5e-4 # base lr, which will be auto-scaled: lr * batch size * nb_gpus / lr_base_size 12 | lr_base_size: 512 13 | lr_base_scale: 'linear' # linear or sqrt 14 | auto_scale_warmup_min_lr: True 15 | min_lr: 5e-6 # which will be auto-scaled: min_lr * batch size * nb_gpus / lr_base_size 16 | sched: 'cosine' 17 | warmup_epochs: 5 18 | warmup_lr: 5e-7 # which will be auto-scaled: warmup_lr * batch size * nb_gpus / lr_base_size 19 | cooldown_epochs: 0 20 | amp: True 21 | # clip_grad: 5.0 22 | color_jitter: 0.4 23 | smoothing: 0.1 24 | reprob: 0.25 25 | remode: 'pixel' 26 | recount: 1 27 | aa: 'rand-m9-mstd0.5-inc1' 28 | mixup: 0.8 29 | cutmix: 1.0 30 | mixup_prob: 1.0 31 | mixup_switch_prob: 0.5 32 | mixup_mode: 'batch' 33 | dist_bn: '' #'reduce' 34 | # model_ema: True 35 | # model_ema_decay: 0.9999 -------------------------------------------------------------------------------- /classification/timm_custom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/classification/timm_custom/__init__.py -------------------------------------------------------------------------------- /classification/timm_custom/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import create_loader_v2 2 | from .transform import create_transform_v2 3 | 4 | __all__ = ['create_loader_v2', 'create_transform_v2'] 5 | -------------------------------------------------------------------------------- /classification/timm_custom/data/loader.py: -------------------------------------------------------------------------------- 1 | # Modifications 2 | # handle CIFAR 3 | """ Loader Factory, Fast Collate, CUDA Prefetcher 4 | 5 | Prefetcher and Fast Collate inspired by NVIDIA APEX example at 6 | https://github.com/NVIDIA/apex/commit/d5e2bb4bdeedd27b1dfaf5bb2b24d6c000dee9be#diff-cf86c282ff7fba81fad27a559379d5bf 7 | 8 | Hacked together by / Copyright 2020 Ross Wightman 9 | """ 10 | import random 11 | from functools import partial 12 | from typing import Callable 13 | 14 | import torch.utils.data 15 | from torchvision import transforms 16 | import numpy as np 17 | 18 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 19 | from timm.data.dataset import IterableImageDataset 20 | from timm.data.distributed_sampler import OrderedDistributedSampler, RepeatAugSampler 21 | from timm.data.random_erasing import RandomErasing 22 | from timm.data.mixup import FastCollateMixup 23 | from timm.data.transforms_factory import create_transform 24 | 25 | from timm.data.loader import (fast_collate, PrefetchLoader, 26 | MultiEpochsDataLoader, _worker_init, 27 | _RepeatSampler) 28 | 29 | from .transform import create_transform_v2 30 | 31 | 32 | def create_loader_v2( 33 | dataset, 34 | input_size, 35 | batch_size, 36 | is_training=False, 37 | use_prefetcher=True, 38 | no_aug=False, 39 | re_prob=0., 40 | re_mode='const', 41 | re_count=1, 42 | re_split=False, 43 | scale=None, 44 | ratio=None, 45 | hflip=0.5, 46 | vflip=0., 47 | color_jitter=0.4, 48 | auto_augment=None, 49 | num_aug_repeats=0, 50 | num_aug_splits=0, 51 | interpolation='bilinear', 52 | mean=IMAGENET_DEFAULT_MEAN, 53 | std=IMAGENET_DEFAULT_STD, 54 | num_workers=1, 55 | distributed=False, 56 | crop_pct=None, 57 | crop_mode=None, 58 | collate_fn=None, 59 | pin_memory=False, 60 | fp16=False, # deprecated, use img_dtype 61 | img_dtype=torch.float32, 62 | device=torch.device('cuda'), 63 | tf_preprocessing=False, 64 | use_multi_epochs_loader=False, 65 | persistent_workers=True, 66 | worker_seeding='all', 67 | use_simple_random_crop=False, 68 | use_three_augment_ssl=False, 69 | ): 70 | re_num_splits = 0 71 | if re_split: 72 | # apply RE to second half of batch if no aug split otherwise line up with aug split 73 | re_num_splits = num_aug_splits or 2 74 | dataset.transform = create_transform_v2( 75 | input_size, 76 | is_training=is_training, 77 | use_prefetcher=use_prefetcher, 78 | no_aug=no_aug, 79 | scale=scale, 80 | ratio=ratio, 81 | hflip=hflip, 82 | vflip=vflip, 83 | color_jitter=color_jitter, 84 | auto_augment=auto_augment, 85 | interpolation=interpolation, 86 | mean=mean, 87 | std=std, 88 | crop_pct=crop_pct, 89 | crop_mode=crop_mode, 90 | tf_preprocessing=tf_preprocessing, 91 | re_prob=re_prob, 92 | re_mode=re_mode, 93 | re_count=re_count, 94 | re_num_splits=re_num_splits, 95 | separate=num_aug_splits > 0, 96 | use_simple_random_crop=use_simple_random_crop, 97 | use_three_augment_ssl=use_three_augment_ssl, 98 | ) 99 | 100 | assert input_size is not None 101 | if isinstance(input_size, (tuple, list)): 102 | img_size = min(input_size[-2:]) 103 | else: 104 | img_size = input_size 105 | 106 | if img_size <= 32 and is_training and not no_aug: # CIFAR 107 | dataset.transform.transforms[0] = transforms.RandomCrop(img_size, 108 | padding=4) 109 | 110 | if isinstance(dataset, IterableImageDataset): 111 | # give Iterable datasets early knowledge of num_workers so that sample estimates 112 | # are correct before worker processes are launched 113 | dataset.set_loader_cfg(num_workers=num_workers) 114 | 115 | sampler = None 116 | if distributed and not isinstance(dataset, torch.utils.data.IterableDataset): 117 | if is_training: 118 | if num_aug_repeats: 119 | sampler = RepeatAugSampler(dataset, num_repeats=num_aug_repeats) 120 | else: 121 | sampler = torch.utils.data.distributed.DistributedSampler(dataset) 122 | else: 123 | # This will add extra duplicate entries to result in equal num 124 | # of samples per-process, will slightly alter validation results 125 | sampler = OrderedDistributedSampler(dataset) 126 | else: 127 | assert num_aug_repeats == 0, "RepeatAugment not currently supported in non-distributed or IterableDataset use" 128 | 129 | if collate_fn is None: 130 | collate_fn = fast_collate if use_prefetcher else torch.utils.data.dataloader.default_collate 131 | 132 | loader_class = torch.utils.data.DataLoader 133 | if use_multi_epochs_loader: 134 | loader_class = MultiEpochsDataLoader 135 | 136 | loader_args = dict( 137 | batch_size=batch_size, 138 | shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training, 139 | num_workers=num_workers, 140 | sampler=sampler, 141 | collate_fn=collate_fn, 142 | pin_memory=pin_memory, 143 | drop_last=is_training, 144 | worker_init_fn=partial(_worker_init, worker_seeding=worker_seeding), 145 | persistent_workers=persistent_workers 146 | ) 147 | try: 148 | loader = loader_class(dataset, **loader_args) 149 | except TypeError as e: 150 | loader_args.pop('persistent_workers') # only in Pytorch 1.7+ 151 | loader = loader_class(dataset, **loader_args) 152 | if use_prefetcher: 153 | prefetch_re_prob = re_prob if is_training and not no_aug else 0. 154 | loader = PrefetchLoader( 155 | loader, 156 | mean=mean, 157 | std=std, 158 | channels=input_size[0], 159 | device=device, 160 | fp16=fp16, # deprecated, use img_dtype 161 | img_dtype=img_dtype, 162 | re_prob=prefetch_re_prob, 163 | re_mode=re_mode, 164 | re_count=re_count, 165 | re_num_splits=re_num_splits 166 | ) 167 | 168 | return loader 169 | 170 | -------------------------------------------------------------------------------- /classification/timm_custom/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from .layerwise_lr_decay import layerwise_lr_decay 2 | -------------------------------------------------------------------------------- /classification/timm_custom/optim/layerwise_lr_decay.py: -------------------------------------------------------------------------------- 1 | from itertools import islice 2 | from typing import Optional, Callable, Tuple 3 | import json 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from timm.optim.optim_factory import _layer_map 9 | 10 | 11 | # modified from timm.optim.optim_factory.param_groups_layer_decay 12 | def layerwise_lr_decay( 13 | model: nn.Module, 14 | num_groups=12, 15 | weight_decay: float = 0.05, 16 | no_weight_decay_list: Tuple[str] = (), 17 | layer_decay: float = 0.75, 18 | _logger=None, 19 | ): 20 | """ 21 | Parameter groups for layer-wise lr decay & weight decay 22 | Based on BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 23 | """ 24 | no_weight_decay_list = set(no_weight_decay_list) 25 | param_group_names = {} # NOTE for debugging 26 | param_groups = {} 27 | 28 | layer_map = _layer_map(model, num_groups=num_groups) 29 | 30 | num_layers = max(layer_map.values()) + 1 31 | layer_max = num_layers - 1 32 | layer_scales = list(layer_decay ** (layer_max - i) for i in range(num_layers)) 33 | 34 | for name, param in model.named_parameters(): 35 | if not param.requires_grad: 36 | continue 37 | 38 | # no decay: all 1D parameters and model specific ones 39 | if param.ndim == 1 or name in no_weight_decay_list: 40 | g_decay = "no_decay" 41 | this_decay = 0.0 42 | else: 43 | g_decay = "decay" 44 | this_decay = weight_decay 45 | 46 | layer_id = layer_map.get(name, layer_max) 47 | group_name = "layer_%d_%s" % (layer_id, g_decay) 48 | 49 | if group_name not in param_groups: 50 | this_scale = layer_scales[layer_id] 51 | param_group_names[group_name] = { 52 | "lr_scale": this_scale, 53 | "weight_decay": this_decay, 54 | "param_names": [], 55 | } 56 | param_groups[group_name] = { 57 | "lr_scale": this_scale, 58 | "weight_decay": this_decay, 59 | "params": [], 60 | } 61 | 62 | param_group_names[group_name]["param_names"].append(name) 63 | param_groups[group_name]["params"].append(param) 64 | 65 | if _loger is not None: 66 | _loger.info(f"\n{json.dumps(param_group_names, indent=2)}\n") 67 | 68 | return list(param_groups.values()) 69 | -------------------------------------------------------------------------------- /classification/timm_custom/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .cuda import ApexScalerV2, NativeScalerV2 2 | from .summary import update_summary_v2 3 | -------------------------------------------------------------------------------- /classification/timm_custom/utils/cuda.py: -------------------------------------------------------------------------------- 1 | """ CUDA / AMP utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | 7 | try: 8 | from apex import amp 9 | has_apex = True 10 | except ImportError: 11 | amp = None 12 | has_apex = False 13 | 14 | from timm.utils.clip_grad import dispatch_clip_grad 15 | 16 | 17 | class ApexScalerV2: 18 | state_dict_key = "amp" 19 | 20 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, update_grad=True): 21 | with amp.scale_loss(loss, optimizer) as scaled_loss: 22 | scaled_loss.backward(create_graph=create_graph) 23 | if update_grad: 24 | if clip_grad is not None: 25 | dispatch_clip_grad(amp.master_params(optimizer), 26 | clip_grad, mode=clip_mode) 27 | optimizer.step() 28 | 29 | def state_dict(self): 30 | if 'state_dict' in amp.__dict__: 31 | return amp.state_dict() 32 | 33 | def load_state_dict(self, state_dict): 34 | if 'load_state_dict' in amp.__dict__: 35 | amp.load_state_dict(state_dict) 36 | 37 | 38 | class NativeScalerV2: 39 | state_dict_key = "amp_scaler" 40 | 41 | def __init__(self): 42 | self._scaler = torch.cuda.amp.GradScaler() 43 | 44 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, update_grad=True): 45 | self._scaler.scale(loss).backward(create_graph=create_graph) 46 | if update_grad: 47 | if clip_grad is not None: 48 | assert parameters is not None 49 | # unscale the gradients of optimizer's assigned params in-place 50 | self._scaler.unscale_(optimizer) 51 | dispatch_clip_grad(parameters, clip_grad, mode=clip_mode) 52 | self._scaler.step(optimizer) 53 | self._scaler.update() 54 | 55 | def state_dict(self): 56 | return self._scaler.state_dict() 57 | 58 | def load_state_dict(self, state_dict): 59 | self._scaler.load_state_dict(state_dict) 60 | -------------------------------------------------------------------------------- /classification/timm_custom/utils/summary.py: -------------------------------------------------------------------------------- 1 | """ Summary utilities 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import csv 6 | import os 7 | from collections import OrderedDict 8 | 9 | try: 10 | import wandb 11 | except ImportError: 12 | pass 13 | 14 | 15 | def update_summary_v2( 16 | epoch, 17 | train_metrics, 18 | eval_metrics, 19 | filename, 20 | ema_eval_metrics=None, 21 | lr=None, 22 | write_header=False, 23 | log_wandb=False, 24 | ): 25 | rowd = OrderedDict(epoch=epoch) 26 | rowd.update([("train_" + k, v) for k, v in train_metrics.items()]) 27 | rowd.update([("eval_" + k, v) for k, v in eval_metrics.items()]) 28 | if ema_eval_metrics is not None: 29 | rowd.update([("ema_eval_" + k, v) for k, v in ema_eval_metrics.items()]) 30 | if lr is not None: 31 | rowd["lr"] = lr 32 | if log_wandb: 33 | wandb.log(rowd) 34 | with open(filename, mode="a") as cf: 35 | dw = csv.DictWriter(cf, fieldnames=rowd.keys()) 36 | if write_header: # first iteration (epoch == 1 can't be used) 37 | dw.writeheader() 38 | dw.writerow(rowd) 39 | -------------------------------------------------------------------------------- /classification/train_timm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 12 ]; then 4 | echo "Usage: me Config_file Model_name Dataset_name Img_size Remove_old_if_exist_0_or_1 Resume_or_not_if_exist Exp_name Tag Gpus Nb_gpus Workers Port [others]" 5 | exit 6 | fi 7 | 8 | CONFIG_FILE=$1 9 | MODEL=$2 10 | DATASET=$3 11 | DATA_SIZE=$4 12 | RM_OLD_IF_EXIST=$5 13 | RESUM_OLD_IF_EXIST=$6 14 | EXP_NAME=$7 15 | TAG=$8 16 | GPUS=$9 17 | NUM_GPUS=${10} 18 | WORKERS=${11} 19 | PORT=${12} 20 | 21 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 22 | 23 | # datasets 24 | NUM_CLASSES=0 25 | if [ "$DATASET" = "IMNET" ]; then 26 | DATA_DIR=$DIR/../datasets/IMNET/ 27 | if [ ! -d $DATA_DIR ]; then 28 | echo "not found $DATA_DIR" 29 | exit 30 | fi 31 | NUM_CLASSES=1000 32 | else 33 | echo "Unknown $DATASET" 34 | exit 35 | fi 36 | 37 | # dirs 38 | WORK_DIR=$DIR/../work_dirs/classification/$EXP_NAME 39 | 40 | EXPERIMET="$DATASET"_"$DATA_SIZE"_"$MODEL"_"$TAG" 41 | 42 | # training has completed? 43 | EXPERIMENT_DIR=$WORK_DIR/TrainingFinished/$EXPERIMET 44 | if [ -d $EXPERIMENT_DIR ]; then 45 | echo "$EXPERIMENT_DIR --- Training Finished!!!!" 46 | exit 47 | fi 48 | 49 | EXPERIMENT_DIR=$WORK_DIR/$EXPERIMET 50 | if [ -d $EXPERIMENT_DIR ]; then 51 | echo "$EXPERIMENT_DIR --- Already exists" 52 | if [ $RM_OLD_IF_EXIST -gt 0 ]; then 53 | while true; do 54 | read -p "Are you sure to delete this result directory? " yn 55 | case $yn in 56 | [Yy]* ) rm -r $EXPERIMENT_DIR; mkdir -p $EXPERIMENT_DIR; break;; 57 | [Nn]* ) exit;; 58 | * ) echo "Please answer yes or no.";; 59 | esac 60 | done 61 | else 62 | if [ $RESUM_OLD_IF_EXIST -gt 0 ]; then 63 | echo "Auto-resume" 64 | else 65 | echo "Skip" 66 | exit 67 | fi 68 | fi 69 | fi 70 | 71 | # TORCH_DISTRIBUTED_DEBUG=INFO \ 72 | OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \ 73 | torchrun \ 74 | --rdzv_backend c10d \ 75 | --rdzv_endpoint localhost:$PORT \ 76 | --nnodes 1 \ 77 | --nproc_per_node $NUM_GPUS \ 78 | $DIR/train_timm.py \ 79 | --data-dir $DATA_DIR \ 80 | --img-size $DATA_SIZE \ 81 | --num-classes $NUM_CLASSES \ 82 | --config $CONFIG_FILE \ 83 | --model $MODEL \ 84 | --workers $WORKERS \ 85 | --channels-last \ 86 | --pin-mem \ 87 | --use-multi-epochs-loader \ 88 | --output $WORK_DIR \ 89 | --experiment $EXPERIMET \ 90 | ${@:13} 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /classification/validate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 6 ]; then 4 | echo "Usage: me model_name checkpoint_file dataset_name img_size gpus num_gpus [others]" 5 | exit 6 | fi 7 | 8 | MODEL=$1 9 | CHECKPOINT_FILE=$2 10 | DATASET=$3 11 | IMAGE_SIZE=$4 12 | GPUS=$5 13 | NUM_GPUS=$6 14 | 15 | PYTHON=${PYTHON:-"python"} 16 | 17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 18 | 19 | # datasets 20 | NUM_CLASSES=0 21 | if [ "$DATASET" = "IMNET" ]; then 22 | DATA_DIR=$DIR/../datasets/IMNET/ 23 | if [ ! -d $DATA_DIR ]; then 24 | echo "not found $DATA_DIR" 25 | exit 26 | fi 27 | NUM_CLASSES=1000 28 | else 29 | echo "Unknown $DATASET" 30 | exit 31 | fi 32 | 33 | 34 | CUDA_VISIBLE_DEVICES=$GPUS $PYTHON \ 35 | $DIR/validate_timm.py $DATA_DIR --dataset $DATASET \ 36 | --img-size $IMAGE_SIZE --workers 8 --num-gpu $NUM_GPUS \ 37 | --model $MODEL --checkpoint $CHECKPOINT_FILE --pin-mem --channels-last --amp \ 38 | ${@:7} 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmdet' 2 | 3 | default_hooks = dict( 4 | timer=dict(type='IterTimerHook'), 5 | logger=dict(type='LoggerHook', interval=50), 6 | param_scheduler=dict(type='ParamSchedulerHook'), 7 | checkpoint=dict(type='CheckpointHook', interval=1), 8 | sampler_seed=dict(type='DistSamplerSeedHook'), 9 | visualization=dict(type='DetVisualizationHook')) 10 | 11 | env_cfg = dict( 12 | cudnn_benchmark=False, 13 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 14 | dist_cfg=dict(backend='nccl'), 15 | ) 16 | 17 | vis_backends = [dict(type='LocalVisBackend')] 18 | visualizer = dict( 19 | type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 20 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 21 | 22 | log_level = 'INFO' 23 | load_from = None 24 | resume = False 25 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/cascade-mask-rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_mask=True, 10 | pad_size_divisor=32), 11 | backbone=dict( 12 | type='ResNet', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | frozen_stages=1, 17 | norm_cfg=dict(type='BN', requires_grad=True), 18 | norm_eval=True, 19 | style='pytorch', 20 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[256, 512, 1024, 2048], 24 | out_channels=256, 25 | num_outs=5), 26 | rpn_head=dict( 27 | type='RPNHead', 28 | in_channels=256, 29 | feat_channels=256, 30 | anchor_generator=dict( 31 | type='AnchorGenerator', 32 | scales=[8], 33 | ratios=[0.5, 1.0, 2.0], 34 | strides=[4, 8, 16, 32, 64]), 35 | bbox_coder=dict( 36 | type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0]), 39 | loss_cls=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 42 | roi_head=dict( 43 | type='CascadeRoIHead', 44 | num_stages=3, 45 | stage_loss_weights=[1, 0.5, 0.25], 46 | bbox_roi_extractor=dict( 47 | type='SingleRoIExtractor', 48 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 49 | out_channels=256, 50 | featmap_strides=[4, 8, 16, 32]), 51 | bbox_head=[ 52 | dict( 53 | type='Shared2FCBBoxHead', 54 | in_channels=256, 55 | fc_out_channels=1024, 56 | roi_feat_size=7, 57 | num_classes=80, 58 | bbox_coder=dict( 59 | type='DeltaXYWHBBoxCoder', 60 | target_means=[0., 0., 0., 0.], 61 | target_stds=[0.1, 0.1, 0.2, 0.2]), 62 | reg_class_agnostic=True, 63 | loss_cls=dict( 64 | type='CrossEntropyLoss', 65 | use_sigmoid=False, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 68 | loss_weight=1.0)), 69 | dict( 70 | type='Shared2FCBBoxHead', 71 | in_channels=256, 72 | fc_out_channels=1024, 73 | roi_feat_size=7, 74 | num_classes=80, 75 | bbox_coder=dict( 76 | type='DeltaXYWHBBoxCoder', 77 | target_means=[0., 0., 0., 0.], 78 | target_stds=[0.05, 0.05, 0.1, 0.1]), 79 | reg_class_agnostic=True, 80 | loss_cls=dict( 81 | type='CrossEntropyLoss', 82 | use_sigmoid=False, 83 | loss_weight=1.0), 84 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 85 | loss_weight=1.0)), 86 | dict( 87 | type='Shared2FCBBoxHead', 88 | in_channels=256, 89 | fc_out_channels=1024, 90 | roi_feat_size=7, 91 | num_classes=80, 92 | bbox_coder=dict( 93 | type='DeltaXYWHBBoxCoder', 94 | target_means=[0., 0., 0., 0.], 95 | target_stds=[0.033, 0.033, 0.067, 0.067]), 96 | reg_class_agnostic=True, 97 | loss_cls=dict( 98 | type='CrossEntropyLoss', 99 | use_sigmoid=False, 100 | loss_weight=1.0), 101 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 102 | ], 103 | mask_roi_extractor=dict( 104 | type='SingleRoIExtractor', 105 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 106 | out_channels=256, 107 | featmap_strides=[4, 8, 16, 32]), 108 | mask_head=dict( 109 | type='FCNMaskHead', 110 | num_convs=4, 111 | in_channels=256, 112 | conv_out_channels=256, 113 | num_classes=80, 114 | loss_mask=dict( 115 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 116 | # model training and testing settings 117 | train_cfg=dict( 118 | rpn=dict( 119 | assigner=dict( 120 | type='MaxIoUAssigner', 121 | pos_iou_thr=0.7, 122 | neg_iou_thr=0.3, 123 | min_pos_iou=0.3, 124 | match_low_quality=True, 125 | ignore_iof_thr=-1), 126 | sampler=dict( 127 | type='RandomSampler', 128 | num=256, 129 | pos_fraction=0.5, 130 | neg_pos_ub=-1, 131 | add_gt_as_proposals=False), 132 | allowed_border=0, 133 | pos_weight=-1, 134 | debug=False), 135 | rpn_proposal=dict( 136 | nms_pre=2000, 137 | max_per_img=2000, 138 | nms=dict(type='nms', iou_threshold=0.7), 139 | min_bbox_size=0), 140 | rcnn=[ 141 | dict( 142 | assigner=dict( 143 | type='MaxIoUAssigner', 144 | pos_iou_thr=0.5, 145 | neg_iou_thr=0.5, 146 | min_pos_iou=0.5, 147 | match_low_quality=False, 148 | ignore_iof_thr=-1), 149 | sampler=dict( 150 | type='RandomSampler', 151 | num=512, 152 | pos_fraction=0.25, 153 | neg_pos_ub=-1, 154 | add_gt_as_proposals=True), 155 | mask_size=28, 156 | pos_weight=-1, 157 | debug=False), 158 | dict( 159 | assigner=dict( 160 | type='MaxIoUAssigner', 161 | pos_iou_thr=0.6, 162 | neg_iou_thr=0.6, 163 | min_pos_iou=0.6, 164 | match_low_quality=False, 165 | ignore_iof_thr=-1), 166 | sampler=dict( 167 | type='RandomSampler', 168 | num=512, 169 | pos_fraction=0.25, 170 | neg_pos_ub=-1, 171 | add_gt_as_proposals=True), 172 | mask_size=28, 173 | pos_weight=-1, 174 | debug=False), 175 | dict( 176 | assigner=dict( 177 | type='MaxIoUAssigner', 178 | pos_iou_thr=0.7, 179 | neg_iou_thr=0.7, 180 | min_pos_iou=0.7, 181 | match_low_quality=False, 182 | ignore_iof_thr=-1), 183 | sampler=dict( 184 | type='RandomSampler', 185 | num=512, 186 | pos_fraction=0.25, 187 | neg_pos_ub=-1, 188 | add_gt_as_proposals=True), 189 | mask_size=28, 190 | pos_weight=-1, 191 | debug=False) 192 | ]), 193 | test_cfg=dict( 194 | rpn=dict( 195 | nms_pre=1000, 196 | max_per_img=1000, 197 | nms=dict(type='nms', iou_threshold=0.7), 198 | min_bbox_size=0), 199 | rcnn=dict( 200 | score_thr=0.05, 201 | nms=dict(type='nms', iou_threshold=0.5), 202 | max_per_img=100, 203 | mask_thr_binary=0.5))) 204 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/cascade-rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | num_outs=5), 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=256, 28 | feat_channels=256, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[8], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[4, 8, 16, 32, 64]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 41 | roi_head=dict( 42 | type='CascadeRoIHead', 43 | num_stages=3, 44 | stage_loss_weights=[1, 0.5, 0.25], 45 | bbox_roi_extractor=dict( 46 | type='SingleRoIExtractor', 47 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 48 | out_channels=256, 49 | featmap_strides=[4, 8, 16, 32]), 50 | bbox_head=[ 51 | dict( 52 | type='Shared2FCBBoxHead', 53 | in_channels=256, 54 | fc_out_channels=1024, 55 | roi_feat_size=7, 56 | num_classes=80, 57 | bbox_coder=dict( 58 | type='DeltaXYWHBBoxCoder', 59 | target_means=[0., 0., 0., 0.], 60 | target_stds=[0.1, 0.1, 0.2, 0.2]), 61 | reg_class_agnostic=True, 62 | loss_cls=dict( 63 | type='CrossEntropyLoss', 64 | use_sigmoid=False, 65 | loss_weight=1.0), 66 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 67 | loss_weight=1.0)), 68 | dict( 69 | type='Shared2FCBBoxHead', 70 | in_channels=256, 71 | fc_out_channels=1024, 72 | roi_feat_size=7, 73 | num_classes=80, 74 | bbox_coder=dict( 75 | type='DeltaXYWHBBoxCoder', 76 | target_means=[0., 0., 0., 0.], 77 | target_stds=[0.05, 0.05, 0.1, 0.1]), 78 | reg_class_agnostic=True, 79 | loss_cls=dict( 80 | type='CrossEntropyLoss', 81 | use_sigmoid=False, 82 | loss_weight=1.0), 83 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 84 | loss_weight=1.0)), 85 | dict( 86 | type='Shared2FCBBoxHead', 87 | in_channels=256, 88 | fc_out_channels=1024, 89 | roi_feat_size=7, 90 | num_classes=80, 91 | bbox_coder=dict( 92 | type='DeltaXYWHBBoxCoder', 93 | target_means=[0., 0., 0., 0.], 94 | target_stds=[0.033, 0.033, 0.067, 0.067]), 95 | reg_class_agnostic=True, 96 | loss_cls=dict( 97 | type='CrossEntropyLoss', 98 | use_sigmoid=False, 99 | loss_weight=1.0), 100 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 101 | ]), 102 | # model training and testing settings 103 | train_cfg=dict( 104 | rpn=dict( 105 | assigner=dict( 106 | type='MaxIoUAssigner', 107 | pos_iou_thr=0.7, 108 | neg_iou_thr=0.3, 109 | min_pos_iou=0.3, 110 | match_low_quality=True, 111 | ignore_iof_thr=-1), 112 | sampler=dict( 113 | type='RandomSampler', 114 | num=256, 115 | pos_fraction=0.5, 116 | neg_pos_ub=-1, 117 | add_gt_as_proposals=False), 118 | allowed_border=0, 119 | pos_weight=-1, 120 | debug=False), 121 | rpn_proposal=dict( 122 | nms_pre=2000, 123 | max_per_img=2000, 124 | nms=dict(type='nms', iou_threshold=0.7), 125 | min_bbox_size=0), 126 | rcnn=[ 127 | dict( 128 | assigner=dict( 129 | type='MaxIoUAssigner', 130 | pos_iou_thr=0.5, 131 | neg_iou_thr=0.5, 132 | min_pos_iou=0.5, 133 | match_low_quality=False, 134 | ignore_iof_thr=-1), 135 | sampler=dict( 136 | type='RandomSampler', 137 | num=512, 138 | pos_fraction=0.25, 139 | neg_pos_ub=-1, 140 | add_gt_as_proposals=True), 141 | pos_weight=-1, 142 | debug=False), 143 | dict( 144 | assigner=dict( 145 | type='MaxIoUAssigner', 146 | pos_iou_thr=0.6, 147 | neg_iou_thr=0.6, 148 | min_pos_iou=0.6, 149 | match_low_quality=False, 150 | ignore_iof_thr=-1), 151 | sampler=dict( 152 | type='RandomSampler', 153 | num=512, 154 | pos_fraction=0.25, 155 | neg_pos_ub=-1, 156 | add_gt_as_proposals=True), 157 | pos_weight=-1, 158 | debug=False), 159 | dict( 160 | assigner=dict( 161 | type='MaxIoUAssigner', 162 | pos_iou_thr=0.7, 163 | neg_iou_thr=0.7, 164 | min_pos_iou=0.7, 165 | match_low_quality=False, 166 | ignore_iof_thr=-1), 167 | sampler=dict( 168 | type='RandomSampler', 169 | num=512, 170 | pos_fraction=0.25, 171 | neg_pos_ub=-1, 172 | add_gt_as_proposals=True), 173 | pos_weight=-1, 174 | debug=False) 175 | ]), 176 | test_cfg=dict( 177 | rpn=dict( 178 | nms_pre=1000, 179 | max_per_img=1000, 180 | nms=dict(type='nms', iou_threshold=0.7), 181 | min_bbox_size=0), 182 | rcnn=dict( 183 | score_thr=0.05, 184 | nms=dict(type='nms', iou_threshold=0.5), 185 | max_per_img=100))) 186 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/fast-rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | num_outs=5), 25 | roi_head=dict( 26 | type='StandardRoIHead', 27 | bbox_roi_extractor=dict( 28 | type='SingleRoIExtractor', 29 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 30 | out_channels=256, 31 | featmap_strides=[4, 8, 16, 32]), 32 | bbox_head=dict( 33 | type='Shared2FCBBoxHead', 34 | in_channels=256, 35 | fc_out_channels=1024, 36 | roi_feat_size=7, 37 | num_classes=80, 38 | bbox_coder=dict( 39 | type='DeltaXYWHBBoxCoder', 40 | target_means=[0., 0., 0., 0.], 41 | target_stds=[0.1, 0.1, 0.2, 0.2]), 42 | reg_class_agnostic=False, 43 | loss_cls=dict( 44 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 45 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 46 | # model training and testing settings 47 | train_cfg=dict( 48 | rcnn=dict( 49 | assigner=dict( 50 | type='MaxIoUAssigner', 51 | pos_iou_thr=0.5, 52 | neg_iou_thr=0.5, 53 | min_pos_iou=0.5, 54 | match_low_quality=False, 55 | ignore_iof_thr=-1), 56 | sampler=dict( 57 | type='RandomSampler', 58 | num=512, 59 | pos_fraction=0.25, 60 | neg_pos_ub=-1, 61 | add_gt_as_proposals=True), 62 | pos_weight=-1, 63 | debug=False)), 64 | test_cfg=dict( 65 | rcnn=dict( 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.5), 68 | max_per_img=100))) 69 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster-rcnn_r50-caffe-c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | data_preprocessor=dict( 6 | type='DetDataPreprocessor', 7 | mean=[103.530, 116.280, 123.675], 8 | std=[1.0, 1.0, 1.0], 9 | bgr_to_rgb=False, 10 | pad_size_divisor=32), 11 | backbone=dict( 12 | type='ResNet', 13 | depth=50, 14 | num_stages=3, 15 | strides=(1, 2, 2), 16 | dilations=(1, 1, 1), 17 | out_indices=(2, ), 18 | frozen_stages=1, 19 | norm_cfg=norm_cfg, 20 | norm_eval=True, 21 | style='caffe', 22 | init_cfg=dict( 23 | type='Pretrained', 24 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=1024, 28 | feat_channels=1024, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[2, 4, 8, 16, 32], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[16]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 41 | roi_head=dict( 42 | type='StandardRoIHead', 43 | shared_head=dict( 44 | type='ResLayer', 45 | depth=50, 46 | stage=3, 47 | stride=2, 48 | dilation=1, 49 | style='caffe', 50 | norm_cfg=norm_cfg, 51 | norm_eval=True, 52 | init_cfg=dict( 53 | type='Pretrained', 54 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 55 | bbox_roi_extractor=dict( 56 | type='SingleRoIExtractor', 57 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 58 | out_channels=1024, 59 | featmap_strides=[16]), 60 | bbox_head=dict( 61 | type='BBoxHead', 62 | with_avg_pool=True, 63 | roi_feat_size=7, 64 | in_channels=2048, 65 | num_classes=80, 66 | bbox_coder=dict( 67 | type='DeltaXYWHBBoxCoder', 68 | target_means=[0., 0., 0., 0.], 69 | target_stds=[0.1, 0.1, 0.2, 0.2]), 70 | reg_class_agnostic=False, 71 | loss_cls=dict( 72 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 73 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | rpn=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | pos_iou_thr=0.7, 80 | neg_iou_thr=0.3, 81 | min_pos_iou=0.3, 82 | match_low_quality=True, 83 | ignore_iof_thr=-1), 84 | sampler=dict( 85 | type='RandomSampler', 86 | num=256, 87 | pos_fraction=0.5, 88 | neg_pos_ub=-1, 89 | add_gt_as_proposals=False), 90 | allowed_border=-1, 91 | pos_weight=-1, 92 | debug=False), 93 | rpn_proposal=dict( 94 | nms_pre=12000, 95 | max_per_img=2000, 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | min_bbox_size=0), 98 | rcnn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.5, 102 | neg_iou_thr=0.5, 103 | min_pos_iou=0.5, 104 | match_low_quality=False, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=512, 109 | pos_fraction=0.25, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=True), 112 | pos_weight=-1, 113 | debug=False)), 114 | test_cfg=dict( 115 | rpn=dict( 116 | nms_pre=6000, 117 | max_per_img=1000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100))) 124 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster-rcnn_r50-caffe-dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | data_preprocessor=dict( 6 | type='DetDataPreprocessor', 7 | mean=[103.530, 116.280, 123.675], 8 | std=[1.0, 1.0, 1.0], 9 | bgr_to_rgb=False, 10 | pad_size_divisor=32), 11 | backbone=dict( 12 | type='ResNet', 13 | depth=50, 14 | num_stages=4, 15 | strides=(1, 2, 2, 1), 16 | dilations=(1, 1, 1, 2), 17 | out_indices=(3, ), 18 | frozen_stages=1, 19 | norm_cfg=norm_cfg, 20 | norm_eval=True, 21 | style='caffe', 22 | init_cfg=dict( 23 | type='Pretrained', 24 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=2048, 28 | feat_channels=2048, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[2, 4, 8, 16, 32], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[16]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 41 | roi_head=dict( 42 | type='StandardRoIHead', 43 | bbox_roi_extractor=dict( 44 | type='SingleRoIExtractor', 45 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 46 | out_channels=2048, 47 | featmap_strides=[16]), 48 | bbox_head=dict( 49 | type='Shared2FCBBoxHead', 50 | in_channels=2048, 51 | fc_out_channels=1024, 52 | roi_feat_size=7, 53 | num_classes=80, 54 | bbox_coder=dict( 55 | type='DeltaXYWHBBoxCoder', 56 | target_means=[0., 0., 0., 0.], 57 | target_stds=[0.1, 0.1, 0.2, 0.2]), 58 | reg_class_agnostic=False, 59 | loss_cls=dict( 60 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 61 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 62 | # model training and testing settings 63 | train_cfg=dict( 64 | rpn=dict( 65 | assigner=dict( 66 | type='MaxIoUAssigner', 67 | pos_iou_thr=0.7, 68 | neg_iou_thr=0.3, 69 | min_pos_iou=0.3, 70 | match_low_quality=True, 71 | ignore_iof_thr=-1), 72 | sampler=dict( 73 | type='RandomSampler', 74 | num=256, 75 | pos_fraction=0.5, 76 | neg_pos_ub=-1, 77 | add_gt_as_proposals=False), 78 | allowed_border=0, 79 | pos_weight=-1, 80 | debug=False), 81 | rpn_proposal=dict( 82 | nms_pre=12000, 83 | max_per_img=2000, 84 | nms=dict(type='nms', iou_threshold=0.7), 85 | min_bbox_size=0), 86 | rcnn=dict( 87 | assigner=dict( 88 | type='MaxIoUAssigner', 89 | pos_iou_thr=0.5, 90 | neg_iou_thr=0.5, 91 | min_pos_iou=0.5, 92 | match_low_quality=False, 93 | ignore_iof_thr=-1), 94 | sampler=dict( 95 | type='RandomSampler', 96 | num=512, 97 | pos_fraction=0.25, 98 | neg_pos_ub=-1, 99 | add_gt_as_proposals=True), 100 | pos_weight=-1, 101 | debug=False)), 102 | test_cfg=dict( 103 | rpn=dict( 104 | nms=dict(type='nms', iou_threshold=0.7), 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | min_bbox_size=0), 108 | rcnn=dict( 109 | score_thr=0.05, 110 | nms=dict(type='nms', iou_threshold=0.5), 111 | max_per_img=100))) 112 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster-rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | num_outs=5), 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=256, 28 | feat_channels=256, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[8], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[4, 8, 16, 32, 64]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 41 | roi_head=dict( 42 | type='StandardRoIHead', 43 | bbox_roi_extractor=dict( 44 | type='SingleRoIExtractor', 45 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 46 | out_channels=256, 47 | featmap_strides=[4, 8, 16, 32]), 48 | bbox_head=dict( 49 | type='Shared2FCBBoxHead', 50 | in_channels=256, 51 | fc_out_channels=1024, 52 | roi_feat_size=7, 53 | num_classes=80, 54 | bbox_coder=dict( 55 | type='DeltaXYWHBBoxCoder', 56 | target_means=[0., 0., 0., 0.], 57 | target_stds=[0.1, 0.1, 0.2, 0.2]), 58 | reg_class_agnostic=False, 59 | loss_cls=dict( 60 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 61 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 62 | # model training and testing settings 63 | train_cfg=dict( 64 | rpn=dict( 65 | assigner=dict( 66 | type='MaxIoUAssigner', 67 | pos_iou_thr=0.7, 68 | neg_iou_thr=0.3, 69 | min_pos_iou=0.3, 70 | match_low_quality=True, 71 | ignore_iof_thr=-1), 72 | sampler=dict( 73 | type='RandomSampler', 74 | num=256, 75 | pos_fraction=0.5, 76 | neg_pos_ub=-1, 77 | add_gt_as_proposals=False), 78 | allowed_border=-1, 79 | pos_weight=-1, 80 | debug=False), 81 | rpn_proposal=dict( 82 | nms_pre=2000, 83 | max_per_img=1000, 84 | nms=dict(type='nms', iou_threshold=0.7), 85 | min_bbox_size=0), 86 | rcnn=dict( 87 | assigner=dict( 88 | type='MaxIoUAssigner', 89 | pos_iou_thr=0.5, 90 | neg_iou_thr=0.5, 91 | min_pos_iou=0.5, 92 | match_low_quality=False, 93 | ignore_iof_thr=-1), 94 | sampler=dict( 95 | type='RandomSampler', 96 | num=512, 97 | pos_fraction=0.25, 98 | neg_pos_ub=-1, 99 | add_gt_as_proposals=True), 100 | pos_weight=-1, 101 | debug=False)), 102 | test_cfg=dict( 103 | rpn=dict( 104 | nms_pre=1000, 105 | max_per_img=1000, 106 | nms=dict(type='nms', iou_threshold=0.7), 107 | min_bbox_size=0), 108 | rcnn=dict( 109 | score_thr=0.05, 110 | nms=dict(type='nms', iou_threshold=0.5), 111 | max_per_img=100) 112 | # soft-nms is also supported for rcnn testing 113 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 114 | )) 115 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask-rcnn_r50-caffe-c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | data_preprocessor=dict( 6 | type='DetDataPreprocessor', 7 | mean=[103.530, 116.280, 123.675], 8 | std=[1.0, 1.0, 1.0], 9 | bgr_to_rgb=False, 10 | pad_mask=True, 11 | pad_size_divisor=32), 12 | backbone=dict( 13 | type='ResNet', 14 | depth=50, 15 | num_stages=3, 16 | strides=(1, 2, 2), 17 | dilations=(1, 1, 1), 18 | out_indices=(2, ), 19 | frozen_stages=1, 20 | norm_cfg=norm_cfg, 21 | norm_eval=True, 22 | style='caffe', 23 | init_cfg=dict( 24 | type='Pretrained', 25 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 26 | rpn_head=dict( 27 | type='RPNHead', 28 | in_channels=1024, 29 | feat_channels=1024, 30 | anchor_generator=dict( 31 | type='AnchorGenerator', 32 | scales=[2, 4, 8, 16, 32], 33 | ratios=[0.5, 1.0, 2.0], 34 | strides=[16]), 35 | bbox_coder=dict( 36 | type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0]), 39 | loss_cls=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 42 | roi_head=dict( 43 | type='StandardRoIHead', 44 | shared_head=dict( 45 | type='ResLayer', 46 | depth=50, 47 | stage=3, 48 | stride=2, 49 | dilation=1, 50 | style='caffe', 51 | norm_cfg=norm_cfg, 52 | norm_eval=True), 53 | bbox_roi_extractor=dict( 54 | type='SingleRoIExtractor', 55 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 56 | out_channels=1024, 57 | featmap_strides=[16]), 58 | bbox_head=dict( 59 | type='BBoxHead', 60 | with_avg_pool=True, 61 | roi_feat_size=7, 62 | in_channels=2048, 63 | num_classes=80, 64 | bbox_coder=dict( 65 | type='DeltaXYWHBBoxCoder', 66 | target_means=[0., 0., 0., 0.], 67 | target_stds=[0.1, 0.1, 0.2, 0.2]), 68 | reg_class_agnostic=False, 69 | loss_cls=dict( 70 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 71 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 72 | mask_roi_extractor=None, 73 | mask_head=dict( 74 | type='FCNMaskHead', 75 | num_convs=0, 76 | in_channels=2048, 77 | conv_out_channels=256, 78 | num_classes=80, 79 | loss_mask=dict( 80 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 81 | # model training and testing settings 82 | train_cfg=dict( 83 | rpn=dict( 84 | assigner=dict( 85 | type='MaxIoUAssigner', 86 | pos_iou_thr=0.7, 87 | neg_iou_thr=0.3, 88 | min_pos_iou=0.3, 89 | match_low_quality=True, 90 | ignore_iof_thr=-1), 91 | sampler=dict( 92 | type='RandomSampler', 93 | num=256, 94 | pos_fraction=0.5, 95 | neg_pos_ub=-1, 96 | add_gt_as_proposals=False), 97 | allowed_border=0, 98 | pos_weight=-1, 99 | debug=False), 100 | rpn_proposal=dict( 101 | nms_pre=12000, 102 | max_per_img=2000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | rcnn=dict( 106 | assigner=dict( 107 | type='MaxIoUAssigner', 108 | pos_iou_thr=0.5, 109 | neg_iou_thr=0.5, 110 | min_pos_iou=0.5, 111 | match_low_quality=False, 112 | ignore_iof_thr=-1), 113 | sampler=dict( 114 | type='RandomSampler', 115 | num=512, 116 | pos_fraction=0.25, 117 | neg_pos_ub=-1, 118 | add_gt_as_proposals=True), 119 | mask_size=14, 120 | pos_weight=-1, 121 | debug=False)), 122 | test_cfg=dict( 123 | rpn=dict( 124 | nms_pre=6000, 125 | nms=dict(type='nms', iou_threshold=0.7), 126 | max_per_img=1000, 127 | min_bbox_size=0), 128 | rcnn=dict( 129 | score_thr=0.05, 130 | nms=dict(type='nms', iou_threshold=0.5), 131 | max_per_img=100, 132 | mask_thr_binary=0.5))) 133 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask-rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_mask=True, 10 | pad_size_divisor=32), 11 | backbone=dict( 12 | type='ResNet', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | frozen_stages=1, 17 | norm_cfg=dict(type='BN', requires_grad=True), 18 | norm_eval=True, 19 | style='pytorch', 20 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[256, 512, 1024, 2048], 24 | out_channels=256, 25 | num_outs=5), 26 | rpn_head=dict( 27 | type='RPNHead', 28 | in_channels=256, 29 | feat_channels=256, 30 | anchor_generator=dict( 31 | type='AnchorGenerator', 32 | scales=[8], 33 | ratios=[0.5, 1.0, 2.0], 34 | strides=[4, 8, 16, 32, 64]), 35 | bbox_coder=dict( 36 | type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0]), 39 | loss_cls=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 42 | roi_head=dict( 43 | type='StandardRoIHead', 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 47 | out_channels=256, 48 | featmap_strides=[4, 8, 16, 32]), 49 | bbox_head=dict( 50 | type='Shared2FCBBoxHead', 51 | in_channels=256, 52 | fc_out_channels=1024, 53 | roi_feat_size=7, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=dict( 64 | type='SingleRoIExtractor', 65 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 66 | out_channels=256, 67 | featmap_strides=[4, 8, 16, 32]), 68 | mask_head=dict( 69 | type='FCNMaskHead', 70 | num_convs=4, 71 | in_channels=256, 72 | conv_out_channels=256, 73 | num_classes=80, 74 | loss_mask=dict( 75 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 76 | # model training and testing settings 77 | train_cfg=dict( 78 | rpn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.7, 82 | neg_iou_thr=0.3, 83 | min_pos_iou=0.3, 84 | match_low_quality=True, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=256, 89 | pos_fraction=0.5, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=False), 92 | allowed_border=-1, 93 | pos_weight=-1, 94 | debug=False), 95 | rpn_proposal=dict( 96 | nms_pre=2000, 97 | max_per_img=1000, 98 | nms=dict(type='nms', iou_threshold=0.7), 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | assigner=dict( 102 | type='MaxIoUAssigner', 103 | pos_iou_thr=0.5, 104 | neg_iou_thr=0.5, 105 | min_pos_iou=0.5, 106 | match_low_quality=True, 107 | ignore_iof_thr=-1), 108 | sampler=dict( 109 | type='RandomSampler', 110 | num=512, 111 | pos_fraction=0.25, 112 | neg_pos_ub=-1, 113 | add_gt_as_proposals=True), 114 | mask_size=28, 115 | pos_weight=-1, 116 | debug=False)), 117 | test_cfg=dict( 118 | rpn=dict( 119 | nms_pre=1000, 120 | max_per_img=1000, 121 | nms=dict(type='nms', iou_threshold=0.7), 122 | min_bbox_size=0), 123 | rcnn=dict( 124 | score_thr=0.05, 125 | nms=dict(type='nms', iou_threshold=0.5), 126 | max_per_img=100, 127 | mask_thr_binary=0.5))) 128 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | start_level=1, 25 | add_extra_convs='on_input', 26 | num_outs=5), 27 | bbox_head=dict( 28 | type='RetinaHead', 29 | num_classes=80, 30 | in_channels=256, 31 | stacked_convs=4, 32 | feat_channels=256, 33 | anchor_generator=dict( 34 | type='AnchorGenerator', 35 | octave_base_scale=4, 36 | scales_per_octave=3, 37 | ratios=[0.5, 1.0, 2.0], 38 | strides=[8, 16, 32, 64, 128]), 39 | bbox_coder=dict( 40 | type='DeltaXYWHBBoxCoder', 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]), 43 | loss_cls=dict( 44 | type='FocalLoss', 45 | use_sigmoid=True, 46 | gamma=2.0, 47 | alpha=0.25, 48 | loss_weight=1.0), 49 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 50 | # model training and testing settings 51 | train_cfg=dict( 52 | assigner=dict( 53 | type='MaxIoUAssigner', 54 | pos_iou_thr=0.5, 55 | neg_iou_thr=0.4, 56 | min_pos_iou=0, 57 | ignore_iof_thr=-1), 58 | sampler=dict( 59 | type='PseudoSampler'), # Focal loss should use PseudoSampler 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.5), 68 | max_per_img=100)) 69 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50-caffe-c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[103.530, 116.280, 123.675], 7 | std=[1.0, 1.0, 1.0], 8 | bgr_to_rgb=False, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=3, 14 | strides=(1, 2, 2), 15 | dilations=(1, 1, 1), 16 | out_indices=(2, ), 17 | frozen_stages=1, 18 | norm_cfg=dict(type='BN', requires_grad=False), 19 | norm_eval=True, 20 | style='caffe', 21 | init_cfg=dict( 22 | type='Pretrained', 23 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 24 | neck=None, 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=1024, 28 | feat_channels=1024, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[2, 4, 8, 16, 32], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[16]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 41 | # model training and testing settings 42 | train_cfg=dict( 43 | rpn=dict( 44 | assigner=dict( 45 | type='MaxIoUAssigner', 46 | pos_iou_thr=0.7, 47 | neg_iou_thr=0.3, 48 | min_pos_iou=0.3, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=256, 53 | pos_fraction=0.5, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=False), 56 | allowed_border=-1, 57 | pos_weight=-1, 58 | debug=False)), 59 | test_cfg=dict( 60 | rpn=dict( 61 | nms_pre=12000, 62 | max_per_img=2000, 63 | nms=dict(type='nms', iou_threshold=0.7), 64 | min_bbox_size=0))) 65 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | num_outs=5), 25 | rpn_head=dict( 26 | type='RPNHead', 27 | in_channels=256, 28 | feat_channels=256, 29 | anchor_generator=dict( 30 | type='AnchorGenerator', 31 | scales=[8], 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[4, 8, 16, 32, 64]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 40 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 41 | # model training and testing settings 42 | train_cfg=dict( 43 | rpn=dict( 44 | assigner=dict( 45 | type='MaxIoUAssigner', 46 | pos_iou_thr=0.7, 47 | neg_iou_thr=0.3, 48 | min_pos_iou=0.3, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=256, 53 | pos_fraction=0.5, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=False), 56 | allowed_border=-1, 57 | pos_weight=-1, 58 | debug=False)), 59 | test_cfg=dict( 60 | rpn=dict( 61 | nms_pre=2000, 62 | max_per_img=1000, 63 | nms=dict(type='nms', iou_threshold=0.7), 64 | min_bbox_size=0))) 65 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | data_preprocessor=dict( 6 | type='DetDataPreprocessor', 7 | mean=[123.675, 116.28, 103.53], 8 | std=[1, 1, 1], 9 | bgr_to_rgb=True, 10 | pad_size_divisor=1), 11 | backbone=dict( 12 | type='SSDVGG', 13 | depth=16, 14 | with_last_pool=False, 15 | ceil_mode=True, 16 | out_indices=(3, 4), 17 | out_feature_indices=(22, 34), 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')), 20 | neck=dict( 21 | type='SSDNeck', 22 | in_channels=(512, 1024), 23 | out_channels=(512, 1024, 512, 256, 256, 256), 24 | level_strides=(2, 2, 1, 1), 25 | level_paddings=(1, 1, 0, 0), 26 | l2_norm_scale=20), 27 | bbox_head=dict( 28 | type='SSDHead', 29 | in_channels=(512, 1024, 512, 256, 256, 256), 30 | num_classes=80, 31 | anchor_generator=dict( 32 | type='SSDAnchorGenerator', 33 | scale_major=False, 34 | input_size=input_size, 35 | basesize_ratio_range=(0.15, 0.9), 36 | strides=[8, 16, 32, 64, 100, 300], 37 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 38 | bbox_coder=dict( 39 | type='DeltaXYWHBBoxCoder', 40 | target_means=[.0, .0, .0, .0], 41 | target_stds=[0.1, 0.1, 0.2, 0.2])), 42 | # model training and testing settings 43 | train_cfg=dict( 44 | assigner=dict( 45 | type='MaxIoUAssigner', 46 | pos_iou_thr=0.5, 47 | neg_iou_thr=0.5, 48 | min_pos_iou=0., 49 | ignore_iof_thr=-1, 50 | gt_max_assign_all=False), 51 | sampler=dict(type='PseudoSampler'), 52 | smoothl1_beta=1., 53 | allowed_border=-1, 54 | pos_weight=-1, 55 | neg_pos_ratio=3, 56 | debug=False), 57 | test_cfg=dict( 58 | nms_pre=1000, 59 | nms=dict(type='nms', iou_threshold=0.45), 60 | min_bbox_size=0, 61 | score_thr=0.02, 62 | max_per_img=200)) 63 | cudnn_benchmark = True 64 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 1x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=12, 14 | by_epoch=True, 15 | milestones=[8, 11], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # training schedule for 20e 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=20, 14 | by_epoch=True, 15 | milestones=[16, 19], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 2x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=24, 14 | by_epoch=True, 15 | milestones=[16, 22], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_base_p2cconv_100_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_base_p2cconv_100_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 384, 512]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_base_100_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_convmixer_base_100_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_base_100.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 384, 512]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_small_100_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_convmixer_small_100_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_small_100.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_tiny_100_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_convmixer_tiny_100_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_tiny_100.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_100_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_49_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_100_49_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_49.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_blockwise_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_100_blockwise_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_blockwise.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_100_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_2_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_2_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_2_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_49_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_49_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_49_100_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cconv_49_100_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_100.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cmlp_100_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_small_p2cmlp_100_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cmlp_100_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_tiny_p2cconv_100_0_mstrain_480_800_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/mask-rcnn_r50_fpn.py", 3 | "../../_base_/datasets/coco_instance.py", 4 | "../../_base_/schedules/schedule_1x.py", 5 | "../../_base_/default_runtime.py", 6 | ] 7 | 8 | 9 | model = dict( 10 | backbone=dict( 11 | _delete_=True, 12 | type="pacavit_tiny_p2cconv_100_0_downstream", 13 | drop_path_rate=0.1, 14 | layer_scale=None, 15 | pretrained=( 16 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth" 17 | ), 18 | ), 19 | neck=dict(in_channels=[96, 192, 320, 384]), 20 | ) 21 | 22 | img_norm_cfg = dict( 23 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True 24 | ) 25 | 26 | # augmentation strategy originates from DETR / Sparse RCNN 27 | train_pipeline = [ 28 | dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), 29 | dict(type="LoadAnnotations", with_bbox=True, with_mask=True), 30 | dict(type="RandomFlip", prob=0.5), 31 | dict( 32 | type="RandomChoice", 33 | transforms=[ 34 | [ 35 | dict( 36 | type="RandomChoiceResize", 37 | scales=[ 38 | (480, 1333), 39 | (512, 1333), 40 | (544, 1333), 41 | (576, 1333), 42 | (608, 1333), 43 | (640, 1333), 44 | (672, 1333), 45 | (704, 1333), 46 | (736, 1333), 47 | (768, 1333), 48 | (800, 1333), 49 | ], 50 | keep_ratio=True, 51 | ) 52 | ], 53 | [ 54 | dict( 55 | type="RandomChoiceResize", 56 | scales=[(400, 1333), (500, 1333), (600, 1333)], 57 | keep_ratio=True, 58 | ), 59 | dict( 60 | type="RandomCrop", 61 | crop_type="absolute_range", 62 | crop_size=(384, 600), 63 | allow_negative_crop=True, 64 | ), 65 | dict( 66 | type="RandomChoiceResize", 67 | scales=[ 68 | (480, 1333), 69 | (512, 1333), 70 | (544, 1333), 71 | (576, 1333), 72 | (608, 1333), 73 | (640, 1333), 74 | (672, 1333), 75 | (704, 1333), 76 | (736, 1333), 77 | (768, 1333), 78 | (800, 1333), 79 | ], 80 | keep_ratio=True, 81 | ), 82 | ], 83 | ], 84 | ), 85 | dict(type="PackDetInputs"), 86 | ] 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 88 | 89 | # optimizer 90 | optim_wrapper = dict( 91 | type="OptimWrapper", 92 | paramwise_cfg=dict( 93 | custom_keys={ 94 | "norm": dict(decay_mult=0.0), 95 | } 96 | ), 97 | optimizer=dict( 98 | _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05 99 | ), 100 | ) 101 | -------------------------------------------------------------------------------- /detection/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import tempfile 4 | from functools import partial 5 | from pathlib import Path 6 | 7 | import torch 8 | from mmengine.config import Config, DictAction 9 | from mmengine.logging import MMLogger 10 | from mmengine.model import revert_sync_batchnorm 11 | from mmengine.registry import init_default_scope 12 | from mmengine.runner import Runner 13 | 14 | from mmdet.registry import MODELS 15 | 16 | try: 17 | from mmengine.analysis import get_model_complexity_info 18 | from mmengine.analysis.print_helper import _format_size 19 | except ImportError: 20 | raise ImportError("Please upgrade mmengine >= 0.6.0") 21 | 22 | from models import * 23 | from torchprofile import profile_macs 24 | 25 | 26 | def parse_args(): 27 | parser = argparse.ArgumentParser(description="Get a detector flops") 28 | parser.add_argument("config", help="train config file path") 29 | parser.add_argument( 30 | "--shape", type=int, nargs="+", default=[1280, 800], help="input image size" 31 | ) 32 | parser.add_argument( 33 | "--cfg-options", 34 | nargs="+", 35 | action=DictAction, 36 | help="override some settings in the used config, the key-value pair " 37 | "in xxx=yyy format will be merged into config file. If the value to " 38 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 39 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 40 | "Note that the quotation marks are necessary and that no white space " 41 | "is allowed.", 42 | ) 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def inference(args, logger): 48 | if str(torch.__version__) < "1.12": 49 | logger.warning( 50 | "Some config files, such as configs/yolact and configs/detectors," 51 | "may have compatibility issues with torch.jit when torch<1.12. " 52 | "If you want to calculate flops for these models, " 53 | "please make sure your pytorch version is >=1.12." 54 | ) 55 | 56 | config_name = Path(args.config) 57 | if not config_name.exists(): 58 | logger.error(f"{config_name} not found.") 59 | 60 | cfg = Config.fromfile(args.config) 61 | cfg.work_dir = tempfile.TemporaryDirectory().name 62 | cfg.log_level = "WARN" 63 | if args.cfg_options is not None: 64 | cfg.merge_from_dict(args.cfg_options) 65 | 66 | init_default_scope(cfg.get("default_scope", "mmdet")) 67 | 68 | # TODO: The following usage is temporary and not safe 69 | # use hard code to convert mmSyncBN to SyncBN. This is a known 70 | # bug in mmengine, mmSyncBN requires a distributed environment, 71 | # this question involves models like configs/strong_baselines 72 | if hasattr(cfg, "head_norm_cfg"): 73 | cfg["head_norm_cfg"] = dict(type="SyncBN", requires_grad=True) 74 | cfg["model"]["roi_head"]["bbox_head"]["norm_cfg"] = dict( 75 | type="SyncBN", requires_grad=True 76 | ) 77 | cfg["model"]["roi_head"]["mask_head"]["norm_cfg"] = dict( 78 | type="SyncBN", requires_grad=True 79 | ) 80 | 81 | if len(args.shape) == 1: 82 | h = w = args.shape[0] 83 | elif len(args.shape) == 2: 84 | h, w = args.shape 85 | else: 86 | raise ValueError("invalid input shape") 87 | result = {} 88 | 89 | # Supports two ways to calculate flops, 90 | # 1. randomly generate a picture 91 | # 2. load a picture from the dataset 92 | # In two stage detectors, _forward need batch_samples to get 93 | # rpn_results_list, then use rpn_results_list to compute flops, 94 | # so only the second way is supported 95 | try: 96 | model = MODELS.build(cfg.model) 97 | if torch.cuda.is_available(): 98 | model.cuda() 99 | model = revert_sync_batchnorm(model) 100 | data_batch = {"inputs": [torch.rand(3, h, w)], "batch_samples": [None]} 101 | data = model.data_preprocessor(data_batch) 102 | result["ori_shape"] = (h, w) 103 | result["pad_shape"] = data["inputs"].shape[-2:] 104 | model.eval() 105 | outputs = get_model_complexity_info( 106 | model, None, inputs=data["inputs"], show_table=False, show_arch=False 107 | ) 108 | flops = outputs["flops"] 109 | params = outputs["params"] 110 | result["compute_type"] = "direct: randomly generate a picture" 111 | 112 | # torchprofile 113 | tp_flops = profile_macs(model, data["inputs"]) 114 | 115 | except TypeError: 116 | logger.warning("Failed to directly get FLOPs, try to get flops with real data") 117 | data_loader = Runner.build_dataloader(cfg.val_dataloader) 118 | data_batch = next(iter(data_loader)) 119 | model = MODELS.build(cfg.model) 120 | if torch.cuda.is_available(): 121 | model = model.cuda() 122 | model = revert_sync_batchnorm(model) 123 | model.eval() 124 | _forward = model.forward 125 | data = model.data_preprocessor(data_batch) 126 | result["ori_shape"] = data["data_samples"][0].ori_shape 127 | result["pad_shape"] = data["data_samples"][0].pad_shape 128 | 129 | del data_loader 130 | model.forward = partial(_forward, data_samples=data["data_samples"]) 131 | outputs = get_model_complexity_info( 132 | model, None, inputs=data["inputs"], show_table=False, show_arch=False 133 | ) 134 | flops = outputs["flops"] 135 | params = outputs["params"] 136 | result["compute_type"] = "dataloader: load a picture from the dataset" 137 | 138 | # torchprofile 139 | tp_flops = profile_macs(model, data["inputs"]) 140 | 141 | flops = _format_size(flops) 142 | tp_flops = _format_size(tp_flops) 143 | params = _format_size(params) 144 | result["flops"] = flops 145 | result["torchprofile_flops"] = tp_flops 146 | result["params"] = params 147 | 148 | return result 149 | 150 | 151 | def main(): 152 | args = parse_args() 153 | logger = MMLogger.get_instance(name="MMLogger") 154 | result = inference(args, logger) 155 | split_line = "=" * 30 156 | ori_shape = result["ori_shape"] 157 | pad_shape = result["pad_shape"] 158 | flops = result["flops"] 159 | torchprofile_flops = result["torchprofile_flops"] 160 | params = result["params"] 161 | compute_type = result["compute_type"] 162 | 163 | if pad_shape != ori_shape: 164 | print( 165 | f"{split_line}\nUse size divisor set input shape " 166 | f"from {ori_shape} to {pad_shape}" 167 | ) 168 | print( 169 | f"{split_line}\nCompute type: {compute_type}\n" 170 | f"Input shape: {pad_shape}\nFlops: {flops}\nFlops (torchprofile): {torchprofile_flops}\n" 171 | f"Params: {params}\n{split_line}" 172 | ) 173 | print( 174 | "!!!Please be cautious if you use the results in papers. " 175 | "You may need to check if all ops are supported and verify " 176 | "that the flops computation is correct." 177 | ) 178 | 179 | 180 | if __name__ == "__main__": 181 | main() 182 | -------------------------------------------------------------------------------- /detection/test_mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import warnings 6 | from copy import deepcopy 7 | 8 | from mmengine import ConfigDict 9 | from mmengine.config import Config, DictAction 10 | from mmengine.runner import Runner 11 | 12 | from mmdet.engine.hooks.utils import trigger_visualization_hook 13 | from mmdet.evaluation import DumpDetResults 14 | from mmdet.registry import RUNNERS 15 | 16 | from models import * 17 | 18 | 19 | # TODO: support fuse_conv_bn and format_only 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description="MMDet test (and eval) a model") 22 | parser.add_argument("config", help="test config file path") 23 | parser.add_argument("checkpoint", help="checkpoint file") 24 | parser.add_argument( 25 | "--work-dir", 26 | help="the directory to save the file containing evaluation metrics", 27 | ) 28 | parser.add_argument( 29 | "--out", 30 | type=str, 31 | help="dump predictions to a pickle file for offline evaluation", 32 | ) 33 | parser.add_argument("--show", action="store_true", help="show prediction results") 34 | parser.add_argument( 35 | "--show-dir", 36 | help="directory where painted images will be saved. " 37 | "If specified, it will be automatically saved " 38 | "to the work_dir/timestamp/show_dir", 39 | ) 40 | parser.add_argument( 41 | "--wait-time", type=float, default=2, help="the interval of show (s)" 42 | ) 43 | parser.add_argument( 44 | "--cfg-options", 45 | nargs="+", 46 | action=DictAction, 47 | help="override some settings in the used config, the key-value pair " 48 | "in xxx=yyy format will be merged into config file. If the value to " 49 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 50 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 51 | "Note that the quotation marks are necessary and that no white space " 52 | "is allowed.", 53 | ) 54 | parser.add_argument( 55 | "--launcher", 56 | choices=["none", "pytorch", "slurm", "mpi"], 57 | default="none", 58 | help="job launcher", 59 | ) 60 | parser.add_argument("--tta", action="store_true") 61 | parser.add_argument("--local_rank", type=int, default=0) 62 | args = parser.parse_args() 63 | if "LOCAL_RANK" not in os.environ: 64 | os.environ["LOCAL_RANK"] = str(args.local_rank) 65 | return args 66 | 67 | 68 | def main(): 69 | args = parse_args() 70 | 71 | # load config 72 | cfg = Config.fromfile(args.config) 73 | cfg.launcher = args.launcher 74 | if args.cfg_options is not None: 75 | cfg.merge_from_dict(args.cfg_options) 76 | 77 | # work_dir is determined in this priority: CLI > segment in file > filename 78 | if args.work_dir is not None: 79 | # update configs according to CLI args if args.work_dir is not None 80 | cfg.work_dir = args.work_dir 81 | elif cfg.get("work_dir", None) is None: 82 | # use config filename as default work_dir if cfg.work_dir is None 83 | cfg.work_dir = osp.join( 84 | "./work_dirs", osp.splitext(osp.basename(args.config))[0] 85 | ) 86 | 87 | cfg.load_from = args.checkpoint 88 | 89 | if args.show or args.show_dir: 90 | cfg = trigger_visualization_hook(cfg, args) 91 | 92 | if args.tta: 93 | if "tta_model" not in cfg: 94 | warnings.warn( 95 | "Cannot find ``tta_model`` in config, " "we will set it as default." 96 | ) 97 | cfg.tta_model = dict( 98 | type="DetTTAModel", 99 | tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.5), max_per_img=100), 100 | ) 101 | if "tta_pipeline" not in cfg: 102 | warnings.warn( 103 | "Cannot find ``tta_pipeline`` in config, " "we will set it as default." 104 | ) 105 | test_data_cfg = cfg.test_dataloader.dataset 106 | while "dataset" in test_data_cfg: 107 | test_data_cfg = test_data_cfg["dataset"] 108 | cfg.tta_pipeline = deepcopy(test_data_cfg.pipeline) 109 | flip_tta = dict( 110 | type="TestTimeAug", 111 | transforms=[ 112 | [ 113 | dict(type="RandomFlip", prob=1.0), 114 | dict(type="RandomFlip", prob=0.0), 115 | ], 116 | [ 117 | dict( 118 | type="PackDetInputs", 119 | meta_keys=( 120 | "img_id", 121 | "img_path", 122 | "ori_shape", 123 | "img_shape", 124 | "scale_factor", 125 | "flip", 126 | "flip_direction", 127 | ), 128 | ) 129 | ], 130 | ], 131 | ) 132 | cfg.tta_pipeline[-1] = flip_tta 133 | cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model) 134 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline 135 | 136 | # build the runner from config 137 | if "runner_type" not in cfg: 138 | # build the default runner 139 | runner = Runner.from_cfg(cfg) 140 | else: 141 | # build customized runner from the registry 142 | # if 'runner_type' is set in the cfg 143 | runner = RUNNERS.build(cfg) 144 | 145 | # add `DumpResults` dummy metric 146 | if args.out is not None: 147 | assert args.out.endswith( 148 | (".pkl", ".pickle") 149 | ), "The dump file must be a pkl file." 150 | runner.test_evaluator.metrics.append(DumpDetResults(out_file_path=args.out)) 151 | 152 | # start testing 153 | runner.test() 154 | 155 | 156 | if __name__ == "__main__": 157 | main() 158 | -------------------------------------------------------------------------------- /detection/test_mmdet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 5 ]; then 4 | echo "Usage: me.sh Relative_config_filename Checkpoint_filename gpus nb_gpus port [others]" 5 | exit 6 | fi 7 | 8 | PYTHON=${PYTHON:-"python"} 9 | 10 | CONFIG_FILE=$1 11 | CHK_FILE=$2 12 | GPUS=$3 13 | NUM_GPUS=$4 14 | PORT=${PORT:-$5} 15 | 16 | 17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 18 | 19 | CONFIG_FILENAME=${CONFIG_FILE##*/} 20 | CONFIG_BASE="${CONFIG_FILENAME%.*}" 21 | 22 | WORK_DIR="$( cd "$( dirname "${CHK_FILE}" )" >/dev/null 2>&1 && pwd )"/$CONFIG_BASE 23 | 24 | if [ -d $WORK_DIR ]; then 25 | echo "... Done already!" 26 | exit 27 | fi 28 | 29 | # export NCCL_DEBUG=INFO 30 | 31 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \ 32 | torchrun \ 33 | --rdzv_backend c10d \ 34 | --rdzv_endpoint localhost:$PORT \ 35 | --nnodes 1 \ 36 | --nproc_per_node $NUM_GPUS \ 37 | $DIR/test_mmdet.py \ 38 | $CONFIG_FILE \ 39 | $CHK_FILE \ 40 | --launcher pytorch \ 41 | --work-dir $WORK_DIR \ 42 | ${@:6} 43 | -------------------------------------------------------------------------------- /detection/train_mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | from mmengine.config import Config, DictAction 8 | from mmengine.logging import print_log 9 | from mmengine.registry import RUNNERS 10 | from mmengine.runner import Runner 11 | 12 | from models import * 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description="Train a detector") 17 | parser.add_argument("config", help="train config file path") 18 | parser.add_argument("--work-dir", help="the dir to save logs and models") 19 | parser.add_argument( 20 | "--amp", 21 | action="store_true", 22 | default=False, 23 | help="enable automatic-mixed-precision training", 24 | ) 25 | parser.add_argument( 26 | "--auto-scale-lr", action="store_true", help="enable automatically scaling LR." 27 | ) 28 | parser.add_argument( 29 | "--resume", 30 | nargs="?", 31 | type=str, 32 | const="auto", 33 | help="If specify checkpoint path, resume from it, while if not " 34 | "specify, try to auto resume from the latest checkpoint " 35 | "in the work directory.", 36 | ) 37 | parser.add_argument( 38 | "--cfg-options", 39 | nargs="+", 40 | action=DictAction, 41 | help="override some settings in the used config, the key-value pair " 42 | "in xxx=yyy format will be merged into config file. If the value to " 43 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 44 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 45 | "Note that the quotation marks are necessary and that no white space " 46 | "is allowed.", 47 | ) 48 | parser.add_argument( 49 | "--launcher", 50 | choices=["none", "pytorch", "slurm", "mpi"], 51 | default="none", 52 | help="job launcher", 53 | ) 54 | parser.add_argument("--local_rank", type=int, default=0) 55 | args = parser.parse_args() 56 | if "LOCAL_RANK" not in os.environ: 57 | os.environ["LOCAL_RANK"] = str(args.local_rank) 58 | 59 | return args 60 | 61 | 62 | def main(): 63 | args = parse_args() 64 | 65 | # load config 66 | cfg = Config.fromfile(args.config) 67 | cfg.launcher = args.launcher 68 | if args.cfg_options is not None: 69 | cfg.merge_from_dict(args.cfg_options) 70 | 71 | # work_dir is determined in this priority: CLI > segment in file > filename 72 | if args.work_dir is not None: 73 | # update configs according to CLI args if args.work_dir is not None 74 | cfg.work_dir = args.work_dir 75 | elif cfg.get("work_dir", None) is None: 76 | # use config filename as default work_dir if cfg.work_dir is None 77 | cfg.work_dir = osp.join( 78 | "./work_dirs", osp.splitext(osp.basename(args.config))[0] 79 | ) 80 | 81 | # enable automatic-mixed-precision training 82 | if args.amp is True: 83 | optim_wrapper = cfg.optim_wrapper.type 84 | if optim_wrapper == "AmpOptimWrapper": 85 | print_log( 86 | "AMP training is already enabled in your config.", 87 | logger="current", 88 | level=logging.WARNING, 89 | ) 90 | else: 91 | assert optim_wrapper == "OptimWrapper", ( 92 | "`--amp` is only supported when the optimizer wrapper type is " 93 | f"`OptimWrapper` but got {optim_wrapper}." 94 | ) 95 | cfg.optim_wrapper.type = "AmpOptimWrapper" 96 | cfg.optim_wrapper.loss_scale = "dynamic" 97 | 98 | # enable automatically scaling LR 99 | if args.auto_scale_lr: 100 | if ( 101 | "auto_scale_lr" in cfg 102 | and "enable" in cfg.auto_scale_lr 103 | and "base_batch_size" in cfg.auto_scale_lr 104 | ): 105 | cfg.auto_scale_lr.enable = True 106 | else: 107 | raise RuntimeError( 108 | 'Can not find "auto_scale_lr" or ' 109 | '"auto_scale_lr.enable" or ' 110 | '"auto_scale_lr.base_batch_size" in your' 111 | " configuration file." 112 | ) 113 | 114 | # resume is determined in this priority: resume from > auto_resume 115 | if args.resume == "auto": 116 | cfg.resume = True 117 | cfg.load_from = None 118 | elif args.resume is not None: 119 | cfg.resume = True 120 | cfg.load_from = args.resume 121 | 122 | # build the runner from config 123 | if "runner_type" not in cfg: 124 | # build the default runner 125 | runner = Runner.from_cfg(cfg) 126 | else: 127 | # build customized runner from the registry 128 | # if 'runner_type' is set in the cfg 129 | runner = RUNNERS.build(cfg) 130 | 131 | # start training 132 | runner.train() 133 | 134 | 135 | if __name__ == "__main__": 136 | main() 137 | -------------------------------------------------------------------------------- /detection/train_mmdet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 7 ]; then 4 | echo "Usage: me.sh Relative_config_filename Remove_old_if_exist_0_or_1 Exp_name Tag gpus nb_gpus port [others]" 5 | exit 6 | fi 7 | 8 | PYTHON=${PYTHON:-"python"} 9 | 10 | CONFIG_FILE=$1 11 | RM_OLD=$2 12 | EXP_NAME=$3 13 | TAG=$4 14 | GPUS=$5 15 | NUM_GPUS=$6 16 | PORT=${PORT:-$7} 17 | 18 | 19 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 20 | 21 | CONFIG_FILENAME=${CONFIG_FILE##*/} 22 | CONFIG_BASE="${CONFIG_FILENAME%.*}" 23 | 24 | WORK_DIR=${DIR}/../work_dirs/detection/${EXP_NAME}/${CONFIG_BASE}_$TAG 25 | 26 | if [ -d $WORK_DIR ]; then 27 | echo "$WORK_DIR --- Already exists" 28 | if [ $2 -gt 0 ]; then 29 | while true; do 30 | read -p "Are you sure to delete this result directory? " yn 31 | case $yn in 32 | [Yy]* ) rm -r $WORK_DIR; mkdir -p $WORK_DIR; break;; 33 | [Nn]* ) exit;; 34 | * ) echo "Please answer yes or no.";; 35 | esac 36 | done 37 | else 38 | echo "Resume" 39 | fi 40 | else 41 | mkdir -p $WORK_DIR 42 | fi 43 | 44 | # export NCCL_DEBUG=INFO 45 | 46 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \ 47 | torchrun \ 48 | --rdzv_backend c10d \ 49 | --rdzv_endpoint localhost:$PORT \ 50 | --nnodes 1 \ 51 | --nproc_per_node $NUM_GPUS \ 52 | $DIR/train_mmdet.py $CONFIG_FILE \ 53 | --amp \ 54 | --resume "auto" \ 55 | --launcher pytorch \ 56 | --work-dir $WORK_DIR \ 57 | --auto-scale-lr \ 58 | ${@:8} 59 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | # name: pacavit 2 | channels: 3 | - defaults 4 | - pytorch 5 | - nvidia 6 | - xformers 7 | - conda-forge 8 | dependencies: 9 | - python=3.9 10 | - pip 11 | # - cudatoolkit=11.6 12 | # - pytorch=1.12.1 13 | # - torchvision=0.13.1 14 | - pytorch::pytorch=2.0.0 15 | - pytorch::pytorch-cuda=11.7.0 16 | - pytorch::torchvision=0.15.0 17 | - numpy>=1.23.1 18 | - fvcore 19 | - xformers 20 | - jupyterlab 21 | - ipywidgets 22 | - pip: 23 | # image classification 24 | - git+https://github.com/rwightman/pytorch-image-models.git 25 | # formatter 26 | - black 27 | # my 28 | - einops>=0.3.0 29 | - torchinfo # print model summary 30 | - torchprofile # FLOPs -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 1 ]; then 4 | echo "Usage: install virtual_env_name" 5 | exit 6 | fi 7 | 8 | if [[ $OSTYPE != 'linux-gnu'* ]]; then 9 | echo "LINUX GNU OS needed (e.g., Ubuntu 20.04)." 10 | exit 11 | fi 12 | 13 | VENV_NAME=$1 14 | UPDATE_PILLOW=1 15 | 16 | # path of this script 17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 18 | echo "---------------------- Work in $DIR ---------------------- " 19 | 20 | # check if the env exists 21 | find_conda_env(){ 22 | conda env list | grep -w ${VENV_NAME} >/dev/null 2>/dev/null 23 | } 24 | # remove: conda env remove -n env_name 25 | 26 | # update conda 27 | # conda update -n base -c defaults conda 28 | 29 | # check env before install 30 | if ! find_conda_env; then 31 | echo "---------------------- Createing the conda env ${VENV_NAME}..." 32 | conda env create -n ${VENV_NAME} -f "$DIR"/environment.yaml 33 | # else 34 | # echo "Updating the conda env ${VENV_NAME}..." 35 | # conda env update -n ${VENV_NAME} -f "$DIR"/environment.yaml 36 | fi 37 | 38 | 39 | function find_conda_package { 40 | conda list | grep -w "$1" >/dev/null 2>/dev/null 41 | } 42 | 43 | # mmengine, mmcv, mmdet, mmseg, mmpretrain 44 | if [ $CONDA_DEFAULT_ENV != ${VENV_NAME} ] ; then 45 | echo "---------------------- Not inside the virtual env $VENV_NAME ---------------------- " 46 | echo "---------------------- Please manually run: conda activate $VENV_NAME ------------- " 47 | echo "---------------------- and then re-run this installation script ------------------ " 48 | exit 49 | else 50 | if find_conda_package mmengine; then 51 | echo "mm packages installed already" 52 | else 53 | pip install -U openmim 54 | mim install mmengine 55 | mim install "mmcv>=2.0.0rc4" 56 | mim install "mmdet>=3.0.0rc0" 57 | mim install "mmsegmentation>=1.0.0" 58 | mim install "mmpretrain>=1.0.0rc7" 59 | python -c 'from mmengine.utils.dl_utils import collect_env;print(collect_env())' 60 | fi 61 | fi 62 | 63 | # update pillow, https://fastai1.fast.ai/performance.html#faster-image-processing 64 | if [ $UPDATE_PILLOW == 1 ]; then 65 | if find_conda_package pillow-simd; then 66 | echo "PILLOW-SIMD installed already" 67 | else 68 | # check env before install 69 | if [ $CONDA_DEFAULT_ENV != ${VENV_NAME} ] ; then 70 | echo "---------------------- Not inside the virtual env $VENV_NAME ---------------------- " 71 | echo "---------------------- Please manually run: conda activate $VENV_NAME ------------- " 72 | echo "---------------------- and then re-run this installation script ------------------ " 73 | exit 74 | fi 75 | 76 | echo "---------------------- Install Pillow-SIMD for Faster Image Processing ---------------------- " 77 | echo " If errors occur, please contact your admin to install prerequistes for pillow https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux" 78 | ## prerequistes for pillow https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux 79 | ## which are needed to be installed if some errors occur in installing the pillow-simd 80 | ## check with the system admin for the installation 81 | 82 | # sudo apt-get install libtiff5-dev libjpeg8-dev libopenjp2-7-dev zlib1g-dev libfreetype6-dev \ 83 | # liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk libharfbuzz-dev libfribidi-dev \ 84 | # libxcb1-dev 85 | # sudo apt-get install gcc-multilib 86 | 87 | conda uninstall -y --force pillow pil jpeg libtiff libjpeg-turbo 88 | pip uninstall -y pillow pil jpeg libtiff libjpeg-turbo 89 | conda install -yc conda-forge libjpeg-turbo 90 | CFLAGS="${CFLAGS} -mavx2" pip install --upgrade --no-cache-dir --force-reinstall --no-binary :all: --compile pillow-simd 91 | conda install -y -c zegami libtiff-libjpeg-turbo 92 | conda install -y jpeg libtiff 93 | fi 94 | fi 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .paca_vit import PaCaViT 2 | -------------------------------------------------------------------------------- /models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .downsample import build_downsample_layer 2 | from .blur_pool import ( 3 | BlurConv2d, 4 | BlurPoolConv2d, 5 | BlurMaxPool2d, 6 | apply_blurpool, 7 | restore_blurpool, 8 | ) 9 | -------------------------------------------------------------------------------- /models/layers/downsample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from mmengine.registry import MODELS 8 | from mmcv.cnn import ConvModule, build_norm_layer 9 | 10 | from timm.models.layers import to_2tuple, LayerNorm2d 11 | 12 | from .blur_pool import apply_blurpool 13 | 14 | 15 | if "LayerNorm2d" not in MODELS: 16 | MODELS.register_module("LayerNorm2d", module=LayerNorm2d) 17 | 18 | 19 | class DownsampleV1(nn.Module): 20 | def __init__( 21 | self, 22 | in_channels, 23 | out_channels, 24 | patch_size, 25 | kernel_size, 26 | norm_cfg=dict(type="LayerNorm2d", eps=1e-6), 27 | img_size=224, 28 | ): 29 | super().__init__() 30 | 31 | assert patch_size in (2, 4) 32 | img_size = to_2tuple(img_size) 33 | self.grid_size = (img_size[0] // patch_size, img_size[1] // patch_size) 34 | 35 | if patch_size <= kernel_size: 36 | self.proj = nn.Conv2d( 37 | in_channels, 38 | out_channels, 39 | kernel_size=kernel_size, 40 | stride=patch_size, 41 | padding=(kernel_size - 1) // 2, 42 | ) 43 | else: 44 | dim = out_channels // 2 45 | self.proj = nn.Sequential( 46 | nn.Conv2d( 47 | in_channels, 48 | dim, 49 | kernel_size=kernel_size, 50 | stride=2, 51 | padding=(kernel_size - 1) // 2, 52 | ), 53 | nn.Conv2d( 54 | out_channels // 2, 55 | out_channels, 56 | kernel_size=kernel_size, 57 | stride=patch_size // 2, 58 | padding=(kernel_size - 1) // 2, 59 | ), 60 | ) 61 | 62 | self.norm = ( 63 | build_norm_layer(norm_cfg, out_channels)[1] if norm_cfg else nn.Identity() 64 | ) 65 | 66 | def forward(self, x): 67 | # x: B C H W 68 | x = self.proj(x) 69 | x = self.norm(x) 70 | return x 71 | 72 | 73 | class DownsampleV2(nn.Module): 74 | def __init__( 75 | self, 76 | in_chs, 77 | out_chs, 78 | img_size=224, 79 | kernel_size=3, 80 | patch_size=4, 81 | ratio=0.5, 82 | conv_cfg=None, 83 | conv_bias=True, 84 | norm_cfg=dict(type="LayerNorm2d"), 85 | act_cfg=dict(type="GELU"), 86 | with_blurpool=False, 87 | order=("conv", "norm", "act"), 88 | **kwargs 89 | ): 90 | super().__init__() 91 | assert patch_size in (2, 4) 92 | 93 | img_size = to_2tuple(img_size) 94 | self.grid_size = (img_size[0] // patch_size, img_size[1] // patch_size) 95 | 96 | if patch_size == 4: 97 | mid_chs = int(out_chs * ratio) 98 | self.conv1 = ConvModule( 99 | in_chs, 100 | mid_chs, 101 | kernel_size=kernel_size, 102 | stride=2, 103 | padding=(kernel_size - 1) // 2, 104 | bias=conv_bias, 105 | conv_cfg=conv_cfg, 106 | norm_cfg=norm_cfg, 107 | act_cfg=act_cfg, 108 | order=order, 109 | ) 110 | else: 111 | mid_chs = in_chs 112 | self.conv1 = nn.Identity() 113 | 114 | self.conv2 = ConvModule( 115 | mid_chs, 116 | out_chs, 117 | kernel_size=kernel_size, 118 | stride=2, 119 | padding=(kernel_size - 1) // 2, 120 | bias=conv_bias, 121 | conv_cfg=conv_cfg, 122 | norm_cfg=norm_cfg, 123 | act_cfg=None, 124 | order=order, 125 | ) 126 | if with_blurpool: 127 | apply_blurpool(self.conv1) 128 | 129 | def forward(self, x): 130 | out = self.conv1(x) 131 | out = self.conv2(out) 132 | return out 133 | 134 | 135 | downsampler_cfg = { 136 | # layer_abbreviation: module 137 | "DownsampleV1": DownsampleV1, 138 | "DownsampleV2": DownsampleV2, 139 | } 140 | 141 | 142 | def build_downsample_layer(cfg): 143 | """Build downsample (stem or transition) layer 144 | 145 | Args: 146 | cfg (dict): cfg should contain: 147 | type (str): Identify activation layer type. 148 | layer args: args needed to instantiate a stem layer. 149 | 150 | Returns: 151 | layer (nn.Module): Created stem layer 152 | """ 153 | assert isinstance(cfg, dict) and "type" in cfg 154 | cfg_ = cfg.copy() 155 | 156 | layer_type = cfg_.pop("type") 157 | if layer_type not in downsampler_cfg: 158 | raise KeyError("Unrecognized stem type {}".format(layer_type)) 159 | else: 160 | layer = downsampler_cfg[layer_type] 161 | if layer is None: 162 | raise NotImplementedError 163 | 164 | layer = layer(**cfg_) 165 | return layer 166 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = "mmseg" 2 | env_cfg = dict( 3 | cudnn_benchmark=True, 4 | mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0), 5 | dist_cfg=dict(backend="nccl"), 6 | ) 7 | vis_backends = [dict(type="LocalVisBackend")] 8 | visualizer = dict( 9 | type="SegLocalVisualizer", vis_backends=vis_backends, name="visualizer" 10 | ) 11 | log_processor = dict(by_epoch=False) 12 | log_level = "INFO" 13 | load_from = None 14 | resume = False 15 | 16 | tta_model = dict(type="SegTTAModel") 17 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/paca_head.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type="SyncBN", requires_grad=True) 3 | data_preprocessor = dict( 4 | type="SegDataPreProcessor", 5 | mean=[123.675, 116.28, 103.53], 6 | std=[58.395, 57.12, 57.375], 7 | bgr_to_rgb=True, 8 | pad_val=0, 9 | seg_pad_val=255, 10 | ) 11 | model = dict( 12 | type="PaCaEncoderDecoder", 13 | data_preprocessor=data_preprocessor, 14 | pretrained=None, 15 | decode_head=dict( 16 | type="PaCaSegHead", 17 | in_channels=[32, 64, 160, 256], 18 | in_index=[0, 1, 2, 3], 19 | channels=256, 20 | dropout_ratio=0.1, 21 | num_classes=19, 22 | norm_cfg=norm_cfg, 23 | align_corners=False, 24 | loss_decode=dict( 25 | type="CrossEntropyLoss", 26 | avg_non_ignore=True, 27 | use_sigmoid=False, 28 | loss_weight=1.0, 29 | ), 30 | aux_loss_decode=dict( 31 | type="CrossEntropyLoss", 32 | avg_non_ignore=True, 33 | use_sigmoid=False, 34 | loss_weight=0.4, 35 | ), 36 | ), 37 | # model training and testing settings 38 | train_cfg=dict(), 39 | test_cfg=dict(mode="whole"), 40 | ) 41 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/upernet_swin.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 4 | data_preprocessor = dict( 5 | type='SegDataPreProcessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_val=0, 10 | seg_pad_val=255) 11 | model = dict( 12 | type='EncoderDecoder', 13 | data_preprocessor=data_preprocessor, 14 | pretrained=None, 15 | backbone=dict( 16 | type='SwinTransformer', 17 | pretrain_img_size=224, 18 | embed_dims=96, 19 | patch_size=4, 20 | window_size=7, 21 | mlp_ratio=4, 22 | depths=[2, 2, 6, 2], 23 | num_heads=[3, 6, 12, 24], 24 | strides=(4, 2, 2, 2), 25 | out_indices=(0, 1, 2, 3), 26 | qkv_bias=True, 27 | qk_scale=None, 28 | patch_norm=True, 29 | drop_rate=0., 30 | attn_drop_rate=0., 31 | drop_path_rate=0.3, 32 | use_abs_pos_embed=False, 33 | act_cfg=dict(type='GELU'), 34 | norm_cfg=backbone_norm_cfg), 35 | decode_head=dict( 36 | type='UPerHead', 37 | in_channels=[96, 192, 384, 768], 38 | in_index=[0, 1, 2, 3], 39 | pool_scales=(1, 2, 3, 6), 40 | channels=512, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict( 46 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 47 | auxiliary_head=dict( 48 | type='FCNHead', 49 | in_channels=384, 50 | in_index=2, 51 | channels=256, 52 | num_convs=1, 53 | concat_input=False, 54 | dropout_ratio=0.1, 55 | num_classes=19, 56 | norm_cfg=norm_cfg, 57 | align_corners=False, 58 | loss_decode=dict( 59 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 60 | # model training and testing settings 61 | train_cfg=dict(), 62 | test_cfg=dict(mode='whole')) 63 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=160000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 160k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=20000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 20k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_240k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=240000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 240k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_320k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=320000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 320k 15 | train_cfg = dict( 16 | type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) 17 | val_cfg = dict(type='ValLoop') 18 | test_cfg = dict(type='TestLoop') 19 | default_hooks = dict( 20 | timer=dict(type='IterTimerHook'), 21 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 22 | param_scheduler=dict(type='ParamSchedulerHook'), 23 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), 24 | sampler_seed=dict(type='DistSamplerSeedHook'), 25 | visualization=dict(type='SegVisualizationHook')) 26 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=40000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 40k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) 4 | # learning policy 5 | param_scheduler = [ 6 | dict( 7 | type='PolyLR', 8 | eta_min=1e-4, 9 | power=0.9, 10 | begin=0, 11 | end=80000, 12 | by_epoch=False) 13 | ] 14 | # training schedule for 80k 15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) 16 | val_cfg = dict(type='ValLoop') 17 | test_cfg = dict(type='TestLoop') 18 | default_hooks = dict( 19 | timer=dict(type='IterTimerHook'), 20 | logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), 21 | param_scheduler=dict(type='ParamSchedulerHook'), 22 | checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), 23 | sampler_seed=dict(type='DistSamplerSeedHook'), 24 | visualization=dict(type='SegVisualizationHook')) 25 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_base_p2cconv_100_0_downstream", 15 | drop_path_rate=0.5, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_base_100_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_convmixer_base_100_downstream", 15 | drop_path_rate=0.5, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_base_100.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_small_100_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_convmixer_small_100_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_small_100.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_tiny_100_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_convmixer_tiny_100_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_tiny_100.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_49_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_100_49_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_49.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_100_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_blockwise_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_100_blockwise_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_blockwise.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_2_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_2_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_2_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_49_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_49_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_49_100_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_49_100_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_100.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 200], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cmlp_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cmlp_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cmlp_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/paca_head/pacahead_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/paca_head.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_tiny_p2cconv_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | ) 24 | 25 | 26 | # AdamW optimizer 27 | optim_wrapper = dict( 28 | _delete_=True, 29 | type="OptimWrapper", 30 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 31 | paramwise_cfg=dict( 32 | custom_keys={ 33 | "norm": dict(decay_mult=0.0), 34 | "clustering.4": dict(lr_mult=10.0), # .4 for p2cconv 35 | "head": dict(lr_mult=10.0), 36 | } 37 | ), 38 | ) 39 | 40 | param_scheduler = [ 41 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 42 | dict( 43 | type="PolyLR", 44 | eta_min=0.0, 45 | power=1.0, 46 | begin=1500, 47 | end=160000, 48 | by_epoch=False, 49 | ), 50 | ] 51 | 52 | # By default, models are trained on 8 GPUs with 2 images per GPU 53 | train_dataloader = dict(batch_size=2) 54 | val_dataloader = dict(batch_size=1) 55 | test_dataloader = val_dataloader 56 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/upernet/upernet_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/upernet_swin.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_base_p2cconv_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150), 23 | auxiliary_head=dict(in_channels=384, num_classes=150), 24 | ) 25 | 26 | # AdamW optimizer, no weight decay for position embedding & layer norm 27 | # in backbone 28 | optim_wrapper = dict( 29 | _delete_=True, 30 | type="OptimWrapper", 31 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 32 | paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}), 33 | ) 34 | 35 | param_scheduler = [ 36 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 37 | dict( 38 | type="PolyLR", 39 | eta_min=0.0, 40 | power=1.0, 41 | begin=1500, 42 | end=160000, 43 | by_epoch=False, 44 | ), 45 | ] 46 | 47 | # By default, models are trained on 8 GPUs with 2 images per GPU 48 | train_dataloader = dict(batch_size=2) 49 | val_dataloader = dict(batch_size=1) 50 | test_dataloader = val_dataloader 51 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/upernet/upernet_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/upernet_swin.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_small_p2cconv_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | auxiliary_head=dict(in_channels=320, num_classes=150), 24 | ) 25 | 26 | # AdamW optimizer, no weight decay for position embedding & layer norm 27 | # in backbone 28 | optim_wrapper = dict( 29 | _delete_=True, 30 | type="OptimWrapper", 31 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 32 | paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}), 33 | ) 34 | 35 | param_scheduler = [ 36 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 37 | dict( 38 | type="PolyLR", 39 | eta_min=0.0, 40 | power=1.0, 41 | begin=1500, 42 | end=160000, 43 | by_epoch=False, 44 | ), 45 | ] 46 | 47 | # By default, models are trained on 8 GPUs with 2 images per GPU 48 | train_dataloader = dict(batch_size=2) 49 | val_dataloader = dict(batch_size=1) 50 | test_dataloader = val_dataloader 51 | -------------------------------------------------------------------------------- /segmentation/configs/paca_vit/upernet/upernet_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | "../../_base_/models/upernet_swin.py", 3 | "../../_base_/datasets/ade20k.py", 4 | "../../_base_/default_runtime.py", 5 | "../../_base_/schedules/schedule_160k.py", 6 | ] 7 | 8 | crop_size = (512, 512) 9 | data_preprocessor = dict(size=crop_size) 10 | model = dict( 11 | data_preprocessor=data_preprocessor, 12 | backbone=dict( 13 | _delete_=True, 14 | type="pacavit_tiny_p2cconv_100_0_downstream", 15 | drop_path_rate=0.1, 16 | layer_scale=None, 17 | pretrained=( 18 | "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth" 19 | ), 20 | downstream_cluster_num=[200, 200, 200, 0], 21 | ), 22 | decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150), 23 | auxiliary_head=dict(in_channels=320, num_classes=150), 24 | ) 25 | 26 | # AdamW optimizer, no weight decay for position embedding & layer norm 27 | # in backbone 28 | optim_wrapper = dict( 29 | _delete_=True, 30 | type="OptimWrapper", 31 | optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01), 32 | paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}), 33 | ) 34 | 35 | param_scheduler = [ 36 | dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500), 37 | dict( 38 | type="PolyLR", 39 | eta_min=0.0, 40 | power=1.0, 41 | begin=1500, 42 | end=160000, 43 | by_epoch=False, 44 | ), 45 | ] 46 | 47 | # By default, models are trained on 8 GPUs with 2 images per GPU 48 | train_dataloader = dict(batch_size=2) 49 | val_dataloader = dict(batch_size=1) 50 | test_dataloader = val_dataloader 51 | -------------------------------------------------------------------------------- /segmentation/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import tempfile 4 | from pathlib import Path 5 | 6 | import torch 7 | from mmengine import Config, DictAction 8 | from mmengine.logging import MMLogger 9 | from mmengine.model import revert_sync_batchnorm 10 | from mmengine.registry import init_default_scope 11 | 12 | from mmseg.models import BaseSegmentor 13 | from mmseg.registry import MODELS 14 | from mmseg.structures import SegDataSample 15 | 16 | try: 17 | from mmengine.analysis import get_model_complexity_info 18 | from mmengine.analysis.print_helper import _format_size 19 | except ImportError: 20 | raise ImportError("Please upgrade mmengine >= 0.6.0 to use this script.") 21 | 22 | from models import * 23 | from mmseg_custom.models import * 24 | from torchprofile import profile_macs 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description="Get the FLOPs of a segmentor") 29 | parser.add_argument("config", help="train config file path") 30 | parser.add_argument( 31 | "--shape", type=int, nargs="+", default=[2048, 1024], help="input image size" 32 | ) 33 | parser.add_argument( 34 | "--cfg-options", 35 | nargs="+", 36 | action=DictAction, 37 | help="override some settings in the used config, the key-value pair " 38 | "in xxx=yyy format will be merged into config file. If the value to " 39 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 40 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 41 | "Note that the quotation marks are necessary and that no white space " 42 | "is allowed.", 43 | ) 44 | args = parser.parse_args() 45 | return args 46 | 47 | 48 | def inference(args: argparse.Namespace, logger: MMLogger) -> dict: 49 | config_name = Path(args.config) 50 | 51 | if not config_name.exists(): 52 | logger.error(f"Config file {config_name} does not exist") 53 | 54 | cfg: Config = Config.fromfile(config_name) 55 | cfg.work_dir = tempfile.TemporaryDirectory().name 56 | cfg.log_level = "WARN" 57 | if args.cfg_options is not None: 58 | cfg.merge_from_dict(args.cfg_options) 59 | 60 | init_default_scope(cfg.get("scope", "mmseg")) 61 | 62 | if len(args.shape) == 1: 63 | input_shape = (3, args.shape[0], args.shape[0]) 64 | elif len(args.shape) == 2: 65 | input_shape = (3,) + tuple(args.shape) 66 | else: 67 | raise ValueError("invalid input shape") 68 | result = {} 69 | 70 | model: BaseSegmentor = MODELS.build(cfg.model) 71 | if hasattr(model, "auxiliary_head"): 72 | model.auxiliary_head = None 73 | if torch.cuda.is_available(): 74 | model.cuda() 75 | model = revert_sync_batchnorm(model) 76 | result["ori_shape"] = input_shape[-2:] 77 | result["pad_shape"] = input_shape[-2:] 78 | data_batch = { 79 | "inputs": [torch.rand(input_shape)], 80 | "data_samples": [SegDataSample(metainfo=result)], 81 | } 82 | data = model.data_preprocessor(data_batch) 83 | model.eval() 84 | if cfg.model.decode_head.type in ["MaskFormerHead", "Mask2FormerHead"]: 85 | # TODO: Support MaskFormer and Mask2Former 86 | raise NotImplementedError( 87 | "MaskFormer and Mask2Former are not " "supported yet." 88 | ) 89 | outputs = get_model_complexity_info( 90 | model, input_shape, inputs=data["inputs"], show_table=False, show_arch=False 91 | ) 92 | result["flops"] = _format_size(outputs["flops"]) 93 | result["params"] = _format_size(outputs["params"]) 94 | result["compute_type"] = "direct: randomly generate a picture" 95 | 96 | # torchprofile 97 | tp_flops = profile_macs(model, data["inputs"]) 98 | result["torchprofile_flops"] = _format_size(tp_flops) 99 | 100 | return result 101 | 102 | 103 | def main(): 104 | args = parse_args() 105 | logger = MMLogger.get_instance(name="MMLogger") 106 | 107 | result = inference(args, logger) 108 | split_line = "=" * 30 109 | ori_shape = result["ori_shape"] 110 | pad_shape = result["pad_shape"] 111 | flops = result["flops"] 112 | torchprofile_flops = result["torchprofile_flops"] 113 | params = result["params"] 114 | compute_type = result["compute_type"] 115 | 116 | if pad_shape != ori_shape: 117 | print( 118 | f"{split_line}\nUse size divisor set input shape " 119 | f"from {ori_shape} to {pad_shape}" 120 | ) 121 | print( 122 | f"{split_line}\nCompute type: {compute_type}\n" 123 | f"Input shape: {pad_shape}\nFlops: {flops}\nFlops (torchprofile): {torchprofile_flops}\n" 124 | f"Params: {params}\n{split_line}" 125 | ) 126 | print( 127 | "!!!Please be cautious if you use the results in papers. " 128 | "You may need to check if all ops are supported and verify " 129 | "that the flops computation is correct." 130 | ) 131 | 132 | 133 | if __name__ == "__main__": 134 | main() 135 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .decode_heads import * 2 | from .segmentors import * 3 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .paca_head import PaCaSegHead 2 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/decode_heads/paca_head.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import torch 4 | from torch import Tensor 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer 9 | 10 | from mmseg.registry import MODELS 11 | from mmseg.models.builder import build_loss 12 | from mmseg.models.losses import accuracy 13 | from mmseg.models.decode_heads.decode_head import BaseDecodeHead 14 | from mmseg.models.utils import resize 15 | from mmseg.utils import ConfigType, SampleList 16 | 17 | from einops import rearrange, repeat 18 | from einops.layers.torch import Rearrange 19 | 20 | 21 | @MODELS.register_module() 22 | class PaCaSegHead(BaseDecodeHead): 23 | """The Patch-to-Cluster Attention head for semantic segmentation 24 | 25 | Args: 26 | interpolate_mode: The interpolate mode of MLP head upsample operation. 27 | Default: 'bilinear'. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | interpolate_mode="bilinear", 33 | aux_loss_decode=dict( 34 | type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 35 | ), 36 | **kwargs, 37 | ): 38 | super().__init__(input_transform="multiple_select", **kwargs) 39 | 40 | self.interpolate_mode = interpolate_mode 41 | num_inputs = len(self.in_channels) 42 | 43 | assert num_inputs == len(self.in_index) 44 | 45 | self.convs = nn.ModuleList() 46 | for i in range(num_inputs): 47 | self.convs.append( 48 | ConvModule( 49 | in_channels=self.in_channels[i], 50 | out_channels=self.channels, 51 | kernel_size=1, 52 | stride=1, 53 | norm_cfg=self.norm_cfg, 54 | act_cfg=self.act_cfg, 55 | ) 56 | ) 57 | 58 | self.q = nn.Sequential( 59 | ConvModule( 60 | in_channels=self.channels * num_inputs, 61 | out_channels=self.channels, 62 | kernel_size=1, 63 | norm_cfg=self.norm_cfg, 64 | act_cfg=self.act_cfg, 65 | ), 66 | Rearrange("B C H W -> B (H W) C"), 67 | ) 68 | 69 | self.clustering = nn.Sequential( 70 | ConvModule( 71 | in_channels=self.channels * num_inputs, 72 | out_channels=self.channels, 73 | kernel_size=1, 74 | norm_cfg=self.norm_cfg, 75 | act_cfg=self.act_cfg, 76 | ), 77 | nn.Conv2d( 78 | self.channels, self.num_classes, kernel_size=1 79 | ), # TODO: bias=False 80 | ) 81 | 82 | self.k = nn.Sequential( 83 | nn.Linear(self.channels * num_inputs, self.channels), 84 | Rearrange("B M C -> B C M"), 85 | nn.SyncBatchNorm(self.channels), 86 | Rearrange("B C M -> B M C"), 87 | build_activation_layer(self.act_cfg), 88 | ) 89 | self.v = nn.Sequential( 90 | nn.Linear(self.channels * num_inputs, self.channels), 91 | Rearrange("B M C -> B C M"), 92 | nn.SyncBatchNorm(self.channels), 93 | Rearrange("B C M -> B M C"), 94 | build_activation_layer(self.act_cfg), 95 | ) 96 | self.proj = ConvModule( 97 | in_channels=self.channels, 98 | out_channels=self.channels, 99 | kernel_size=1, 100 | norm_cfg=self.norm_cfg, 101 | act_cfg=self.act_cfg, 102 | ) 103 | 104 | if isinstance(aux_loss_decode, dict): 105 | self.aux_loss_decode = MODELS.build( 106 | aux_loss_decode 107 | ) # build_loss(aux_loss_decode) 108 | else: 109 | raise TypeError( 110 | f"aux_loss_decode must be a dict,\ 111 | but got {type(aux_loss_decode)}" 112 | ) 113 | 114 | def forward(self, inputs): 115 | inputs = self._transform_inputs(inputs) 116 | 117 | outs = [] 118 | for idx in range(len(inputs)): 119 | x = inputs[idx] 120 | conv = self.convs[idx] 121 | outs.append( 122 | resize( 123 | input=conv(x), 124 | size=inputs[0].shape[2:], 125 | mode=self.interpolate_mode, 126 | align_corners=self.align_corners, 127 | ) 128 | ) 129 | 130 | x = torch.cat(outs, dim=1) 131 | H, W = x.shape[2:] 132 | 133 | q = self.q(x) # B N C 134 | 135 | c_raw = self.clustering(x) # B M H W 136 | c = rearrange(c_raw, "B M H W -> B M (H W)") 137 | c = c.softmax(dim=-1) 138 | 139 | x_ = rearrange(x, "B C H W -> B (H W) C") 140 | z = c @ x_ # B M C 141 | k = self.k(z) 142 | v = self.v(z) 143 | 144 | attn = q @ k.transpose(-2, -1) 145 | attn = attn.softmax(dim=-1) 146 | 147 | out = attn @ v # B N C 148 | out = rearrange(out, "B (H W) C -> B C H W", H=H, W=W).contiguous() 149 | out = self.proj(out) 150 | 151 | out = self.cls_seg(out) 152 | 153 | if self.training: 154 | return out, c_raw 155 | else: 156 | return out 157 | 158 | def loss( 159 | self, 160 | inputs: Tuple[Tensor], 161 | batch_data_samples: SampleList, 162 | train_cfg: ConfigType, 163 | ) -> Tuple[dict]: 164 | """Forward function for training. 165 | 166 | Args: 167 | inputs (Tuple[Tensor]): List of multi-level img features. 168 | batch_data_samples (list[:obj:`SegDataSample`]): The seg 169 | data samples. It usually includes information such 170 | as `img_metas` or `gt_semantic_seg`. 171 | train_cfg (dict): The training config. 172 | 173 | Returns: 174 | Tuple[dict[str, Tensor]]: a tuple of dictionary of loss components 175 | """ 176 | seg_logits, c_raw = self.forward(inputs) 177 | aux_losses = self.aux_loss_by_paca(c_raw, batch_data_samples) 178 | losses = self.loss_by_feat(seg_logits, batch_data_samples) 179 | return losses, aux_losses 180 | 181 | def aux_loss_by_paca( 182 | self, seg_logits: Tensor, batch_data_samples: SampleList 183 | ) -> dict: 184 | """Compute segmentation loss.""" 185 | seg_label = self._stack_batch_gt(batch_data_samples) 186 | loss = dict() 187 | seg_logits = resize( 188 | input=seg_logits, 189 | size=seg_label.shape[2:], 190 | mode="bilinear", 191 | align_corners=self.align_corners, 192 | ) 193 | if self.sampler is not None: 194 | seg_weight = self.sampler.sample(seg_logits, seg_label) 195 | else: 196 | seg_weight = None 197 | seg_label = seg_label.squeeze(1) 198 | 199 | if not isinstance(self.aux_loss_decode, nn.ModuleList): 200 | aux_losses_decode = [self.aux_loss_decode] 201 | else: 202 | aux_losses_decode = self.aux_loss_decode 203 | for loss_decode in aux_losses_decode: 204 | if loss_decode.loss_name not in loss: 205 | loss[loss_decode.loss_name] = loss_decode( 206 | seg_logits, 207 | seg_label, 208 | weight=seg_weight, 209 | ignore_index=self.ignore_index, 210 | ) 211 | else: 212 | loss[loss_decode.loss_name] += loss_decode( 213 | seg_logits, 214 | seg_label, 215 | weight=seg_weight, 216 | ignore_index=self.ignore_index, 217 | ) 218 | 219 | loss["acc_seg"] = accuracy( 220 | seg_logits, seg_label, ignore_index=self.ignore_index 221 | ) 222 | return loss 223 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoder_decoder_paca import PaCaEncoderDecoder 2 | -------------------------------------------------------------------------------- /segmentation/mmseg_custom/models/segmentors/encoder_decoder_paca.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from typing import List, Optional 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch import Tensor 7 | 8 | from mmseg.registry import MODELS 9 | from mmseg.utils import ( 10 | ConfigType, 11 | OptConfigType, 12 | OptMultiConfig, 13 | OptSampleList, 14 | SampleList, 15 | add_prefix, 16 | ) 17 | 18 | from mmseg.models.segmentors import EncoderDecoder 19 | 20 | 21 | @MODELS.register_module() 22 | class PaCaEncoderDecoder(EncoderDecoder): 23 | def __init__(self, **kwargs) -> None: 24 | super().__init__(**kwargs) 25 | 26 | def _decode_head_forward_train( 27 | self, inputs: List[Tensor], data_samples: SampleList 28 | ) -> dict: 29 | """Run forward function and calculate loss for decode head in 30 | training.""" 31 | losses = dict() 32 | loss_decode = self.decode_head.loss(inputs, data_samples, self.train_cfg) 33 | 34 | losses.update(add_prefix(loss_decode[0], "decode")) 35 | losses.update(add_prefix(loss_decode[1], "paca")) 36 | return losses 37 | -------------------------------------------------------------------------------- /segmentation/test_mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | 6 | from mmengine.config import Config, DictAction 7 | from mmengine.runner import Runner 8 | 9 | from models import * 10 | from mmseg_custom.models import * 11 | 12 | 13 | # TODO: support fuse_conv_bn, visualization, and format_only 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description="MMSeg test (and eval) a model") 16 | parser.add_argument("config", help="train config file path") 17 | parser.add_argument("checkpoint", help="checkpoint file") 18 | parser.add_argument( 19 | "--work-dir", 20 | help=( 21 | "if specified, the evaluation metric results will be dumped" 22 | "into the directory as json" 23 | ), 24 | ) 25 | parser.add_argument( 26 | "--out", 27 | type=str, 28 | help="The directory to save output prediction for offline evaluation", 29 | ) 30 | parser.add_argument("--show", action="store_true", help="show prediction results") 31 | parser.add_argument( 32 | "--show-dir", 33 | help="directory where painted images will be saved. " 34 | "If specified, it will be automatically saved " 35 | "to the work_dir/timestamp/show_dir", 36 | ) 37 | parser.add_argument( 38 | "--wait-time", type=float, default=2, help="the interval of show (s)" 39 | ) 40 | parser.add_argument( 41 | "--cfg-options", 42 | nargs="+", 43 | action=DictAction, 44 | help="override some settings in the used config, the key-value pair " 45 | "in xxx=yyy format will be merged into config file. If the value to " 46 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 47 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 48 | "Note that the quotation marks are necessary and that no white space " 49 | "is allowed.", 50 | ) 51 | parser.add_argument( 52 | "--launcher", 53 | choices=["none", "pytorch", "slurm", "mpi"], 54 | default="none", 55 | help="job launcher", 56 | ) 57 | parser.add_argument("--tta", action="store_true", help="Test time augmentation") 58 | parser.add_argument("--local_rank", type=int, default=0) 59 | args = parser.parse_args() 60 | if "LOCAL_RANK" not in os.environ: 61 | os.environ["LOCAL_RANK"] = str(args.local_rank) 62 | 63 | return args 64 | 65 | 66 | def trigger_visualization_hook(cfg, args): 67 | default_hooks = cfg.default_hooks 68 | if "visualization" in default_hooks: 69 | visualization_hook = default_hooks["visualization"] 70 | # Turn on visualization 71 | visualization_hook["draw"] = True 72 | if args.show: 73 | visualization_hook["show"] = True 74 | visualization_hook["wait_time"] = args.wait_time 75 | if args.show_dir: 76 | visulizer = cfg.visualizer 77 | visulizer["save_dir"] = args.show_dir 78 | else: 79 | raise RuntimeError( 80 | "VisualizationHook must be included in default_hooks." 81 | "refer to usage " 82 | "\"visualization=dict(type='VisualizationHook')\"" 83 | ) 84 | 85 | return cfg 86 | 87 | 88 | def main(): 89 | args = parse_args() 90 | 91 | # load config 92 | cfg = Config.fromfile(args.config) 93 | cfg.launcher = args.launcher 94 | if args.cfg_options is not None: 95 | cfg.merge_from_dict(args.cfg_options) 96 | 97 | # work_dir is determined in this priority: CLI > segment in file > filename 98 | if args.work_dir is not None: 99 | # update configs according to CLI args if args.work_dir is not None 100 | cfg.work_dir = args.work_dir 101 | elif cfg.get("work_dir", None) is None: 102 | # use config filename as default work_dir if cfg.work_dir is None 103 | cfg.work_dir = osp.join( 104 | "./work_dirs", osp.splitext(osp.basename(args.config))[0] 105 | ) 106 | 107 | cfg.load_from = args.checkpoint 108 | 109 | if args.show or args.show_dir: 110 | cfg = trigger_visualization_hook(cfg, args) 111 | 112 | if args.tta: 113 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline 114 | cfg.tta_model.module = cfg.model 115 | cfg.model = cfg.tta_model 116 | 117 | # add output_dir in metric 118 | if args.out is not None: 119 | cfg.test_evaluator["output_dir"] = args.out 120 | cfg.test_evaluator["keep_results"] = True 121 | 122 | # build the runner from config 123 | runner = Runner.from_cfg(cfg) 124 | 125 | # start testing 126 | runner.test() 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /segmentation/test_mmseg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 5 ]; then 4 | echo "Usage: me.sh Relative_config_filename Checkpoint_filename gpus nb_gpus port [others]" 5 | exit 6 | fi 7 | 8 | PYTHON=${PYTHON:-"python"} 9 | 10 | CONFIG_FILE=$1 11 | CHK_FILE=$2 12 | GPUS=$3 13 | NUM_GPUS=$4 14 | PORT=${PORT:-$5} 15 | 16 | 17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 18 | 19 | CONFIG_FILENAME=${CONFIG_FILE##*/} 20 | CONFIG_BASE="${CONFIG_FILENAME%.*}" 21 | 22 | WORK_DIR="$( cd "$( dirname "${CHK_FILE}" )" >/dev/null 2>&1 && pwd )"/$CONFIG_BASE 23 | 24 | if [ -d $WORK_DIR ]; then 25 | echo "... Done already!" 26 | exit 27 | fi 28 | 29 | # export NCCL_DEBUG=INFO 30 | 31 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \ 32 | torchrun \ 33 | --rdzv_backend c10d \ 34 | --rdzv_endpoint localhost:$PORT \ 35 | --nnodes 1 \ 36 | --nproc_per_node $NUM_GPUS \ 37 | $DIR/test_mmseg.py \ 38 | $CONFIG_FILE \ 39 | $CHK_FILE \ 40 | --launcher pytorch \ 41 | --work-dir $WORK_DIR \ 42 | ${@:6} 43 | -------------------------------------------------------------------------------- /segmentation/train_mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | from mmengine.config import Config, DictAction 8 | from mmengine.logging import print_log 9 | from mmengine.runner import Runner 10 | 11 | from mmseg.registry import RUNNERS 12 | 13 | from models import * 14 | from mmseg_custom.models import * 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description="Train a segmentor") 19 | parser.add_argument("config", help="train config file path") 20 | parser.add_argument("--work-dir", help="the dir to save logs and models") 21 | parser.add_argument( 22 | "--resume", 23 | action="store_true", 24 | default=False, 25 | help="resume from the latest checkpoint in the work_dir automatically", 26 | ) 27 | parser.add_argument( 28 | "--amp", 29 | action="store_true", 30 | default=False, 31 | help="enable automatic-mixed-precision training", 32 | ) 33 | parser.add_argument( 34 | "--cfg-options", 35 | nargs="+", 36 | action=DictAction, 37 | help="override some settings in the used config, the key-value pair " 38 | "in xxx=yyy format will be merged into config file. If the value to " 39 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 40 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 41 | "Note that the quotation marks are necessary and that no white space " 42 | "is allowed.", 43 | ) 44 | parser.add_argument( 45 | "--launcher", 46 | choices=["none", "pytorch", "slurm", "mpi"], 47 | default="none", 48 | help="job launcher", 49 | ) 50 | parser.add_argument("--local_rank", type=int, default=0) 51 | args = parser.parse_args() 52 | if "LOCAL_RANK" not in os.environ: 53 | os.environ["LOCAL_RANK"] = str(args.local_rank) 54 | 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | 61 | # load config 62 | cfg = Config.fromfile(args.config) 63 | cfg.launcher = args.launcher 64 | if args.cfg_options is not None: 65 | cfg.merge_from_dict(args.cfg_options) 66 | 67 | # work_dir is determined in this priority: CLI > segment in file > filename 68 | if args.work_dir is not None: 69 | # update configs according to CLI args if args.work_dir is not None 70 | cfg.work_dir = args.work_dir 71 | elif cfg.get("work_dir", None) is None: 72 | # use config filename as default work_dir if cfg.work_dir is None 73 | cfg.work_dir = osp.join( 74 | "./work_dirs", osp.splitext(osp.basename(args.config))[0] 75 | ) 76 | 77 | # enable automatic-mixed-precision training 78 | if args.amp is True: 79 | optim_wrapper = cfg.optim_wrapper.type 80 | if optim_wrapper == "AmpOptimWrapper": 81 | print_log( 82 | "AMP training is already enabled in your config.", 83 | logger="current", 84 | level=logging.WARNING, 85 | ) 86 | else: 87 | assert optim_wrapper == "OptimWrapper", ( 88 | "`--amp` is only supported when the optimizer wrapper type is " 89 | f"`OptimWrapper` but got {optim_wrapper}." 90 | ) 91 | cfg.optim_wrapper.type = "AmpOptimWrapper" 92 | cfg.optim_wrapper.loss_scale = "dynamic" 93 | 94 | # resume training 95 | cfg.resume = args.resume 96 | 97 | # build the runner from config 98 | if "runner_type" not in cfg: 99 | # build the default runner 100 | runner = Runner.from_cfg(cfg) 101 | else: 102 | # build customized runner from the registry 103 | # if 'runner_type' is set in the cfg 104 | runner = RUNNERS.build(cfg) 105 | 106 | # start training 107 | runner.train() 108 | 109 | 110 | if __name__ == "__main__": 111 | main() 112 | -------------------------------------------------------------------------------- /segmentation/train_mmseg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ "$#" -lt 7 ]; then 4 | echo "Usage: me.sh Relative_config_filename Remove_old_if_exist_0_or_1 Exp_name Tag gpus nb_gpus port [others]" 5 | exit 6 | fi 7 | 8 | PYTHON=${PYTHON:-"python"} 9 | 10 | CONFIG_FILE=$1 11 | RM_OLD=$2 12 | EXP_NAME=$3 13 | TAG=$4 14 | GPUS=$5 15 | NUM_GPUS=$6 16 | PORT=${PORT:-$7} 17 | 18 | 19 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 20 | 21 | CONFIG_FILENAME=${CONFIG_FILE##*/} 22 | CONFIG_BASE="${CONFIG_FILENAME%.*}" 23 | 24 | WORK_DIR=${DIR}/../work_dirs/segmentation/${EXP_NAME}/${CONFIG_BASE}_$TAG 25 | 26 | if [ -d $WORK_DIR ]; then 27 | echo "$WORK_DIR --- Already exists" 28 | if [ $2 -gt 0 ]; then 29 | while true; do 30 | read -p "Are you sure to delete this result directory? " yn 31 | case $yn in 32 | [Yy]* ) rm -r $WORK_DIR; mkdir -p $WORK_DIR; break;; 33 | [Nn]* ) exit;; 34 | * ) echo "Please answer yes or no.";; 35 | esac 36 | done 37 | else 38 | echo "Resume" 39 | fi 40 | else 41 | mkdir -p $WORK_DIR 42 | fi 43 | 44 | # export NCCL_DEBUG=INFO 45 | 46 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \ 47 | torchrun \ 48 | --rdzv_backend c10d \ 49 | --rdzv_endpoint localhost:$PORT \ 50 | --nnodes 1 \ 51 | --nproc_per_node $NUM_GPUS \ 52 | $DIR/train_mmseg.py $CONFIG_FILE \ 53 | --amp \ 54 | --resume \ 55 | --launcher pytorch \ 56 | --work-dir $WORK_DIR \ 57 | ${@:8} 58 | --------------------------------------------------------------------------------