├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── paca-seghead.png
    ├── paca-vit-onsite.png
    ├── paca-vit-teacher.png
    ├── paca_scheme.png
    └── paca_teaser.png
├── classification
    ├── benchmark.sh
    ├── benchmark_timm.py
    ├── configs
    │   ├── imagenet_conv_adamw.yml
    │   └── imagenet_vit_adamw.yml
    ├── timm_custom
    │   ├── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── loader.py
    │   │   └── transform.py
    │   ├── optim
    │   │   ├── __init__.py
    │   │   └── layerwise_lr_decay.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── cuda.py
    │   │   └── summary.py
    ├── train_timm.py
    ├── train_timm.sh
    ├── validate.sh
    └── validate_timm.py
├── detection
    ├── configs
    │   ├── _base_
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade-mask-rcnn_r50_fpn.py
    │   │   │   ├── cascade-rcnn_r50_fpn.py
    │   │   │   ├── fast-rcnn_r50_fpn.py
    │   │   │   ├── faster-rcnn_r50-caffe-c4.py
    │   │   │   ├── faster-rcnn_r50-caffe-dc5.py
    │   │   │   ├── faster-rcnn_r50_fpn.py
    │   │   │   ├── mask-rcnn_r50-caffe-c4.py
    │   │   │   ├── mask-rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50-caffe-c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   └── schedule_2x.py
    │   └── paca_vit
    │   │   └── mask_rcnn_1x
    │   │       ├── mask_rcnn_pacavit_base_p2cconv_100_0_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_convmixer_base_100_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_convmixer_small_100_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_convmixer_tiny_100_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_100_0_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_100_49_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_100_blockwise_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_100_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_2_0_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_49_0_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cconv_49_100_mstrain_480_800_1x_coco.py
    │   │       ├── mask_rcnn_pacavit_small_p2cmlp_100_0_mstrain_480_800_1x_coco.py
    │   │       └── mask_rcnn_pacavit_tiny_p2cconv_100_0_mstrain_480_800_1x_coco.py
    ├── get_flops.py
    ├── test_mmdet.py
    ├── test_mmdet.sh
    ├── train_mmdet.py
    └── train_mmdet.sh
├── environment.yaml
├── install.sh
├── models
    ├── __init__.py
    ├── layers
    │   ├── __init__.py
    │   ├── blur_pool.py
    │   └── downsample.py
    └── paca_vit.py
└── segmentation
    ├── configs
        ├── _base_
        │   ├── default_runtime.py
        │   ├── models
        │   │   ├── paca_head.py
        │   │   └── upernet_swin.py
        │   └── schedules
        │   │   ├── schedule_160k.py
        │   │   ├── schedule_20k.py
        │   │   ├── schedule_240k.py
        │   │   ├── schedule_320k.py
        │   │   ├── schedule_40k.py
        │   │   └── schedule_80k.py
        └── paca_vit
        │   ├── paca_head
        │       ├── pacahead_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_convmixer_base_100_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_convmixer_small_100_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_convmixer_tiny_100_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_100_49_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_100_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_100_blockwise_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_2_0_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_49_0_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cconv_49_100_512x512_160k_ade20k.py
        │       ├── pacahead_pacavit_small_p2cmlp_100_0_512x512_160k_ade20k.py
        │       └── pacahead_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py
        │   └── upernet
        │       ├── upernet_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py
        │       ├── upernet_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py
        │       └── upernet_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py
    ├── get_flops.py
    ├── mmseg_custom
        └── models
        │   ├── __init__.py
        │   ├── decode_heads
        │       ├── __init__.py
        │       └── paca_head.py
        │   └── segmentors
        │       ├── __init__.py
        │       └── encoder_decoder_paca.py
    ├── test_mmseg.py
    ├── test_mmseg.sh
    ├── train_mmseg.py
    └── train_mmseg.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/en/_build/
 68 | docs/zh_cn/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | # data/
108 | # data
109 | .vscode
110 | .idea
111 | .DS_Store
112 | 
113 | # custom
114 | *.pkl
115 | *.pkl.json
116 | *.log.json
117 | work_dirs/
118 | private/
119 | external/
120 | datasets/
121 | outputs/
122 | pretrained-checkpoints/
123 | __pycache__/
124 | 
125 | # Pytorch
126 | *.pth
127 | *.py~
128 | *.sh~
129 | 
130 | # my experimental stuff
131 | my_*.*
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | =======================================================================
 3 | 
 4 | Copyright (c) Meta Platforms, Inc. and affiliates.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/assets/paca-seghead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-seghead.png


--------------------------------------------------------------------------------
/assets/paca-vit-onsite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-vit-onsite.png


--------------------------------------------------------------------------------
/assets/paca-vit-teacher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca-vit-teacher.png


--------------------------------------------------------------------------------
/assets/paca_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca_scheme.png


--------------------------------------------------------------------------------
/assets/paca_teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/assets/paca_teaser.png


--------------------------------------------------------------------------------
/classification/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 4 ]; then
 4 |     echo "Usage: me model_name img_size num_classes gpu [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | MODEL_NAME=$1
 9 | IMAGE_SIZE=$2
10 | NUM_CLASSES=$3
11 | GPU=$4
12 | 
13 | 
14 | PYTHON=${PYTHON:-"python"}
15 | 
16 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
17 | 
18 | RESULT_FILE=$DIR/../work_dirs/classification/all_benchmark_results.csv
19 | 
20 | OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPU $PYTHON \
21 |   $DIR/benchmark_timm.py  --results-file $RESULT_FILE \
22 |   --model $MODEL_NAME --bench inference \
23 |   --num-bench-iter 100 \
24 |   --batch-size 128 --img-size $IMAGE_SIZE --num-classes $NUM_CLASSES \
25 |   --opt adamw --opt-eps 1e-8 --momentum 0.9 --weight-decay 0.05 \
26 |   --smoothing 0.1 --drop-path 0.1 \
27 |   --amp --channels-last \
28 |   ${@:5}
29 | #   --clip-grad 1.0 --clip-mode norm
30 | 


--------------------------------------------------------------------------------
/classification/configs/imagenet_conv_adamw.yml:
--------------------------------------------------------------------------------
 1 | # based on convnext 
 2 | batch_size: 256
 3 | train_interpolation: 'bicubic'
 4 | epochs: 300
 5 | opt: 'adamw'
 6 | opt_eps: 1e-8
 7 | opt_betas:
 8 |   - 0.9
 9 |   - 0.999
10 | momentum: 0.9
11 | weight_decay: 0.05
12 | lr_base: 4e-3 # base lr,  which will be auto-scaled: lr * batch size * nb_gpus / lr_base_size
13 | lr_base_size: 4096 # from convnext
14 | lr_base_scale: 'linear' # linear or sqrt
15 | auto_scale_warmup_min_lr: False
16 | grad_accumulation_steps: 1
17 | min_lr: 1e-6 
18 | sched: 'cosine'
19 | warmup_epochs: 20
20 | warmup_lr: 1e-6  
21 | cooldown_epochs: 0
22 | amp: True
23 | # clip_grad: 5.0
24 | color_jitter: 0.4
25 | smoothing: 0.1
26 | reprob: 0.25
27 | remode: 'pixel'
28 | recount: 1
29 | aa: 'rand-m9-mstd0.5-inc1'
30 | mixup: 0.8
31 | cutmix: 1.0
32 | mixup_prob: 1.0
33 | mixup_switch_prob: 0.5
34 | mixup_mode: 'batch'
35 | dist_bn: 'reduce' 
36 | # model_ema: True 
37 | # model_ema_decay: 0.9999
38 | 


--------------------------------------------------------------------------------
/classification/configs/imagenet_vit_adamw.yml:
--------------------------------------------------------------------------------
 1 | batch_size: 128
 2 | train_interpolation: 'bicubic'
 3 | epochs: 300
 4 | opt: 'adamw'
 5 | opt_eps: 1e-8
 6 | opt_betas:
 7 |   - 0.9
 8 |   - 0.999
 9 | momentum: 0.9
10 | weight_decay: 0.05
11 | lr_base: 5e-4 # base lr,  which will be auto-scaled: lr * batch size * nb_gpus / lr_base_size
12 | lr_base_size: 512
13 | lr_base_scale: 'linear' # linear or sqrt
14 | auto_scale_warmup_min_lr: True
15 | min_lr: 5e-6 #  which will be auto-scaled: min_lr * batch size * nb_gpus / lr_base_size
16 | sched: 'cosine'
17 | warmup_epochs: 5
18 | warmup_lr: 5e-7  # which will be auto-scaled: warmup_lr * batch size * nb_gpus / lr_base_size
19 | cooldown_epochs: 0
20 | amp: True
21 | # clip_grad: 5.0
22 | color_jitter: 0.4
23 | smoothing: 0.1
24 | reprob: 0.25
25 | remode: 'pixel'
26 | recount: 1
27 | aa: 'rand-m9-mstd0.5-inc1'
28 | mixup: 0.8
29 | cutmix: 1.0
30 | mixup_prob: 1.0
31 | mixup_switch_prob: 0.5
32 | mixup_mode: 'batch'
33 | dist_bn: '' #'reduce'
34 | # model_ema: True
35 | # model_ema_decay: 0.9999


--------------------------------------------------------------------------------
/classification/timm_custom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iVMCL/PaCaViT/4307018d4925786d9b9ecd75d7d2a011587c5e9f/classification/timm_custom/__init__.py


--------------------------------------------------------------------------------
/classification/timm_custom/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import create_loader_v2
2 | from .transform import create_transform_v2
3 | 
4 | __all__ = ['create_loader_v2', 'create_transform_v2']
5 | 


--------------------------------------------------------------------------------
/classification/timm_custom/data/loader.py:
--------------------------------------------------------------------------------
  1 | # Modifications
  2 | #    handle CIFAR
  3 | """ Loader Factory, Fast Collate, CUDA Prefetcher
  4 | 
  5 | Prefetcher and Fast Collate inspired by NVIDIA APEX example at
  6 | https://github.com/NVIDIA/apex/commit/d5e2bb4bdeedd27b1dfaf5bb2b24d6c000dee9be#diff-cf86c282ff7fba81fad27a559379d5bf
  7 | 
  8 | Hacked together by / Copyright 2020 Ross Wightman
  9 | """
 10 | import random
 11 | from functools import partial
 12 | from typing import Callable
 13 | 
 14 | import torch.utils.data
 15 | from torchvision import transforms
 16 | import numpy as np
 17 | 
 18 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 19 | from timm.data.dataset import IterableImageDataset
 20 | from timm.data.distributed_sampler import OrderedDistributedSampler, RepeatAugSampler
 21 | from timm.data.random_erasing import RandomErasing
 22 | from timm.data.mixup import FastCollateMixup
 23 | from timm.data.transforms_factory import create_transform
 24 | 
 25 | from timm.data.loader import (fast_collate, PrefetchLoader,
 26 |                               MultiEpochsDataLoader, _worker_init,
 27 |                               _RepeatSampler)
 28 | 
 29 | from .transform import create_transform_v2
 30 | 
 31 | 
 32 | def create_loader_v2(
 33 |         dataset,
 34 |         input_size,
 35 |         batch_size,
 36 |         is_training=False,
 37 |         use_prefetcher=True,
 38 |         no_aug=False,
 39 |         re_prob=0.,
 40 |         re_mode='const',
 41 |         re_count=1,
 42 |         re_split=False,
 43 |         scale=None,
 44 |         ratio=None,
 45 |         hflip=0.5,
 46 |         vflip=0.,
 47 |         color_jitter=0.4,
 48 |         auto_augment=None,
 49 |         num_aug_repeats=0,
 50 |         num_aug_splits=0,
 51 |         interpolation='bilinear',
 52 |         mean=IMAGENET_DEFAULT_MEAN,
 53 |         std=IMAGENET_DEFAULT_STD,
 54 |         num_workers=1,
 55 |         distributed=False,
 56 |         crop_pct=None,
 57 |         crop_mode=None,
 58 |         collate_fn=None,
 59 |         pin_memory=False,
 60 |         fp16=False,  # deprecated, use img_dtype
 61 |         img_dtype=torch.float32,
 62 |         device=torch.device('cuda'),
 63 |         tf_preprocessing=False,
 64 |         use_multi_epochs_loader=False,
 65 |         persistent_workers=True,
 66 |         worker_seeding='all',
 67 |         use_simple_random_crop=False,
 68 |         use_three_augment_ssl=False,
 69 | ):
 70 |     re_num_splits = 0
 71 |     if re_split:
 72 |         # apply RE to second half of batch if no aug split otherwise line up with aug split
 73 |         re_num_splits = num_aug_splits or 2
 74 |     dataset.transform = create_transform_v2(
 75 |         input_size,
 76 |         is_training=is_training,
 77 |         use_prefetcher=use_prefetcher,
 78 |         no_aug=no_aug,
 79 |         scale=scale,
 80 |         ratio=ratio,
 81 |         hflip=hflip,
 82 |         vflip=vflip,
 83 |         color_jitter=color_jitter,
 84 |         auto_augment=auto_augment,
 85 |         interpolation=interpolation,
 86 |         mean=mean,
 87 |         std=std,
 88 |         crop_pct=crop_pct,
 89 |         crop_mode=crop_mode,
 90 |         tf_preprocessing=tf_preprocessing,
 91 |         re_prob=re_prob,
 92 |         re_mode=re_mode,
 93 |         re_count=re_count,
 94 |         re_num_splits=re_num_splits,
 95 |         separate=num_aug_splits > 0,
 96 |         use_simple_random_crop=use_simple_random_crop,
 97 |         use_three_augment_ssl=use_three_augment_ssl,
 98 |     )
 99 | 
100 |     assert input_size is not None
101 |     if isinstance(input_size, (tuple, list)):
102 |         img_size = min(input_size[-2:])
103 |     else:
104 |         img_size = input_size
105 | 
106 |     if img_size <= 32 and is_training and not no_aug:  # CIFAR
107 |         dataset.transform.transforms[0] = transforms.RandomCrop(img_size,
108 |                                                                 padding=4)
109 | 
110 |     if isinstance(dataset, IterableImageDataset):
111 |         # give Iterable datasets early knowledge of num_workers so that sample estimates
112 |         # are correct before worker processes are launched
113 |         dataset.set_loader_cfg(num_workers=num_workers)
114 | 
115 |     sampler = None
116 |     if distributed and not isinstance(dataset, torch.utils.data.IterableDataset):
117 |         if is_training:
118 |             if num_aug_repeats:
119 |                 sampler = RepeatAugSampler(dataset, num_repeats=num_aug_repeats)
120 |             else:
121 |                 sampler = torch.utils.data.distributed.DistributedSampler(dataset)
122 |         else:
123 |             # This will add extra duplicate entries to result in equal num
124 |             # of samples per-process, will slightly alter validation results
125 |             sampler = OrderedDistributedSampler(dataset)
126 |     else:
127 |         assert num_aug_repeats == 0, "RepeatAugment not currently supported in non-distributed or IterableDataset use"
128 | 
129 |     if collate_fn is None:
130 |         collate_fn = fast_collate if use_prefetcher else torch.utils.data.dataloader.default_collate
131 | 
132 |     loader_class = torch.utils.data.DataLoader
133 |     if use_multi_epochs_loader:
134 |         loader_class = MultiEpochsDataLoader
135 | 
136 |     loader_args = dict(
137 |         batch_size=batch_size,
138 |         shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,
139 |         num_workers=num_workers,
140 |         sampler=sampler,
141 |         collate_fn=collate_fn,
142 |         pin_memory=pin_memory,
143 |         drop_last=is_training,
144 |         worker_init_fn=partial(_worker_init, worker_seeding=worker_seeding),
145 |         persistent_workers=persistent_workers
146 |     )
147 |     try:
148 |         loader = loader_class(dataset, **loader_args)
149 |     except TypeError as e:
150 |         loader_args.pop('persistent_workers')  # only in Pytorch 1.7+
151 |         loader = loader_class(dataset, **loader_args)
152 |     if use_prefetcher:
153 |         prefetch_re_prob = re_prob if is_training and not no_aug else 0.
154 |         loader = PrefetchLoader(
155 |             loader,
156 |             mean=mean,
157 |             std=std,
158 |             channels=input_size[0],
159 |             device=device,
160 |             fp16=fp16,  # deprecated, use img_dtype
161 |             img_dtype=img_dtype,
162 |             re_prob=prefetch_re_prob,
163 |             re_mode=re_mode,
164 |             re_count=re_count,
165 |             re_num_splits=re_num_splits
166 |         )
167 | 
168 |     return loader
169 | 
170 | 


--------------------------------------------------------------------------------
/classification/timm_custom/optim/__init__.py:
--------------------------------------------------------------------------------
1 | from .layerwise_lr_decay import layerwise_lr_decay
2 | 


--------------------------------------------------------------------------------
/classification/timm_custom/optim/layerwise_lr_decay.py:
--------------------------------------------------------------------------------
 1 | from itertools import islice
 2 | from typing import Optional, Callable, Tuple
 3 | import json
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from timm.optim.optim_factory import _layer_map
 9 | 
10 | 
11 | # modified from timm.optim.optim_factory.param_groups_layer_decay
12 | def layerwise_lr_decay(
13 |     model: nn.Module,
14 |     num_groups=12,
15 |     weight_decay: float = 0.05,
16 |     no_weight_decay_list: Tuple[str] = (),
17 |     layer_decay: float = 0.75,
18 |     _logger=None,
19 | ):
20 |     """
21 |     Parameter groups for layer-wise lr decay & weight decay
22 |     Based on BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
23 |     """
24 |     no_weight_decay_list = set(no_weight_decay_list)
25 |     param_group_names = {}  # NOTE for debugging
26 |     param_groups = {}
27 | 
28 |     layer_map = _layer_map(model, num_groups=num_groups)
29 | 
30 |     num_layers = max(layer_map.values()) + 1
31 |     layer_max = num_layers - 1
32 |     layer_scales = list(layer_decay ** (layer_max - i) for i in range(num_layers))
33 | 
34 |     for name, param in model.named_parameters():
35 |         if not param.requires_grad:
36 |             continue
37 | 
38 |         # no decay: all 1D parameters and model specific ones
39 |         if param.ndim == 1 or name in no_weight_decay_list:
40 |             g_decay = "no_decay"
41 |             this_decay = 0.0
42 |         else:
43 |             g_decay = "decay"
44 |             this_decay = weight_decay
45 | 
46 |         layer_id = layer_map.get(name, layer_max)
47 |         group_name = "layer_%d_%s" % (layer_id, g_decay)
48 | 
49 |         if group_name not in param_groups:
50 |             this_scale = layer_scales[layer_id]
51 |             param_group_names[group_name] = {
52 |                 "lr_scale": this_scale,
53 |                 "weight_decay": this_decay,
54 |                 "param_names": [],
55 |             }
56 |             param_groups[group_name] = {
57 |                 "lr_scale": this_scale,
58 |                 "weight_decay": this_decay,
59 |                 "params": [],
60 |             }
61 | 
62 |         param_group_names[group_name]["param_names"].append(name)
63 |         param_groups[group_name]["params"].append(param)
64 | 
65 |     if _loger is not None:
66 |         _loger.info(f"\n{json.dumps(param_group_names, indent=2)}\n")
67 | 
68 |     return list(param_groups.values())
69 | 


--------------------------------------------------------------------------------
/classification/timm_custom/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .cuda import ApexScalerV2, NativeScalerV2
2 | from .summary import update_summary_v2
3 | 


--------------------------------------------------------------------------------
/classification/timm_custom/utils/cuda.py:
--------------------------------------------------------------------------------
 1 | """ CUDA / AMP utils
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import torch
 6 | 
 7 | try:
 8 |     from apex import amp
 9 |     has_apex = True
10 | except ImportError:
11 |     amp = None
12 |     has_apex = False
13 | 
14 | from timm.utils.clip_grad import dispatch_clip_grad
15 | 
16 | 
17 | class ApexScalerV2:
18 |     state_dict_key = "amp"
19 | 
20 |     def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, update_grad=True):
21 |         with amp.scale_loss(loss, optimizer) as scaled_loss:
22 |             scaled_loss.backward(create_graph=create_graph)
23 |         if update_grad:
24 |             if clip_grad is not None:
25 |                 dispatch_clip_grad(amp.master_params(optimizer),
26 |                                    clip_grad, mode=clip_mode)
27 |             optimizer.step()
28 | 
29 |     def state_dict(self):
30 |         if 'state_dict' in amp.__dict__:
31 |             return amp.state_dict()
32 | 
33 |     def load_state_dict(self, state_dict):
34 |         if 'load_state_dict' in amp.__dict__:
35 |             amp.load_state_dict(state_dict)
36 | 
37 | 
38 | class NativeScalerV2:
39 |     state_dict_key = "amp_scaler"
40 | 
41 |     def __init__(self):
42 |         self._scaler = torch.cuda.amp.GradScaler()
43 | 
44 |     def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, update_grad=True):
45 |         self._scaler.scale(loss).backward(create_graph=create_graph)
46 |         if update_grad:
47 |             if clip_grad is not None:
48 |                 assert parameters is not None
49 |                 # unscale the gradients of optimizer's assigned params in-place
50 |                 self._scaler.unscale_(optimizer)
51 |                 dispatch_clip_grad(parameters, clip_grad, mode=clip_mode)
52 |             self._scaler.step(optimizer)
53 |             self._scaler.update()
54 | 
55 |     def state_dict(self):
56 |         return self._scaler.state_dict()
57 | 
58 |     def load_state_dict(self, state_dict):
59 |         self._scaler.load_state_dict(state_dict)
60 | 


--------------------------------------------------------------------------------
/classification/timm_custom/utils/summary.py:
--------------------------------------------------------------------------------
 1 | """ Summary utilities
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import csv
 6 | import os
 7 | from collections import OrderedDict
 8 | 
 9 | try:
10 |     import wandb
11 | except ImportError:
12 |     pass
13 | 
14 | 
15 | def update_summary_v2(
16 |     epoch,
17 |     train_metrics,
18 |     eval_metrics,
19 |     filename,
20 |     ema_eval_metrics=None,
21 |     lr=None,
22 |     write_header=False,
23 |     log_wandb=False,
24 | ):
25 |     rowd = OrderedDict(epoch=epoch)
26 |     rowd.update([("train_" + k, v) for k, v in train_metrics.items()])
27 |     rowd.update([("eval_" + k, v) for k, v in eval_metrics.items()])
28 |     if ema_eval_metrics is not None:
29 |         rowd.update([("ema_eval_" + k, v) for k, v in ema_eval_metrics.items()])
30 |     if lr is not None:
31 |         rowd["lr"] = lr
32 |     if log_wandb:
33 |         wandb.log(rowd)
34 |     with open(filename, mode="a") as cf:
35 |         dw = csv.DictWriter(cf, fieldnames=rowd.keys())
36 |         if write_header:  # first iteration (epoch == 1 can't be used)
37 |             dw.writeheader()
38 |         dw.writerow(rowd)
39 | 


--------------------------------------------------------------------------------
/classification/train_timm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 12 ]; then
 4 |     echo "Usage: me Config_file Model_name Dataset_name Img_size Remove_old_if_exist_0_or_1 Resume_or_not_if_exist Exp_name Tag Gpus Nb_gpus Workers Port [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | CONFIG_FILE=$1
 9 | MODEL=$2
10 | DATASET=$3
11 | DATA_SIZE=$4
12 | RM_OLD_IF_EXIST=$5
13 | RESUM_OLD_IF_EXIST=$6
14 | EXP_NAME=$7
15 | TAG=$8
16 | GPUS=$9
17 | NUM_GPUS=${10}
18 | WORKERS=${11}
19 | PORT=${12}
20 | 
21 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
22 | 
23 | # datasets 
24 | NUM_CLASSES=0
25 | if [ "$DATASET" = "IMNET" ]; then
26 |   DATA_DIR=$DIR/../datasets/IMNET/
27 |   if [ ! -d $DATA_DIR ]; then
28 |     echo "not found $DATA_DIR"
29 |     exit
30 |   fi
31 |   NUM_CLASSES=1000
32 | else
33 |   echo "Unknown $DATASET"
34 |   exit 
35 | fi
36 | 
37 | # dirs 
38 | WORK_DIR=$DIR/../work_dirs/classification/$EXP_NAME
39 | 
40 | EXPERIMET="$DATASET"_"$DATA_SIZE"_"$MODEL"_"$TAG"
41 | 
42 | # training has completed?
43 | EXPERIMENT_DIR=$WORK_DIR/TrainingFinished/$EXPERIMET
44 | if [ -d $EXPERIMENT_DIR ]; then
45 |   echo "$EXPERIMENT_DIR --- Training Finished!!!!"
46 |   exit 
47 | fi
48 | 
49 | EXPERIMENT_DIR=$WORK_DIR/$EXPERIMET
50 | if [ -d $EXPERIMENT_DIR ]; then
51 |   echo "$EXPERIMENT_DIR --- Already exists"
52 |   if [ $RM_OLD_IF_EXIST -gt 0 ]; then
53 |     while true; do
54 |         read -p "Are you sure to delete this result directory? " yn
55 |         case $yn in
56 |             [Yy]* ) rm -r $EXPERIMENT_DIR; mkdir -p $EXPERIMENT_DIR; break;;
57 |             [Nn]* ) exit;;
58 |             * ) echo "Please answer yes or no.";;
59 |         esac
60 |     done
61 |   else
62 |     if [ $RESUM_OLD_IF_EXIST -gt 0 ]; then
63 |       echo "Auto-resume"
64 |     else
65 |       echo "Skip"
66 |       exit
67 |     fi
68 |   fi
69 | fi
70 | 
71 | # TORCH_DISTRIBUTED_DEBUG=INFO \
72 | OMP_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \
73 |   torchrun \
74 |     --rdzv_backend c10d \
75 |     --rdzv_endpoint localhost:$PORT \
76 |     --nnodes 1 \
77 |     --nproc_per_node $NUM_GPUS \
78 |     $DIR/train_timm.py  \
79 |     --data-dir $DATA_DIR \
80 |     --img-size $DATA_SIZE \
81 |     --num-classes $NUM_CLASSES \
82 |     --config $CONFIG_FILE \
83 |     --model $MODEL \
84 |     --workers $WORKERS \
85 |     --channels-last \
86 |     --pin-mem \
87 |     --use-multi-epochs-loader \
88 |     --output $WORK_DIR \
89 |     --experiment $EXPERIMET \
90 |     ${@:13}
91 | 
92 |     
93 | 
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/classification/validate.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 6 ]; then
 4 |     echo "Usage: me model_name checkpoint_file dataset_name img_size gpus num_gpus [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | MODEL=$1
 9 | CHECKPOINT_FILE=$2
10 | DATASET=$3
11 | IMAGE_SIZE=$4
12 | GPUS=$5
13 | NUM_GPUS=$6
14 | 
15 | PYTHON=${PYTHON:-"python"}
16 | 
17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
18 | 
19 | # datasets 
20 | NUM_CLASSES=0
21 | if [ "$DATASET" = "IMNET" ]; then
22 |   DATA_DIR=$DIR/../datasets/IMNET/
23 |   if [ ! -d $DATA_DIR ]; then
24 |     echo "not found $DATA_DIR"
25 |     exit
26 |   fi
27 |   NUM_CLASSES=1000
28 | else
29 |   echo "Unknown $DATASET"
30 |   exit 
31 | fi
32 | 
33 | 
34 | CUDA_VISIBLE_DEVICES=$GPUS $PYTHON \
35 |   $DIR/validate_timm.py  $DATA_DIR --dataset $DATASET \
36 |   --img-size $IMAGE_SIZE --workers 8 --num-gpu $NUM_GPUS \
37 |   --model $MODEL --checkpoint $CHECKPOINT_FILE --pin-mem --channels-last --amp \
38 |   ${@:7}
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmdet'
 2 | 
 3 | default_hooks = dict(
 4 |     timer=dict(type='IterTimerHook'),
 5 |     logger=dict(type='LoggerHook', interval=50),
 6 |     param_scheduler=dict(type='ParamSchedulerHook'),
 7 |     checkpoint=dict(type='CheckpointHook', interval=1),
 8 |     sampler_seed=dict(type='DistSamplerSeedHook'),
 9 |     visualization=dict(type='DetVisualizationHook'))
10 | 
11 | env_cfg = dict(
12 |     cudnn_benchmark=False,
13 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
14 |     dist_cfg=dict(backend='nccl'),
15 | )
16 | 
17 | vis_backends = [dict(type='LocalVisBackend')]
18 | visualizer = dict(
19 |     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
20 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
21 | 
22 | log_level = 'INFO'
23 | load_from = None
24 | resume = False
25 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/cascade-mask-rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     data_preprocessor=dict(
  5 |         type='DetDataPreprocessor',
  6 |         mean=[123.675, 116.28, 103.53],
  7 |         std=[58.395, 57.12, 57.375],
  8 |         bgr_to_rgb=True,
  9 |         pad_mask=True,
 10 |         pad_size_divisor=32),
 11 |     backbone=dict(
 12 |         type='ResNet',
 13 |         depth=50,
 14 |         num_stages=4,
 15 |         out_indices=(0, 1, 2, 3),
 16 |         frozen_stages=1,
 17 |         norm_cfg=dict(type='BN', requires_grad=True),
 18 |         norm_eval=True,
 19 |         style='pytorch',
 20 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 21 |     neck=dict(
 22 |         type='FPN',
 23 |         in_channels=[256, 512, 1024, 2048],
 24 |         out_channels=256,
 25 |         num_outs=5),
 26 |     rpn_head=dict(
 27 |         type='RPNHead',
 28 |         in_channels=256,
 29 |         feat_channels=256,
 30 |         anchor_generator=dict(
 31 |             type='AnchorGenerator',
 32 |             scales=[8],
 33 |             ratios=[0.5, 1.0, 2.0],
 34 |             strides=[4, 8, 16, 32, 64]),
 35 |         bbox_coder=dict(
 36 |             type='DeltaXYWHBBoxCoder',
 37 |             target_means=[.0, .0, .0, .0],
 38 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 39 |         loss_cls=dict(
 40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 41 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 42 |     roi_head=dict(
 43 |         type='CascadeRoIHead',
 44 |         num_stages=3,
 45 |         stage_loss_weights=[1, 0.5, 0.25],
 46 |         bbox_roi_extractor=dict(
 47 |             type='SingleRoIExtractor',
 48 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 49 |             out_channels=256,
 50 |             featmap_strides=[4, 8, 16, 32]),
 51 |         bbox_head=[
 52 |             dict(
 53 |                 type='Shared2FCBBoxHead',
 54 |                 in_channels=256,
 55 |                 fc_out_channels=1024,
 56 |                 roi_feat_size=7,
 57 |                 num_classes=80,
 58 |                 bbox_coder=dict(
 59 |                     type='DeltaXYWHBBoxCoder',
 60 |                     target_means=[0., 0., 0., 0.],
 61 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 62 |                 reg_class_agnostic=True,
 63 |                 loss_cls=dict(
 64 |                     type='CrossEntropyLoss',
 65 |                     use_sigmoid=False,
 66 |                     loss_weight=1.0),
 67 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 68 |                                loss_weight=1.0)),
 69 |             dict(
 70 |                 type='Shared2FCBBoxHead',
 71 |                 in_channels=256,
 72 |                 fc_out_channels=1024,
 73 |                 roi_feat_size=7,
 74 |                 num_classes=80,
 75 |                 bbox_coder=dict(
 76 |                     type='DeltaXYWHBBoxCoder',
 77 |                     target_means=[0., 0., 0., 0.],
 78 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 79 |                 reg_class_agnostic=True,
 80 |                 loss_cls=dict(
 81 |                     type='CrossEntropyLoss',
 82 |                     use_sigmoid=False,
 83 |                     loss_weight=1.0),
 84 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 85 |                                loss_weight=1.0)),
 86 |             dict(
 87 |                 type='Shared2FCBBoxHead',
 88 |                 in_channels=256,
 89 |                 fc_out_channels=1024,
 90 |                 roi_feat_size=7,
 91 |                 num_classes=80,
 92 |                 bbox_coder=dict(
 93 |                     type='DeltaXYWHBBoxCoder',
 94 |                     target_means=[0., 0., 0., 0.],
 95 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 96 |                 reg_class_agnostic=True,
 97 |                 loss_cls=dict(
 98 |                     type='CrossEntropyLoss',
 99 |                     use_sigmoid=False,
100 |                     loss_weight=1.0),
101 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
102 |         ],
103 |         mask_roi_extractor=dict(
104 |             type='SingleRoIExtractor',
105 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
106 |             out_channels=256,
107 |             featmap_strides=[4, 8, 16, 32]),
108 |         mask_head=dict(
109 |             type='FCNMaskHead',
110 |             num_convs=4,
111 |             in_channels=256,
112 |             conv_out_channels=256,
113 |             num_classes=80,
114 |             loss_mask=dict(
115 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
116 |     # model training and testing settings
117 |     train_cfg=dict(
118 |         rpn=dict(
119 |             assigner=dict(
120 |                 type='MaxIoUAssigner',
121 |                 pos_iou_thr=0.7,
122 |                 neg_iou_thr=0.3,
123 |                 min_pos_iou=0.3,
124 |                 match_low_quality=True,
125 |                 ignore_iof_thr=-1),
126 |             sampler=dict(
127 |                 type='RandomSampler',
128 |                 num=256,
129 |                 pos_fraction=0.5,
130 |                 neg_pos_ub=-1,
131 |                 add_gt_as_proposals=False),
132 |             allowed_border=0,
133 |             pos_weight=-1,
134 |             debug=False),
135 |         rpn_proposal=dict(
136 |             nms_pre=2000,
137 |             max_per_img=2000,
138 |             nms=dict(type='nms', iou_threshold=0.7),
139 |             min_bbox_size=0),
140 |         rcnn=[
141 |             dict(
142 |                 assigner=dict(
143 |                     type='MaxIoUAssigner',
144 |                     pos_iou_thr=0.5,
145 |                     neg_iou_thr=0.5,
146 |                     min_pos_iou=0.5,
147 |                     match_low_quality=False,
148 |                     ignore_iof_thr=-1),
149 |                 sampler=dict(
150 |                     type='RandomSampler',
151 |                     num=512,
152 |                     pos_fraction=0.25,
153 |                     neg_pos_ub=-1,
154 |                     add_gt_as_proposals=True),
155 |                 mask_size=28,
156 |                 pos_weight=-1,
157 |                 debug=False),
158 |             dict(
159 |                 assigner=dict(
160 |                     type='MaxIoUAssigner',
161 |                     pos_iou_thr=0.6,
162 |                     neg_iou_thr=0.6,
163 |                     min_pos_iou=0.6,
164 |                     match_low_quality=False,
165 |                     ignore_iof_thr=-1),
166 |                 sampler=dict(
167 |                     type='RandomSampler',
168 |                     num=512,
169 |                     pos_fraction=0.25,
170 |                     neg_pos_ub=-1,
171 |                     add_gt_as_proposals=True),
172 |                 mask_size=28,
173 |                 pos_weight=-1,
174 |                 debug=False),
175 |             dict(
176 |                 assigner=dict(
177 |                     type='MaxIoUAssigner',
178 |                     pos_iou_thr=0.7,
179 |                     neg_iou_thr=0.7,
180 |                     min_pos_iou=0.7,
181 |                     match_low_quality=False,
182 |                     ignore_iof_thr=-1),
183 |                 sampler=dict(
184 |                     type='RandomSampler',
185 |                     num=512,
186 |                     pos_fraction=0.25,
187 |                     neg_pos_ub=-1,
188 |                     add_gt_as_proposals=True),
189 |                 mask_size=28,
190 |                 pos_weight=-1,
191 |                 debug=False)
192 |         ]),
193 |     test_cfg=dict(
194 |         rpn=dict(
195 |             nms_pre=1000,
196 |             max_per_img=1000,
197 |             nms=dict(type='nms', iou_threshold=0.7),
198 |             min_bbox_size=0),
199 |         rcnn=dict(
200 |             score_thr=0.05,
201 |             nms=dict(type='nms', iou_threshold=0.5),
202 |             max_per_img=100,
203 |             mask_thr_binary=0.5)))
204 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/cascade-rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     data_preprocessor=dict(
  5 |         type='DetDataPreprocessor',
  6 |         mean=[123.675, 116.28, 103.53],
  7 |         std=[58.395, 57.12, 57.375],
  8 |         bgr_to_rgb=True,
  9 |         pad_size_divisor=32),
 10 |     backbone=dict(
 11 |         type='ResNet',
 12 |         depth=50,
 13 |         num_stages=4,
 14 |         out_indices=(0, 1, 2, 3),
 15 |         frozen_stages=1,
 16 |         norm_cfg=dict(type='BN', requires_grad=True),
 17 |         norm_eval=True,
 18 |         style='pytorch',
 19 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 20 |     neck=dict(
 21 |         type='FPN',
 22 |         in_channels=[256, 512, 1024, 2048],
 23 |         out_channels=256,
 24 |         num_outs=5),
 25 |     rpn_head=dict(
 26 |         type='RPNHead',
 27 |         in_channels=256,
 28 |         feat_channels=256,
 29 |         anchor_generator=dict(
 30 |             type='AnchorGenerator',
 31 |             scales=[8],
 32 |             ratios=[0.5, 1.0, 2.0],
 33 |             strides=[4, 8, 16, 32, 64]),
 34 |         bbox_coder=dict(
 35 |             type='DeltaXYWHBBoxCoder',
 36 |             target_means=[.0, .0, .0, .0],
 37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 38 |         loss_cls=dict(
 39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 40 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 41 |     roi_head=dict(
 42 |         type='CascadeRoIHead',
 43 |         num_stages=3,
 44 |         stage_loss_weights=[1, 0.5, 0.25],
 45 |         bbox_roi_extractor=dict(
 46 |             type='SingleRoIExtractor',
 47 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 48 |             out_channels=256,
 49 |             featmap_strides=[4, 8, 16, 32]),
 50 |         bbox_head=[
 51 |             dict(
 52 |                 type='Shared2FCBBoxHead',
 53 |                 in_channels=256,
 54 |                 fc_out_channels=1024,
 55 |                 roi_feat_size=7,
 56 |                 num_classes=80,
 57 |                 bbox_coder=dict(
 58 |                     type='DeltaXYWHBBoxCoder',
 59 |                     target_means=[0., 0., 0., 0.],
 60 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 61 |                 reg_class_agnostic=True,
 62 |                 loss_cls=dict(
 63 |                     type='CrossEntropyLoss',
 64 |                     use_sigmoid=False,
 65 |                     loss_weight=1.0),
 66 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 67 |                                loss_weight=1.0)),
 68 |             dict(
 69 |                 type='Shared2FCBBoxHead',
 70 |                 in_channels=256,
 71 |                 fc_out_channels=1024,
 72 |                 roi_feat_size=7,
 73 |                 num_classes=80,
 74 |                 bbox_coder=dict(
 75 |                     type='DeltaXYWHBBoxCoder',
 76 |                     target_means=[0., 0., 0., 0.],
 77 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 78 |                 reg_class_agnostic=True,
 79 |                 loss_cls=dict(
 80 |                     type='CrossEntropyLoss',
 81 |                     use_sigmoid=False,
 82 |                     loss_weight=1.0),
 83 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 84 |                                loss_weight=1.0)),
 85 |             dict(
 86 |                 type='Shared2FCBBoxHead',
 87 |                 in_channels=256,
 88 |                 fc_out_channels=1024,
 89 |                 roi_feat_size=7,
 90 |                 num_classes=80,
 91 |                 bbox_coder=dict(
 92 |                     type='DeltaXYWHBBoxCoder',
 93 |                     target_means=[0., 0., 0., 0.],
 94 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 95 |                 reg_class_agnostic=True,
 96 |                 loss_cls=dict(
 97 |                     type='CrossEntropyLoss',
 98 |                     use_sigmoid=False,
 99 |                     loss_weight=1.0),
100 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
101 |         ]),
102 |     # model training and testing settings
103 |     train_cfg=dict(
104 |         rpn=dict(
105 |             assigner=dict(
106 |                 type='MaxIoUAssigner',
107 |                 pos_iou_thr=0.7,
108 |                 neg_iou_thr=0.3,
109 |                 min_pos_iou=0.3,
110 |                 match_low_quality=True,
111 |                 ignore_iof_thr=-1),
112 |             sampler=dict(
113 |                 type='RandomSampler',
114 |                 num=256,
115 |                 pos_fraction=0.5,
116 |                 neg_pos_ub=-1,
117 |                 add_gt_as_proposals=False),
118 |             allowed_border=0,
119 |             pos_weight=-1,
120 |             debug=False),
121 |         rpn_proposal=dict(
122 |             nms_pre=2000,
123 |             max_per_img=2000,
124 |             nms=dict(type='nms', iou_threshold=0.7),
125 |             min_bbox_size=0),
126 |         rcnn=[
127 |             dict(
128 |                 assigner=dict(
129 |                     type='MaxIoUAssigner',
130 |                     pos_iou_thr=0.5,
131 |                     neg_iou_thr=0.5,
132 |                     min_pos_iou=0.5,
133 |                     match_low_quality=False,
134 |                     ignore_iof_thr=-1),
135 |                 sampler=dict(
136 |                     type='RandomSampler',
137 |                     num=512,
138 |                     pos_fraction=0.25,
139 |                     neg_pos_ub=-1,
140 |                     add_gt_as_proposals=True),
141 |                 pos_weight=-1,
142 |                 debug=False),
143 |             dict(
144 |                 assigner=dict(
145 |                     type='MaxIoUAssigner',
146 |                     pos_iou_thr=0.6,
147 |                     neg_iou_thr=0.6,
148 |                     min_pos_iou=0.6,
149 |                     match_low_quality=False,
150 |                     ignore_iof_thr=-1),
151 |                 sampler=dict(
152 |                     type='RandomSampler',
153 |                     num=512,
154 |                     pos_fraction=0.25,
155 |                     neg_pos_ub=-1,
156 |                     add_gt_as_proposals=True),
157 |                 pos_weight=-1,
158 |                 debug=False),
159 |             dict(
160 |                 assigner=dict(
161 |                     type='MaxIoUAssigner',
162 |                     pos_iou_thr=0.7,
163 |                     neg_iou_thr=0.7,
164 |                     min_pos_iou=0.7,
165 |                     match_low_quality=False,
166 |                     ignore_iof_thr=-1),
167 |                 sampler=dict(
168 |                     type='RandomSampler',
169 |                     num=512,
170 |                     pos_fraction=0.25,
171 |                     neg_pos_ub=-1,
172 |                     add_gt_as_proposals=True),
173 |                 pos_weight=-1,
174 |                 debug=False)
175 |         ]),
176 |     test_cfg=dict(
177 |         rpn=dict(
178 |             nms_pre=1000,
179 |             max_per_img=1000,
180 |             nms=dict(type='nms', iou_threshold=0.7),
181 |             min_bbox_size=0),
182 |         rcnn=dict(
183 |             score_thr=0.05,
184 |             nms=dict(type='nms', iou_threshold=0.5),
185 |             max_per_img=100)))
186 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/fast-rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     data_preprocessor=dict(
 5 |         type='DetDataPreprocessor',
 6 |         mean=[123.675, 116.28, 103.53],
 7 |         std=[58.395, 57.12, 57.375],
 8 |         bgr_to_rgb=True,
 9 |         pad_size_divisor=32),
10 |     backbone=dict(
11 |         type='ResNet',
12 |         depth=50,
13 |         num_stages=4,
14 |         out_indices=(0, 1, 2, 3),
15 |         frozen_stages=1,
16 |         norm_cfg=dict(type='BN', requires_grad=True),
17 |         norm_eval=True,
18 |         style='pytorch',
19 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20 |     neck=dict(
21 |         type='FPN',
22 |         in_channels=[256, 512, 1024, 2048],
23 |         out_channels=256,
24 |         num_outs=5),
25 |     roi_head=dict(
26 |         type='StandardRoIHead',
27 |         bbox_roi_extractor=dict(
28 |             type='SingleRoIExtractor',
29 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
30 |             out_channels=256,
31 |             featmap_strides=[4, 8, 16, 32]),
32 |         bbox_head=dict(
33 |             type='Shared2FCBBoxHead',
34 |             in_channels=256,
35 |             fc_out_channels=1024,
36 |             roi_feat_size=7,
37 |             num_classes=80,
38 |             bbox_coder=dict(
39 |                 type='DeltaXYWHBBoxCoder',
40 |                 target_means=[0., 0., 0., 0.],
41 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
42 |             reg_class_agnostic=False,
43 |             loss_cls=dict(
44 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
45 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
46 |     # model training and testing settings
47 |     train_cfg=dict(
48 |         rcnn=dict(
49 |             assigner=dict(
50 |                 type='MaxIoUAssigner',
51 |                 pos_iou_thr=0.5,
52 |                 neg_iou_thr=0.5,
53 |                 min_pos_iou=0.5,
54 |                 match_low_quality=False,
55 |                 ignore_iof_thr=-1),
56 |             sampler=dict(
57 |                 type='RandomSampler',
58 |                 num=512,
59 |                 pos_fraction=0.25,
60 |                 neg_pos_ub=-1,
61 |                 add_gt_as_proposals=True),
62 |             pos_weight=-1,
63 |             debug=False)),
64 |     test_cfg=dict(
65 |         rcnn=dict(
66 |             score_thr=0.05,
67 |             nms=dict(type='nms', iou_threshold=0.5),
68 |             max_per_img=100)))
69 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster-rcnn_r50-caffe-c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     data_preprocessor=dict(
  6 |         type='DetDataPreprocessor',
  7 |         mean=[103.530, 116.280, 123.675],
  8 |         std=[1.0, 1.0, 1.0],
  9 |         bgr_to_rgb=False,
 10 |         pad_size_divisor=32),
 11 |     backbone=dict(
 12 |         type='ResNet',
 13 |         depth=50,
 14 |         num_stages=3,
 15 |         strides=(1, 2, 2),
 16 |         dilations=(1, 1, 1),
 17 |         out_indices=(2, ),
 18 |         frozen_stages=1,
 19 |         norm_cfg=norm_cfg,
 20 |         norm_eval=True,
 21 |         style='caffe',
 22 |         init_cfg=dict(
 23 |             type='Pretrained',
 24 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 25 |     rpn_head=dict(
 26 |         type='RPNHead',
 27 |         in_channels=1024,
 28 |         feat_channels=1024,
 29 |         anchor_generator=dict(
 30 |             type='AnchorGenerator',
 31 |             scales=[2, 4, 8, 16, 32],
 32 |             ratios=[0.5, 1.0, 2.0],
 33 |             strides=[16]),
 34 |         bbox_coder=dict(
 35 |             type='DeltaXYWHBBoxCoder',
 36 |             target_means=[.0, .0, .0, .0],
 37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 38 |         loss_cls=dict(
 39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 40 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 41 |     roi_head=dict(
 42 |         type='StandardRoIHead',
 43 |         shared_head=dict(
 44 |             type='ResLayer',
 45 |             depth=50,
 46 |             stage=3,
 47 |             stride=2,
 48 |             dilation=1,
 49 |             style='caffe',
 50 |             norm_cfg=norm_cfg,
 51 |             norm_eval=True,
 52 |             init_cfg=dict(
 53 |                 type='Pretrained',
 54 |                 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 55 |         bbox_roi_extractor=dict(
 56 |             type='SingleRoIExtractor',
 57 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 58 |             out_channels=1024,
 59 |             featmap_strides=[16]),
 60 |         bbox_head=dict(
 61 |             type='BBoxHead',
 62 |             with_avg_pool=True,
 63 |             roi_feat_size=7,
 64 |             in_channels=2048,
 65 |             num_classes=80,
 66 |             bbox_coder=dict(
 67 |                 type='DeltaXYWHBBoxCoder',
 68 |                 target_means=[0., 0., 0., 0.],
 69 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 70 |             reg_class_agnostic=False,
 71 |             loss_cls=dict(
 72 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 73 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 74 |     # model training and testing settings
 75 |     train_cfg=dict(
 76 |         rpn=dict(
 77 |             assigner=dict(
 78 |                 type='MaxIoUAssigner',
 79 |                 pos_iou_thr=0.7,
 80 |                 neg_iou_thr=0.3,
 81 |                 min_pos_iou=0.3,
 82 |                 match_low_quality=True,
 83 |                 ignore_iof_thr=-1),
 84 |             sampler=dict(
 85 |                 type='RandomSampler',
 86 |                 num=256,
 87 |                 pos_fraction=0.5,
 88 |                 neg_pos_ub=-1,
 89 |                 add_gt_as_proposals=False),
 90 |             allowed_border=-1,
 91 |             pos_weight=-1,
 92 |             debug=False),
 93 |         rpn_proposal=dict(
 94 |             nms_pre=12000,
 95 |             max_per_img=2000,
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             min_bbox_size=0),
 98 |         rcnn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.5,
102 |                 neg_iou_thr=0.5,
103 |                 min_pos_iou=0.5,
104 |                 match_low_quality=False,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=512,
109 |                 pos_fraction=0.25,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=True),
112 |             pos_weight=-1,
113 |             debug=False)),
114 |     test_cfg=dict(
115 |         rpn=dict(
116 |             nms_pre=6000,
117 |             max_per_img=1000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=dict(
121 |             score_thr=0.05,
122 |             nms=dict(type='nms', iou_threshold=0.5),
123 |             max_per_img=100)))
124 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster-rcnn_r50-caffe-dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     data_preprocessor=dict(
  6 |         type='DetDataPreprocessor',
  7 |         mean=[103.530, 116.280, 123.675],
  8 |         std=[1.0, 1.0, 1.0],
  9 |         bgr_to_rgb=False,
 10 |         pad_size_divisor=32),
 11 |     backbone=dict(
 12 |         type='ResNet',
 13 |         depth=50,
 14 |         num_stages=4,
 15 |         strides=(1, 2, 2, 1),
 16 |         dilations=(1, 1, 1, 2),
 17 |         out_indices=(3, ),
 18 |         frozen_stages=1,
 19 |         norm_cfg=norm_cfg,
 20 |         norm_eval=True,
 21 |         style='caffe',
 22 |         init_cfg=dict(
 23 |             type='Pretrained',
 24 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 25 |     rpn_head=dict(
 26 |         type='RPNHead',
 27 |         in_channels=2048,
 28 |         feat_channels=2048,
 29 |         anchor_generator=dict(
 30 |             type='AnchorGenerator',
 31 |             scales=[2, 4, 8, 16, 32],
 32 |             ratios=[0.5, 1.0, 2.0],
 33 |             strides=[16]),
 34 |         bbox_coder=dict(
 35 |             type='DeltaXYWHBBoxCoder',
 36 |             target_means=[.0, .0, .0, .0],
 37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 38 |         loss_cls=dict(
 39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 40 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 41 |     roi_head=dict(
 42 |         type='StandardRoIHead',
 43 |         bbox_roi_extractor=dict(
 44 |             type='SingleRoIExtractor',
 45 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 46 |             out_channels=2048,
 47 |             featmap_strides=[16]),
 48 |         bbox_head=dict(
 49 |             type='Shared2FCBBoxHead',
 50 |             in_channels=2048,
 51 |             fc_out_channels=1024,
 52 |             roi_feat_size=7,
 53 |             num_classes=80,
 54 |             bbox_coder=dict(
 55 |                 type='DeltaXYWHBBoxCoder',
 56 |                 target_means=[0., 0., 0., 0.],
 57 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 58 |             reg_class_agnostic=False,
 59 |             loss_cls=dict(
 60 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 61 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 62 |     # model training and testing settings
 63 |     train_cfg=dict(
 64 |         rpn=dict(
 65 |             assigner=dict(
 66 |                 type='MaxIoUAssigner',
 67 |                 pos_iou_thr=0.7,
 68 |                 neg_iou_thr=0.3,
 69 |                 min_pos_iou=0.3,
 70 |                 match_low_quality=True,
 71 |                 ignore_iof_thr=-1),
 72 |             sampler=dict(
 73 |                 type='RandomSampler',
 74 |                 num=256,
 75 |                 pos_fraction=0.5,
 76 |                 neg_pos_ub=-1,
 77 |                 add_gt_as_proposals=False),
 78 |             allowed_border=0,
 79 |             pos_weight=-1,
 80 |             debug=False),
 81 |         rpn_proposal=dict(
 82 |             nms_pre=12000,
 83 |             max_per_img=2000,
 84 |             nms=dict(type='nms', iou_threshold=0.7),
 85 |             min_bbox_size=0),
 86 |         rcnn=dict(
 87 |             assigner=dict(
 88 |                 type='MaxIoUAssigner',
 89 |                 pos_iou_thr=0.5,
 90 |                 neg_iou_thr=0.5,
 91 |                 min_pos_iou=0.5,
 92 |                 match_low_quality=False,
 93 |                 ignore_iof_thr=-1),
 94 |             sampler=dict(
 95 |                 type='RandomSampler',
 96 |                 num=512,
 97 |                 pos_fraction=0.25,
 98 |                 neg_pos_ub=-1,
 99 |                 add_gt_as_proposals=True),
100 |             pos_weight=-1,
101 |             debug=False)),
102 |     test_cfg=dict(
103 |         rpn=dict(
104 |             nms=dict(type='nms', iou_threshold=0.7),
105 |             nms_pre=6000,
106 |             max_per_img=1000,
107 |             min_bbox_size=0),
108 |         rcnn=dict(
109 |             score_thr=0.05,
110 |             nms=dict(type='nms', iou_threshold=0.5),
111 |             max_per_img=100)))
112 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster-rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     data_preprocessor=dict(
  5 |         type='DetDataPreprocessor',
  6 |         mean=[123.675, 116.28, 103.53],
  7 |         std=[58.395, 57.12, 57.375],
  8 |         bgr_to_rgb=True,
  9 |         pad_size_divisor=32),
 10 |     backbone=dict(
 11 |         type='ResNet',
 12 |         depth=50,
 13 |         num_stages=4,
 14 |         out_indices=(0, 1, 2, 3),
 15 |         frozen_stages=1,
 16 |         norm_cfg=dict(type='BN', requires_grad=True),
 17 |         norm_eval=True,
 18 |         style='pytorch',
 19 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 20 |     neck=dict(
 21 |         type='FPN',
 22 |         in_channels=[256, 512, 1024, 2048],
 23 |         out_channels=256,
 24 |         num_outs=5),
 25 |     rpn_head=dict(
 26 |         type='RPNHead',
 27 |         in_channels=256,
 28 |         feat_channels=256,
 29 |         anchor_generator=dict(
 30 |             type='AnchorGenerator',
 31 |             scales=[8],
 32 |             ratios=[0.5, 1.0, 2.0],
 33 |             strides=[4, 8, 16, 32, 64]),
 34 |         bbox_coder=dict(
 35 |             type='DeltaXYWHBBoxCoder',
 36 |             target_means=[.0, .0, .0, .0],
 37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 38 |         loss_cls=dict(
 39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 40 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 41 |     roi_head=dict(
 42 |         type='StandardRoIHead',
 43 |         bbox_roi_extractor=dict(
 44 |             type='SingleRoIExtractor',
 45 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 46 |             out_channels=256,
 47 |             featmap_strides=[4, 8, 16, 32]),
 48 |         bbox_head=dict(
 49 |             type='Shared2FCBBoxHead',
 50 |             in_channels=256,
 51 |             fc_out_channels=1024,
 52 |             roi_feat_size=7,
 53 |             num_classes=80,
 54 |             bbox_coder=dict(
 55 |                 type='DeltaXYWHBBoxCoder',
 56 |                 target_means=[0., 0., 0., 0.],
 57 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 58 |             reg_class_agnostic=False,
 59 |             loss_cls=dict(
 60 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 61 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 62 |     # model training and testing settings
 63 |     train_cfg=dict(
 64 |         rpn=dict(
 65 |             assigner=dict(
 66 |                 type='MaxIoUAssigner',
 67 |                 pos_iou_thr=0.7,
 68 |                 neg_iou_thr=0.3,
 69 |                 min_pos_iou=0.3,
 70 |                 match_low_quality=True,
 71 |                 ignore_iof_thr=-1),
 72 |             sampler=dict(
 73 |                 type='RandomSampler',
 74 |                 num=256,
 75 |                 pos_fraction=0.5,
 76 |                 neg_pos_ub=-1,
 77 |                 add_gt_as_proposals=False),
 78 |             allowed_border=-1,
 79 |             pos_weight=-1,
 80 |             debug=False),
 81 |         rpn_proposal=dict(
 82 |             nms_pre=2000,
 83 |             max_per_img=1000,
 84 |             nms=dict(type='nms', iou_threshold=0.7),
 85 |             min_bbox_size=0),
 86 |         rcnn=dict(
 87 |             assigner=dict(
 88 |                 type='MaxIoUAssigner',
 89 |                 pos_iou_thr=0.5,
 90 |                 neg_iou_thr=0.5,
 91 |                 min_pos_iou=0.5,
 92 |                 match_low_quality=False,
 93 |                 ignore_iof_thr=-1),
 94 |             sampler=dict(
 95 |                 type='RandomSampler',
 96 |                 num=512,
 97 |                 pos_fraction=0.25,
 98 |                 neg_pos_ub=-1,
 99 |                 add_gt_as_proposals=True),
100 |             pos_weight=-1,
101 |             debug=False)),
102 |     test_cfg=dict(
103 |         rpn=dict(
104 |             nms_pre=1000,
105 |             max_per_img=1000,
106 |             nms=dict(type='nms', iou_threshold=0.7),
107 |             min_bbox_size=0),
108 |         rcnn=dict(
109 |             score_thr=0.05,
110 |             nms=dict(type='nms', iou_threshold=0.5),
111 |             max_per_img=100)
112 |         # soft-nms is also supported for rcnn testing
113 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
114 |     ))
115 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask-rcnn_r50-caffe-c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     data_preprocessor=dict(
  6 |         type='DetDataPreprocessor',
  7 |         mean=[103.530, 116.280, 123.675],
  8 |         std=[1.0, 1.0, 1.0],
  9 |         bgr_to_rgb=False,
 10 |         pad_mask=True,
 11 |         pad_size_divisor=32),
 12 |     backbone=dict(
 13 |         type='ResNet',
 14 |         depth=50,
 15 |         num_stages=3,
 16 |         strides=(1, 2, 2),
 17 |         dilations=(1, 1, 1),
 18 |         out_indices=(2, ),
 19 |         frozen_stages=1,
 20 |         norm_cfg=norm_cfg,
 21 |         norm_eval=True,
 22 |         style='caffe',
 23 |         init_cfg=dict(
 24 |             type='Pretrained',
 25 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 26 |     rpn_head=dict(
 27 |         type='RPNHead',
 28 |         in_channels=1024,
 29 |         feat_channels=1024,
 30 |         anchor_generator=dict(
 31 |             type='AnchorGenerator',
 32 |             scales=[2, 4, 8, 16, 32],
 33 |             ratios=[0.5, 1.0, 2.0],
 34 |             strides=[16]),
 35 |         bbox_coder=dict(
 36 |             type='DeltaXYWHBBoxCoder',
 37 |             target_means=[.0, .0, .0, .0],
 38 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 39 |         loss_cls=dict(
 40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 41 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 42 |     roi_head=dict(
 43 |         type='StandardRoIHead',
 44 |         shared_head=dict(
 45 |             type='ResLayer',
 46 |             depth=50,
 47 |             stage=3,
 48 |             stride=2,
 49 |             dilation=1,
 50 |             style='caffe',
 51 |             norm_cfg=norm_cfg,
 52 |             norm_eval=True),
 53 |         bbox_roi_extractor=dict(
 54 |             type='SingleRoIExtractor',
 55 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 56 |             out_channels=1024,
 57 |             featmap_strides=[16]),
 58 |         bbox_head=dict(
 59 |             type='BBoxHead',
 60 |             with_avg_pool=True,
 61 |             roi_feat_size=7,
 62 |             in_channels=2048,
 63 |             num_classes=80,
 64 |             bbox_coder=dict(
 65 |                 type='DeltaXYWHBBoxCoder',
 66 |                 target_means=[0., 0., 0., 0.],
 67 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 68 |             reg_class_agnostic=False,
 69 |             loss_cls=dict(
 70 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 71 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 72 |         mask_roi_extractor=None,
 73 |         mask_head=dict(
 74 |             type='FCNMaskHead',
 75 |             num_convs=0,
 76 |             in_channels=2048,
 77 |             conv_out_channels=256,
 78 |             num_classes=80,
 79 |             loss_mask=dict(
 80 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 81 |     # model training and testing settings
 82 |     train_cfg=dict(
 83 |         rpn=dict(
 84 |             assigner=dict(
 85 |                 type='MaxIoUAssigner',
 86 |                 pos_iou_thr=0.7,
 87 |                 neg_iou_thr=0.3,
 88 |                 min_pos_iou=0.3,
 89 |                 match_low_quality=True,
 90 |                 ignore_iof_thr=-1),
 91 |             sampler=dict(
 92 |                 type='RandomSampler',
 93 |                 num=256,
 94 |                 pos_fraction=0.5,
 95 |                 neg_pos_ub=-1,
 96 |                 add_gt_as_proposals=False),
 97 |             allowed_border=0,
 98 |             pos_weight=-1,
 99 |             debug=False),
100 |         rpn_proposal=dict(
101 |             nms_pre=12000,
102 |             max_per_img=2000,
103 |             nms=dict(type='nms', iou_threshold=0.7),
104 |             min_bbox_size=0),
105 |         rcnn=dict(
106 |             assigner=dict(
107 |                 type='MaxIoUAssigner',
108 |                 pos_iou_thr=0.5,
109 |                 neg_iou_thr=0.5,
110 |                 min_pos_iou=0.5,
111 |                 match_low_quality=False,
112 |                 ignore_iof_thr=-1),
113 |             sampler=dict(
114 |                 type='RandomSampler',
115 |                 num=512,
116 |                 pos_fraction=0.25,
117 |                 neg_pos_ub=-1,
118 |                 add_gt_as_proposals=True),
119 |             mask_size=14,
120 |             pos_weight=-1,
121 |             debug=False)),
122 |     test_cfg=dict(
123 |         rpn=dict(
124 |             nms_pre=6000,
125 |             nms=dict(type='nms', iou_threshold=0.7),
126 |             max_per_img=1000,
127 |             min_bbox_size=0),
128 |         rcnn=dict(
129 |             score_thr=0.05,
130 |             nms=dict(type='nms', iou_threshold=0.5),
131 |             max_per_img=100,
132 |             mask_thr_binary=0.5)))
133 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask-rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     data_preprocessor=dict(
  5 |         type='DetDataPreprocessor',
  6 |         mean=[123.675, 116.28, 103.53],
  7 |         std=[58.395, 57.12, 57.375],
  8 |         bgr_to_rgb=True,
  9 |         pad_mask=True,
 10 |         pad_size_divisor=32),
 11 |     backbone=dict(
 12 |         type='ResNet',
 13 |         depth=50,
 14 |         num_stages=4,
 15 |         out_indices=(0, 1, 2, 3),
 16 |         frozen_stages=1,
 17 |         norm_cfg=dict(type='BN', requires_grad=True),
 18 |         norm_eval=True,
 19 |         style='pytorch',
 20 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 21 |     neck=dict(
 22 |         type='FPN',
 23 |         in_channels=[256, 512, 1024, 2048],
 24 |         out_channels=256,
 25 |         num_outs=5),
 26 |     rpn_head=dict(
 27 |         type='RPNHead',
 28 |         in_channels=256,
 29 |         feat_channels=256,
 30 |         anchor_generator=dict(
 31 |             type='AnchorGenerator',
 32 |             scales=[8],
 33 |             ratios=[0.5, 1.0, 2.0],
 34 |             strides=[4, 8, 16, 32, 64]),
 35 |         bbox_coder=dict(
 36 |             type='DeltaXYWHBBoxCoder',
 37 |             target_means=[.0, .0, .0, .0],
 38 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 39 |         loss_cls=dict(
 40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 41 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 42 |     roi_head=dict(
 43 |         type='StandardRoIHead',
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 47 |             out_channels=256,
 48 |             featmap_strides=[4, 8, 16, 32]),
 49 |         bbox_head=dict(
 50 |             type='Shared2FCBBoxHead',
 51 |             in_channels=256,
 52 |             fc_out_channels=1024,
 53 |             roi_feat_size=7,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=dict(
 64 |             type='SingleRoIExtractor',
 65 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 66 |             out_channels=256,
 67 |             featmap_strides=[4, 8, 16, 32]),
 68 |         mask_head=dict(
 69 |             type='FCNMaskHead',
 70 |             num_convs=4,
 71 |             in_channels=256,
 72 |             conv_out_channels=256,
 73 |             num_classes=80,
 74 |             loss_mask=dict(
 75 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 76 |     # model training and testing settings
 77 |     train_cfg=dict(
 78 |         rpn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.7,
 82 |                 neg_iou_thr=0.3,
 83 |                 min_pos_iou=0.3,
 84 |                 match_low_quality=True,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=256,
 89 |                 pos_fraction=0.5,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=False),
 92 |             allowed_border=-1,
 93 |             pos_weight=-1,
 94 |             debug=False),
 95 |         rpn_proposal=dict(
 96 |             nms_pre=2000,
 97 |             max_per_img=1000,
 98 |             nms=dict(type='nms', iou_threshold=0.7),
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             assigner=dict(
102 |                 type='MaxIoUAssigner',
103 |                 pos_iou_thr=0.5,
104 |                 neg_iou_thr=0.5,
105 |                 min_pos_iou=0.5,
106 |                 match_low_quality=True,
107 |                 ignore_iof_thr=-1),
108 |             sampler=dict(
109 |                 type='RandomSampler',
110 |                 num=512,
111 |                 pos_fraction=0.25,
112 |                 neg_pos_ub=-1,
113 |                 add_gt_as_proposals=True),
114 |             mask_size=28,
115 |             pos_weight=-1,
116 |             debug=False)),
117 |     test_cfg=dict(
118 |         rpn=dict(
119 |             nms_pre=1000,
120 |             max_per_img=1000,
121 |             nms=dict(type='nms', iou_threshold=0.7),
122 |             min_bbox_size=0),
123 |         rcnn=dict(
124 |             score_thr=0.05,
125 |             nms=dict(type='nms', iou_threshold=0.5),
126 |             max_per_img=100,
127 |             mask_thr_binary=0.5)))
128 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     data_preprocessor=dict(
 5 |         type='DetDataPreprocessor',
 6 |         mean=[123.675, 116.28, 103.53],
 7 |         std=[58.395, 57.12, 57.375],
 8 |         bgr_to_rgb=True,
 9 |         pad_size_divisor=32),
10 |     backbone=dict(
11 |         type='ResNet',
12 |         depth=50,
13 |         num_stages=4,
14 |         out_indices=(0, 1, 2, 3),
15 |         frozen_stages=1,
16 |         norm_cfg=dict(type='BN', requires_grad=True),
17 |         norm_eval=True,
18 |         style='pytorch',
19 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20 |     neck=dict(
21 |         type='FPN',
22 |         in_channels=[256, 512, 1024, 2048],
23 |         out_channels=256,
24 |         start_level=1,
25 |         add_extra_convs='on_input',
26 |         num_outs=5),
27 |     bbox_head=dict(
28 |         type='RetinaHead',
29 |         num_classes=80,
30 |         in_channels=256,
31 |         stacked_convs=4,
32 |         feat_channels=256,
33 |         anchor_generator=dict(
34 |             type='AnchorGenerator',
35 |             octave_base_scale=4,
36 |             scales_per_octave=3,
37 |             ratios=[0.5, 1.0, 2.0],
38 |             strides=[8, 16, 32, 64, 128]),
39 |         bbox_coder=dict(
40 |             type='DeltaXYWHBBoxCoder',
41 |             target_means=[.0, .0, .0, .0],
42 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
43 |         loss_cls=dict(
44 |             type='FocalLoss',
45 |             use_sigmoid=True,
46 |             gamma=2.0,
47 |             alpha=0.25,
48 |             loss_weight=1.0),
49 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
50 |     # model training and testing settings
51 |     train_cfg=dict(
52 |         assigner=dict(
53 |             type='MaxIoUAssigner',
54 |             pos_iou_thr=0.5,
55 |             neg_iou_thr=0.4,
56 |             min_pos_iou=0,
57 |             ignore_iof_thr=-1),
58 |         sampler=dict(
59 |             type='PseudoSampler'),  # Focal loss should use PseudoSampler
60 |         allowed_border=-1,
61 |         pos_weight=-1,
62 |         debug=False),
63 |     test_cfg=dict(
64 |         nms_pre=1000,
65 |         min_bbox_size=0,
66 |         score_thr=0.05,
67 |         nms=dict(type='nms', iou_threshold=0.5),
68 |         max_per_img=100))
69 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50-caffe-c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     data_preprocessor=dict(
 5 |         type='DetDataPreprocessor',
 6 |         mean=[103.530, 116.280, 123.675],
 7 |         std=[1.0, 1.0, 1.0],
 8 |         bgr_to_rgb=False,
 9 |         pad_size_divisor=32),
10 |     backbone=dict(
11 |         type='ResNet',
12 |         depth=50,
13 |         num_stages=3,
14 |         strides=(1, 2, 2),
15 |         dilations=(1, 1, 1),
16 |         out_indices=(2, ),
17 |         frozen_stages=1,
18 |         norm_cfg=dict(type='BN', requires_grad=False),
19 |         norm_eval=True,
20 |         style='caffe',
21 |         init_cfg=dict(
22 |             type='Pretrained',
23 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
24 |     neck=None,
25 |     rpn_head=dict(
26 |         type='RPNHead',
27 |         in_channels=1024,
28 |         feat_channels=1024,
29 |         anchor_generator=dict(
30 |             type='AnchorGenerator',
31 |             scales=[2, 4, 8, 16, 32],
32 |             ratios=[0.5, 1.0, 2.0],
33 |             strides=[16]),
34 |         bbox_coder=dict(
35 |             type='DeltaXYWHBBoxCoder',
36 |             target_means=[.0, .0, .0, .0],
37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
38 |         loss_cls=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41 |     # model training and testing settings
42 |     train_cfg=dict(
43 |         rpn=dict(
44 |             assigner=dict(
45 |                 type='MaxIoUAssigner',
46 |                 pos_iou_thr=0.7,
47 |                 neg_iou_thr=0.3,
48 |                 min_pos_iou=0.3,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=256,
53 |                 pos_fraction=0.5,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=False),
56 |             allowed_border=-1,
57 |             pos_weight=-1,
58 |             debug=False)),
59 |     test_cfg=dict(
60 |         rpn=dict(
61 |             nms_pre=12000,
62 |             max_per_img=2000,
63 |             nms=dict(type='nms', iou_threshold=0.7),
64 |             min_bbox_size=0)))
65 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     data_preprocessor=dict(
 5 |         type='DetDataPreprocessor',
 6 |         mean=[123.675, 116.28, 103.53],
 7 |         std=[58.395, 57.12, 57.375],
 8 |         bgr_to_rgb=True,
 9 |         pad_size_divisor=32),
10 |     backbone=dict(
11 |         type='ResNet',
12 |         depth=50,
13 |         num_stages=4,
14 |         out_indices=(0, 1, 2, 3),
15 |         frozen_stages=1,
16 |         norm_cfg=dict(type='BN', requires_grad=True),
17 |         norm_eval=True,
18 |         style='pytorch',
19 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20 |     neck=dict(
21 |         type='FPN',
22 |         in_channels=[256, 512, 1024, 2048],
23 |         out_channels=256,
24 |         num_outs=5),
25 |     rpn_head=dict(
26 |         type='RPNHead',
27 |         in_channels=256,
28 |         feat_channels=256,
29 |         anchor_generator=dict(
30 |             type='AnchorGenerator',
31 |             scales=[8],
32 |             ratios=[0.5, 1.0, 2.0],
33 |             strides=[4, 8, 16, 32, 64]),
34 |         bbox_coder=dict(
35 |             type='DeltaXYWHBBoxCoder',
36 |             target_means=[.0, .0, .0, .0],
37 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
38 |         loss_cls=dict(
39 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41 |     # model training and testing settings
42 |     train_cfg=dict(
43 |         rpn=dict(
44 |             assigner=dict(
45 |                 type='MaxIoUAssigner',
46 |                 pos_iou_thr=0.7,
47 |                 neg_iou_thr=0.3,
48 |                 min_pos_iou=0.3,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=256,
53 |                 pos_fraction=0.5,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=False),
56 |             allowed_border=-1,
57 |             pos_weight=-1,
58 |             debug=False)),
59 |     test_cfg=dict(
60 |         rpn=dict(
61 |             nms_pre=2000,
62 |             max_per_img=1000,
63 |             nms=dict(type='nms', iou_threshold=0.7),
64 |             min_bbox_size=0)))
65 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     data_preprocessor=dict(
 6 |         type='DetDataPreprocessor',
 7 |         mean=[123.675, 116.28, 103.53],
 8 |         std=[1, 1, 1],
 9 |         bgr_to_rgb=True,
10 |         pad_size_divisor=1),
11 |     backbone=dict(
12 |         type='SSDVGG',
13 |         depth=16,
14 |         with_last_pool=False,
15 |         ceil_mode=True,
16 |         out_indices=(3, 4),
17 |         out_feature_indices=(22, 34),
18 |         init_cfg=dict(
19 |             type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
20 |     neck=dict(
21 |         type='SSDNeck',
22 |         in_channels=(512, 1024),
23 |         out_channels=(512, 1024, 512, 256, 256, 256),
24 |         level_strides=(2, 2, 1, 1),
25 |         level_paddings=(1, 1, 0, 0),
26 |         l2_norm_scale=20),
27 |     bbox_head=dict(
28 |         type='SSDHead',
29 |         in_channels=(512, 1024, 512, 256, 256, 256),
30 |         num_classes=80,
31 |         anchor_generator=dict(
32 |             type='SSDAnchorGenerator',
33 |             scale_major=False,
34 |             input_size=input_size,
35 |             basesize_ratio_range=(0.15, 0.9),
36 |             strides=[8, 16, 32, 64, 100, 300],
37 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
38 |         bbox_coder=dict(
39 |             type='DeltaXYWHBBoxCoder',
40 |             target_means=[.0, .0, .0, .0],
41 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
42 |     # model training and testing settings
43 |     train_cfg=dict(
44 |         assigner=dict(
45 |             type='MaxIoUAssigner',
46 |             pos_iou_thr=0.5,
47 |             neg_iou_thr=0.5,
48 |             min_pos_iou=0.,
49 |             ignore_iof_thr=-1,
50 |             gt_max_assign_all=False),
51 |         sampler=dict(type='PseudoSampler'),
52 |         smoothl1_beta=1.,
53 |         allowed_border=-1,
54 |         pos_weight=-1,
55 |         neg_pos_ratio=3,
56 |         debug=False),
57 |     test_cfg=dict(
58 |         nms_pre=1000,
59 |         nms=dict(type='nms', iou_threshold=0.45),
60 |         min_bbox_size=0,
61 |         score_thr=0.02,
62 |         max_per_img=200))
63 | cudnn_benchmark = True
64 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 1x
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=12,
14 |         by_epoch=True,
15 |         milestones=[8, 11],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 20e
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=20,
14 |         by_epoch=True,
15 |         milestones=[16, 19],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # training schedule for 2x
 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1)
 3 | val_cfg = dict(type='ValLoop')
 4 | test_cfg = dict(type='TestLoop')
 5 | 
 6 | # learning rate
 7 | param_scheduler = [
 8 |     dict(
 9 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
10 |     dict(
11 |         type='MultiStepLR',
12 |         begin=0,
13 |         end=24,
14 |         by_epoch=True,
15 |         milestones=[16, 22],
16 |         gamma=0.1)
17 | ]
18 | 
19 | # optimizer
20 | optim_wrapper = dict(
21 |     type='OptimWrapper',
22 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
23 | 
24 | # Default setting for scaling LR automatically
25 | #   - `enable` means enable scaling LR automatically
26 | #       or not by default.
27 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
28 | auto_scale_lr = dict(enable=False, base_batch_size=16)
29 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_base_p2cconv_100_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_base_p2cconv_100_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 384, 512]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_base_100_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_convmixer_base_100_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_base_100.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 384, 512]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_small_100_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_convmixer_small_100_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_small_100.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_convmixer_tiny_100_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_convmixer_tiny_100_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_tiny_100.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_100_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_49_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_100_49_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_49.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_blockwise_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_100_blockwise_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_blockwise.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_100_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_100_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_2_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_2_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_2_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_49_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_49_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cconv_49_100_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cconv_49_100_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_100.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_small_p2cmlp_100_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_small_p2cmlp_100_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cmlp_100_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/configs/paca_vit/mask_rcnn_1x/mask_rcnn_pacavit_tiny_p2cconv_100_0_mstrain_480_800_1x_coco.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     "../../_base_/models/mask-rcnn_r50_fpn.py",
  3 |     "../../_base_/datasets/coco_instance.py",
  4 |     "../../_base_/schedules/schedule_1x.py",
  5 |     "../../_base_/default_runtime.py",
  6 | ]
  7 | 
  8 | 
  9 | model = dict(
 10 |     backbone=dict(
 11 |         _delete_=True,
 12 |         type="pacavit_tiny_p2cconv_100_0_downstream",
 13 |         drop_path_rate=0.1,
 14 |         layer_scale=None,
 15 |         pretrained=(
 16 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth"
 17 |         ),
 18 |     ),
 19 |     neck=dict(in_channels=[96, 192, 320, 384]),
 20 | )
 21 | 
 22 | img_norm_cfg = dict(
 23 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
 24 | )
 25 | 
 26 | # augmentation strategy originates from DETR / Sparse RCNN
 27 | train_pipeline = [
 28 |     dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}),
 29 |     dict(type="LoadAnnotations", with_bbox=True, with_mask=True),
 30 |     dict(type="RandomFlip", prob=0.5),
 31 |     dict(
 32 |         type="RandomChoice",
 33 |         transforms=[
 34 |             [
 35 |                 dict(
 36 |                     type="RandomChoiceResize",
 37 |                     scales=[
 38 |                         (480, 1333),
 39 |                         (512, 1333),
 40 |                         (544, 1333),
 41 |                         (576, 1333),
 42 |                         (608, 1333),
 43 |                         (640, 1333),
 44 |                         (672, 1333),
 45 |                         (704, 1333),
 46 |                         (736, 1333),
 47 |                         (768, 1333),
 48 |                         (800, 1333),
 49 |                     ],
 50 |                     keep_ratio=True,
 51 |                 )
 52 |             ],
 53 |             [
 54 |                 dict(
 55 |                     type="RandomChoiceResize",
 56 |                     scales=[(400, 1333), (500, 1333), (600, 1333)],
 57 |                     keep_ratio=True,
 58 |                 ),
 59 |                 dict(
 60 |                     type="RandomCrop",
 61 |                     crop_type="absolute_range",
 62 |                     crop_size=(384, 600),
 63 |                     allow_negative_crop=True,
 64 |                 ),
 65 |                 dict(
 66 |                     type="RandomChoiceResize",
 67 |                     scales=[
 68 |                         (480, 1333),
 69 |                         (512, 1333),
 70 |                         (544, 1333),
 71 |                         (576, 1333),
 72 |                         (608, 1333),
 73 |                         (640, 1333),
 74 |                         (672, 1333),
 75 |                         (704, 1333),
 76 |                         (736, 1333),
 77 |                         (768, 1333),
 78 |                         (800, 1333),
 79 |                     ],
 80 |                     keep_ratio=True,
 81 |                 ),
 82 |             ],
 83 |         ],
 84 |     ),
 85 |     dict(type="PackDetInputs"),
 86 | ]
 87 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
 88 | 
 89 | # optimizer
 90 | optim_wrapper = dict(
 91 |     type="OptimWrapper",
 92 |     paramwise_cfg=dict(
 93 |         custom_keys={
 94 |             "norm": dict(decay_mult=0.0),
 95 |         }
 96 |     ),
 97 |     optimizer=dict(
 98 |         _delete_=True, type="AdamW", lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05
 99 |     ),
100 | )
101 | 


--------------------------------------------------------------------------------
/detection/get_flops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import tempfile
  4 | from functools import partial
  5 | from pathlib import Path
  6 | 
  7 | import torch
  8 | from mmengine.config import Config, DictAction
  9 | from mmengine.logging import MMLogger
 10 | from mmengine.model import revert_sync_batchnorm
 11 | from mmengine.registry import init_default_scope
 12 | from mmengine.runner import Runner
 13 | 
 14 | from mmdet.registry import MODELS
 15 | 
 16 | try:
 17 |     from mmengine.analysis import get_model_complexity_info
 18 |     from mmengine.analysis.print_helper import _format_size
 19 | except ImportError:
 20 |     raise ImportError("Please upgrade mmengine >= 0.6.0")
 21 | 
 22 | from models import *
 23 | from torchprofile import profile_macs
 24 | 
 25 | 
 26 | def parse_args():
 27 |     parser = argparse.ArgumentParser(description="Get a detector flops")
 28 |     parser.add_argument("config", help="train config file path")
 29 |     parser.add_argument(
 30 |         "--shape", type=int, nargs="+", default=[1280, 800], help="input image size"
 31 |     )
 32 |     parser.add_argument(
 33 |         "--cfg-options",
 34 |         nargs="+",
 35 |         action=DictAction,
 36 |         help="override some settings in the used config, the key-value pair "
 37 |         "in xxx=yyy format will be merged into config file. If the value to "
 38 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 39 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 40 |         "Note that the quotation marks are necessary and that no white space "
 41 |         "is allowed.",
 42 |     )
 43 |     args = parser.parse_args()
 44 |     return args
 45 | 
 46 | 
 47 | def inference(args, logger):
 48 |     if str(torch.__version__) < "1.12":
 49 |         logger.warning(
 50 |             "Some config files, such as configs/yolact and configs/detectors,"
 51 |             "may have compatibility issues with torch.jit when torch<1.12. "
 52 |             "If you want to calculate flops for these models, "
 53 |             "please make sure your pytorch version is >=1.12."
 54 |         )
 55 | 
 56 |     config_name = Path(args.config)
 57 |     if not config_name.exists():
 58 |         logger.error(f"{config_name} not found.")
 59 | 
 60 |     cfg = Config.fromfile(args.config)
 61 |     cfg.work_dir = tempfile.TemporaryDirectory().name
 62 |     cfg.log_level = "WARN"
 63 |     if args.cfg_options is not None:
 64 |         cfg.merge_from_dict(args.cfg_options)
 65 | 
 66 |     init_default_scope(cfg.get("default_scope", "mmdet"))
 67 | 
 68 |     # TODO: The following usage is temporary and not safe
 69 |     # use hard code to convert mmSyncBN to SyncBN. This is a known
 70 |     # bug in mmengine, mmSyncBN requires a distributed environment，
 71 |     # this question involves models like configs/strong_baselines
 72 |     if hasattr(cfg, "head_norm_cfg"):
 73 |         cfg["head_norm_cfg"] = dict(type="SyncBN", requires_grad=True)
 74 |         cfg["model"]["roi_head"]["bbox_head"]["norm_cfg"] = dict(
 75 |             type="SyncBN", requires_grad=True
 76 |         )
 77 |         cfg["model"]["roi_head"]["mask_head"]["norm_cfg"] = dict(
 78 |             type="SyncBN", requires_grad=True
 79 |         )
 80 | 
 81 |     if len(args.shape) == 1:
 82 |         h = w = args.shape[0]
 83 |     elif len(args.shape) == 2:
 84 |         h, w = args.shape
 85 |     else:
 86 |         raise ValueError("invalid input shape")
 87 |     result = {}
 88 | 
 89 |     # Supports two ways to calculate flops,
 90 |     # 1. randomly generate a picture
 91 |     # 2. load a picture from the dataset
 92 |     # In two stage detectors, _forward need batch_samples to get
 93 |     # rpn_results_list, then use rpn_results_list to compute flops,
 94 |     # so only the second way is supported
 95 |     try:
 96 |         model = MODELS.build(cfg.model)
 97 |         if torch.cuda.is_available():
 98 |             model.cuda()
 99 |         model = revert_sync_batchnorm(model)
100 |         data_batch = {"inputs": [torch.rand(3, h, w)], "batch_samples": [None]}
101 |         data = model.data_preprocessor(data_batch)
102 |         result["ori_shape"] = (h, w)
103 |         result["pad_shape"] = data["inputs"].shape[-2:]
104 |         model.eval()
105 |         outputs = get_model_complexity_info(
106 |             model, None, inputs=data["inputs"], show_table=False, show_arch=False
107 |         )
108 |         flops = outputs["flops"]
109 |         params = outputs["params"]
110 |         result["compute_type"] = "direct: randomly generate a picture"
111 | 
112 |         # torchprofile
113 |         tp_flops = profile_macs(model, data["inputs"])
114 | 
115 |     except TypeError:
116 |         logger.warning("Failed to directly get FLOPs, try to get flops with real data")
117 |         data_loader = Runner.build_dataloader(cfg.val_dataloader)
118 |         data_batch = next(iter(data_loader))
119 |         model = MODELS.build(cfg.model)
120 |         if torch.cuda.is_available():
121 |             model = model.cuda()
122 |         model = revert_sync_batchnorm(model)
123 |         model.eval()
124 |         _forward = model.forward
125 |         data = model.data_preprocessor(data_batch)
126 |         result["ori_shape"] = data["data_samples"][0].ori_shape
127 |         result["pad_shape"] = data["data_samples"][0].pad_shape
128 | 
129 |         del data_loader
130 |         model.forward = partial(_forward, data_samples=data["data_samples"])
131 |         outputs = get_model_complexity_info(
132 |             model, None, inputs=data["inputs"], show_table=False, show_arch=False
133 |         )
134 |         flops = outputs["flops"]
135 |         params = outputs["params"]
136 |         result["compute_type"] = "dataloader: load a picture from the dataset"
137 | 
138 |         # torchprofile
139 |         tp_flops = profile_macs(model, data["inputs"])
140 | 
141 |     flops = _format_size(flops)
142 |     tp_flops = _format_size(tp_flops)
143 |     params = _format_size(params)
144 |     result["flops"] = flops
145 |     result["torchprofile_flops"] = tp_flops
146 |     result["params"] = params
147 | 
148 |     return result
149 | 
150 | 
151 | def main():
152 |     args = parse_args()
153 |     logger = MMLogger.get_instance(name="MMLogger")
154 |     result = inference(args, logger)
155 |     split_line = "=" * 30
156 |     ori_shape = result["ori_shape"]
157 |     pad_shape = result["pad_shape"]
158 |     flops = result["flops"]
159 |     torchprofile_flops = result["torchprofile_flops"]
160 |     params = result["params"]
161 |     compute_type = result["compute_type"]
162 | 
163 |     if pad_shape != ori_shape:
164 |         print(
165 |             f"{split_line}\nUse size divisor set input shape "
166 |             f"from {ori_shape} to {pad_shape}"
167 |         )
168 |     print(
169 |         f"{split_line}\nCompute type: {compute_type}\n"
170 |         f"Input shape: {pad_shape}\nFlops: {flops}\nFlops (torchprofile): {torchprofile_flops}\n"
171 |         f"Params: {params}\n{split_line}"
172 |     )
173 |     print(
174 |         "!!!Please be cautious if you use the results in papers. "
175 |         "You may need to check if all ops are supported and verify "
176 |         "that the flops computation is correct."
177 |     )
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     main()
182 | 


--------------------------------------------------------------------------------
/detection/test_mmdet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | import warnings
  6 | from copy import deepcopy
  7 | 
  8 | from mmengine import ConfigDict
  9 | from mmengine.config import Config, DictAction
 10 | from mmengine.runner import Runner
 11 | 
 12 | from mmdet.engine.hooks.utils import trigger_visualization_hook
 13 | from mmdet.evaluation import DumpDetResults
 14 | from mmdet.registry import RUNNERS
 15 | 
 16 | from models import *
 17 | 
 18 | 
 19 | # TODO: support fuse_conv_bn and format_only
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser(description="MMDet test (and eval) a model")
 22 |     parser.add_argument("config", help="test config file path")
 23 |     parser.add_argument("checkpoint", help="checkpoint file")
 24 |     parser.add_argument(
 25 |         "--work-dir",
 26 |         help="the directory to save the file containing evaluation metrics",
 27 |     )
 28 |     parser.add_argument(
 29 |         "--out",
 30 |         type=str,
 31 |         help="dump predictions to a pickle file for offline evaluation",
 32 |     )
 33 |     parser.add_argument("--show", action="store_true", help="show prediction results")
 34 |     parser.add_argument(
 35 |         "--show-dir",
 36 |         help="directory where painted images will be saved. "
 37 |         "If specified, it will be automatically saved "
 38 |         "to the work_dir/timestamp/show_dir",
 39 |     )
 40 |     parser.add_argument(
 41 |         "--wait-time", type=float, default=2, help="the interval of show (s)"
 42 |     )
 43 |     parser.add_argument(
 44 |         "--cfg-options",
 45 |         nargs="+",
 46 |         action=DictAction,
 47 |         help="override some settings in the used config, the key-value pair "
 48 |         "in xxx=yyy format will be merged into config file. If the value to "
 49 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 50 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 51 |         "Note that the quotation marks are necessary and that no white space "
 52 |         "is allowed.",
 53 |     )
 54 |     parser.add_argument(
 55 |         "--launcher",
 56 |         choices=["none", "pytorch", "slurm", "mpi"],
 57 |         default="none",
 58 |         help="job launcher",
 59 |     )
 60 |     parser.add_argument("--tta", action="store_true")
 61 |     parser.add_argument("--local_rank", type=int, default=0)
 62 |     args = parser.parse_args()
 63 |     if "LOCAL_RANK" not in os.environ:
 64 |         os.environ["LOCAL_RANK"] = str(args.local_rank)
 65 |     return args
 66 | 
 67 | 
 68 | def main():
 69 |     args = parse_args()
 70 | 
 71 |     # load config
 72 |     cfg = Config.fromfile(args.config)
 73 |     cfg.launcher = args.launcher
 74 |     if args.cfg_options is not None:
 75 |         cfg.merge_from_dict(args.cfg_options)
 76 | 
 77 |     # work_dir is determined in this priority: CLI > segment in file > filename
 78 |     if args.work_dir is not None:
 79 |         # update configs according to CLI args if args.work_dir is not None
 80 |         cfg.work_dir = args.work_dir
 81 |     elif cfg.get("work_dir", None) is None:
 82 |         # use config filename as default work_dir if cfg.work_dir is None
 83 |         cfg.work_dir = osp.join(
 84 |             "./work_dirs", osp.splitext(osp.basename(args.config))[0]
 85 |         )
 86 | 
 87 |     cfg.load_from = args.checkpoint
 88 | 
 89 |     if args.show or args.show_dir:
 90 |         cfg = trigger_visualization_hook(cfg, args)
 91 | 
 92 |     if args.tta:
 93 |         if "tta_model" not in cfg:
 94 |             warnings.warn(
 95 |                 "Cannot find ``tta_model`` in config, " "we will set it as default."
 96 |             )
 97 |             cfg.tta_model = dict(
 98 |                 type="DetTTAModel",
 99 |                 tta_cfg=dict(nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
100 |             )
101 |         if "tta_pipeline" not in cfg:
102 |             warnings.warn(
103 |                 "Cannot find ``tta_pipeline`` in config, " "we will set it as default."
104 |             )
105 |             test_data_cfg = cfg.test_dataloader.dataset
106 |             while "dataset" in test_data_cfg:
107 |                 test_data_cfg = test_data_cfg["dataset"]
108 |             cfg.tta_pipeline = deepcopy(test_data_cfg.pipeline)
109 |             flip_tta = dict(
110 |                 type="TestTimeAug",
111 |                 transforms=[
112 |                     [
113 |                         dict(type="RandomFlip", prob=1.0),
114 |                         dict(type="RandomFlip", prob=0.0),
115 |                     ],
116 |                     [
117 |                         dict(
118 |                             type="PackDetInputs",
119 |                             meta_keys=(
120 |                                 "img_id",
121 |                                 "img_path",
122 |                                 "ori_shape",
123 |                                 "img_shape",
124 |                                 "scale_factor",
125 |                                 "flip",
126 |                                 "flip_direction",
127 |                             ),
128 |                         )
129 |                     ],
130 |                 ],
131 |             )
132 |             cfg.tta_pipeline[-1] = flip_tta
133 |         cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
134 |         cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
135 | 
136 |     # build the runner from config
137 |     if "runner_type" not in cfg:
138 |         # build the default runner
139 |         runner = Runner.from_cfg(cfg)
140 |     else:
141 |         # build customized runner from the registry
142 |         # if 'runner_type' is set in the cfg
143 |         runner = RUNNERS.build(cfg)
144 | 
145 |     # add `DumpResults` dummy metric
146 |     if args.out is not None:
147 |         assert args.out.endswith(
148 |             (".pkl", ".pickle")
149 |         ), "The dump file must be a pkl file."
150 |         runner.test_evaluator.metrics.append(DumpDetResults(out_file_path=args.out))
151 | 
152 |     # start testing
153 |     runner.test()
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     main()
158 | 


--------------------------------------------------------------------------------
/detection/test_mmdet.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 5 ]; then
 4 |     echo "Usage: me.sh Relative_config_filename Checkpoint_filename gpus nb_gpus port [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | PYTHON=${PYTHON:-"python"}
 9 | 
10 | CONFIG_FILE=$1
11 | CHK_FILE=$2
12 | GPUS=$3
13 | NUM_GPUS=$4
14 | PORT=${PORT:-$5}
15 | 
16 | 
17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
18 | 
19 | CONFIG_FILENAME=${CONFIG_FILE##*/}
20 | CONFIG_BASE="${CONFIG_FILENAME%.*}"
21 | 
22 | WORK_DIR="$( cd "$( dirname "${CHK_FILE}" )" >/dev/null 2>&1 && pwd )"/$CONFIG_BASE
23 | 
24 | if [ -d $WORK_DIR ]; then 
25 |   echo "... Done already!"
26 |   exit 
27 | fi 
28 | 
29 | # export NCCL_DEBUG=INFO
30 | 
31 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \
32 |   torchrun \
33 |     --rdzv_backend c10d \
34 |     --rdzv_endpoint localhost:$PORT \
35 |     --nnodes 1 \
36 |     --nproc_per_node $NUM_GPUS \
37 |     $DIR/test_mmdet.py \
38 |     $CONFIG_FILE \
39 |     $CHK_FILE \
40 |     --launcher pytorch \
41 |     --work-dir $WORK_DIR \
42 |     ${@:6}
43 | 


--------------------------------------------------------------------------------
/detection/train_mmdet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import logging
  4 | import os
  5 | import os.path as osp
  6 | 
  7 | from mmengine.config import Config, DictAction
  8 | from mmengine.logging import print_log
  9 | from mmengine.registry import RUNNERS
 10 | from mmengine.runner import Runner
 11 | 
 12 | from models import *
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description="Train a detector")
 17 |     parser.add_argument("config", help="train config file path")
 18 |     parser.add_argument("--work-dir", help="the dir to save logs and models")
 19 |     parser.add_argument(
 20 |         "--amp",
 21 |         action="store_true",
 22 |         default=False,
 23 |         help="enable automatic-mixed-precision training",
 24 |     )
 25 |     parser.add_argument(
 26 |         "--auto-scale-lr", action="store_true", help="enable automatically scaling LR."
 27 |     )
 28 |     parser.add_argument(
 29 |         "--resume",
 30 |         nargs="?",
 31 |         type=str,
 32 |         const="auto",
 33 |         help="If specify checkpoint path, resume from it, while if not "
 34 |         "specify, try to auto resume from the latest checkpoint "
 35 |         "in the work directory.",
 36 |     )
 37 |     parser.add_argument(
 38 |         "--cfg-options",
 39 |         nargs="+",
 40 |         action=DictAction,
 41 |         help="override some settings in the used config, the key-value pair "
 42 |         "in xxx=yyy format will be merged into config file. If the value to "
 43 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 44 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 45 |         "Note that the quotation marks are necessary and that no white space "
 46 |         "is allowed.",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--launcher",
 50 |         choices=["none", "pytorch", "slurm", "mpi"],
 51 |         default="none",
 52 |         help="job launcher",
 53 |     )
 54 |     parser.add_argument("--local_rank", type=int, default=0)
 55 |     args = parser.parse_args()
 56 |     if "LOCAL_RANK" not in os.environ:
 57 |         os.environ["LOCAL_RANK"] = str(args.local_rank)
 58 | 
 59 |     return args
 60 | 
 61 | 
 62 | def main():
 63 |     args = parse_args()
 64 | 
 65 |     # load config
 66 |     cfg = Config.fromfile(args.config)
 67 |     cfg.launcher = args.launcher
 68 |     if args.cfg_options is not None:
 69 |         cfg.merge_from_dict(args.cfg_options)
 70 | 
 71 |     # work_dir is determined in this priority: CLI > segment in file > filename
 72 |     if args.work_dir is not None:
 73 |         # update configs according to CLI args if args.work_dir is not None
 74 |         cfg.work_dir = args.work_dir
 75 |     elif cfg.get("work_dir", None) is None:
 76 |         # use config filename as default work_dir if cfg.work_dir is None
 77 |         cfg.work_dir = osp.join(
 78 |             "./work_dirs", osp.splitext(osp.basename(args.config))[0]
 79 |         )
 80 | 
 81 |     # enable automatic-mixed-precision training
 82 |     if args.amp is True:
 83 |         optim_wrapper = cfg.optim_wrapper.type
 84 |         if optim_wrapper == "AmpOptimWrapper":
 85 |             print_log(
 86 |                 "AMP training is already enabled in your config.",
 87 |                 logger="current",
 88 |                 level=logging.WARNING,
 89 |             )
 90 |         else:
 91 |             assert optim_wrapper == "OptimWrapper", (
 92 |                 "`--amp` is only supported when the optimizer wrapper type is "
 93 |                 f"`OptimWrapper` but got {optim_wrapper}."
 94 |             )
 95 |             cfg.optim_wrapper.type = "AmpOptimWrapper"
 96 |             cfg.optim_wrapper.loss_scale = "dynamic"
 97 | 
 98 |     # enable automatically scaling LR
 99 |     if args.auto_scale_lr:
100 |         if (
101 |             "auto_scale_lr" in cfg
102 |             and "enable" in cfg.auto_scale_lr
103 |             and "base_batch_size" in cfg.auto_scale_lr
104 |         ):
105 |             cfg.auto_scale_lr.enable = True
106 |         else:
107 |             raise RuntimeError(
108 |                 'Can not find "auto_scale_lr" or '
109 |                 '"auto_scale_lr.enable" or '
110 |                 '"auto_scale_lr.base_batch_size" in your'
111 |                 " configuration file."
112 |             )
113 | 
114 |     # resume is determined in this priority: resume from > auto_resume
115 |     if args.resume == "auto":
116 |         cfg.resume = True
117 |         cfg.load_from = None
118 |     elif args.resume is not None:
119 |         cfg.resume = True
120 |         cfg.load_from = args.resume
121 | 
122 |     # build the runner from config
123 |     if "runner_type" not in cfg:
124 |         # build the default runner
125 |         runner = Runner.from_cfg(cfg)
126 |     else:
127 |         # build customized runner from the registry
128 |         # if 'runner_type' is set in the cfg
129 |         runner = RUNNERS.build(cfg)
130 | 
131 |     # start training
132 |     runner.train()
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     main()
137 | 


--------------------------------------------------------------------------------
/detection/train_mmdet.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 7 ]; then
 4 |     echo "Usage: me.sh Relative_config_filename Remove_old_if_exist_0_or_1 Exp_name Tag gpus nb_gpus port [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | PYTHON=${PYTHON:-"python"}
 9 | 
10 | CONFIG_FILE=$1
11 | RM_OLD=$2
12 | EXP_NAME=$3
13 | TAG=$4
14 | GPUS=$5
15 | NUM_GPUS=$6
16 | PORT=${PORT:-$7}
17 | 
18 | 
19 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
20 | 
21 | CONFIG_FILENAME=${CONFIG_FILE##*/}
22 | CONFIG_BASE="${CONFIG_FILENAME%.*}"
23 | 
24 | WORK_DIR=${DIR}/../work_dirs/detection/${EXP_NAME}/${CONFIG_BASE}_$TAG
25 | 
26 | if [ -d $WORK_DIR ]; then
27 |   echo "$WORK_DIR --- Already exists"
28 |   if [ $2 -gt 0 ]; then
29 |     while true; do
30 |         read -p "Are you sure to delete this result directory? " yn
31 |         case $yn in
32 |             [Yy]* ) rm -r $WORK_DIR; mkdir -p $WORK_DIR; break;;
33 |             [Nn]* ) exit;;
34 |             * ) echo "Please answer yes or no.";;
35 |         esac
36 |     done
37 |   else
38 |     echo "Resume"
39 |   fi
40 | else
41 |     mkdir -p $WORK_DIR
42 | fi
43 | 
44 | # export NCCL_DEBUG=INFO
45 | 
46 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \
47 |   torchrun \
48 |     --rdzv_backend c10d \
49 |     --rdzv_endpoint localhost:$PORT \
50 |     --nnodes 1 \
51 |     --nproc_per_node $NUM_GPUS \
52 |     $DIR/train_mmdet.py $CONFIG_FILE \
53 |     --amp \
54 |     --resume "auto" \
55 |     --launcher pytorch \
56 |     --work-dir $WORK_DIR \
57 |     --auto-scale-lr \
58 |     ${@:8}
59 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
 1 | # name: pacavit
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 |   - xformers
 7 |   - conda-forge
 8 | dependencies:
 9 |   - python=3.9
10 |   - pip
11 |   # - cudatoolkit=11.6
12 |   # - pytorch=1.12.1
13 |   # - torchvision=0.13.1
14 |   - pytorch::pytorch=2.0.0
15 |   - pytorch::pytorch-cuda=11.7.0
16 |   - pytorch::torchvision=0.15.0
17 |   - numpy>=1.23.1
18 |   - fvcore
19 |   - xformers
20 |   - jupyterlab
21 |   - ipywidgets
22 |   - pip:
23 |     # image classification 
24 |     - git+https://github.com/rwightman/pytorch-image-models.git
25 |     # formatter
26 |     - black
27 |     # my 
28 |     - einops>=0.3.0
29 |     - torchinfo # print model summary 
30 |     - torchprofile # FLOPs 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 1 ]; then
 4 |     echo "Usage: install virtual_env_name"
 5 |     exit
 6 | fi
 7 | 
 8 | if [[ $OSTYPE != 'linux-gnu'* ]]; then
 9 |     echo "LINUX GNU OS needed (e.g., Ubuntu 20.04)."
10 |     exit  
11 | fi
12 | 
13 | VENV_NAME=$1
14 | UPDATE_PILLOW=1
15 | 
16 | # path of this script
17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
18 | echo "---------------------- Work in $DIR ---------------------- "
19 | 
20 | # check if the env exists
21 | find_conda_env(){
22 |     conda env list | grep -w ${VENV_NAME} >/dev/null 2>/dev/null
23 | }
24 | # remove: conda env remove -n env_name
25 | 
26 | # update conda 
27 | # conda update -n base -c defaults conda
28 | 
29 | # check env before install
30 | if ! find_conda_env; then   
31 |     echo "---------------------- Createing the conda env ${VENV_NAME}..."
32 |     conda env create -n ${VENV_NAME} -f "$DIR"/environment.yaml      
33 | # else
34 | #     echo "Updating the conda env ${VENV_NAME}..."
35 | #     conda env update -n ${VENV_NAME} -f "$DIR"/environment.yaml      
36 | fi
37 | 
38 | 
39 | function find_conda_package {
40 |     conda list | grep -w "$1" >/dev/null 2>/dev/null
41 | }
42 | 
43 | # mmengine, mmcv, mmdet, mmseg, mmpretrain
44 | if [ $CONDA_DEFAULT_ENV != ${VENV_NAME} ] ; then
45 |     echo "---------------------- Not inside the virtual env $VENV_NAME ---------------------- "
46 |     echo "---------------------- Please manually run: conda activate $VENV_NAME ------------- "
47 |     echo "----------------------  and then re-run this installation script ------------------ "
48 |     exit
49 | else
50 |     if find_conda_package mmengine; then
51 |         echo "mm packages installed already"
52 |     else
53 |         pip install -U openmim
54 |         mim install mmengine        
55 |         mim install "mmcv>=2.0.0rc4"
56 |         mim install "mmdet>=3.0.0rc0"
57 |         mim install "mmsegmentation>=1.0.0"
58 |         mim install "mmpretrain>=1.0.0rc7"
59 |         python -c 'from mmengine.utils.dl_utils import collect_env;print(collect_env())'
60 |     fi
61 | fi
62 | 
63 | # update pillow, https://fastai1.fast.ai/performance.html#faster-image-processing
64 | if [ $UPDATE_PILLOW == 1 ]; then 
65 |     if find_conda_package pillow-simd; then
66 |         echo "PILLOW-SIMD installed already"
67 |     else
68 |         # check env before install
69 |         if [ $CONDA_DEFAULT_ENV != ${VENV_NAME} ] ; then
70 |             echo "---------------------- Not inside the virtual env $VENV_NAME ---------------------- "
71 |             echo "---------------------- Please manually run: conda activate $VENV_NAME ------------- "
72 |             echo "----------------------  and then re-run this installation script ------------------ "
73 |             exit
74 |         fi
75 | 
76 |         echo "---------------------- Install Pillow-SIMD for Faster Image Processing ---------------------- "
77 |         echo "    If errors occur, please contact your admin to install prerequistes for pillow https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux"
78 |         ## prerequistes for pillow https://pillow.readthedocs.io/en/stable/installation.html#building-on-linux
79 |         ##   which are needed to be installed if some errors occur in installing the pillow-simd
80 |         ##   check with the system admin for the installation
81 |         
82 |         # sudo apt-get install libtiff5-dev libjpeg8-dev libopenjp2-7-dev zlib1g-dev libfreetype6-dev \
83 |         #   liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk libharfbuzz-dev libfribidi-dev \
84 |         #   libxcb1-dev
85 |         # sudo apt-get install gcc-multilib
86 | 
87 |         conda uninstall -y --force pillow pil jpeg libtiff libjpeg-turbo
88 |         pip   uninstall -y         pillow pil jpeg libtiff libjpeg-turbo
89 |         conda install -yc conda-forge libjpeg-turbo
90 |         CFLAGS="${CFLAGS} -mavx2" pip install --upgrade --no-cache-dir --force-reinstall --no-binary :all: --compile pillow-simd
91 |         conda install -y -c zegami libtiff-libjpeg-turbo
92 |         conda install -y jpeg libtiff        
93 |     fi
94 | fi
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .paca_vit import PaCaViT
2 | 


--------------------------------------------------------------------------------
/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .downsample import build_downsample_layer
2 | from .blur_pool import (
3 |     BlurConv2d,
4 |     BlurPoolConv2d,
5 |     BlurMaxPool2d,
6 |     apply_blurpool,
7 |     restore_blurpool,
8 | )
9 | 


--------------------------------------------------------------------------------
/models/layers/downsample.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from mmengine.registry import MODELS
  8 | from mmcv.cnn import ConvModule, build_norm_layer
  9 | 
 10 | from timm.models.layers import to_2tuple, LayerNorm2d
 11 | 
 12 | from .blur_pool import apply_blurpool
 13 | 
 14 | 
 15 | if "LayerNorm2d" not in MODELS:
 16 |     MODELS.register_module("LayerNorm2d", module=LayerNorm2d)
 17 | 
 18 | 
 19 | class DownsampleV1(nn.Module):
 20 |     def __init__(
 21 |         self,
 22 |         in_channels,
 23 |         out_channels,
 24 |         patch_size,
 25 |         kernel_size,
 26 |         norm_cfg=dict(type="LayerNorm2d", eps=1e-6),
 27 |         img_size=224,
 28 |     ):
 29 |         super().__init__()
 30 | 
 31 |         assert patch_size in (2, 4)
 32 |         img_size = to_2tuple(img_size)
 33 |         self.grid_size = (img_size[0] // patch_size, img_size[1] // patch_size)
 34 | 
 35 |         if patch_size <= kernel_size:
 36 |             self.proj = nn.Conv2d(
 37 |                 in_channels,
 38 |                 out_channels,
 39 |                 kernel_size=kernel_size,
 40 |                 stride=patch_size,
 41 |                 padding=(kernel_size - 1) // 2,
 42 |             )
 43 |         else:
 44 |             dim = out_channels // 2
 45 |             self.proj = nn.Sequential(
 46 |                 nn.Conv2d(
 47 |                     in_channels,
 48 |                     dim,
 49 |                     kernel_size=kernel_size,
 50 |                     stride=2,
 51 |                     padding=(kernel_size - 1) // 2,
 52 |                 ),
 53 |                 nn.Conv2d(
 54 |                     out_channels // 2,
 55 |                     out_channels,
 56 |                     kernel_size=kernel_size,
 57 |                     stride=patch_size // 2,
 58 |                     padding=(kernel_size - 1) // 2,
 59 |                 ),
 60 |             )
 61 | 
 62 |         self.norm = (
 63 |             build_norm_layer(norm_cfg, out_channels)[1] if norm_cfg else nn.Identity()
 64 |         )
 65 | 
 66 |     def forward(self, x):
 67 |         # x: B C H W
 68 |         x = self.proj(x)
 69 |         x = self.norm(x)
 70 |         return x
 71 | 
 72 | 
 73 | class DownsampleV2(nn.Module):
 74 |     def __init__(
 75 |         self,
 76 |         in_chs,
 77 |         out_chs,
 78 |         img_size=224,
 79 |         kernel_size=3,
 80 |         patch_size=4,
 81 |         ratio=0.5,
 82 |         conv_cfg=None,
 83 |         conv_bias=True,
 84 |         norm_cfg=dict(type="LayerNorm2d"),
 85 |         act_cfg=dict(type="GELU"),
 86 |         with_blurpool=False,
 87 |         order=("conv", "norm", "act"),
 88 |         **kwargs
 89 |     ):
 90 |         super().__init__()
 91 |         assert patch_size in (2, 4)
 92 | 
 93 |         img_size = to_2tuple(img_size)
 94 |         self.grid_size = (img_size[0] // patch_size, img_size[1] // patch_size)
 95 | 
 96 |         if patch_size == 4:
 97 |             mid_chs = int(out_chs * ratio)
 98 |             self.conv1 = ConvModule(
 99 |                 in_chs,
100 |                 mid_chs,
101 |                 kernel_size=kernel_size,
102 |                 stride=2,
103 |                 padding=(kernel_size - 1) // 2,
104 |                 bias=conv_bias,
105 |                 conv_cfg=conv_cfg,
106 |                 norm_cfg=norm_cfg,
107 |                 act_cfg=act_cfg,
108 |                 order=order,
109 |             )
110 |         else:
111 |             mid_chs = in_chs
112 |             self.conv1 = nn.Identity()
113 | 
114 |         self.conv2 = ConvModule(
115 |             mid_chs,
116 |             out_chs,
117 |             kernel_size=kernel_size,
118 |             stride=2,
119 |             padding=(kernel_size - 1) // 2,
120 |             bias=conv_bias,
121 |             conv_cfg=conv_cfg,
122 |             norm_cfg=norm_cfg,
123 |             act_cfg=None,
124 |             order=order,
125 |         )
126 |         if with_blurpool:
127 |             apply_blurpool(self.conv1)
128 | 
129 |     def forward(self, x):
130 |         out = self.conv1(x)
131 |         out = self.conv2(out)
132 |         return out
133 | 
134 | 
135 | downsampler_cfg = {
136 |     # layer_abbreviation: module
137 |     "DownsampleV1": DownsampleV1,
138 |     "DownsampleV2": DownsampleV2,
139 | }
140 | 
141 | 
142 | def build_downsample_layer(cfg):
143 |     """Build downsample (stem or transition) layer
144 | 
145 |     Args:
146 |         cfg (dict): cfg should contain:
147 |             type (str): Identify activation layer type.
148 |             layer args: args needed to instantiate a stem layer.
149 | 
150 |     Returns:
151 |         layer (nn.Module): Created stem layer
152 |     """
153 |     assert isinstance(cfg, dict) and "type" in cfg
154 |     cfg_ = cfg.copy()
155 | 
156 |     layer_type = cfg_.pop("type")
157 |     if layer_type not in downsampler_cfg:
158 |         raise KeyError("Unrecognized stem type {}".format(layer_type))
159 |     else:
160 |         layer = downsampler_cfg[layer_type]
161 |         if layer is None:
162 |             raise NotImplementedError
163 | 
164 |     layer = layer(**cfg_)
165 |     return layer
166 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = "mmseg"
 2 | env_cfg = dict(
 3 |     cudnn_benchmark=True,
 4 |     mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0),
 5 |     dist_cfg=dict(backend="nccl"),
 6 | )
 7 | vis_backends = [dict(type="LocalVisBackend")]
 8 | visualizer = dict(
 9 |     type="SegLocalVisualizer", vis_backends=vis_backends, name="visualizer"
10 | )
11 | log_processor = dict(by_epoch=False)
12 | log_level = "INFO"
13 | load_from = None
14 | resume = False
15 | 
16 | tta_model = dict(type="SegTTAModel")
17 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/paca_head.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type="SyncBN", requires_grad=True)
 3 | data_preprocessor = dict(
 4 |     type="SegDataPreProcessor",
 5 |     mean=[123.675, 116.28, 103.53],
 6 |     std=[58.395, 57.12, 57.375],
 7 |     bgr_to_rgb=True,
 8 |     pad_val=0,
 9 |     seg_pad_val=255,
10 | )
11 | model = dict(
12 |     type="PaCaEncoderDecoder",
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=None,
15 |     decode_head=dict(
16 |         type="PaCaSegHead",
17 |         in_channels=[32, 64, 160, 256],
18 |         in_index=[0, 1, 2, 3],
19 |         channels=256,
20 |         dropout_ratio=0.1,
21 |         num_classes=19,
22 |         norm_cfg=norm_cfg,
23 |         align_corners=False,
24 |         loss_decode=dict(
25 |             type="CrossEntropyLoss",
26 |             avg_non_ignore=True,
27 |             use_sigmoid=False,
28 |             loss_weight=1.0,
29 |         ),
30 |         aux_loss_decode=dict(
31 |             type="CrossEntropyLoss",
32 |             avg_non_ignore=True,
33 |             use_sigmoid=False,
34 |             loss_weight=0.4,
35 |         ),
36 |     ),
37 |     # model training and testing settings
38 |     train_cfg=dict(),
39 |     test_cfg=dict(mode="whole"),
40 | )
41 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/upernet_swin.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
 4 | data_preprocessor = dict(
 5 |     type='SegDataPreProcessor',
 6 |     mean=[123.675, 116.28, 103.53],
 7 |     std=[58.395, 57.12, 57.375],
 8 |     bgr_to_rgb=True,
 9 |     pad_val=0,
10 |     seg_pad_val=255)
11 | model = dict(
12 |     type='EncoderDecoder',
13 |     data_preprocessor=data_preprocessor,
14 |     pretrained=None,
15 |     backbone=dict(
16 |         type='SwinTransformer',
17 |         pretrain_img_size=224,
18 |         embed_dims=96,
19 |         patch_size=4,
20 |         window_size=7,
21 |         mlp_ratio=4,
22 |         depths=[2, 2, 6, 2],
23 |         num_heads=[3, 6, 12, 24],
24 |         strides=(4, 2, 2, 2),
25 |         out_indices=(0, 1, 2, 3),
26 |         qkv_bias=True,
27 |         qk_scale=None,
28 |         patch_norm=True,
29 |         drop_rate=0.,
30 |         attn_drop_rate=0.,
31 |         drop_path_rate=0.3,
32 |         use_abs_pos_embed=False,
33 |         act_cfg=dict(type='GELU'),
34 |         norm_cfg=backbone_norm_cfg),
35 |     decode_head=dict(
36 |         type='UPerHead',
37 |         in_channels=[96, 192, 384, 768],
38 |         in_index=[0, 1, 2, 3],
39 |         pool_scales=(1, 2, 3, 6),
40 |         channels=512,
41 |         dropout_ratio=0.1,
42 |         num_classes=19,
43 |         norm_cfg=norm_cfg,
44 |         align_corners=False,
45 |         loss_decode=dict(
46 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
47 |     auxiliary_head=dict(
48 |         type='FCNHead',
49 |         in_channels=384,
50 |         in_index=2,
51 |         channels=256,
52 |         num_convs=1,
53 |         concat_input=False,
54 |         dropout_ratio=0.1,
55 |         num_classes=19,
56 |         norm_cfg=norm_cfg,
57 |         align_corners=False,
58 |         loss_decode=dict(
59 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
60 |     # model training and testing settings
61 |     train_cfg=dict(),
62 |     test_cfg=dict(mode='whole'))
63 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=160000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 160k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=20000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 20k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_240k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=240000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 240k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=240000, val_interval=24000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_320k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=320000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 320k
15 | train_cfg = dict(
16 |     type='IterBasedTrainLoop', max_iters=320000, val_interval=32000)
17 | val_cfg = dict(type='ValLoop')
18 | test_cfg = dict(type='TestLoop')
19 | default_hooks = dict(
20 |     timer=dict(type='IterTimerHook'),
21 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
22 |     param_scheduler=dict(type='ParamSchedulerHook'),
23 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
24 |     sampler_seed=dict(type='DistSamplerSeedHook'),
25 |     visualization=dict(type='SegVisualizationHook'))
26 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=40000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 40k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
 4 | # learning policy
 5 | param_scheduler = [
 6 |     dict(
 7 |         type='PolyLR',
 8 |         eta_min=1e-4,
 9 |         power=0.9,
10 |         begin=0,
11 |         end=80000,
12 |         by_epoch=False)
13 | ]
14 | # training schedule for 80k
15 | train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000)
16 | val_cfg = dict(type='ValLoop')
17 | test_cfg = dict(type='TestLoop')
18 | default_hooks = dict(
19 |     timer=dict(type='IterTimerHook'),
20 |     logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
21 |     param_scheduler=dict(type='ParamSchedulerHook'),
22 |     checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000),
23 |     sampler_seed=dict(type='DistSamplerSeedHook'),
24 |     visualization=dict(type='SegVisualizationHook'))
25 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_base_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.5,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_base_100_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_convmixer_base_100_downstream",
15 |         drop_path_rate=0.5,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_base_100.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_small_100_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_convmixer_small_100_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_small_100.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_convmixer_tiny_100_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_convmixer_tiny_100_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_convmixer_tiny_100.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_49_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_100_49_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_49.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_100_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_100_blockwise_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_100_blockwise_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_blockwise.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_2_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_2_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_2_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_49_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_49_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cconv_49_100_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_49_100_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_49_100.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 200],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_small_p2cmlp_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cmlp_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cmlp_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/paca_head/pacahead_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/paca_head.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_tiny_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 | )
24 | 
25 | 
26 | # AdamW optimizer
27 | optim_wrapper = dict(
28 |     _delete_=True,
29 |     type="OptimWrapper",
30 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
31 |     paramwise_cfg=dict(
32 |         custom_keys={
33 |             "norm": dict(decay_mult=0.0),
34 |             "clustering.4": dict(lr_mult=10.0),  # .4 for p2cconv
35 |             "head": dict(lr_mult=10.0),
36 |         }
37 |     ),
38 | )
39 | 
40 | param_scheduler = [
41 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
42 |     dict(
43 |         type="PolyLR",
44 |         eta_min=0.0,
45 |         power=1.0,
46 |         begin=1500,
47 |         end=160000,
48 |         by_epoch=False,
49 |     ),
50 | ]
51 | 
52 | # By default, models are trained on 8 GPUs with 2 images per GPU
53 | train_dataloader = dict(batch_size=2)
54 | val_dataloader = dict(batch_size=1)
55 | test_dataloader = val_dataloader
56 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/upernet/upernet_pacavit_base_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/upernet_swin.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_base_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_base_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 384, 512], num_classes=150),
23 |     auxiliary_head=dict(in_channels=384, num_classes=150),
24 | )
25 | 
26 | # AdamW optimizer, no weight decay for position embedding & layer norm
27 | # in backbone
28 | optim_wrapper = dict(
29 |     _delete_=True,
30 |     type="OptimWrapper",
31 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
32 |     paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}),
33 | )
34 | 
35 | param_scheduler = [
36 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
37 |     dict(
38 |         type="PolyLR",
39 |         eta_min=0.0,
40 |         power=1.0,
41 |         begin=1500,
42 |         end=160000,
43 |         by_epoch=False,
44 |     ),
45 | ]
46 | 
47 | # By default, models are trained on 8 GPUs with 2 images per GPU
48 | train_dataloader = dict(batch_size=2)
49 | val_dataloader = dict(batch_size=1)
50 | test_dataloader = val_dataloader
51 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/upernet/upernet_pacavit_small_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/upernet_swin.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_small_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_small_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 |     auxiliary_head=dict(in_channels=320, num_classes=150),
24 | )
25 | 
26 | # AdamW optimizer, no weight decay for position embedding & layer norm
27 | # in backbone
28 | optim_wrapper = dict(
29 |     _delete_=True,
30 |     type="OptimWrapper",
31 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
32 |     paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}),
33 | )
34 | 
35 | param_scheduler = [
36 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
37 |     dict(
38 |         type="PolyLR",
39 |         eta_min=0.0,
40 |         power=1.0,
41 |         begin=1500,
42 |         end=160000,
43 |         by_epoch=False,
44 |     ),
45 | ]
46 | 
47 | # By default, models are trained on 8 GPUs with 2 images per GPU
48 | train_dataloader = dict(batch_size=2)
49 | val_dataloader = dict(batch_size=1)
50 | test_dataloader = val_dataloader
51 | 


--------------------------------------------------------------------------------
/segmentation/configs/paca_vit/upernet/upernet_pacavit_tiny_p2cconv_100_0_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     "../../_base_/models/upernet_swin.py",
 3 |     "../../_base_/datasets/ade20k.py",
 4 |     "../../_base_/default_runtime.py",
 5 |     "../../_base_/schedules/schedule_160k.py",
 6 | ]
 7 | 
 8 | crop_size = (512, 512)
 9 | data_preprocessor = dict(size=crop_size)
10 | model = dict(
11 |     data_preprocessor=data_preprocessor,
12 |     backbone=dict(
13 |         _delete_=True,
14 |         type="pacavit_tiny_p2cconv_100_0_downstream",
15 |         drop_path_rate=0.1,
16 |         layer_scale=None,
17 |         pretrained=(
18 |             "../work_dirs/classification/cvpr23_paca/IMNET_224_pacavit_tiny_p2cconv_100_0.pth"
19 |         ),
20 |         downstream_cluster_num=[200, 200, 200, 0],
21 |     ),
22 |     decode_head=dict(in_channels=[96, 192, 320, 384], num_classes=150),
23 |     auxiliary_head=dict(in_channels=320, num_classes=150),
24 | )
25 | 
26 | # AdamW optimizer, no weight decay for position embedding & layer norm
27 | # in backbone
28 | optim_wrapper = dict(
29 |     _delete_=True,
30 |     type="OptimWrapper",
31 |     optimizer=dict(type="AdamW", lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
32 |     paramwise_cfg=dict(custom_keys={"norm": dict(decay_mult=0.0)}),
33 | )
34 | 
35 | param_scheduler = [
36 |     dict(type="LinearLR", start_factor=1e-6, by_epoch=False, begin=0, end=1500),
37 |     dict(
38 |         type="PolyLR",
39 |         eta_min=0.0,
40 |         power=1.0,
41 |         begin=1500,
42 |         end=160000,
43 |         by_epoch=False,
44 |     ),
45 | ]
46 | 
47 | # By default, models are trained on 8 GPUs with 2 images per GPU
48 | train_dataloader = dict(batch_size=2)
49 | val_dataloader = dict(batch_size=1)
50 | test_dataloader = val_dataloader
51 | 


--------------------------------------------------------------------------------
/segmentation/get_flops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import tempfile
  4 | from pathlib import Path
  5 | 
  6 | import torch
  7 | from mmengine import Config, DictAction
  8 | from mmengine.logging import MMLogger
  9 | from mmengine.model import revert_sync_batchnorm
 10 | from mmengine.registry import init_default_scope
 11 | 
 12 | from mmseg.models import BaseSegmentor
 13 | from mmseg.registry import MODELS
 14 | from mmseg.structures import SegDataSample
 15 | 
 16 | try:
 17 |     from mmengine.analysis import get_model_complexity_info
 18 |     from mmengine.analysis.print_helper import _format_size
 19 | except ImportError:
 20 |     raise ImportError("Please upgrade mmengine >= 0.6.0 to use this script.")
 21 | 
 22 | from models import *
 23 | from mmseg_custom.models import *
 24 | from torchprofile import profile_macs
 25 | 
 26 | 
 27 | def parse_args():
 28 |     parser = argparse.ArgumentParser(description="Get the FLOPs of a segmentor")
 29 |     parser.add_argument("config", help="train config file path")
 30 |     parser.add_argument(
 31 |         "--shape", type=int, nargs="+", default=[2048, 1024], help="input image size"
 32 |     )
 33 |     parser.add_argument(
 34 |         "--cfg-options",
 35 |         nargs="+",
 36 |         action=DictAction,
 37 |         help="override some settings in the used config, the key-value pair "
 38 |         "in xxx=yyy format will be merged into config file. If the value to "
 39 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 40 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 41 |         "Note that the quotation marks are necessary and that no white space "
 42 |         "is allowed.",
 43 |     )
 44 |     args = parser.parse_args()
 45 |     return args
 46 | 
 47 | 
 48 | def inference(args: argparse.Namespace, logger: MMLogger) -> dict:
 49 |     config_name = Path(args.config)
 50 | 
 51 |     if not config_name.exists():
 52 |         logger.error(f"Config file {config_name} does not exist")
 53 | 
 54 |     cfg: Config = Config.fromfile(config_name)
 55 |     cfg.work_dir = tempfile.TemporaryDirectory().name
 56 |     cfg.log_level = "WARN"
 57 |     if args.cfg_options is not None:
 58 |         cfg.merge_from_dict(args.cfg_options)
 59 | 
 60 |     init_default_scope(cfg.get("scope", "mmseg"))
 61 | 
 62 |     if len(args.shape) == 1:
 63 |         input_shape = (3, args.shape[0], args.shape[0])
 64 |     elif len(args.shape) == 2:
 65 |         input_shape = (3,) + tuple(args.shape)
 66 |     else:
 67 |         raise ValueError("invalid input shape")
 68 |     result = {}
 69 | 
 70 |     model: BaseSegmentor = MODELS.build(cfg.model)
 71 |     if hasattr(model, "auxiliary_head"):
 72 |         model.auxiliary_head = None
 73 |     if torch.cuda.is_available():
 74 |         model.cuda()
 75 |     model = revert_sync_batchnorm(model)
 76 |     result["ori_shape"] = input_shape[-2:]
 77 |     result["pad_shape"] = input_shape[-2:]
 78 |     data_batch = {
 79 |         "inputs": [torch.rand(input_shape)],
 80 |         "data_samples": [SegDataSample(metainfo=result)],
 81 |     }
 82 |     data = model.data_preprocessor(data_batch)
 83 |     model.eval()
 84 |     if cfg.model.decode_head.type in ["MaskFormerHead", "Mask2FormerHead"]:
 85 |         # TODO: Support MaskFormer and Mask2Former
 86 |         raise NotImplementedError(
 87 |             "MaskFormer and Mask2Former are not " "supported yet."
 88 |         )
 89 |     outputs = get_model_complexity_info(
 90 |         model, input_shape, inputs=data["inputs"], show_table=False, show_arch=False
 91 |     )
 92 |     result["flops"] = _format_size(outputs["flops"])
 93 |     result["params"] = _format_size(outputs["params"])
 94 |     result["compute_type"] = "direct: randomly generate a picture"
 95 | 
 96 |     # torchprofile
 97 |     tp_flops = profile_macs(model, data["inputs"])
 98 |     result["torchprofile_flops"] = _format_size(tp_flops)
 99 | 
100 |     return result
101 | 
102 | 
103 | def main():
104 |     args = parse_args()
105 |     logger = MMLogger.get_instance(name="MMLogger")
106 | 
107 |     result = inference(args, logger)
108 |     split_line = "=" * 30
109 |     ori_shape = result["ori_shape"]
110 |     pad_shape = result["pad_shape"]
111 |     flops = result["flops"]
112 |     torchprofile_flops = result["torchprofile_flops"]
113 |     params = result["params"]
114 |     compute_type = result["compute_type"]
115 | 
116 |     if pad_shape != ori_shape:
117 |         print(
118 |             f"{split_line}\nUse size divisor set input shape "
119 |             f"from {ori_shape} to {pad_shape}"
120 |         )
121 |     print(
122 |         f"{split_line}\nCompute type: {compute_type}\n"
123 |         f"Input shape: {pad_shape}\nFlops: {flops}\nFlops (torchprofile): {torchprofile_flops}\n"
124 |         f"Params: {params}\n{split_line}"
125 |     )
126 |     print(
127 |         "!!!Please be cautious if you use the results in papers. "
128 |         "You may need to check if all ops are supported and verify "
129 |         "that the flops computation is correct."
130 |     )
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     main()
135 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .decode_heads import *
2 | from .segmentors import *
3 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/decode_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .paca_head import PaCaSegHead
2 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/decode_heads/paca_head.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | 
  3 | import torch
  4 | from torch import Tensor
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer
  9 | 
 10 | from mmseg.registry import MODELS
 11 | from mmseg.models.builder import build_loss
 12 | from mmseg.models.losses import accuracy
 13 | from mmseg.models.decode_heads.decode_head import BaseDecodeHead
 14 | from mmseg.models.utils import resize
 15 | from mmseg.utils import ConfigType, SampleList
 16 | 
 17 | from einops import rearrange, repeat
 18 | from einops.layers.torch import Rearrange
 19 | 
 20 | 
 21 | @MODELS.register_module()
 22 | class PaCaSegHead(BaseDecodeHead):
 23 |     """The Patch-to-Cluster Attention head for semantic segmentation
 24 | 
 25 |     Args:
 26 |         interpolate_mode: The interpolate mode of MLP head upsample operation.
 27 |             Default: 'bilinear'.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         interpolate_mode="bilinear",
 33 |         aux_loss_decode=dict(
 34 |             type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4
 35 |         ),
 36 |         **kwargs,
 37 |     ):
 38 |         super().__init__(input_transform="multiple_select", **kwargs)
 39 | 
 40 |         self.interpolate_mode = interpolate_mode
 41 |         num_inputs = len(self.in_channels)
 42 | 
 43 |         assert num_inputs == len(self.in_index)
 44 | 
 45 |         self.convs = nn.ModuleList()
 46 |         for i in range(num_inputs):
 47 |             self.convs.append(
 48 |                 ConvModule(
 49 |                     in_channels=self.in_channels[i],
 50 |                     out_channels=self.channels,
 51 |                     kernel_size=1,
 52 |                     stride=1,
 53 |                     norm_cfg=self.norm_cfg,
 54 |                     act_cfg=self.act_cfg,
 55 |                 )
 56 |             )
 57 | 
 58 |         self.q = nn.Sequential(
 59 |             ConvModule(
 60 |                 in_channels=self.channels * num_inputs,
 61 |                 out_channels=self.channels,
 62 |                 kernel_size=1,
 63 |                 norm_cfg=self.norm_cfg,
 64 |                 act_cfg=self.act_cfg,
 65 |             ),
 66 |             Rearrange("B C H W -> B (H W) C"),
 67 |         )
 68 | 
 69 |         self.clustering = nn.Sequential(
 70 |             ConvModule(
 71 |                 in_channels=self.channels * num_inputs,
 72 |                 out_channels=self.channels,
 73 |                 kernel_size=1,
 74 |                 norm_cfg=self.norm_cfg,
 75 |                 act_cfg=self.act_cfg,
 76 |             ),
 77 |             nn.Conv2d(
 78 |                 self.channels, self.num_classes, kernel_size=1
 79 |             ),  # TODO: bias=False
 80 |         )
 81 | 
 82 |         self.k = nn.Sequential(
 83 |             nn.Linear(self.channels * num_inputs, self.channels),
 84 |             Rearrange("B M C -> B C M"),
 85 |             nn.SyncBatchNorm(self.channels),
 86 |             Rearrange("B C M -> B M C"),
 87 |             build_activation_layer(self.act_cfg),
 88 |         )
 89 |         self.v = nn.Sequential(
 90 |             nn.Linear(self.channels * num_inputs, self.channels),
 91 |             Rearrange("B M C -> B C M"),
 92 |             nn.SyncBatchNorm(self.channels),
 93 |             Rearrange("B C M -> B M C"),
 94 |             build_activation_layer(self.act_cfg),
 95 |         )
 96 |         self.proj = ConvModule(
 97 |             in_channels=self.channels,
 98 |             out_channels=self.channels,
 99 |             kernel_size=1,
100 |             norm_cfg=self.norm_cfg,
101 |             act_cfg=self.act_cfg,
102 |         )
103 | 
104 |         if isinstance(aux_loss_decode, dict):
105 |             self.aux_loss_decode = MODELS.build(
106 |                 aux_loss_decode
107 |             )  # build_loss(aux_loss_decode)
108 |         else:
109 |             raise TypeError(
110 |                 f"aux_loss_decode must be a dict,\
111 |                 but got {type(aux_loss_decode)}"
112 |             )
113 | 
114 |     def forward(self, inputs):
115 |         inputs = self._transform_inputs(inputs)
116 | 
117 |         outs = []
118 |         for idx in range(len(inputs)):
119 |             x = inputs[idx]
120 |             conv = self.convs[idx]
121 |             outs.append(
122 |                 resize(
123 |                     input=conv(x),
124 |                     size=inputs[0].shape[2:],
125 |                     mode=self.interpolate_mode,
126 |                     align_corners=self.align_corners,
127 |                 )
128 |             )
129 | 
130 |         x = torch.cat(outs, dim=1)
131 |         H, W = x.shape[2:]
132 | 
133 |         q = self.q(x)  # B N C
134 | 
135 |         c_raw = self.clustering(x)  # B M H W
136 |         c = rearrange(c_raw, "B M H W -> B M (H W)")
137 |         c = c.softmax(dim=-1)
138 | 
139 |         x_ = rearrange(x, "B C H W -> B (H W) C")
140 |         z = c @ x_  # B M C
141 |         k = self.k(z)
142 |         v = self.v(z)
143 | 
144 |         attn = q @ k.transpose(-2, -1)
145 |         attn = attn.softmax(dim=-1)
146 | 
147 |         out = attn @ v  # B N C
148 |         out = rearrange(out, "B (H W) C -> B C H W", H=H, W=W).contiguous()
149 |         out = self.proj(out)
150 | 
151 |         out = self.cls_seg(out)
152 | 
153 |         if self.training:
154 |             return out, c_raw
155 |         else:
156 |             return out
157 | 
158 |     def loss(
159 |         self,
160 |         inputs: Tuple[Tensor],
161 |         batch_data_samples: SampleList,
162 |         train_cfg: ConfigType,
163 |     ) -> Tuple[dict]:
164 |         """Forward function for training.
165 | 
166 |         Args:
167 |             inputs (Tuple[Tensor]): List of multi-level img features.
168 |             batch_data_samples (list[:obj:`SegDataSample`]): The seg
169 |                 data samples. It usually includes information such
170 |                 as `img_metas` or `gt_semantic_seg`.
171 |             train_cfg (dict): The training config.
172 | 
173 |         Returns:
174 |             Tuple[dict[str, Tensor]]: a tuple of dictionary of loss components
175 |         """
176 |         seg_logits, c_raw = self.forward(inputs)
177 |         aux_losses = self.aux_loss_by_paca(c_raw, batch_data_samples)
178 |         losses = self.loss_by_feat(seg_logits, batch_data_samples)
179 |         return losses, aux_losses
180 | 
181 |     def aux_loss_by_paca(
182 |         self, seg_logits: Tensor, batch_data_samples: SampleList
183 |     ) -> dict:
184 |         """Compute segmentation loss."""
185 |         seg_label = self._stack_batch_gt(batch_data_samples)
186 |         loss = dict()
187 |         seg_logits = resize(
188 |             input=seg_logits,
189 |             size=seg_label.shape[2:],
190 |             mode="bilinear",
191 |             align_corners=self.align_corners,
192 |         )
193 |         if self.sampler is not None:
194 |             seg_weight = self.sampler.sample(seg_logits, seg_label)
195 |         else:
196 |             seg_weight = None
197 |         seg_label = seg_label.squeeze(1)
198 | 
199 |         if not isinstance(self.aux_loss_decode, nn.ModuleList):
200 |             aux_losses_decode = [self.aux_loss_decode]
201 |         else:
202 |             aux_losses_decode = self.aux_loss_decode
203 |         for loss_decode in aux_losses_decode:
204 |             if loss_decode.loss_name not in loss:
205 |                 loss[loss_decode.loss_name] = loss_decode(
206 |                     seg_logits,
207 |                     seg_label,
208 |                     weight=seg_weight,
209 |                     ignore_index=self.ignore_index,
210 |                 )
211 |             else:
212 |                 loss[loss_decode.loss_name] += loss_decode(
213 |                     seg_logits,
214 |                     seg_label,
215 |                     weight=seg_weight,
216 |                     ignore_index=self.ignore_index,
217 |                 )
218 | 
219 |         loss["acc_seg"] = accuracy(
220 |             seg_logits, seg_label, ignore_index=self.ignore_index
221 |         )
222 |         return loss
223 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/segmentors/__init__.py:
--------------------------------------------------------------------------------
1 | from .encoder_decoder_paca import PaCaEncoderDecoder
2 | 


--------------------------------------------------------------------------------
/segmentation/mmseg_custom/models/segmentors/encoder_decoder_paca.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from typing import List, Optional
 3 | 
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch import Tensor
 7 | 
 8 | from mmseg.registry import MODELS
 9 | from mmseg.utils import (
10 |     ConfigType,
11 |     OptConfigType,
12 |     OptMultiConfig,
13 |     OptSampleList,
14 |     SampleList,
15 |     add_prefix,
16 | )
17 | 
18 | from mmseg.models.segmentors import EncoderDecoder
19 | 
20 | 
21 | @MODELS.register_module()
22 | class PaCaEncoderDecoder(EncoderDecoder):
23 |     def __init__(self, **kwargs) -> None:
24 |         super().__init__(**kwargs)
25 | 
26 |     def _decode_head_forward_train(
27 |         self, inputs: List[Tensor], data_samples: SampleList
28 |     ) -> dict:
29 |         """Run forward function and calculate loss for decode head in
30 |         training."""
31 |         losses = dict()
32 |         loss_decode = self.decode_head.loss(inputs, data_samples, self.train_cfg)
33 | 
34 |         losses.update(add_prefix(loss_decode[0], "decode"))
35 |         losses.update(add_prefix(loss_decode[1], "paca"))
36 |         return losses
37 | 


--------------------------------------------------------------------------------
/segmentation/test_mmseg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | 
  6 | from mmengine.config import Config, DictAction
  7 | from mmengine.runner import Runner
  8 | 
  9 | from models import *
 10 | from mmseg_custom.models import *
 11 | 
 12 | 
 13 | # TODO: support fuse_conv_bn, visualization, and format_only
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(description="MMSeg test (and eval) a model")
 16 |     parser.add_argument("config", help="train config file path")
 17 |     parser.add_argument("checkpoint", help="checkpoint file")
 18 |     parser.add_argument(
 19 |         "--work-dir",
 20 |         help=(
 21 |             "if specified, the evaluation metric results will be dumped"
 22 |             "into the directory as json"
 23 |         ),
 24 |     )
 25 |     parser.add_argument(
 26 |         "--out",
 27 |         type=str,
 28 |         help="The directory to save output prediction for offline evaluation",
 29 |     )
 30 |     parser.add_argument("--show", action="store_true", help="show prediction results")
 31 |     parser.add_argument(
 32 |         "--show-dir",
 33 |         help="directory where painted images will be saved. "
 34 |         "If specified, it will be automatically saved "
 35 |         "to the work_dir/timestamp/show_dir",
 36 |     )
 37 |     parser.add_argument(
 38 |         "--wait-time", type=float, default=2, help="the interval of show (s)"
 39 |     )
 40 |     parser.add_argument(
 41 |         "--cfg-options",
 42 |         nargs="+",
 43 |         action=DictAction,
 44 |         help="override some settings in the used config, the key-value pair "
 45 |         "in xxx=yyy format will be merged into config file. If the value to "
 46 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 47 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 48 |         "Note that the quotation marks are necessary and that no white space "
 49 |         "is allowed.",
 50 |     )
 51 |     parser.add_argument(
 52 |         "--launcher",
 53 |         choices=["none", "pytorch", "slurm", "mpi"],
 54 |         default="none",
 55 |         help="job launcher",
 56 |     )
 57 |     parser.add_argument("--tta", action="store_true", help="Test time augmentation")
 58 |     parser.add_argument("--local_rank", type=int, default=0)
 59 |     args = parser.parse_args()
 60 |     if "LOCAL_RANK" not in os.environ:
 61 |         os.environ["LOCAL_RANK"] = str(args.local_rank)
 62 | 
 63 |     return args
 64 | 
 65 | 
 66 | def trigger_visualization_hook(cfg, args):
 67 |     default_hooks = cfg.default_hooks
 68 |     if "visualization" in default_hooks:
 69 |         visualization_hook = default_hooks["visualization"]
 70 |         # Turn on visualization
 71 |         visualization_hook["draw"] = True
 72 |         if args.show:
 73 |             visualization_hook["show"] = True
 74 |             visualization_hook["wait_time"] = args.wait_time
 75 |         if args.show_dir:
 76 |             visulizer = cfg.visualizer
 77 |             visulizer["save_dir"] = args.show_dir
 78 |     else:
 79 |         raise RuntimeError(
 80 |             "VisualizationHook must be included in default_hooks."
 81 |             "refer to usage "
 82 |             "\"visualization=dict(type='VisualizationHook')\""
 83 |         )
 84 | 
 85 |     return cfg
 86 | 
 87 | 
 88 | def main():
 89 |     args = parse_args()
 90 | 
 91 |     # load config
 92 |     cfg = Config.fromfile(args.config)
 93 |     cfg.launcher = args.launcher
 94 |     if args.cfg_options is not None:
 95 |         cfg.merge_from_dict(args.cfg_options)
 96 | 
 97 |     # work_dir is determined in this priority: CLI > segment in file > filename
 98 |     if args.work_dir is not None:
 99 |         # update configs according to CLI args if args.work_dir is not None
100 |         cfg.work_dir = args.work_dir
101 |     elif cfg.get("work_dir", None) is None:
102 |         # use config filename as default work_dir if cfg.work_dir is None
103 |         cfg.work_dir = osp.join(
104 |             "./work_dirs", osp.splitext(osp.basename(args.config))[0]
105 |         )
106 | 
107 |     cfg.load_from = args.checkpoint
108 | 
109 |     if args.show or args.show_dir:
110 |         cfg = trigger_visualization_hook(cfg, args)
111 | 
112 |     if args.tta:
113 |         cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
114 |         cfg.tta_model.module = cfg.model
115 |         cfg.model = cfg.tta_model
116 | 
117 |     # add output_dir in metric
118 |     if args.out is not None:
119 |         cfg.test_evaluator["output_dir"] = args.out
120 |         cfg.test_evaluator["keep_results"] = True
121 | 
122 |     # build the runner from config
123 |     runner = Runner.from_cfg(cfg)
124 | 
125 |     # start testing
126 |     runner.test()
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 | 


--------------------------------------------------------------------------------
/segmentation/test_mmseg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 5 ]; then
 4 |     echo "Usage: me.sh Relative_config_filename Checkpoint_filename gpus nb_gpus port [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | PYTHON=${PYTHON:-"python"}
 9 | 
10 | CONFIG_FILE=$1
11 | CHK_FILE=$2
12 | GPUS=$3
13 | NUM_GPUS=$4
14 | PORT=${PORT:-$5}
15 | 
16 | 
17 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
18 | 
19 | CONFIG_FILENAME=${CONFIG_FILE##*/}
20 | CONFIG_BASE="${CONFIG_FILENAME%.*}"
21 | 
22 | WORK_DIR="$( cd "$( dirname "${CHK_FILE}" )" >/dev/null 2>&1 && pwd )"/$CONFIG_BASE
23 | 
24 | if [ -d $WORK_DIR ]; then 
25 |   echo "... Done already!"
26 |   exit 
27 | fi 
28 | 
29 | # export NCCL_DEBUG=INFO
30 | 
31 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \
32 |   torchrun \
33 |     --rdzv_backend c10d \
34 |     --rdzv_endpoint localhost:$PORT \
35 |     --nnodes 1 \
36 |     --nproc_per_node $NUM_GPUS \
37 |     $DIR/test_mmseg.py \
38 |     $CONFIG_FILE \
39 |     $CHK_FILE \
40 |     --launcher pytorch \
41 |     --work-dir $WORK_DIR \
42 |     ${@:6}
43 | 


--------------------------------------------------------------------------------
/segmentation/train_mmseg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import logging
  4 | import os
  5 | import os.path as osp
  6 | 
  7 | from mmengine.config import Config, DictAction
  8 | from mmengine.logging import print_log
  9 | from mmengine.runner import Runner
 10 | 
 11 | from mmseg.registry import RUNNERS
 12 | 
 13 | from models import *
 14 | from mmseg_custom.models import *
 15 | 
 16 | 
 17 | def parse_args():
 18 |     parser = argparse.ArgumentParser(description="Train a segmentor")
 19 |     parser.add_argument("config", help="train config file path")
 20 |     parser.add_argument("--work-dir", help="the dir to save logs and models")
 21 |     parser.add_argument(
 22 |         "--resume",
 23 |         action="store_true",
 24 |         default=False,
 25 |         help="resume from the latest checkpoint in the work_dir automatically",
 26 |     )
 27 |     parser.add_argument(
 28 |         "--amp",
 29 |         action="store_true",
 30 |         default=False,
 31 |         help="enable automatic-mixed-precision training",
 32 |     )
 33 |     parser.add_argument(
 34 |         "--cfg-options",
 35 |         nargs="+",
 36 |         action=DictAction,
 37 |         help="override some settings in the used config, the key-value pair "
 38 |         "in xxx=yyy format will be merged into config file. If the value to "
 39 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 40 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 41 |         "Note that the quotation marks are necessary and that no white space "
 42 |         "is allowed.",
 43 |     )
 44 |     parser.add_argument(
 45 |         "--launcher",
 46 |         choices=["none", "pytorch", "slurm", "mpi"],
 47 |         default="none",
 48 |         help="job launcher",
 49 |     )
 50 |     parser.add_argument("--local_rank", type=int, default=0)
 51 |     args = parser.parse_args()
 52 |     if "LOCAL_RANK" not in os.environ:
 53 |         os.environ["LOCAL_RANK"] = str(args.local_rank)
 54 | 
 55 |     return args
 56 | 
 57 | 
 58 | def main():
 59 |     args = parse_args()
 60 | 
 61 |     # load config
 62 |     cfg = Config.fromfile(args.config)
 63 |     cfg.launcher = args.launcher
 64 |     if args.cfg_options is not None:
 65 |         cfg.merge_from_dict(args.cfg_options)
 66 | 
 67 |     # work_dir is determined in this priority: CLI > segment in file > filename
 68 |     if args.work_dir is not None:
 69 |         # update configs according to CLI args if args.work_dir is not None
 70 |         cfg.work_dir = args.work_dir
 71 |     elif cfg.get("work_dir", None) is None:
 72 |         # use config filename as default work_dir if cfg.work_dir is None
 73 |         cfg.work_dir = osp.join(
 74 |             "./work_dirs", osp.splitext(osp.basename(args.config))[0]
 75 |         )
 76 | 
 77 |     # enable automatic-mixed-precision training
 78 |     if args.amp is True:
 79 |         optim_wrapper = cfg.optim_wrapper.type
 80 |         if optim_wrapper == "AmpOptimWrapper":
 81 |             print_log(
 82 |                 "AMP training is already enabled in your config.",
 83 |                 logger="current",
 84 |                 level=logging.WARNING,
 85 |             )
 86 |         else:
 87 |             assert optim_wrapper == "OptimWrapper", (
 88 |                 "`--amp` is only supported when the optimizer wrapper type is "
 89 |                 f"`OptimWrapper` but got {optim_wrapper}."
 90 |             )
 91 |             cfg.optim_wrapper.type = "AmpOptimWrapper"
 92 |             cfg.optim_wrapper.loss_scale = "dynamic"
 93 | 
 94 |     # resume training
 95 |     cfg.resume = args.resume
 96 | 
 97 |     # build the runner from config
 98 |     if "runner_type" not in cfg:
 99 |         # build the default runner
100 |         runner = Runner.from_cfg(cfg)
101 |     else:
102 |         # build customized runner from the registry
103 |         # if 'runner_type' is set in the cfg
104 |         runner = RUNNERS.build(cfg)
105 | 
106 |     # start training
107 |     runner.train()
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     main()
112 | 


--------------------------------------------------------------------------------
/segmentation/train_mmseg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ "$#" -lt 7 ]; then
 4 |     echo "Usage: me.sh Relative_config_filename Remove_old_if_exist_0_or_1 Exp_name Tag gpus nb_gpus port [others]"
 5 |     exit
 6 | fi
 7 | 
 8 | PYTHON=${PYTHON:-"python"}
 9 | 
10 | CONFIG_FILE=$1
11 | RM_OLD=$2
12 | EXP_NAME=$3
13 | TAG=$4
14 | GPUS=$5
15 | NUM_GPUS=$6
16 | PORT=${PORT:-$7}
17 | 
18 | 
19 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
20 | 
21 | CONFIG_FILENAME=${CONFIG_FILE##*/}
22 | CONFIG_BASE="${CONFIG_FILENAME%.*}"
23 | 
24 | WORK_DIR=${DIR}/../work_dirs/segmentation/${EXP_NAME}/${CONFIG_BASE}_$TAG
25 | 
26 | if [ -d $WORK_DIR ]; then
27 |   echo "$WORK_DIR --- Already exists"
28 |   if [ $2 -gt 0 ]; then
29 |     while true; do
30 |         read -p "Are you sure to delete this result directory? " yn
31 |         case $yn in
32 |             [Yy]* ) rm -r $WORK_DIR; mkdir -p $WORK_DIR; break;;
33 |             [Nn]* ) exit;;
34 |             * ) echo "Please answer yes or no.";;
35 |         esac
36 |     done
37 |   else
38 |     echo "Resume"
39 |   fi
40 | else
41 |     mkdir -p $WORK_DIR
42 | fi
43 | 
44 | # export NCCL_DEBUG=INFO
45 | 
46 | TORCH_DISTRIBUTED_DEBUG=INFO OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 CUDA_VISIBLE_DEVICES=$GPUS \
47 |   torchrun \
48 |     --rdzv_backend c10d \
49 |     --rdzv_endpoint localhost:$PORT \
50 |     --nnodes 1 \
51 |     --nproc_per_node $NUM_GPUS \
52 |     $DIR/train_mmseg.py $CONFIG_FILE \
53 |     --amp \
54 |     --resume \
55 |     --launcher pytorch \
56 |     --work-dir $WORK_DIR \
57 |     ${@:8}
58 | 


--------------------------------------------------------------------------------