├── .gitignore
├── README.md
├── classification
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── cifar100_bs16.py
    │   │   │   ├── cifar10_bs16.py
    │   │   │   ├── imagenet_bs32.py
    │   │   │   ├── imagenet_bs32_pil_resize.py
    │   │   │   ├── imagenet_bs64.py
    │   │   │   ├── imagenet_bs64_autoaug.py
    │   │   │   ├── imagenet_bs64_pil_resize.py
    │   │   │   ├── imagenet_bs64_swin_224.py
    │   │   │   ├── imagenet_bs64_swin_384.py
    │   │   │   ├── pipelines
    │   │   │   │   ├── auto_aug.py
    │   │   │   │   └── rand_aug.py
    │   │   │   └── voc_bs16.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── mobilenet_v2_1x.py
    │   │   │   ├── mobilenet_v3_large_imagenet.py
    │   │   │   ├── mobilenet_v3_small_cifar.py
    │   │   │   ├── mobilenet_v3_small_imagenet.py
    │   │   │   ├── regnet
    │   │   │   │   ├── regnetx_1.6gf.py
    │   │   │   │   ├── regnetx_12gf.py
    │   │   │   │   ├── regnetx_3.2gf.py
    │   │   │   │   ├── regnetx_4.0gf.py
    │   │   │   │   ├── regnetx_400mf.py
    │   │   │   │   ├── regnetx_6.4gf.py
    │   │   │   │   ├── regnetx_8.0gf.py
    │   │   │   │   └── regnetx_800mf.py
    │   │   │   ├── repvgg-A0_in1k.py
    │   │   │   ├── repvgg-B3_lbs-mixup_in1k.py
    │   │   │   ├── resnest101.py
    │   │   │   ├── resnest200.py
    │   │   │   ├── resnest269.py
    │   │   │   ├── resnest50.py
    │   │   │   ├── resnet101.py
    │   │   │   ├── resnet101_cifar.py
    │   │   │   ├── resnet152.py
    │   │   │   ├── resnet152_cifar.py
    │   │   │   ├── resnet18.py
    │   │   │   ├── resnet18_cifar.py
    │   │   │   ├── resnet34.py
    │   │   │   ├── resnet34_cifar.py
    │   │   │   ├── resnet50.py
    │   │   │   ├── resnet50_cifar.py
    │   │   │   ├── resnet50_cifar_cutmix.py
    │   │   │   ├── resnet50_cifar_mixup.py
    │   │   │   ├── resnet50_cutmix.py
    │   │   │   ├── resnet50_label_smooth.py
    │   │   │   ├── resnet50_mixup.py
    │   │   │   ├── resnetv1d101.py
    │   │   │   ├── resnetv1d152.py
    │   │   │   ├── resnetv1d50.py
    │   │   │   ├── resnext101_32x4d.py
    │   │   │   ├── resnext101_32x8d.py
    │   │   │   ├── resnext152_32x4d.py
    │   │   │   ├── resnext50_32x4d.py
    │   │   │   ├── seresnet101.py
    │   │   │   ├── seresnet50.py
    │   │   │   ├── seresnext101_32x4d.py
    │   │   │   ├── seresnext50_32x4d.py
    │   │   │   ├── shufflenet_v1_1x.py
    │   │   │   ├── shufflenet_v2_1x.py
    │   │   │   ├── swin_transformer
    │   │   │   │   ├── base_224.py
    │   │   │   │   ├── base_384.py
    │   │   │   │   ├── large_224.py
    │   │   │   │   ├── large_384.py
    │   │   │   │   ├── small_224.py
    │   │   │   │   └── tiny_224.py
    │   │   │   ├── tnt_s_patch16_224.py
    │   │   │   ├── vgg11.py
    │   │   │   ├── vgg11bn.py
    │   │   │   ├── vgg13.py
    │   │   │   ├── vgg13bn.py
    │   │   │   ├── vgg16.py
    │   │   │   ├── vgg16bn.py
    │   │   │   ├── vgg19.py
    │   │   │   ├── vgg19bn.py
    │   │   │   ├── vit_base_patch16_224_finetune.py
    │   │   │   ├── vit_base_patch16_224_pretrain.py
    │   │   │   ├── vit_base_patch16_384_finetune.py
    │   │   │   ├── vit_base_patch32_384_finetune.py
    │   │   │   ├── vit_large_patch16_224_finetune.py
    │   │   │   ├── vit_large_patch16_384_finetune.py
    │   │   │   └── vit_large_patch32_384_finetune.py
    │   │   └── schedules
    │   │   │   ├── cifar10_bs128.py
    │   │   │   ├── imagenet_bs1024_adamw_swin.py
    │   │   │   ├── imagenet_bs1024_linearlr_bn_nowd.py
    │   │   │   ├── imagenet_bs2048.py
    │   │   │   ├── imagenet_bs2048_AdamW.py
    │   │   │   ├── imagenet_bs2048_coslr.py
    │   │   │   ├── imagenet_bs256.py
    │   │   │   ├── imagenet_bs256_140e.py
    │   │   │   ├── imagenet_bs256_200e_coslr_warmup.py
    │   │   │   ├── imagenet_bs256_coslr.py
    │   │   │   ├── imagenet_bs256_epochstep.py
    │   │   │   └── imagenet_bs4096_AdamW.py
    │   ├── regnet
    │   │   ├── regnet_0.4G_origin.py
    │   │   ├── regnet_0.4G_pruning.py
    │   │   ├── regnet_0.8G_origin.py
    │   │   ├── regnet_0.8G_pruning.py
    │   │   ├── regnet_1.6G_origin.py
    │   │   ├── regnet_1.6G_pruning.py
    │   │   ├── regnet_3.2G_finetune.py
    │   │   ├── regnet_3.2G_origin.py
    │   │   ├── regnet_3.2G_pruning.py
    │   │   └── regnet_6.4G_origin.py
    │   ├── resnet50
    │   │   ├── resnet50_finetune.py
    │   │   └── resnet50_pruning.py
    │   └── resnext
    │   │   ├── resnext50_finetune.py
    │   │   └── resnext50_pruning.py
    └── tools
    │   ├── dist_test.sh
    │   ├── dist_train.sh
    │   ├── fisher_pruning_hook
    │   ├── model_eval.py
    │   ├── slurm_test.sh
    │   ├── slurm_train.sh
    │   ├── test.py
    │   └── train.py
├── detection
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── cityscapes_detection.py
    │   │   │   ├── cityscapes_instance.py
    │   │   │   ├── coco_detection.py
    │   │   │   ├── coco_instance.py
    │   │   │   ├── coco_instance_semantic.py
    │   │   │   ├── deepfashion.py
    │   │   │   ├── lvis_v0.5_instance.py
    │   │   │   ├── lvis_v1_instance.py
    │   │   │   ├── voc0712.py
    │   │   │   └── wider_face.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   │   ├── fast_rcnn_r50_fpn.py
    │   │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   │   ├── faster_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50_caffe_c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   └── schedule_2x.py
    │   ├── atss
    │   │   ├── atss_finetune.py
    │   │   └── atss_pruning.py
    │   ├── faster_rcnn
    │   │   ├── faster_finetune.py
    │   │   └── faster_pruning.py
    │   ├── fsaf
    │   │   ├── fsaf_finetune.py
    │   │   └── fsaf_pruning.py
    │   ├── paa
    │   │   ├── paa_finetune.py
    │   │   └── paa_pruning.py
    │   └── retina
    │   │   ├── retina_finetune.py
    │   │   └── retina_pruning.py
    └── tools
    │   ├── dist_test.sh
    │   ├── dist_train.sh
    │   ├── fisher_pruning_hook
    │   ├── slurm_test.sh
    │   ├── slurm_train.sh
    │   ├── test.py
    │   └── train.py
└── fisher_pruning_hook
    ├── __init__.py
    ├── fisher_pruning.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .pyc
2 | __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FisherPruning-Pytorch
  2 | An implementation of &lt;Group Fisher Pruning for Practical Network Compression> based on pytorch and mmcv 
  3 | 
  4 | ---
  5 | 
  6 | 
  7 | ### Main Functions
  8 | 
  9 | - Pruning for fully-convolutional structures, 
 10 |   such as one-stage detectors; (copied from the [official code](https://github.com/jshilong/FisherPruning))
 11 |   
 12 | - Pruning for networks combining convolutional layers and fully-connected layers, such as faster-RCNN and ResNet;
 13 | 
 14 | - Pruning for networks which involve group convolutions, such as ResNeXt and RegNet.
 15 | 
 16 | ### Usage
 17 | 
 18 | #### Requirements
 19 | 
 20 | ```text
 21 | torch
 22 | torchvision
 23 | mmcv / mmcv-full
 24 | mmcls 
 25 | mmdet 
 26 | ```
 27 | #### Compatibility
 28 | This code is tested with 
 29 | 
 30 | ```text
 31 | pytorch=1.3
 32 | torchvision=0.4
 33 | cudatoolkit=10.0
 34 | mmcv-full==1.3.14
 35 | mmcls=0.16 
 36 | mmdet=2.17
 37 | ```
 38 | 
 39 | and 
 40 | 
 41 | ```text
 42 | pytorch=1.8
 43 | torchvision=0.9
 44 | cudatoolkit=11.1
 45 | mmcv==1.3.16
 46 | mmcls=0.16 
 47 | mmdet=2.17
 48 | ```
 49 | 
 50 | #### Data
 51 | 
 52 | Download [ImageNet](https://image-net.org/download.php) and [COCO](https://cocodataset.org/), 
 53 | then extract them and organize the folders as 
 54 | 
 55 |   ```
 56 |   - detection
 57 |     |- tools
 58 |     |- configs
 59 |     |- data
 60 |     |   |- coco
 61 |     |   |   |- train2017
 62 |     |   |   |- val2017
 63 |     |   |   |- test2017
 64 |     |   |   |- annotations
 65 |     |
 66 |   - classification
 67 |     |- tools
 68 |     |- configs
 69 |     |- data
 70 |     |   |- imagenet
 71 |     |   |   |- train
 72 |     |   |   |- val
 73 |     |   |   |- test 
 74 |     |   |   |- meta
 75 |     |
 76 |   - ...
 77 |   ```
 78 | 
 79 | #### Commands
 80 | 
 81 | e.g. Classification
 82 | ```bash
 83 | cd classification
 84 | ```
 85 | 1. Pruning
 86 |    ```bash
 87 |    # single GPU
 88 |    python tools/train.py configs/xxx_pruning.py --gpus=1
 89 |    # multi GPUs (e.g. 4 GPUs)
 90 |    python -m torch.distributed.launch --nproc_per_node=4 tools/train.py configs/xxx_pruning.py --launch pytorch
 91 |    ```
 92 |    
 93 | 2. Fine-tune
 94 |    
 95 |    In the config file, modify the `deploy_from` to the pruned model, and modify the `samples_per_gpu` to 256/#GPUs. Then
 96 |    ```bash
 97 |    # single GPU
 98 |    python tools/train.py configs/xxx_finetune.py --gpus=1
 99 |    # multi GPUs (e.g. 4 GPUs)
100 |    python -m torch.distributed.launch --nproc_per_node=4 tools/train.py configs/xxx_finetune.py --launch pytorch
101 |    ```
102 |    
103 | 3. Test
104 | 
105 |    In the config file, add the attribute `load_from` to the finetuned model. Then
106 |    ```bash
107 |    python tools/test.py configs/xxx_finetune.py --metrics=accuracy
108 |    ```
109 |    
110 | The commands for pruning and finetuning of detection models are similar to that of classification models. 
111 | Instructions will be added soon.
112 | 
113 | ## Acknowledgments
114 | 
115 | My project acknowledges the official code [FisherPruning](https://github.com/jshilong/FisherPruning).


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/cifar100_bs16.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CIFAR100'
 3 | img_norm_cfg = dict(
 4 |     mean=[129.304, 124.070, 112.434],
 5 |     std=[68.170, 65.392, 70.418],
 6 |     to_rgb=False)
 7 | train_pipeline = [
 8 |     dict(type='RandomCrop', size=32, padding=4),
 9 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='ImageToTensor', keys=['img']),
12 |     dict(type='ToTensor', keys=['gt_label']),
13 |     dict(type='Collect', keys=['img', 'gt_label'])
14 | ]
15 | test_pipeline = [
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='ImageToTensor', keys=['img']),
18 |     dict(type='Collect', keys=['img'])
19 | ]
20 | data = dict(
21 |     samples_per_gpu=16,
22 |     workers_per_gpu=2,
23 |     train=dict(
24 |         type=dataset_type,
25 |         data_prefix='data/cifar100',
26 |         pipeline=train_pipeline),
27 |     val=dict(
28 |         type=dataset_type,
29 |         data_prefix='data/cifar100',
30 |         pipeline=test_pipeline,
31 |         test_mode=True),
32 |     test=dict(
33 |         type=dataset_type,
34 |         data_prefix='data/cifar100',
35 |         pipeline=test_pipeline,
36 |         test_mode=True))
37 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/cifar10_bs16.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CIFAR10'
 3 | img_norm_cfg = dict(
 4 |     mean=[125.307, 122.961, 113.8575],
 5 |     std=[51.5865, 50.847, 51.255],
 6 |     to_rgb=False)
 7 | train_pipeline = [
 8 |     dict(type='RandomCrop', size=32, padding=4),
 9 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='ImageToTensor', keys=['img']),
12 |     dict(type='ToTensor', keys=['gt_label']),
13 |     dict(type='Collect', keys=['img', 'gt_label'])
14 | ]
15 | test_pipeline = [
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='ImageToTensor', keys=['img']),
18 |     dict(type='Collect', keys=['img'])
19 | ]
20 | data = dict(
21 |     samples_per_gpu=16,
22 |     workers_per_gpu=2,
23 |     train=dict(
24 |         type=dataset_type, data_prefix='data/cifar10',
25 |         pipeline=train_pipeline),
26 |     val=dict(
27 |         type=dataset_type,
28 |         data_prefix='data/cifar10',
29 |         pipeline=test_pipeline,
30 |         test_mode=True),
31 |     test=dict(
32 |         type=dataset_type,
33 |         data_prefix='data/cifar10',
34 |         pipeline=test_pipeline,
35 |         test_mode=True))
36 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs32.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ImageNet'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='RandomResizedCrop', size=224),
 8 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='ImageToTensor', keys=['img']),
11 |     dict(type='ToTensor', keys=['gt_label']),
12 |     dict(type='Collect', keys=['img', 'gt_label'])
13 | ]
14 | test_pipeline = [
15 |     dict(type='LoadImageFromFile'),
16 |     dict(type='Resize', size=(256, -1)),
17 |     dict(type='CenterCrop', crop_size=224),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='ImageToTensor', keys=['img']),
20 |     dict(type='Collect', keys=['img'])
21 | ]
22 | data = dict(
23 |     samples_per_gpu=32,
24 |     workers_per_gpu=2,
25 |     train=dict(
26 |         type=dataset_type,
27 |         data_prefix='data/imagenet/train',
28 |         pipeline=train_pipeline),
29 |     val=dict(
30 |         type=dataset_type,
31 |         data_prefix='data/imagenet/val',
32 |         ann_file='data/imagenet/meta/val.txt',
33 |         pipeline=test_pipeline),
34 |     test=dict(
35 |         # replace `data/val` with `data/test` for standard test
36 |         type=dataset_type,
37 |         data_prefix='data/imagenet/val',
38 |         ann_file='data/imagenet/meta/val.txt',
39 |         pipeline=test_pipeline))
40 | evaluation = dict(interval=1, metric='accuracy')
41 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs32_pil_resize.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ImageNet'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='RandomResizedCrop', size=224, backend='pillow'),
 8 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='ImageToTensor', keys=['img']),
11 |     dict(type='ToTensor', keys=['gt_label']),
12 |     dict(type='Collect', keys=['img', 'gt_label'])
13 | ]
14 | test_pipeline = [
15 |     dict(type='LoadImageFromFile'),
16 |     dict(type='Resize', size=(256, -1), backend='pillow'),
17 |     dict(type='CenterCrop', crop_size=224),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='ImageToTensor', keys=['img']),
20 |     dict(type='Collect', keys=['img'])
21 | ]
22 | data = dict(
23 |     samples_per_gpu=32,
24 |     workers_per_gpu=2,
25 |     train=dict(
26 |         type=dataset_type,
27 |         data_prefix='data/imagenet/train',
28 |         pipeline=train_pipeline),
29 |     val=dict(
30 |         type=dataset_type,
31 |         data_prefix='data/imagenet/val',
32 |         ann_file='data/imagenet/meta/val.txt',
33 |         pipeline=test_pipeline),
34 |     test=dict(
35 |         # replace `data/val` with `data/test` for standard test
36 |         type=dataset_type,
37 |         data_prefix='data/imagenet/val',
38 |         ann_file='data/imagenet/meta/val.txt',
39 |         pipeline=test_pipeline))
40 | evaluation = dict(interval=1, metric='accuracy')
41 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs64.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ImageNet'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='RandomResizedCrop', size=224),
 8 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='ImageToTensor', keys=['img']),
11 |     dict(type='ToTensor', keys=['gt_label']),
12 |     dict(type='Collect', keys=['img', 'gt_label'])
13 | ]
14 | test_pipeline = [
15 |     dict(type='LoadImageFromFile'),
16 |     dict(type='Resize', size=(256, -1)),
17 |     dict(type='CenterCrop', crop_size=224),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='ImageToTensor', keys=['img']),
20 |     dict(type='Collect', keys=['img'])
21 | ]
22 | data = dict(
23 |     samples_per_gpu=64,
24 |     workers_per_gpu=2,
25 |     train=dict(
26 |         type=dataset_type,
27 |         data_prefix='data/imagenet/train',
28 |         pipeline=train_pipeline),
29 |     val=dict(
30 |         type=dataset_type,
31 |         data_prefix='data/imagenet/val',
32 |         ann_file='data/imagenet/meta/val.txt',
33 |         pipeline=test_pipeline),
34 |     test=dict(
35 |         # replace `data/val` with `data/test` for standard test
36 |         type=dataset_type,
37 |         data_prefix='data/imagenet/val',
38 |         ann_file='data/imagenet/meta/val.txt',
39 |         pipeline=test_pipeline))
40 | evaluation = dict(interval=1, metric='accuracy')
41 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs64_autoaug.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['./pipelines/auto_aug.py']
 2 | 
 3 | # dataset settings
 4 | dataset_type = 'ImageNet'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='RandomResizedCrop', size=224),
10 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
11 |     dict(type='AutoAugment', policies={{_base_.auto_increasing_policies}}),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='ImageToTensor', keys=['img']),
14 |     dict(type='ToTensor', keys=['gt_label']),
15 |     dict(type='Collect', keys=['img', 'gt_label'])
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='Resize', size=(256, -1)),
20 |     dict(type='CenterCrop', crop_size=224),
21 |     dict(type='Normalize', **img_norm_cfg),
22 |     dict(type='ImageToTensor', keys=['img']),
23 |     dict(type='Collect', keys=['img'])
24 | ]
25 | data = dict(
26 |     samples_per_gpu=64,
27 |     workers_per_gpu=2,
28 |     train=dict(
29 |         type=dataset_type,
30 |         data_prefix='data/imagenet/train',
31 |         pipeline=train_pipeline),
32 |     val=dict(
33 |         type=dataset_type,
34 |         data_prefix='data/imagenet/val',
35 |         ann_file='data/imagenet/meta/val.txt',
36 |         pipeline=test_pipeline),
37 |     test=dict(
38 |         # replace `data/val` with `data/test` for standard test
39 |         type=dataset_type,
40 |         data_prefix='data/imagenet/val',
41 |         ann_file='data/imagenet/meta/val.txt',
42 |         pipeline=test_pipeline))
43 | evaluation = dict(interval=1, metric='accuracy')
44 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs64_pil_resize.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ImageNet'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='RandomResizedCrop', size=224, backend='pillow'),
 8 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='ImageToTensor', keys=['img']),
11 |     dict(type='ToTensor', keys=['gt_label']),
12 |     dict(type='Collect', keys=['img', 'gt_label'])
13 | ]
14 | test_pipeline = [
15 |     dict(type='LoadImageFromFile'),
16 |     dict(type='Resize', size=(256, -1), backend='pillow'),
17 |     dict(type='CenterCrop', crop_size=224),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='ImageToTensor', keys=['img']),
20 |     dict(type='Collect', keys=['img'])
21 | ]
22 | data = dict(
23 |     samples_per_gpu=64,
24 |     workers_per_gpu=2,
25 |     train=dict(
26 |         type=dataset_type,
27 |         data_prefix='data/imagenet/train',
28 |         pipeline=train_pipeline),
29 |     val=dict(
30 |         type=dataset_type,
31 |         data_prefix='data/imagenet/val',
32 |         ann_file='data/imagenet/meta/val.txt',
33 |         pipeline=test_pipeline),
34 |     test=dict(
35 |         # replace `data/val` with `data/test` for standard test
36 |         type=dataset_type,
37 |         data_prefix='data/imagenet/val',
38 |         ann_file='data/imagenet/meta/val.txt',
39 |         pipeline=test_pipeline))
40 | evaluation = dict(interval=1, metric='accuracy')
41 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs64_swin_224.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['./pipelines/rand_aug.py']
 2 | 
 3 | # dataset settings
 4 | dataset_type = 'ImageNet'
 5 | img_norm_cfg = dict(
 6 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 7 | 
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(
11 |         type='RandomResizedCrop',
12 |         size=224,
13 |         backend='pillow',
14 |         interpolation='bicubic'),
15 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
16 |     dict(
17 |         type='RandAugment',
18 |         policies={{_base_.rand_increasing_policies}},
19 |         num_policies=2,
20 |         total_level=10,
21 |         magnitude_level=9,
22 |         magnitude_std=0.5,
23 |         hparams=dict(
24 |             pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]],
25 |             interpolation='bicubic')),
26 |     dict(
27 |         type='RandomErasing',
28 |         erase_prob=0.25,
29 |         mode='rand',
30 |         min_area_ratio=0.02,
31 |         max_area_ratio=1 / 3,
32 |         fill_color=img_norm_cfg['mean'][::-1],
33 |         fill_std=img_norm_cfg['std'][::-1]),
34 |     dict(type='Normalize', **img_norm_cfg),
35 |     dict(type='ImageToTensor', keys=['img']),
36 |     dict(type='ToTensor', keys=['gt_label']),
37 |     dict(type='Collect', keys=['img', 'gt_label'])
38 | ]
39 | 
40 | test_pipeline = [
41 |     dict(type='LoadImageFromFile'),
42 |     dict(
43 |         type='Resize',
44 |         size=(256, -1),
45 |         backend='pillow',
46 |         interpolation='bicubic'),
47 |     dict(type='CenterCrop', crop_size=224),
48 |     dict(type='Normalize', **img_norm_cfg),
49 |     dict(type='ImageToTensor', keys=['img']),
50 |     dict(type='Collect', keys=['img'])
51 | ]
52 | data = dict(
53 |     samples_per_gpu=64,
54 |     workers_per_gpu=8,
55 |     train=dict(
56 |         type=dataset_type,
57 |         data_prefix='data/imagenet/train',
58 |         pipeline=train_pipeline),
59 |     val=dict(
60 |         type=dataset_type,
61 |         data_prefix='data/imagenet/val',
62 |         ann_file='data/imagenet/meta/val.txt',
63 |         pipeline=test_pipeline),
64 |     test=dict(
65 |         # replace `data/val` with `data/test` for standard test
66 |         type=dataset_type,
67 |         data_prefix='data/imagenet/val',
68 |         ann_file='data/imagenet/meta/val.txt',
69 |         pipeline=test_pipeline))
70 | 
71 | evaluation = dict(interval=10, metric='accuracy')
72 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/imagenet_bs64_swin_384.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ImageNet'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(
 8 |         type='RandomResizedCrop',
 9 |         size=384,
10 |         backend='pillow',
11 |         interpolation='bicubic'),
12 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='ImageToTensor', keys=['img']),
15 |     dict(type='ToTensor', keys=['gt_label']),
16 |     dict(type='Collect', keys=['img', 'gt_label'])
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='Resize', size=384, backend='pillow', interpolation='bicubic'),
21 |     dict(type='Normalize', **img_norm_cfg),
22 |     dict(type='ImageToTensor', keys=['img']),
23 |     dict(type='Collect', keys=['img'])
24 | ]
25 | data = dict(
26 |     samples_per_gpu=64,
27 |     workers_per_gpu=8,
28 |     train=dict(
29 |         type=dataset_type,
30 |         data_prefix='data/imagenet/train',
31 |         pipeline=train_pipeline),
32 |     val=dict(
33 |         type=dataset_type,
34 |         data_prefix='data/imagenet/val',
35 |         ann_file='data/imagenet/meta/val.txt',
36 |         pipeline=test_pipeline),
37 |     test=dict(
38 |         # replace `data/val` with `data/test` for standard test
39 |         type=dataset_type,
40 |         data_prefix='data/imagenet/val',
41 |         ann_file='data/imagenet/meta/val.txt',
42 |         pipeline=test_pipeline))
43 | evaluation = dict(interval=10, metric='accuracy')
44 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/pipelines/auto_aug.py:
--------------------------------------------------------------------------------
 1 | # Policy for ImageNet, refers to
 2 | # https://github.com/DeepVoltaire/AutoAugment/blame/master/autoaugment.py
 3 | policy_imagenet = [
 4 |     [
 5 |         dict(type='Posterize', bits=4, prob=0.4),
 6 |         dict(type='Rotate', angle=30., prob=0.6)
 7 |     ],
 8 |     [
 9 |         dict(type='Solarize', thr=256 / 9 * 4, prob=0.6),
10 |         dict(type='AutoContrast', prob=0.6)
11 |     ],
12 |     [dict(type='Equalize', prob=0.8),
13 |      dict(type='Equalize', prob=0.6)],
14 |     [
15 |         dict(type='Posterize', bits=5, prob=0.6),
16 |         dict(type='Posterize', bits=5, prob=0.6)
17 |     ],
18 |     [
19 |         dict(type='Equalize', prob=0.4),
20 |         dict(type='Solarize', thr=256 / 9 * 5, prob=0.2)
21 |     ],
22 |     [
23 |         dict(type='Equalize', prob=0.4),
24 |         dict(type='Rotate', angle=30 / 9 * 8, prob=0.8)
25 |     ],
26 |     [
27 |         dict(type='Solarize', thr=256 / 9 * 6, prob=0.6),
28 |         dict(type='Equalize', prob=0.6)
29 |     ],
30 |     [dict(type='Posterize', bits=6, prob=0.8),
31 |      dict(type='Equalize', prob=1.)],
32 |     [
33 |         dict(type='Rotate', angle=10., prob=0.2),
34 |         dict(type='Solarize', thr=256 / 9, prob=0.6)
35 |     ],
36 |     [
37 |         dict(type='Equalize', prob=0.6),
38 |         dict(type='Posterize', bits=5, prob=0.4)
39 |     ],
40 |     [
41 |         dict(type='Rotate', angle=30 / 9 * 8, prob=0.8),
42 |         dict(type='ColorTransform', magnitude=0., prob=0.4)
43 |     ],
44 |     [
45 |         dict(type='Rotate', angle=30., prob=0.4),
46 |         dict(type='Equalize', prob=0.6)
47 |     ],
48 |     [dict(type='Equalize', prob=0.0),
49 |      dict(type='Equalize', prob=0.8)],
50 |     [dict(type='Invert', prob=0.6),
51 |      dict(type='Equalize', prob=1.)],
52 |     [
53 |         dict(type='ColorTransform', magnitude=0.4, prob=0.6),
54 |         dict(type='Contrast', magnitude=0.8, prob=1.)
55 |     ],
56 |     [
57 |         dict(type='Rotate', angle=30 / 9 * 8, prob=0.8),
58 |         dict(type='ColorTransform', magnitude=0.2, prob=1.)
59 |     ],
60 |     [
61 |         dict(type='ColorTransform', magnitude=0.8, prob=0.8),
62 |         dict(type='Solarize', thr=256 / 9 * 2, prob=0.8)
63 |     ],
64 |     [
65 |         dict(type='Sharpness', magnitude=0.7, prob=0.4),
66 |         dict(type='Invert', prob=0.6)
67 |     ],
68 |     [
69 |         dict(
70 |             type='Shear',
71 |             magnitude=0.3 / 9 * 5,
72 |             prob=0.6,
73 |             direction='horizontal'),
74 |         dict(type='Equalize', prob=1.)
75 |     ],
76 |     [
77 |         dict(type='ColorTransform', magnitude=0., prob=0.4),
78 |         dict(type='Equalize', prob=0.6)
79 |     ],
80 |     [
81 |         dict(type='Equalize', prob=0.4),
82 |         dict(type='Solarize', thr=256 / 9 * 5, prob=0.2)
83 |     ],
84 |     [
85 |         dict(type='Solarize', thr=256 / 9 * 4, prob=0.6),
86 |         dict(type='AutoContrast', prob=0.6)
87 |     ],
88 |     [dict(type='Invert', prob=0.6),
89 |      dict(type='Equalize', prob=1.)],
90 |     [
91 |         dict(type='ColorTransform', magnitude=0.4, prob=0.6),
92 |         dict(type='Contrast', magnitude=0.8, prob=1.)
93 |     ],
94 |     [dict(type='Equalize', prob=0.8),
95 |      dict(type='Equalize', prob=0.6)],
96 | ]
97 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/pipelines/rand_aug.py:
--------------------------------------------------------------------------------
 1 | # Refers to `_RAND_INCREASING_TRANSFORMS` in pytorch-image-models
 2 | rand_increasing_policies = [
 3 |     dict(type='AutoContrast'),
 4 |     dict(type='Equalize'),
 5 |     dict(type='Invert'),
 6 |     dict(type='Rotate', magnitude_key='angle', magnitude_range=(0, 30)),
 7 |     dict(type='Posterize', magnitude_key='bits', magnitude_range=(4, 0)),
 8 |     dict(type='Solarize', magnitude_key='thr', magnitude_range=(256, 0)),
 9 |     dict(
10 |         type='SolarizeAdd',
11 |         magnitude_key='magnitude',
12 |         magnitude_range=(0, 110)),
13 |     dict(
14 |         type='ColorTransform',
15 |         magnitude_key='magnitude',
16 |         magnitude_range=(0, 0.9)),
17 |     dict(type='Contrast', magnitude_key='magnitude', magnitude_range=(0, 0.9)),
18 |     dict(
19 |         type='Brightness', magnitude_key='magnitude',
20 |         magnitude_range=(0, 0.9)),
21 |     dict(
22 |         type='Sharpness', magnitude_key='magnitude', magnitude_range=(0, 0.9)),
23 |     dict(
24 |         type='Shear',
25 |         magnitude_key='magnitude',
26 |         magnitude_range=(0, 0.3),
27 |         direction='horizontal'),
28 |     dict(
29 |         type='Shear',
30 |         magnitude_key='magnitude',
31 |         magnitude_range=(0, 0.3),
32 |         direction='vertical'),
33 |     dict(
34 |         type='Translate',
35 |         magnitude_key='magnitude',
36 |         magnitude_range=(0, 0.45),
37 |         direction='horizontal'),
38 |     dict(
39 |         type='Translate',
40 |         magnitude_key='magnitude',
41 |         magnitude_range=(0, 0.45),
42 |         direction='vertical')
43 | ]
44 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/datasets/voc_bs16.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOC'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='RandomResizedCrop', size=224),
 8 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='ImageToTensor', keys=['img']),
11 |     dict(type='ToTensor', keys=['gt_label']),
12 |     dict(type='Collect', keys=['img', 'gt_label'])
13 | ]
14 | test_pipeline = [
15 |     dict(type='LoadImageFromFile'),
16 |     dict(type='Resize', size=(256, -1)),
17 |     dict(type='CenterCrop', crop_size=224),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='ImageToTensor', keys=['img']),
20 |     dict(type='Collect', keys=['img'])
21 | ]
22 | data = dict(
23 |     samples_per_gpu=16,
24 |     workers_per_gpu=2,
25 |     train=dict(
26 |         type=dataset_type,
27 |         data_prefix='data/VOCdevkit/VOC2007/',
28 |         ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
29 |         pipeline=train_pipeline),
30 |     val=dict(
31 |         type=dataset_type,
32 |         data_prefix='data/VOCdevkit/VOC2007/',
33 |         ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt',
34 |         pipeline=test_pipeline),
35 |     test=dict(
36 |         type=dataset_type,
37 |         data_prefix='data/VOCdevkit/VOC2007/',
38 |         ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt',
39 |         pipeline=test_pipeline))
40 | evaluation = dict(
41 |     interval=1, metric=['mAP', 'CP', 'OP', 'CR', 'OR', 'CF1', 'OF1'])
42 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # checkpoint saving
 2 | checkpoint_config = dict(interval=1)
 3 | # yapf:disable
 4 | log_config = dict(
 5 |     interval=100,
 6 |     hooks=[
 7 |         dict(type='TextLoggerHook'),
 8 |         # dict(type='TensorboardLoggerHook')
 9 |     ])
10 | # yapf:enable
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/mobilenet_v2_1x.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='MobileNetV2', widen_factor=1.0),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1280,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/mobilenet_v3_large_imagenet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='MobileNetV3', arch='large'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='StackedLinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=960,
10 |         mid_channels=[1280],
11 |         dropout_rate=0.2,
12 |         act_cfg=dict(type='HSwish'),
13 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
14 |         topk=(1, 5)))
15 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/mobilenet_v3_small_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='MobileNetV3', arch='small'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='StackedLinearClsHead',
 8 |         num_classes=10,
 9 |         in_channels=576,
10 |         mid_channels=[1280],
11 |         act_cfg=dict(type='HSwish'),
12 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
13 |         topk=(1, 5)))
14 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/mobilenet_v3_small_imagenet.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='MobileNetV3', arch='small'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='StackedLinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=576,
10 |         mid_channels=[1024],
11 |         dropout_rate=0.2,
12 |         act_cfg=dict(type='HSwish'),
13 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
14 |         topk=(1, 5)))
15 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_1.6gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_1.6gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=912,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_12gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_12gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=2240,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_3.2gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_3.2gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1008,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_4.0gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_4.0gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1360,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_400mf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_400mf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=384,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_6.4gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_6.4gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1624,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_8.0gf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_8.0gf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1920,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/regnet/regnetx_800mf.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='RegNet', arch='regnetx_800mf'),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=672,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/repvgg-A0_in1k.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='ImageClassifier',
 3 |     backbone=dict(
 4 |         type='RepVGG',
 5 |         arch='A0',
 6 |         out_indices=(3, ),
 7 |     ),
 8 |     neck=dict(type='GlobalAveragePooling'),
 9 |     head=dict(
10 |         type='LinearClsHead',
11 |         num_classes=1000,
12 |         in_channels=1280,
13 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
14 |         topk=(1, 5),
15 |     ))
16 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/repvgg-B3_lbs-mixup_in1k.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='ImageClassifier',
 3 |     backbone=dict(
 4 |         type='RepVGG',
 5 |         arch='B3',
 6 |         out_indices=(3, ),
 7 |     ),
 8 |     neck=dict(type='GlobalAveragePooling'),
 9 |     head=dict(
10 |         type='LinearClsHead',
11 |         num_classes=1000,
12 |         in_channels=2560,
13 |         loss=dict(
14 |             type='LabelSmoothLoss',
15 |             loss_weight=1.0,
16 |             label_smooth_val=0.1,
17 |             mode='classy_vision',
18 |             num_classes=1000),
19 |         topk=(1, 5),
20 |     ),
21 |     train_cfg=dict(
22 |         augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000,
23 |                       prob=1.)))
24 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnest101.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeSt',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         stem_channels=128,
 9 |         out_indices=(3, ),
10 |         style='pytorch'),
11 |     neck=dict(type='GlobalAveragePooling'),
12 |     head=dict(
13 |         type='LinearClsHead',
14 |         num_classes=1000,
15 |         in_channels=2048,
16 |         loss=dict(
17 |             type='LabelSmoothLoss',
18 |             label_smooth_val=0.1,
19 |             num_classes=1000,
20 |             reduction='mean',
21 |             loss_weight=1.0),
22 |         topk=(1, 5),
23 |         cal_acc=False))
24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000))
25 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnest200.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeSt',
 6 |         depth=200,
 7 |         num_stages=4,
 8 |         stem_channels=128,
 9 |         out_indices=(3, ),
10 |         style='pytorch'),
11 |     neck=dict(type='GlobalAveragePooling'),
12 |     head=dict(
13 |         type='LinearClsHead',
14 |         num_classes=1000,
15 |         in_channels=2048,
16 |         loss=dict(
17 |             type='LabelSmoothLoss',
18 |             label_smooth_val=0.1,
19 |             num_classes=1000,
20 |             reduction='mean',
21 |             loss_weight=1.0),
22 |         topk=(1, 5),
23 |         cal_acc=False))
24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000))
25 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnest269.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeSt',
 6 |         depth=269,
 7 |         num_stages=4,
 8 |         stem_channels=128,
 9 |         out_indices=(3, ),
10 |         style='pytorch'),
11 |     neck=dict(type='GlobalAveragePooling'),
12 |     head=dict(
13 |         type='LinearClsHead',
14 |         num_classes=1000,
15 |         in_channels=2048,
16 |         loss=dict(
17 |             type='LabelSmoothLoss',
18 |             label_smooth_val=0.1,
19 |             num_classes=1000,
20 |             reduction='mean',
21 |             loss_weight=1.0),
22 |         topk=(1, 5),
23 |         cal_acc=False))
24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000))
25 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnest50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeSt',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(
16 |             type='LabelSmoothLoss',
17 |             label_smooth_val=0.1,
18 |             num_classes=1000,
19 |             reduction='mean',
20 |             loss_weight=1.0),
21 |         topk=(1, 5),
22 |         cal_acc=False))
23 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000))
24 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet101.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet101_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=10,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |     ))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet152.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=152,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet152_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=152,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=10,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |     ))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet18.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=18,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=512,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet18_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=18,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=10,
14 |         in_channels=512,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |     ))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet34.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=34,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=512,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet34_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=34,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=10,
14 |         in_channels=512,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |     ))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_cifar.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=10,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |     ))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_cifar_cutmix.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='MultiLabelLinearClsHead',
13 |         num_classes=10,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)),
16 |     train_cfg=dict(
17 |         augments=dict(type='BatchCutMix', alpha=1.0, num_classes=10,
18 |                       prob=1.0)))
19 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_cifar_mixup.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet_CIFAR',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='MultiLabelLinearClsHead',
13 |         num_classes=10,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)),
16 |     train_cfg=dict(
17 |         augments=dict(type='BatchMixup', alpha=1., num_classes=10, prob=1.)))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_cutmix.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='MultiLabelLinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)),
16 |     train_cfg=dict(
17 |         augments=dict(
18 |             type='BatchCutMix', alpha=1.0, num_classes=1000, prob=1.0)))
19 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_label_smooth.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(
16 |             type='LabelSmoothLoss', label_smooth_val=0.1, loss_weight=1.0),
17 |         topk=(1, 5),
18 |     ))
19 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnet50_mixup.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='MultiLabelLinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)),
16 |     train_cfg=dict(
17 |         augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000,
18 |                       prob=1.)))
19 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnetv1d101.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNetV1d',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnetv1d152.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNetV1d',
 6 |         depth=152,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnetv1d50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNetV1d',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnext101_32x4d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=4,
11 |         style='pytorch'),
12 |     neck=dict(type='GlobalAveragePooling'),
13 |     head=dict(
14 |         type='LinearClsHead',
15 |         num_classes=1000,
16 |         in_channels=2048,
17 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
18 |         topk=(1, 5),
19 |     ))
20 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnext101_32x8d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=8,
11 |         style='pytorch'),
12 |     neck=dict(type='GlobalAveragePooling'),
13 |     head=dict(
14 |         type='LinearClsHead',
15 |         num_classes=1000,
16 |         in_channels=2048,
17 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
18 |         topk=(1, 5),
19 |     ))
20 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnext152_32x4d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=152,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=4,
11 |         style='pytorch'),
12 |     neck=dict(type='GlobalAveragePooling'),
13 |     head=dict(
14 |         type='LinearClsHead',
15 |         num_classes=1000,
16 |         in_channels=2048,
17 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
18 |         topk=(1, 5),
19 |     ))
20 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/resnext50_32x4d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=4,
11 |         style='pytorch'),
12 |     neck=dict(type='GlobalAveragePooling'),
13 |     head=dict(
14 |         type='LinearClsHead',
15 |         num_classes=1000,
16 |         in_channels=2048,
17 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
18 |         topk=(1, 5),
19 |     ))
20 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/seresnet101.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SEResNet',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/seresnet50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SEResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         style='pytorch'),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=2048,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5),
17 |     ))
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/seresnext101_32x4d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SEResNeXt',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=4,
11 |         se_ratio=16,
12 |         style='pytorch'),
13 |     neck=dict(type='GlobalAveragePooling'),
14 |     head=dict(
15 |         type='LinearClsHead',
16 |         num_classes=1000,
17 |         in_channels=2048,
18 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
19 |         topk=(1, 5),
20 |     ))
21 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/seresnext50_32x4d.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SEResNeXt',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(3, ),
 9 |         groups=32,
10 |         width_per_group=4,
11 |         se_ratio=16,
12 |         style='pytorch'),
13 |     neck=dict(type='GlobalAveragePooling'),
14 |     head=dict(
15 |         type='LinearClsHead',
16 |         num_classes=1000,
17 |         in_channels=2048,
18 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
19 |         topk=(1, 5),
20 |     ))
21 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/shufflenet_v1_1x.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='ShuffleNetV1', groups=3),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=960,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/shufflenet_v2_1x.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='ShuffleNetV2', widen_factor=1.0),
 5 |     neck=dict(type='GlobalAveragePooling'),
 6 |     head=dict(
 7 |         type='LinearClsHead',
 8 |         num_classes=1000,
 9 |         in_channels=1024,
10 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
11 |         topk=(1, 5),
12 |     ))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/base_224.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SwinTransformer', arch='base', img_size=224, drop_path_rate=0.5),
 6 |     neck=dict(type='GlobalAveragePooling'),
 7 |     head=dict(
 8 |         type='LinearClsHead',
 9 |         num_classes=1000,
10 |         in_channels=1024,
11 |         init_cfg=None,  # suppress the default init_cfg of LinearClsHead.
12 |         loss=dict(
13 |             type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
14 |         cal_acc=False),
15 |     init_cfg=[
16 |         dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
17 |         dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
18 |     ],
19 |     train_cfg=dict(augments=[
20 |         dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
21 |         dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
22 |     ]))
23 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/base_384.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Only for evaluation
 3 | model = dict(
 4 |     type='ImageClassifier',
 5 |     backbone=dict(
 6 |         type='SwinTransformer',
 7 |         arch='base',
 8 |         img_size=384,
 9 |         stage_cfgs=dict(block_cfgs=dict(window_size=12))),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=1024,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5)))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/large_224.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Only for evaluation
 3 | model = dict(
 4 |     type='ImageClassifier',
 5 |     backbone=dict(type='SwinTransformer', arch='large', img_size=224),
 6 |     neck=dict(type='GlobalAveragePooling'),
 7 |     head=dict(
 8 |         type='LinearClsHead',
 9 |         num_classes=1000,
10 |         in_channels=1536,
11 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
12 |         topk=(1, 5)))
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/large_384.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Only for evaluation
 3 | model = dict(
 4 |     type='ImageClassifier',
 5 |     backbone=dict(
 6 |         type='SwinTransformer',
 7 |         arch='large',
 8 |         img_size=384,
 9 |         stage_cfgs=dict(block_cfgs=dict(window_size=12))),
10 |     neck=dict(type='GlobalAveragePooling'),
11 |     head=dict(
12 |         type='LinearClsHead',
13 |         num_classes=1000,
14 |         in_channels=1536,
15 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
16 |         topk=(1, 5)))
17 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/small_224.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SwinTransformer', arch='small', img_size=224,
 6 |         drop_path_rate=0.3),
 7 |     neck=dict(type='GlobalAveragePooling'),
 8 |     head=dict(
 9 |         type='LinearClsHead',
10 |         num_classes=1000,
11 |         in_channels=768,
12 |         init_cfg=None,  # suppress the default init_cfg of LinearClsHead.
13 |         loss=dict(
14 |             type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
15 |         cal_acc=False),
16 |     init_cfg=[
17 |         dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
18 |         dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
19 |     ],
20 |     train_cfg=dict(augments=[
21 |         dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
22 |         dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
23 |     ]))
24 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/swin_transformer/tiny_224.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='SwinTransformer', arch='tiny', img_size=224, drop_path_rate=0.2),
 6 |     neck=dict(type='GlobalAveragePooling'),
 7 |     head=dict(
 8 |         type='LinearClsHead',
 9 |         num_classes=1000,
10 |         in_channels=768,
11 |         init_cfg=None,  # suppress the default init_cfg of LinearClsHead.
12 |         loss=dict(
13 |             type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
14 |         cal_acc=False),
15 |     init_cfg=[
16 |         dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
17 |         dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
18 |     ],
19 |     train_cfg=dict(augments=[
20 |         dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
21 |         dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
22 |     ]))
23 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/tnt_s_patch16_224.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='TNT',
 6 |         arch='s',
 7 |         img_size=224,
 8 |         patch_size=16,
 9 |         in_channels=3,
10 |         ffn_ratio=4,
11 |         qkv_bias=False,
12 |         drop_rate=0.,
13 |         attn_drop_rate=0.,
14 |         drop_path_rate=0.1,
15 |         first_stride=4,
16 |         num_fcs=2,
17 |         init_cfg=[
18 |             dict(type='TruncNormal', layer='Linear', std=.02),
19 |             dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
20 |         ]),
21 |     neck=None,
22 |     head=dict(
23 |         type='LinearClsHead',
24 |         num_classes=1000,
25 |         in_channels=384,
26 |         loss=dict(
27 |             type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
28 |         topk=(1, 5),
29 |         init_cfg=dict(type='TruncNormal', layer='Linear', std=.02)))
30 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg11.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='VGG', depth=11, num_classes=1000),
 5 |     neck=None,
 6 |     head=dict(
 7 |         type='ClsHead',
 8 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
 9 |         topk=(1, 5),
10 |     ))
11 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg11bn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VGG', depth=11, norm_cfg=dict(type='BN'), num_classes=1000),
 6 |     neck=None,
 7 |     head=dict(
 8 |         type='ClsHead',
 9 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
10 |         topk=(1, 5),
11 |     ))
12 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg13.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='VGG', depth=13, num_classes=1000),
 5 |     neck=None,
 6 |     head=dict(
 7 |         type='ClsHead',
 8 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
 9 |         topk=(1, 5),
10 |     ))
11 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg13bn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VGG', depth=13, norm_cfg=dict(type='BN'), num_classes=1000),
 6 |     neck=None,
 7 |     head=dict(
 8 |         type='ClsHead',
 9 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
10 |         topk=(1, 5),
11 |     ))
12 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg16.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='VGG', depth=16, num_classes=1000),
 5 |     neck=None,
 6 |     head=dict(
 7 |         type='ClsHead',
 8 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
 9 |         topk=(1, 5),
10 |     ))
11 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg16bn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VGG', depth=16, norm_cfg=dict(type='BN'), num_classes=1000),
 6 |     neck=None,
 7 |     head=dict(
 8 |         type='ClsHead',
 9 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
10 |         topk=(1, 5),
11 |     ))
12 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg19.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(type='VGG', depth=19, num_classes=1000),
 5 |     neck=None,
 6 |     head=dict(
 7 |         type='ClsHead',
 8 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
 9 |         topk=(1, 5),
10 |     ))
11 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vgg19bn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VGG', depth=19, norm_cfg=dict(type='BN'), num_classes=1000),
 6 |     neck=None,
 7 |     head=dict(
 8 |         type='ClsHead',
 9 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
10 |         topk=(1, 5),
11 |     ))
12 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_base_patch16_224_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=12,
 7 |         embed_dim=768,
 8 |         num_heads=12,
 9 |         img_size=224,
10 |         patch_size=16,
11 |         in_channels=3,
12 |         feedforward_channels=3072,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=768,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_base_patch16_224_pretrain.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=12,
 7 |         embed_dim=768,
 8 |         num_heads=12,
 9 |         img_size=224,
10 |         patch_size=16,
11 |         in_channels=3,
12 |         feedforward_channels=3072,
13 |         drop_rate=0.1,
14 |         attn_drop_rate=0.),
15 |     neck=None,
16 |     head=dict(
17 |         type='VisionTransformerClsHead',
18 |         num_classes=1000,
19 |         in_channels=768,
20 |         hidden_dim=3072,
21 |         loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
22 |         topk=(1, 5),
23 |     ),
24 |     train_cfg=dict(
25 |         augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000,
26 |                       prob=1.)))
27 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_base_patch16_384_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=12,
 7 |         embed_dim=768,
 8 |         num_heads=12,
 9 |         img_size=384,
10 |         patch_size=16,
11 |         in_channels=3,
12 |         feedforward_channels=3072,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=768,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_base_patch32_384_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=12,
 7 |         embed_dim=768,
 8 |         num_heads=12,
 9 |         img_size=384,
10 |         patch_size=32,
11 |         in_channels=3,
12 |         feedforward_channels=3072,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=768,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_large_patch16_224_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=24,
 7 |         embed_dim=1024,
 8 |         num_heads=16,
 9 |         img_size=224,
10 |         patch_size=16,
11 |         in_channels=3,
12 |         feedforward_channels=4096,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=1024,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_large_patch16_384_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=24,
 7 |         embed_dim=1024,
 8 |         num_heads=16,
 9 |         img_size=384,
10 |         patch_size=16,
11 |         in_channels=3,
12 |         feedforward_channels=4096,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=1024,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/models/vit_large_patch32_384_finetune.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='ImageClassifier',
 4 |     backbone=dict(
 5 |         type='VisionTransformer',
 6 |         num_layers=24,
 7 |         embed_dim=1024,
 8 |         num_heads=16,
 9 |         img_size=384,
10 |         patch_size=32,
11 |         in_channels=3,
12 |         feedforward_channels=4096,
13 |         drop_rate=0.1),
14 |     neck=None,
15 |     head=dict(
16 |         type='VisionTransformerClsHead',
17 |         num_classes=1000,
18 |         in_channels=1024,
19 |         loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
20 |         topk=(1, 5),
21 |     ))
22 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/cifar10_bs128.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(policy='step', step=[100, 150])
6 | runner = dict(type='EpochBasedRunner', max_epochs=200)
7 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py:
--------------------------------------------------------------------------------
 1 | paramwise_cfg = dict(
 2 |     norm_decay_mult=0.0,
 3 |     bias_decay_mult=0.0,
 4 |     custom_keys={
 5 |         '.absolute_pos_embed': dict(decay_mult=0.0),
 6 |         '.relative_position_bias_table': dict(decay_mult=0.0)
 7 |     })
 8 | 
 9 | # for batch in each gpu is 128, 8 gpu
10 | # lr = 5e-4 * 128 * 8 / 512 = 0.001
11 | optimizer = dict(
12 |     type='AdamW',
13 |     lr=5e-4 * 128 * 8 / 512,
14 |     weight_decay=0.05,
15 |     eps=1e-8,
16 |     betas=(0.9, 0.999),
17 |     paramwise_cfg=paramwise_cfg)
18 | optimizer_config = dict(grad_clip=dict(max_norm=5.0))
19 | 
20 | # learning policy
21 | lr_config = dict(
22 |     policy='CosineAnnealing',
23 |     by_epoch=False,
24 |     min_lr_ratio=1e-2,
25 |     warmup='linear',
26 |     warmup_ratio=1e-3,
27 |     warmup_iters=20 * 1252,
28 |     warmup_by_epoch=False)
29 | 
30 | runner = dict(type='EpochBasedRunner', max_epochs=300)
31 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs1024_linearlr_bn_nowd.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(
 3 |     type='SGD',
 4 |     lr=0.5,
 5 |     momentum=0.9,
 6 |     weight_decay=0.00004,
 7 |     paramwise_cfg=dict(norm_decay_mult=0))
 8 | optimizer_config = dict(grad_clip=None)
 9 | # learning policy
10 | lr_config = dict(
11 |     policy='poly',
12 |     min_lr=0,
13 |     by_epoch=False,
14 |     warmup='constant',
15 |     warmup_iters=5000,
16 | )
17 | runner = dict(type='EpochBasedRunner', max_epochs=300)
18 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs2048.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(
 3 |     type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
 4 | optimizer_config = dict(grad_clip=None)
 5 | # learning policy
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=2500,
10 |     warmup_ratio=0.25,
11 |     step=[30, 60, 90])
12 | runner = dict(type='EpochBasedRunner', max_epochs=100)
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs2048_AdamW.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # In ClassyVision, the lr is set to 0.003 for bs4096.
 3 | # In this implementation(bs2048), lr = 0.003 / 4096 * (32bs * 64gpus) = 0.0015
 4 | optimizer = dict(type='AdamW', lr=0.0015, weight_decay=0.3)
 5 | optimizer_config = dict(grad_clip=dict(max_norm=1.0))
 6 | 
 7 | # specific to vit pretrain
 8 | paramwise_cfg = dict(
 9 |     custom_keys={
10 |         '.backbone.cls_token': dict(decay_mult=0.0),
11 |         '.backbone.pos_embed': dict(decay_mult=0.0)
12 |     })
13 | # learning policy
14 | lr_config = dict(
15 |     policy='CosineAnnealing',
16 |     min_lr=0,
17 |     warmup='linear',
18 |     warmup_iters=10000,
19 |     warmup_ratio=1e-4)
20 | runner = dict(type='EpochBasedRunner', max_epochs=300)
21 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs2048_coslr.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(
 3 |     type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
 4 | optimizer_config = dict(grad_clip=None)
 5 | # learning policy
 6 | lr_config = dict(
 7 |     policy='CosineAnnealing',
 8 |     min_lr=0,
 9 |     warmup='linear',
10 |     warmup_iters=2500,
11 |     warmup_ratio=0.25)
12 | runner = dict(type='EpochBasedRunner', max_epochs=100)
13 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs256.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(policy='step', step=[30, 60, 90])
6 | runner = dict(type='EpochBasedRunner', max_epochs=100)
7 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs256_140e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(policy='step', step=[40, 80, 120])
6 | runner = dict(type='EpochBasedRunner', max_epochs=140)
7 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs256_200e_coslr_warmup.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='CosineAnnealing',
 7 |     min_lr=0,
 8 |     warmup='linear',
 9 |     warmup_iters=25025,
10 |     warmup_ratio=0.25)
11 | runner = dict(type='EpochBasedRunner', max_epochs=200)
12 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs256_coslr.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(policy='CosineAnnealing', min_lr=0)
6 | runner = dict(type='EpochBasedRunner', max_epochs=100)
7 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs256_epochstep.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=0.00004)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(policy='step', gamma=0.98, step=1)
6 | runner = dict(type='EpochBasedRunner', max_epochs=300)
7 | 


--------------------------------------------------------------------------------
/classification/configs/_base_/schedules/imagenet_bs4096_AdamW.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='AdamW', lr=0.003, weight_decay=0.3)
 3 | optimizer_config = dict(grad_clip=dict(max_norm=1.0))
 4 | 
 5 | # specific to vit pretrain
 6 | paramwise_cfg = dict(
 7 |     custom_keys={
 8 |         '.backbone.cls_token': dict(decay_mult=0.0),
 9 |         '.backbone.pos_embed': dict(decay_mult=0.0)
10 |     })
11 | # learning policy
12 | lr_config = dict(
13 |     policy='CosineAnnealing',
14 |     min_lr=0,
15 |     warmup='linear',
16 |     warmup_iters=10000,
17 |     warmup_ratio=1e-4)
18 | runner = dict(type='EpochBasedRunner', max_epochs=300)
19 | 


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_0.4G_origin.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/regnet/regnetx_400mf.py',
 3 |     '../_base_/datasets/imagenet_bs32.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | work_dir = "work_dirs/regnet_0.4G_origin"
 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-400MF-0db9f35c.pth"
 9 | 
10 | # dataset settings
11 | dataset_type = 'ImageNet'
12 | 
13 | img_norm_cfg = dict(
14 |     # The mean and std are used in PyCls when training RegNets
15 |     mean=[103.53, 116.28, 123.675],
16 |     std=[57.375, 57.12, 58.395],
17 |     to_rgb=False)
18 | 
19 | train_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='RandomResizedCrop', size=224),
22 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
23 |     dict(type='Normalize', **img_norm_cfg),
24 |     dict(type='ImageToTensor', keys=['img']),
25 |     dict(type='ToTensor', keys=['gt_label']),
26 |     dict(type='Collect', keys=['img', 'gt_label'])
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='Resize', size=(256, -1)),
31 |     dict(type='CenterCrop', crop_size=224),
32 |     dict(type='Normalize', **img_norm_cfg),
33 |     dict(type='ImageToTensor', keys=['img']),
34 |     dict(type='Collect', keys=['img'])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=256,
38 |     workers_per_gpu=16,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_prefix='data/imagenet/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_prefix='data/imagenet/val',
46 |         ann_file='data/imagenet/meta/val.txt',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         # replace `data/val` with `data/test` for standard test
50 |         type=dataset_type,
51 |         data_prefix='data/imagenet/val',
52 |         ann_file='data/imagenet/meta/val.txt',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(interval=1, metric='accuracy')


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_0.4G_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["./regnet_0.8G_origin.py"]
 2 | 
 3 | work_dir = "work_dirs/regnet_0.4G_pruning"
 4 | optimizer = dict(lr=0.004)
 5 | 
 6 | custom_hooks = [
 7 |     dict(
 8 |         type='FisherPruningHook',
 9 |         # In pruning process, you need set priority
10 |         # as 'LOWEST' to insure the pruning_hook is excused
11 |         # after optimizer_hook, in fintune process, you
12 |         # should set it as 'HIGHEST' to insure it excused
13 |         # before checkpoint_hook
14 |         pruning=True,
15 |         batch_size=32,
16 |         interval=25,
17 |         priority='LOWEST',
18 |     )
19 | ]
20 | 
21 | data = dict(samples_per_gpu=32, workers_per_gpu=2)


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_0.8G_origin.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/regnet/regnetx_800mf.py',
 3 |     '../_base_/datasets/imagenet_bs32.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | work_dir = "work_dirs/regnet_0.8G_origin"
 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-800MF-4f9d1e8a.pth"
 9 | 
10 | # dataset settings
11 | dataset_type = 'ImageNet'
12 | 
13 | img_norm_cfg = dict(
14 |     # The mean and std are used in PyCls when training RegNets
15 |     mean=[103.53, 116.28, 123.675],
16 |     std=[57.375, 57.12, 58.395],
17 |     to_rgb=False)
18 | 
19 | train_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='RandomResizedCrop', size=224),
22 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
23 |     dict(type='Normalize', **img_norm_cfg),
24 |     dict(type='ImageToTensor', keys=['img']),
25 |     dict(type='ToTensor', keys=['gt_label']),
26 |     dict(type='Collect', keys=['img', 'gt_label'])
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='Resize', size=(256, -1)),
31 |     dict(type='CenterCrop', crop_size=224),
32 |     dict(type='Normalize', **img_norm_cfg),
33 |     dict(type='ImageToTensor', keys=['img']),
34 |     dict(type='Collect', keys=['img'])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=256,
38 |     workers_per_gpu=16,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_prefix='data/imagenet/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_prefix='data/imagenet/val',
46 |         ann_file='data/imagenet/meta/val.txt',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         # replace `data/val` with `data/test` for standard test
50 |         type=dataset_type,
51 |         data_prefix='data/imagenet/val',
52 |         ann_file='data/imagenet/meta/val.txt',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(interval=1, metric='accuracy')


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_0.8G_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["./regnet_1.6G_origin.py"]
 2 | 
 3 | work_dir = "work_dirs/regnet_0.8G_pruning"
 4 | optimizer = dict(lr=0.004)
 5 | 
 6 | custom_hooks = [
 7 |     dict(
 8 |         type='FisherPruningHook',
 9 |         # In pruning process, you need set priority
10 |         # as 'LOWEST' to insure the pruning_hook is excused
11 |         # after optimizer_hook, in fintune process, you
12 |         # should set it as 'HIGHEST' to insure it excused
13 |         # before checkpoint_hook
14 |         pruning=True,
15 |         batch_size=32,
16 |         interval=25,
17 |         priority='LOWEST',
18 |     )
19 | ]
20 | 
21 | data = dict(samples_per_gpu=32, workers_per_gpu=2)


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_1.6G_origin.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/regnet/regnetx_1.6gf.py',
 3 |     '../_base_/datasets/imagenet_bs32.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | work_dir = "work_dirs/regnet_1.6G_origin"
 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-1.6GF-cfb32375.pth"
 9 | 
10 | # dataset settings
11 | dataset_type = 'ImageNet'
12 | 
13 | img_norm_cfg = dict(
14 |     # The mean and std are used in PyCls when training RegNets
15 |     mean=[103.53, 116.28, 123.675],
16 |     std=[57.375, 57.12, 58.395],
17 |     to_rgb=False)
18 | 
19 | train_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='RandomResizedCrop', size=224),
22 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
23 |     dict(type='Normalize', **img_norm_cfg),
24 |     dict(type='ImageToTensor', keys=['img']),
25 |     dict(type='ToTensor', keys=['gt_label']),
26 |     dict(type='Collect', keys=['img', 'gt_label'])
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='Resize', size=(256, -1)),
31 |     dict(type='CenterCrop', crop_size=224),
32 |     dict(type='Normalize', **img_norm_cfg),
33 |     dict(type='ImageToTensor', keys=['img']),
34 |     dict(type='Collect', keys=['img'])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=256,
38 |     workers_per_gpu=16,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_prefix='data/imagenet/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_prefix='data/imagenet/val',
46 |         ann_file='data/imagenet/meta/val.txt',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         # replace `data/val` with `data/test` for standard test
50 |         type=dataset_type,
51 |         data_prefix='data/imagenet/val',
52 |         ann_file='data/imagenet/meta/val.txt',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(interval=1, metric='accuracy')


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_1.6G_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["./regnet_3.2G_origin.py"]
 2 | 
 3 | work_dir = "work_dirs/regnet_1.6G_pruning"
 4 | optimizer = dict(lr=0.004)
 5 | 
 6 | custom_hooks = [
 7 |     dict(
 8 |         type='FisherPruningHook',
 9 |         # In pruning process, you need set priority
10 |         # as 'LOWEST' to insure the pruning_hook is excused
11 |         # after optimizer_hook, in fintune process, you
12 |         # should set it as 'HIGHEST' to insure it excused
13 |         # before checkpoint_hook
14 |         pruning=True,
15 |         batch_size=32,
16 |         interval=25,
17 |         priority='LOWEST',
18 |     )
19 | ]
20 | 
21 | data = dict(samples_per_gpu=32, workers_per_gpu=2)


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_3.2G_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["./regnet_6.4G_origin.py"]
 2 | 
 3 | work_dir = "work_dirs/regnet_3.2G"
 4 | 
 5 | custom_hooks = [
 6 |     dict(type='FisherPruningHook',
 7 |          pruning=False,
 8 |          deploy_from='path to the pruned model')
 9 | ]
10 | 
11 | optimizer = dict(lr=0.1)
12 | data = dict(samples_per_gpu = 256, workers_per_gpu=16)


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_3.2G_origin.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/regnet/regnetx_3.2gf.py',
 3 |     '../_base_/datasets/imagenet_bs32.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | work_dir = "work_dirs/regnet_3.2G_origin"
 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-3.2GF-82c43fd5.pth"
 9 | 
10 | # dataset settings
11 | dataset_type = 'ImageNet'
12 | 
13 | img_norm_cfg = dict(
14 |     # The mean and std are used in PyCls when training RegNets
15 |     mean=[103.53, 116.28, 123.675],
16 |     std=[57.375, 57.12, 58.395],
17 |     to_rgb=False)
18 | 
19 | train_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='RandomResizedCrop', size=224),
22 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
23 |     dict(type='Normalize', **img_norm_cfg),
24 |     dict(type='ImageToTensor', keys=['img']),
25 |     dict(type='ToTensor', keys=['gt_label']),
26 |     dict(type='Collect', keys=['img', 'gt_label'])
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='Resize', size=(256, -1)),
31 |     dict(type='CenterCrop', crop_size=224),
32 |     dict(type='Normalize', **img_norm_cfg),
33 |     dict(type='ImageToTensor', keys=['img']),
34 |     dict(type='Collect', keys=['img'])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=256,
38 |     workers_per_gpu=16,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_prefix='data/imagenet/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_prefix='data/imagenet/val',
46 |         ann_file='data/imagenet/meta/val.txt',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         # replace `data/val` with `data/test` for standard test
50 |         type=dataset_type,
51 |         data_prefix='data/imagenet/val',
52 |         ann_file='data/imagenet/meta/val.txt',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(interval=1, metric='accuracy')


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_3.2G_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = ["./regnet_6.4G_origin.py"]
 2 | 
 3 | work_dir = "work_dirs/regnet_3.2G_pruning"
 4 | optimizer = dict(lr=0.004)
 5 | 
 6 | custom_hooks = [
 7 |     dict(
 8 |         type='FisherPruningHook',
 9 |         # In pruning process, you need set priority
10 |         # as 'LOWEST' to insure the pruning_hook is excused
11 |         # after optimizer_hook, in fintune process, you
12 |         # should set it as 'HIGHEST' to insure it excused
13 |         # before checkpoint_hook
14 |         pruning=True,
15 |         batch_size=32,
16 |         interval=25,
17 |         priority='LOWEST',
18 |     )
19 | ]
20 | 
21 | data = dict(samples_per_gpu=32, workers_per_gpu=2)


--------------------------------------------------------------------------------
/classification/configs/regnet/regnet_6.4G_origin.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/regnet/regnetx_6.4gf.py',
 3 |     '../_base_/datasets/imagenet_bs32.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | work_dir = "work_dirs/regnet_6.4G_origin"
 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-6.4GF-6888c0ea.pth"
 9 | 
10 | # dataset settings
11 | dataset_type = 'ImageNet'
12 | 
13 | img_norm_cfg = dict(
14 |     # The mean and std are used in PyCls when training RegNets
15 |     mean=[103.53, 116.28, 123.675],
16 |     std=[57.375, 57.12, 58.395],
17 |     to_rgb=False)
18 | 
19 | train_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='RandomResizedCrop', size=224),
22 |     dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
23 |     dict(type='Normalize', **img_norm_cfg),
24 |     dict(type='ImageToTensor', keys=['img']),
25 |     dict(type='ToTensor', keys=['gt_label']),
26 |     dict(type='Collect', keys=['img', 'gt_label'])
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='Resize', size=(256, -1)),
31 |     dict(type='CenterCrop', crop_size=224),
32 |     dict(type='Normalize', **img_norm_cfg),
33 |     dict(type='ImageToTensor', keys=['img']),
34 |     dict(type='Collect', keys=['img'])
35 | ]
36 | data = dict(
37 |     samples_per_gpu=32,
38 |     workers_per_gpu=2,
39 |     train=dict(
40 |         type=dataset_type,
41 |         data_prefix='data/imagenet/train',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_prefix='data/imagenet/val',
46 |         ann_file='data/imagenet/meta/val.txt',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         # replace `data/val` with `data/test` for standard test
50 |         type=dataset_type,
51 |         data_prefix='data/imagenet/val',
52 |         ann_file='data/imagenet/meta/val.txt',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(interval=1, metric='accuracy')


--------------------------------------------------------------------------------
/classification/configs/resnet50/resnet50_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs32.py',
 3 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 4 | ]
 5 | 
 6 | 
 7 | custom_hooks = [
 8 |     dict(type='FisherPruningHook',
 9 |          pruning=False,
10 |          deploy_from='path to the pruned model')
11 | ]
12 | 
13 | work_dir = "work_dirs/resnet50"
14 | optimizer = dict(lr=0.1)
15 | data = dict(samples_per_gpu = 256, workers_per_gpu=16) # for single GPU
16 | 


--------------------------------------------------------------------------------
/classification/configs/resnet50/resnet50_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs32.py',
 3 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 4 | ]
 5 | 
 6 | optimizer = dict(lr=0.004)
 7 | 
 8 | custom_hooks = [
 9 |     dict(
10 |         type='FisherPruningHook',
11 |         # In pruning process, you need set priority
12 |         # as 'LOWEST' to insure the pruning_hook is excused
13 |         # after optimizer_hook, in fintune process, you
14 |         # should set it as 'HIGHEST' to insure it excused
15 |         # before checkpoint_hook
16 |         pruning=True,
17 |         batch_size=32,
18 |         interval=25,
19 |         priority='LOWEST',
20 |     )
21 | ]
22 | 
23 | work_dir = "work_dirs/resnet50"
24 | load_from = "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth"
25 | 


--------------------------------------------------------------------------------
/classification/configs/resnext/resnext50_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/resnext50_32x4d.py',
 3 |     '../_base_/datasets/imagenet_bs32_pil_resize.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | custom_hooks = [
 8 |     dict(type='FisherPruningHook',
 9 |          pruning=False,
10 |          deploy_from='path to the pruned model')
11 | ]
12 | 
13 | work_dir = "work_dirs/resnext50"
14 | optimizer = dict(lr=0.1)
15 | data = dict(samples_per_gpu = 256, workers_per_gpu=16) # for single GPU
16 | 


--------------------------------------------------------------------------------
/classification/configs/resnext/resnext50_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/resnext50_32x4d.py',
 3 |     '../_base_/datasets/imagenet_bs32_pil_resize.py',
 4 |     '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | optimizer = dict(lr=0.004)
 8 | 
 9 | custom_hooks = [
10 |     dict(
11 |         type='FisherPruningHook',
12 |         # In pruning process, you need set priority
13 |         # as 'LOWEST' to insure the pruning_hook is excused
14 |         # after optimizer_hook, in fintune process, you
15 |         # should set it as 'HIGHEST' to insure it excused
16 |         # before checkpoint_hook
17 |         pruning=True,
18 |         batch_size=32,
19 |         interval=10,
20 |         priority='LOWEST',
21 |     )
22 | ]
23 | work_dir = "work_dirs/resnext50"
24 | load_from = "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth"
25 | 


--------------------------------------------------------------------------------
/classification/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/classification/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/classification/tools/fisher_pruning_hook:
--------------------------------------------------------------------------------
1 | ../../fisher_pruning_hook


--------------------------------------------------------------------------------
/classification/tools/model_eval.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import mmcv
  3 | import numpy as np
  4 | import torch
  5 | from mmcls.models import build_classifier
  6 | import time
  7 | from fisher_pruning_hook import FisherPruningHook
  8 | from torch.nn import Conv2d, Linear
  9 | from torch.nn.modules.batchnorm import _BatchNorm
 10 | from torch.nn.modules.activation import ReLU
 11 | from functools import partial
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description='mmcls test model')
 15 |     parser.add_argument('config', help='test config file path')
 16 | 
 17 |     parser.add_argument(
 18 |         '--device',
 19 |         choices=['cpu', 'cuda'],
 20 |         default='cuda',
 21 |         help='device used for testing')
 22 |     args = parser.parse_args()
 23 | 
 24 |     return args
 25 | 
 26 | def speed_test(model, device, batchsize, iterations):
 27 |     x = torch.randn(batchsize, 3, 224, 224).to(device)
 28 |     model = model.to(device)
 29 |     model.eval()
 30 |     with torch.no_grad():
 31 |         start = time.time()
 32 |         for _ in range(iterations):
 33 |             _ = model(x)
 34 |         mid = time.time()
 35 |         for _ in range(iterations):
 36 |             _ = model(x)
 37 |         end = time.time()
 38 |     return start, mid, end
 39 | 
 40 | def compute_parameters(model):
 41 |     params = sum(p.numel() for p in model.parameters())
 42 |     return params
 43 | 
 44 | class FlopsActsHook:
 45 |     def __init__(self, model):
 46 |         self.flops = {}
 47 |         self.acts = {}
 48 |         self.non_registered = []
 49 |         for n, m in model.named_modules():
 50 |             self.flops[n] = 0
 51 |             self.acts[n] = 0
 52 | 
 53 |             if isinstance(m, Conv2d): m.register_forward_hook(self.forward_hook_conv)
 54 |             elif isinstance(m, Linear): m.register_forward_hook(self.forward_hook_fc)
 55 |             elif isinstance(m, _BatchNorm): m.register_forward_hook(self.forward_hook_bn)
 56 |             elif isinstance(m, ReLU): m.register_forward_hook(self.forward_hook_relu)
 57 |             else:
 58 |                 # print(n, type(m))
 59 |                 self.non_registered.append([n, type(m)])
 60 | 
 61 |     def forward_hook_conv(self, module, inputs, outputs):
 62 |         ic = module.in_channels // module.groups
 63 |         kh, kw = module.kernel_size
 64 |         self.flops[module.name] += np.prod([ic, kh, kw, *outputs.shape])
 65 |         if module.bias is not None:
 66 |             self.flops[module.name] += np.prod(outputs.shape)
 67 |         self.acts[module.name] += np.prod(outputs.shape)
 68 | 
 69 |     def forward_hook_fc(self, module, inputs, outputs):
 70 |         ic = module.in_features
 71 |         self.flops[module.name] += np.prod([ic, *outputs.shape])
 72 |         if module.bias is not None:
 73 |             self.flops[module.name] += np.prod(outputs.shape)
 74 |         self.acts[module.name] += np.prod(outputs.shape)
 75 | 
 76 |     def forward_hook_bn(self, module, inputs, outputs):
 77 |         self.flops[module.name] += np.prod(outputs.shape) * (4 if module.affine else 2)
 78 |         self.acts[module.name] += np.prod(outputs.shape)
 79 | 
 80 |     def forward_hook_relu(self, module, inputs, outputs):
 81 |         self.flops[module.name] += np.prod(outputs.shape)
 82 |         self.acts[module.name] += 0 if module.inplace else np.prod(outputs.shape)
 83 | 
 84 |     def init(self):
 85 |         for n in self.flops:
 86 |             self.flops[n] = 0
 87 |             self.acts[n] = 0
 88 | 
 89 |     def summarize(self):
 90 |         flops, acts = 0, 0
 91 |         for n in self.flops:
 92 |             flops += self.flops[n]
 93 |             acts += self.acts[n]
 94 |         return flops, acts
 95 | 
 96 | def compute_flops_acts(model, device):
 97 |     model.eval()
 98 |     model.to(device)
 99 |     hook = FlopsActsHook(model)
100 |     hook.init()
101 |     x = torch.randn(32, 3, 224, 224).to(device)
102 |     _ = model(x)
103 |     flops, acts = hook.summarize()
104 |     return flops / x.size(0), acts / x.size(0)
105 | 
106 | def compute_flops_params_thop(model, device):
107 |     from thop import profile
108 |     model.eval()
109 |     model.to(device)
110 |     x = torch.randn(32, 3, 224, 224).to(device)
111 |     flops, params = profile(model, inputs=(x,))
112 |     return flops / x.size(0), params
113 | 
114 | def main():
115 |     args = parse_args()
116 |     cfg = mmcv.Config.fromfile(args.config)
117 |     # set cudnn_benchmark
118 |     if cfg.get('cudnn_benchmark', False):
119 |         torch.backends.cudnn.benchmark = True
120 |     cfg.device= args.device
121 | 
122 |     # build the model
123 |     model = build_classifier(cfg.model)
124 |     model.forward = partial(model.forward, return_loss=False, img_metas=None)
125 |     for n, m in model.named_modules():
126 |         m.name = n
127 | 
128 |     if 'custom_hooks' in cfg:
129 |         for hook in cfg.custom_hooks:
130 |             if hook.type.startswith('FisherPruningHook'):
131 |                 hook_cfg = hook.copy()
132 |                 hook_cfg.pop('priority', None)
133 |                 from mmcv.runner.hooks import HOOKS
134 |                 hook_cls = HOOKS.get(hook_cfg['type'])
135 |                 if hasattr(hook_cls, 'after_build_model'):
136 |                     pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
137 |                     pruning_hook.after_build_model(model, cfg.work_dir)
138 | 
139 |     # test speed
140 |     batchsize, iterations = (16, 50) if cfg.device == "cpu" else (64, 100)
141 |     start, mid, end = speed_test(model, cfg.device, batchsize, iterations)
142 |     print(f"time elapse for each iteration with batchsize {batchsize}:")
143 |     print(f"first {iterations} iterations: {(mid - start) * 1000 / iterations:.3f}ms")
144 |     print(f"last {iterations} iterations:  {(end - mid) * 1000 / iterations:.3f}ms")
145 | 
146 |     # flops and acts
147 |     flops, acts = compute_flops_acts(model, cfg.device)
148 |     print(f"flops:  {flops / (10 ** 9):.3f}G")
149 |     print(f"memory: {acts / (10 ** 6):.3f}M")
150 |     params = compute_parameters(model)
151 |     print(f"params: {params / (10 ** 6):.3f}M")
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     main()


--------------------------------------------------------------------------------
/classification/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/classification/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/classification/tools/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import copy
  4 | import os
  5 | import os.path as osp
  6 | import time
  7 | import warnings
  8 | 
  9 | import mmcv
 10 | import torch
 11 | from mmcv import Config, DictAction
 12 | from mmcv.runner import get_dist_info, init_dist
 13 | 
 14 | from mmcls import __version__
 15 | from mmcls.apis import set_random_seed, train_model
 16 | from mmcls.datasets import build_dataset
 17 | from mmcls.models import build_classifier
 18 | from mmcls.utils import collect_env, get_root_logger
 19 | from fisher_pruning_hook import FisherPruningHook
 20 | 
 21 | def parse_args():
 22 |     parser = argparse.ArgumentParser(description='Train a model')
 23 |     parser.add_argument('config', help='train config file path')
 24 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 25 |     parser.add_argument(
 26 |         '--resume-from', help='the checkpoint file to resume from')
 27 |     parser.add_argument(
 28 |         '--no-validate',
 29 |         action='store_true',
 30 |         help='whether not to evaluate the checkpoint during training')
 31 |     group_gpus = parser.add_mutually_exclusive_group()
 32 |     group_gpus.add_argument('--device', help='device used for training')
 33 |     group_gpus.add_argument(
 34 |         '--gpus',
 35 |         type=int,
 36 |         help='number of gpus to use '
 37 |         '(only applicable to non-distributed training)')
 38 |     group_gpus.add_argument(
 39 |         '--gpu-ids',
 40 |         type=int,
 41 |         nargs='+',
 42 |         help='ids of gpus to use '
 43 |         '(only applicable to non-distributed training)')
 44 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 45 |     parser.add_argument(
 46 |         '--deterministic',
 47 |         action='store_true',
 48 |         help='whether to set deterministic options for CUDNN backend.')
 49 |     parser.add_argument(
 50 |         '--options',
 51 |         nargs='+',
 52 |         action=DictAction,
 53 |         help='override some settings in the used config, the key-value pair '
 54 |         'in xxx=yyy format will be merged into config file (deprecate), '
 55 |         'change to --cfg-options instead.')
 56 |     parser.add_argument(
 57 |         '--cfg-options',
 58 |         nargs='+',
 59 |         action=DictAction,
 60 |         help='override some settings in the used config, the key-value pair '
 61 |         'in xxx=yyy format will be merged into config file. If the value to '
 62 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 63 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 64 |         'Note that the quotation marks are necessary and that no white space '
 65 |         'is allowed.')
 66 |     parser.add_argument(
 67 |         '--launcher',
 68 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 69 |         default='none',
 70 |         help='job launcher')
 71 |     parser.add_argument('--local_rank', type=int, default=0)
 72 |     args = parser.parse_args()
 73 |     if 'LOCAL_RANK' not in os.environ:
 74 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 75 | 
 76 |     if args.options and args.cfg_options:
 77 |         raise ValueError(
 78 |             '--options and --cfg-options cannot be both '
 79 |             'specified, --options is deprecated in favor of --cfg-options')
 80 |     if args.options:
 81 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 82 |         args.cfg_options = args.options
 83 | 
 84 |     return args
 85 | 
 86 | 
 87 | def main():
 88 |     args = parse_args()
 89 | 
 90 |     cfg = Config.fromfile(args.config)
 91 |     if args.cfg_options is not None:
 92 |         cfg.merge_from_dict(args.cfg_options)
 93 |     # set cudnn_benchmark
 94 |     if cfg.get('cudnn_benchmark', False):
 95 |         torch.backends.cudnn.benchmark = True
 96 | 
 97 |     # work_dir is determined in this priority: CLI > segment in file > filename
 98 |     if args.work_dir is not None:
 99 |         # update configs according to CLI args if args.work_dir is not None
100 |         cfg.work_dir = args.work_dir
101 |     elif cfg.get('work_dir', None) is None:
102 |         # use config filename as default work_dir if cfg.work_dir is None
103 |         cfg.work_dir = osp.join('./work_dirs',
104 |                                 osp.splitext(osp.basename(args.config))[0])
105 |     if args.resume_from is not None:
106 |         cfg.resume_from = args.resume_from
107 |     if args.gpu_ids is not None:
108 |         cfg.gpu_ids = args.gpu_ids
109 |     else:
110 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
111 | 
112 |     # init distributed env first, since logger depends on the dist info.
113 |     if args.launcher == 'none':
114 |         distributed = False
115 |     else:
116 |         distributed = True
117 |         init_dist(args.launcher, **cfg.dist_params)
118 |         _, world_size = get_dist_info()
119 |         cfg.gpu_ids = range(world_size)
120 | 
121 |     # create work_dir
122 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
123 |     # dump config
124 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
125 |     # init the logger before other steps
126 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
127 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
128 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
129 | 
130 |     # init the meta dict to record some important information such as
131 |     # environment info and seed, which will be logged
132 |     meta = dict()
133 |     # log env info
134 |     env_info_dict = collect_env()
135 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
136 |     dash_line = '-' * 60 + '\n'
137 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
138 |                 dash_line)
139 |     meta['env_info'] = env_info
140 | 
141 |     # log some basic info
142 |     logger.info(f'Distributed training: {distributed}')
143 |     logger.info(f'Config:\n{cfg.pretty_text}')
144 | 
145 |     # set random seeds
146 |     if args.seed is not None:
147 |         logger.info(f'Set random seed to {args.seed}, '
148 |                     f'deterministic: {args.deterministic}')
149 |         set_random_seed(args.seed, deterministic=args.deterministic)
150 |     cfg.seed = args.seed
151 |     meta['seed'] = args.seed
152 | 
153 |     model = build_classifier(cfg.model)
154 |     model.init_weights()
155 | 
156 |     if 'custom_hooks' in cfg:
157 |         for hook in cfg.custom_hooks:
158 |             if hook.type.startswith('FisherPruningHook'):
159 |                 hook_cfg = hook.copy()
160 |                 hook_cfg.pop('priority', None)
161 |                 from mmcv.runner.hooks import HOOKS
162 |                 hook_cls = HOOKS.get(hook_cfg['type'])
163 |                 if hasattr(hook_cls, 'after_build_model'):
164 |                     pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
165 |                     pruning_hook.after_build_model(model, cfg.work_dir)
166 | 
167 |     datasets = [build_dataset(cfg.data.train)]
168 |     if len(cfg.workflow) == 2:
169 |         val_dataset = copy.deepcopy(cfg.data.val)
170 |         val_dataset.pipeline = cfg.data.train.pipeline
171 |         datasets.append(build_dataset(val_dataset))
172 |     if cfg.checkpoint_config is not None:
173 |         # save mmcls version, config file content and class names in
174 |         # checkpoints as meta data
175 |         cfg.checkpoint_config.meta = dict(
176 |             mmcls_version=__version__,
177 |             config=cfg.pretty_text,
178 |             CLASSES=datasets[0].CLASSES)
179 |     # add an attribute for visualization convenience
180 |     train_model(
181 |         model,
182 |         datasets,
183 |         cfg,
184 |         distributed=distributed,
185 |         validate=(not args.no_validate),
186 |         timestamp=timestamp,
187 |         device='cpu' if args.device == 'cpu' else 'cuda',
188 |         meta=meta)
189 | 
190 | 
191 | if __name__ == '__main__':
192 |     main()
193 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(interval=1, metric='bbox')
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)],
11 |          keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
17 | ]
18 | test_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='MultiScaleFlipAug',
21 |          img_scale=(2048, 1024),
22 |          flip=False,
23 |          transforms=[
24 |              dict(type='Resize', keep_ratio=True),
25 |              dict(type='RandomFlip'),
26 |              dict(type='Normalize', **img_norm_cfg),
27 |              dict(type='Pad', size_divisor=32),
28 |              dict(type='ImageToTensor', keys=['img']),
29 |              dict(type='Collect', keys=['img']),
30 |          ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(type=dataset_type,
39 |                      ann_file=data_root +
40 |                      'annotations/instancesonly_filtered_gtFine_train.json',
41 |                      img_prefix=data_root + 'leftImg8bit/train/',
42 |                      pipeline=train_pipeline)),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root +
45 |              'annotations/instancesonly_filtered_gtFine_val.json',
46 |              img_prefix=data_root + 'leftImg8bit/val/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root +
50 |               'annotations/instancesonly_filtered_gtFine_test.json',
51 |               img_prefix=data_root + 'leftImg8bit/test/',
52 |               pipeline=test_pipeline))
53 | evaluation = dict(metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(interval=1, metric='bbox')
47 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1333, 800),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type=dataset_type,
35 |                ann_file=data_root + 'annotations/instances_train2017.json',
36 |                img_prefix=data_root + 'train2017/',
37 |                pipeline=train_pipeline),
38 |     val=dict(type=dataset_type,
39 |              ann_file=data_root + 'annotations/instances_val2017.json',
40 |              img_prefix=data_root + 'val2017/',
41 |              pipeline=test_pipeline),
42 |     test=dict(type=dataset_type,
43 |               ann_file=data_root + 'annotations/instances_val2017.json',
44 |               img_prefix=data_root + 'val2017/',
45 |               pipeline=test_pipeline))
46 | evaluation = dict(metric=['bbox', 'segm'])
47 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,
10 |          with_seg=True),
11 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
12 |     dict(type='RandomFlip', flip_ratio=0.5),
13 |     dict(type='Normalize', **img_norm_cfg),
14 |     dict(type='Pad', size_divisor=32),
15 |     dict(type='SegRescale', scale_factor=1 / 8),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect',
18 |          keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks',
19 |                'gt_semantic_seg']),
20 | ]
21 | test_pipeline = [
22 |     dict(type='LoadImageFromFile'),
23 |     dict(type='MultiScaleFlipAug',
24 |          img_scale=(1333, 800),
25 |          flip=False,
26 |          transforms=[
27 |              dict(type='Resize', keep_ratio=True),
28 |              dict(type='RandomFlip', flip_ratio=0.5),
29 |              dict(type='Normalize', **img_norm_cfg),
30 |              dict(type='Pad', size_divisor=32),
31 |              dict(type='ImageToTensor', keys=['img']),
32 |              dict(type='Collect', keys=['img']),
33 |          ])
34 | ]
35 | data = dict(
36 |     samples_per_gpu=2,
37 |     workers_per_gpu=2,
38 |     train=dict(type=dataset_type,
39 |                ann_file=data_root + 'annotations/instances_train2017.json',
40 |                img_prefix=data_root + 'train2017/',
41 |                seg_prefix=data_root + 'stuffthingmaps/train2017/',
42 |                pipeline=train_pipeline),
43 |     val=dict(type=dataset_type,
44 |              ann_file=data_root + 'annotations/instances_val2017.json',
45 |              img_prefix=data_root + 'val2017/',
46 |              pipeline=test_pipeline),
47 |     test=dict(type=dataset_type,
48 |               ann_file=data_root + 'annotations/instances_val2017.json',
49 |               img_prefix=data_root + 'val2017/',
50 |               pipeline=test_pipeline))
51 | evaluation = dict(metric=['bbox', 'segm'])
52 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
10 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(750, 1101),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(imgs_per_gpu=2,
32 |             workers_per_gpu=1,
33 |             train=dict(type=dataset_type,
34 |                        ann_file=data_root +
35 |                        'annotations/DeepFashion_segmentation_query.json',
36 |                        img_prefix=data_root + 'Img/',
37 |                        pipeline=train_pipeline,
38 |                        data_root=data_root),
39 |             val=dict(type=dataset_type,
40 |                      ann_file=data_root +
41 |                      'annotations/DeepFashion_segmentation_query.json',
42 |                      img_prefix=data_root + 'Img/',
43 |                      pipeline=test_pipeline,
44 |                      data_root=data_root),
45 |             test=dict(type=dataset_type,
46 |                       ann_file=data_root +
47 |                       'annotations/DeepFashion_segmentation_gallery.json',
48 |                       img_prefix=data_root + 'Img/',
49 |                       pipeline=test_pipeline,
50 |                       data_root=data_root))
51 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
52 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV05Dataset'
 4 | data_root = 'data/lvis_v0.5/'
 5 | data = dict(samples_per_gpu=2,
 6 |             workers_per_gpu=2,
 7 |             train=dict(_delete_=True,
 8 |                        type='ClassBalancedDataset',
 9 |                        oversample_thr=1e-3,
10 |                        dataset=dict(type=dataset_type,
11 |                                     ann_file=data_root +
12 |                                     'annotations/lvis_v0.5_train.json',
13 |                                     img_prefix=data_root + 'train2017/')),
14 |             val=dict(type=dataset_type,
15 |                      ann_file=data_root + 'annotations/lvis_v0.5_val.json',
16 |                      img_prefix=data_root + 'val2017/'),
17 |             test=dict(type=dataset_type,
18 |                       ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |                       img_prefix=data_root + 'val2017/'))
20 | evaluation = dict(metric=['bbox', 'segm'])
21 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV1Dataset'
 4 | data_root = 'data/lvis_v1/'
 5 | data = dict(samples_per_gpu=2,
 6 |             workers_per_gpu=2,
 7 |             train=dict(_delete_=True,
 8 |                        type='ClassBalancedDataset',
 9 |                        oversample_thr=1e-3,
10 |                        dataset=dict(type=dataset_type,
11 |                                     ann_file=data_root +
12 |                                     'annotations/lvis_v1_train.json',
13 |                                     img_prefix=data_root)),
14 |             val=dict(type=dataset_type,
15 |                      ann_file=data_root + 'annotations/lvis_v1_val.json',
16 |                      img_prefix=data_root),
17 |             test=dict(type=dataset_type,
18 |                       ann_file=data_root + 'annotations/lvis_v1_val.json',
19 |                       img_prefix=data_root))
20 | evaluation = dict(metric=['bbox', 'segm'])
21 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(type='MultiScaleFlipAug',
20 |          img_scale=(1000, 600),
21 |          flip=False,
22 |          transforms=[
23 |              dict(type='Resize', keep_ratio=True),
24 |              dict(type='RandomFlip'),
25 |              dict(type='Normalize', **img_norm_cfg),
26 |              dict(type='Pad', size_divisor=32),
27 |              dict(type='ImageToTensor', keys=['img']),
28 |              dict(type='Collect', keys=['img']),
29 |          ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(type='RepeatDataset',
35 |                times=3,
36 |                dataset=dict(
37 |                    type=dataset_type,
38 |                    ann_file=[
39 |                        data_root + 'VOC2007/ImageSets/Main/trainval.txt',
40 |                        data_root + 'VOC2012/ImageSets/Main/trainval.txt'
41 |                    ],
42 |                    img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
43 |                    pipeline=train_pipeline)),
44 |     val=dict(type=dataset_type,
45 |              ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
46 |              img_prefix=data_root + 'VOC2007/',
47 |              pipeline=test_pipeline),
48 |     test=dict(type=dataset_type,
49 |               ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
50 |               img_prefix=data_root + 'VOC2007/',
51 |               pipeline=test_pipeline))
52 | evaluation = dict(interval=1, metric='mAP')
53 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(type='PhotoMetricDistortion',
 9 |          brightness_delta=32,
10 |          contrast_range=(0.5, 1.5),
11 |          saturation_range=(0.5, 1.5),
12 |          hue_delta=18),
13 |     dict(type='Expand',
14 |          mean=img_norm_cfg['mean'],
15 |          to_rgb=img_norm_cfg['to_rgb'],
16 |          ratio_range=(1, 4)),
17 |     dict(type='MinIoURandomCrop',
18 |          min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
19 |          min_crop_size=0.3),
20 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
21 |     dict(type='Normalize', **img_norm_cfg),
22 |     dict(type='RandomFlip', flip_ratio=0.5),
23 |     dict(type='DefaultFormatBundle'),
24 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
25 | ]
26 | test_pipeline = [
27 |     dict(type='LoadImageFromFile'),
28 |     dict(type='MultiScaleFlipAug',
29 |          img_scale=(300, 300),
30 |          flip=False,
31 |          transforms=[
32 |              dict(type='Resize', keep_ratio=False),
33 |              dict(type='Normalize', **img_norm_cfg),
34 |              dict(type='ImageToTensor', keys=['img']),
35 |              dict(type='Collect', keys=['img']),
36 |          ])
37 | ]
38 | data = dict(samples_per_gpu=60,
39 |             workers_per_gpu=2,
40 |             train=dict(type='RepeatDataset',
41 |                        times=2,
42 |                        dataset=dict(type=dataset_type,
43 |                                     ann_file=data_root + 'train.txt',
44 |                                     img_prefix=data_root + 'WIDER_train/',
45 |                                     min_size=17,
46 |                                     pipeline=train_pipeline)),
47 |             val=dict(type=dataset_type,
48 |                      ann_file=data_root + 'val.txt',
49 |                      img_prefix=data_root + 'WIDER_val/',
50 |                      pipeline=test_pipeline),
51 |             test=dict(type=dataset_type,
52 |                       ann_file=data_root + 'val.txt',
53 |                       img_prefix=data_root + 'WIDER_val/',
54 |                       pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(type='ResNet',
  6 |                   depth=50,
  7 |                   num_stages=4,
  8 |                   out_indices=(0, 1, 2, 3),
  9 |                   frozen_stages=1,
 10 |                   norm_cfg=dict(type='BN', requires_grad=True),
 11 |                   norm_eval=True,
 12 |                   style='pytorch'),
 13 |     neck=dict(type='FPN',
 14 |               in_channels=[256, 512, 1024, 2048],
 15 |               out_channels=256,
 16 |               num_outs=5),
 17 |     rpn_head=dict(type='RPNHead',
 18 |                   in_channels=256,
 19 |                   feat_channels=256,
 20 |                   anchor_generator=dict(type='AnchorGenerator',
 21 |                                         scales=[8],
 22 |                                         ratios=[0.5, 1.0, 2.0],
 23 |                                         strides=[4, 8, 16, 32, 64]),
 24 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 25 |                                   target_means=[.0, .0, .0, .0],
 26 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
 27 |                   loss_cls=dict(type='CrossEntropyLoss',
 28 |                                 use_sigmoid=True,
 29 |                                 loss_weight=1.0),
 30 |                   loss_bbox=dict(type='SmoothL1Loss',
 31 |                                  beta=1.0 / 9.0,
 32 |                                  loss_weight=1.0)),
 33 |     roi_head=dict(type='CascadeRoIHead',
 34 |                   num_stages=3,
 35 |                   stage_loss_weights=[1, 0.5, 0.25],
 36 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
 37 |                                           roi_layer=dict(type='RoIAlign',
 38 |                                                          output_size=7,
 39 |                                                          sampling_ratio=0),
 40 |                                           out_channels=256,
 41 |                                           featmap_strides=[4, 8, 16, 32]),
 42 |                   bbox_head=[
 43 |                       dict(type='Shared2FCBBoxHead',
 44 |                            in_channels=256,
 45 |                            fc_out_channels=1024,
 46 |                            roi_feat_size=7,
 47 |                            num_classes=80,
 48 |                            bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 49 |                                            target_means=[0., 0., 0., 0.],
 50 |                                            target_stds=[0.1, 0.1, 0.2, 0.2]),
 51 |                            reg_class_agnostic=True,
 52 |                            loss_cls=dict(type='CrossEntropyLoss',
 53 |                                          use_sigmoid=False,
 54 |                                          loss_weight=1.0),
 55 |                            loss_bbox=dict(type='SmoothL1Loss',
 56 |                                           beta=1.0,
 57 |                                           loss_weight=1.0)),
 58 |                       dict(type='Shared2FCBBoxHead',
 59 |                            in_channels=256,
 60 |                            fc_out_channels=1024,
 61 |                            roi_feat_size=7,
 62 |                            num_classes=80,
 63 |                            bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 64 |                                            target_means=[0., 0., 0., 0.],
 65 |                                            target_stds=[0.05, 0.05, 0.1, 0.1]),
 66 |                            reg_class_agnostic=True,
 67 |                            loss_cls=dict(type='CrossEntropyLoss',
 68 |                                          use_sigmoid=False,
 69 |                                          loss_weight=1.0),
 70 |                            loss_bbox=dict(type='SmoothL1Loss',
 71 |                                           beta=1.0,
 72 |                                           loss_weight=1.0)),
 73 |                       dict(type='Shared2FCBBoxHead',
 74 |                            in_channels=256,
 75 |                            fc_out_channels=1024,
 76 |                            roi_feat_size=7,
 77 |                            num_classes=80,
 78 |                            bbox_coder=dict(
 79 |                                type='DeltaXYWHBBoxCoder',
 80 |                                target_means=[0., 0., 0., 0.],
 81 |                                target_stds=[0.033, 0.033, 0.067, 0.067]),
 82 |                            reg_class_agnostic=True,
 83 |                            loss_cls=dict(type='CrossEntropyLoss',
 84 |                                          use_sigmoid=False,
 85 |                                          loss_weight=1.0),
 86 |                            loss_bbox=dict(type='SmoothL1Loss',
 87 |                                           beta=1.0,
 88 |                                           loss_weight=1.0))
 89 |                   ]),
 90 |     # model training and testing settings
 91 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
 92 |                                           pos_iou_thr=0.7,
 93 |                                           neg_iou_thr=0.3,
 94 |                                           min_pos_iou=0.3,
 95 |                                           match_low_quality=True,
 96 |                                           ignore_iof_thr=-1),
 97 |                             sampler=dict(type='RandomSampler',
 98 |                                          num=256,
 99 |                                          pos_fraction=0.5,
100 |                                          neg_pos_ub=-1,
101 |                                          add_gt_as_proposals=False),
102 |                             allowed_border=0,
103 |                             pos_weight=-1,
104 |                             debug=False),
105 |                    rpn_proposal=dict(nms_pre=2000,
106 |                                      max_per_img=2000,
107 |                                      nms=dict(type='nms', iou_threshold=0.7),
108 |                                      min_bbox_size=0),
109 |                    rcnn=[
110 |                        dict(assigner=dict(type='MaxIoUAssigner',
111 |                                           pos_iou_thr=0.5,
112 |                                           neg_iou_thr=0.5,
113 |                                           min_pos_iou=0.5,
114 |                                           match_low_quality=False,
115 |                                           ignore_iof_thr=-1),
116 |                             sampler=dict(type='RandomSampler',
117 |                                          num=512,
118 |                                          pos_fraction=0.25,
119 |                                          neg_pos_ub=-1,
120 |                                          add_gt_as_proposals=True),
121 |                             pos_weight=-1,
122 |                             debug=False),
123 |                        dict(assigner=dict(type='MaxIoUAssigner',
124 |                                           pos_iou_thr=0.6,
125 |                                           neg_iou_thr=0.6,
126 |                                           min_pos_iou=0.6,
127 |                                           match_low_quality=False,
128 |                                           ignore_iof_thr=-1),
129 |                             sampler=dict(type='RandomSampler',
130 |                                          num=512,
131 |                                          pos_fraction=0.25,
132 |                                          neg_pos_ub=-1,
133 |                                          add_gt_as_proposals=True),
134 |                             pos_weight=-1,
135 |                             debug=False),
136 |                        dict(assigner=dict(type='MaxIoUAssigner',
137 |                                           pos_iou_thr=0.7,
138 |                                           neg_iou_thr=0.7,
139 |                                           min_pos_iou=0.7,
140 |                                           match_low_quality=False,
141 |                                           ignore_iof_thr=-1),
142 |                             sampler=dict(type='RandomSampler',
143 |                                          num=512,
144 |                                          pos_fraction=0.25,
145 |                                          neg_pos_ub=-1,
146 |                                          add_gt_as_proposals=True),
147 |                             pos_weight=-1,
148 |                             debug=False)
149 |                    ]),
150 |     test_cfg=dict(rpn=dict(nms_pre=1000,
151 |                            max_per_img=1000,
152 |                            nms=dict(type='nms', iou_threshold=0.7),
153 |                            min_bbox_size=0),
154 |                   rcnn=dict(score_thr=0.05,
155 |                             nms=dict(type='nms', iou_threshold=0.5),
156 |                             max_per_img=100)))
157 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(type='ResNet',
 6 |                   depth=50,
 7 |                   num_stages=4,
 8 |                   out_indices=(0, 1, 2, 3),
 9 |                   frozen_stages=1,
10 |                   norm_cfg=dict(type='BN', requires_grad=True),
11 |                   norm_eval=True,
12 |                   style='pytorch'),
13 |     neck=dict(type='FPN',
14 |               in_channels=[256, 512, 1024, 2048],
15 |               out_channels=256,
16 |               num_outs=5),
17 |     roi_head=dict(type='StandardRoIHead',
18 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
19 |                                           roi_layer=dict(type='RoIAlign',
20 |                                                          output_size=7,
21 |                                                          sampling_ratio=0),
22 |                                           out_channels=256,
23 |                                           featmap_strides=[4, 8, 16, 32]),
24 |                   bbox_head=dict(
25 |                       type='Shared2FCBBoxHead',
26 |                       in_channels=256,
27 |                       fc_out_channels=1024,
28 |                       roi_feat_size=7,
29 |                       num_classes=80,
30 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
31 |                                       target_means=[0., 0., 0., 0.],
32 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
33 |                       reg_class_agnostic=False,
34 |                       loss_cls=dict(type='CrossEntropyLoss',
35 |                                     use_sigmoid=False,
36 |                                     loss_weight=1.0),
37 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
38 |     # model training and testing settings
39 |     train_cfg=dict(rcnn=dict(assigner=dict(type='MaxIoUAssigner',
40 |                                            pos_iou_thr=0.5,
41 |                                            neg_iou_thr=0.5,
42 |                                            min_pos_iou=0.5,
43 |                                            match_low_quality=False,
44 |                                            ignore_iof_thr=-1),
45 |                              sampler=dict(type='RandomSampler',
46 |                                           num=512,
47 |                                           pos_fraction=0.25,
48 |                                           neg_pos_ub=-1,
49 |                                           add_gt_as_proposals=True),
50 |                              pos_weight=-1,
51 |                              debug=False)),
52 |     test_cfg=dict(rcnn=dict(score_thr=0.05,
53 |                             nms=dict(type='nms', iou_threshold=0.5),
54 |                             max_per_img=100)))
55 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='BN', requires_grad=False)
 3 | model = dict(
 4 |     type='FasterRCNN',
 5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 6 |     backbone=dict(type='ResNet',
 7 |                   depth=50,
 8 |                   num_stages=3,
 9 |                   strides=(1, 2, 2),
10 |                   dilations=(1, 1, 1),
11 |                   out_indices=(2, ),
12 |                   frozen_stages=1,
13 |                   norm_cfg=norm_cfg,
14 |                   norm_eval=True,
15 |                   style='caffe'),
16 |     rpn_head=dict(type='RPNHead',
17 |                   in_channels=1024,
18 |                   feat_channels=1024,
19 |                   anchor_generator=dict(type='AnchorGenerator',
20 |                                         scales=[2, 4, 8, 16, 32],
21 |                                         ratios=[0.5, 1.0, 2.0],
22 |                                         strides=[16]),
23 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
24 |                                   target_means=[.0, .0, .0, .0],
25 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
26 |                   loss_cls=dict(type='CrossEntropyLoss',
27 |                                 use_sigmoid=True,
28 |                                 loss_weight=1.0),
29 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
30 |     roi_head=dict(type='StandardRoIHead',
31 |                   shared_head=dict(type='ResLayer',
32 |                                    depth=50,
33 |                                    stage=3,
34 |                                    stride=2,
35 |                                    dilation=1,
36 |                                    style='caffe',
37 |                                    norm_cfg=norm_cfg,
38 |                                    norm_eval=True),
39 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
40 |                                           roi_layer=dict(type='RoIAlign',
41 |                                                          output_size=14,
42 |                                                          sampling_ratio=0),
43 |                                           out_channels=1024,
44 |                                           featmap_strides=[16]),
45 |                   bbox_head=dict(
46 |                       type='BBoxHead',
47 |                       with_avg_pool=True,
48 |                       roi_feat_size=7,
49 |                       in_channels=2048,
50 |                       num_classes=80,
51 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
52 |                                       target_means=[0., 0., 0., 0.],
53 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
54 |                       reg_class_agnostic=False,
55 |                       loss_cls=dict(type='CrossEntropyLoss',
56 |                                     use_sigmoid=False,
57 |                                     loss_weight=1.0),
58 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
59 |     # model training and testing settings
60 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
61 |                                           pos_iou_thr=0.7,
62 |                                           neg_iou_thr=0.3,
63 |                                           min_pos_iou=0.3,
64 |                                           match_low_quality=True,
65 |                                           ignore_iof_thr=-1),
66 |                             sampler=dict(type='RandomSampler',
67 |                                          num=256,
68 |                                          pos_fraction=0.5,
69 |                                          neg_pos_ub=-1,
70 |                                          add_gt_as_proposals=False),
71 |                             allowed_border=0,
72 |                             pos_weight=-1,
73 |                             debug=False),
74 |                    rpn_proposal=dict(nms_pre=12000,
75 |                                      max_per_img=2000,
76 |                                      nms=dict(type='nms', iou_threshold=0.7),
77 |                                      min_bbox_size=0),
78 |                    rcnn=dict(assigner=dict(type='MaxIoUAssigner',
79 |                                            pos_iou_thr=0.5,
80 |                                            neg_iou_thr=0.5,
81 |                                            min_pos_iou=0.5,
82 |                                            match_low_quality=False,
83 |                                            ignore_iof_thr=-1),
84 |                              sampler=dict(type='RandomSampler',
85 |                                           num=512,
86 |                                           pos_fraction=0.25,
87 |                                           neg_pos_ub=-1,
88 |                                           add_gt_as_proposals=True),
89 |                              pos_weight=-1,
90 |                              debug=False)),
91 |     test_cfg=dict(rpn=dict(nms_pre=6000,
92 |                            max_per_img=1000,
93 |                            nms=dict(type='nms', iou_threshold=0.7),
94 |                            min_bbox_size=0),
95 |                   rcnn=dict(score_thr=0.05,
96 |                             nms=dict(type='nms', iou_threshold=0.5),
97 |                             max_per_img=100)))
98 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='BN', requires_grad=False)
 3 | model = dict(
 4 |     type='FasterRCNN',
 5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 6 |     backbone=dict(type='ResNet',
 7 |                   depth=50,
 8 |                   num_stages=4,
 9 |                   strides=(1, 2, 2, 1),
10 |                   dilations=(1, 1, 1, 2),
11 |                   out_indices=(3, ),
12 |                   frozen_stages=1,
13 |                   norm_cfg=norm_cfg,
14 |                   norm_eval=True,
15 |                   style='caffe'),
16 |     rpn_head=dict(type='RPNHead',
17 |                   in_channels=2048,
18 |                   feat_channels=2048,
19 |                   anchor_generator=dict(type='AnchorGenerator',
20 |                                         scales=[2, 4, 8, 16, 32],
21 |                                         ratios=[0.5, 1.0, 2.0],
22 |                                         strides=[16]),
23 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
24 |                                   target_means=[.0, .0, .0, .0],
25 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
26 |                   loss_cls=dict(type='CrossEntropyLoss',
27 |                                 use_sigmoid=True,
28 |                                 loss_weight=1.0),
29 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
30 |     roi_head=dict(type='StandardRoIHead',
31 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
32 |                                           roi_layer=dict(type='RoIAlign',
33 |                                                          output_size=7,
34 |                                                          sampling_ratio=0),
35 |                                           out_channels=2048,
36 |                                           featmap_strides=[16]),
37 |                   bbox_head=dict(
38 |                       type='Shared2FCBBoxHead',
39 |                       in_channels=2048,
40 |                       fc_out_channels=1024,
41 |                       roi_feat_size=7,
42 |                       num_classes=80,
43 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
44 |                                       target_means=[0., 0., 0., 0.],
45 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
46 |                       reg_class_agnostic=False,
47 |                       loss_cls=dict(type='CrossEntropyLoss',
48 |                                     use_sigmoid=False,
49 |                                     loss_weight=1.0),
50 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
51 |     # model training and testing settings
52 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
53 |                                           pos_iou_thr=0.7,
54 |                                           neg_iou_thr=0.3,
55 |                                           min_pos_iou=0.3,
56 |                                           match_low_quality=True,
57 |                                           ignore_iof_thr=-1),
58 |                             sampler=dict(type='RandomSampler',
59 |                                          num=256,
60 |                                          pos_fraction=0.5,
61 |                                          neg_pos_ub=-1,
62 |                                          add_gt_as_proposals=False),
63 |                             allowed_border=0,
64 |                             pos_weight=-1,
65 |                             debug=False),
66 |                    rpn_proposal=dict(nms_pre=12000,
67 |                                      max_per_img=2000,
68 |                                      nms=dict(type='nms', iou_threshold=0.7),
69 |                                      min_bbox_size=0),
70 |                    rcnn=dict(assigner=dict(type='MaxIoUAssigner',
71 |                                            pos_iou_thr=0.5,
72 |                                            neg_iou_thr=0.5,
73 |                                            min_pos_iou=0.5,
74 |                                            match_low_quality=False,
75 |                                            ignore_iof_thr=-1),
76 |                              sampler=dict(type='RandomSampler',
77 |                                           num=512,
78 |                                           pos_fraction=0.25,
79 |                                           neg_pos_ub=-1,
80 |                                           add_gt_as_proposals=True),
81 |                              pos_weight=-1,
82 |                              debug=False)),
83 |     test_cfg=dict(rpn=dict(nms=dict(type='nms', iou_threshold=0.7),
84 |                            nms_pre=6000,
85 |                            max_per_img=1000,
86 |                            min_bbox_size=0),
87 |                   rcnn=dict(score_thr=0.05,
88 |                             nms=dict(type='nms', iou_threshold=0.5),
89 |                             max_per_img=100)))
90 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FasterRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(type='ResNet',
 6 |                   depth=50,
 7 |                   num_stages=4,
 8 |                   out_indices=(0, 1, 2, 3),
 9 |                   frozen_stages=1,
10 |                   norm_cfg=dict(type='BN', requires_grad=True),
11 |                   norm_eval=True,
12 |                   style='pytorch'),
13 |     neck=dict(type='FPN',
14 |               in_channels=[256, 512, 1024, 2048],
15 |               out_channels=256,
16 |               num_outs=5),
17 |     rpn_head=dict(type='RPNHead',
18 |                   in_channels=256,
19 |                   feat_channels=256,
20 |                   anchor_generator=dict(type='AnchorGenerator',
21 |                                         scales=[8],
22 |                                         ratios=[0.5, 1.0, 2.0],
23 |                                         strides=[4, 8, 16, 32, 64]),
24 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
25 |                                   target_means=[.0, .0, .0, .0],
26 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
27 |                   loss_cls=dict(type='CrossEntropyLoss',
28 |                                 use_sigmoid=True,
29 |                                 loss_weight=1.0),
30 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
31 |     roi_head=dict(type='StandardRoIHead',
32 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
33 |                                           roi_layer=dict(type='RoIAlign',
34 |                                                          output_size=7,
35 |                                                          sampling_ratio=0),
36 |                                           out_channels=256,
37 |                                           featmap_strides=[4, 8, 16, 32]),
38 |                   bbox_head=dict(
39 |                       type='Shared2FCBBoxHead',
40 |                       in_channels=256,
41 |                       fc_out_channels=1024,
42 |                       roi_feat_size=7,
43 |                       num_classes=80,
44 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
45 |                                       target_means=[0., 0., 0., 0.],
46 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
47 |                       reg_class_agnostic=False,
48 |                       loss_cls=dict(type='CrossEntropyLoss',
49 |                                     use_sigmoid=False,
50 |                                     loss_weight=1.0),
51 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
52 |     # model training and testing settings
53 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
54 |                                           pos_iou_thr=0.7,
55 |                                           neg_iou_thr=0.3,
56 |                                           min_pos_iou=0.3,
57 |                                           match_low_quality=True,
58 |                                           ignore_iof_thr=-1),
59 |                             sampler=dict(type='RandomSampler',
60 |                                          num=256,
61 |                                          pos_fraction=0.5,
62 |                                          neg_pos_ub=-1,
63 |                                          add_gt_as_proposals=False),
64 |                             allowed_border=-1,
65 |                             pos_weight=-1,
66 |                             debug=False),
67 |                    rpn_proposal=dict(nms_pre=2000,
68 |                                      max_per_img=1000,
69 |                                      nms=dict(type='nms', iou_threshold=0.7),
70 |                                      min_bbox_size=0),
71 |                    rcnn=dict(assigner=dict(type='MaxIoUAssigner',
72 |                                            pos_iou_thr=0.5,
73 |                                            neg_iou_thr=0.5,
74 |                                            min_pos_iou=0.5,
75 |                                            match_low_quality=False,
76 |                                            ignore_iof_thr=-1),
77 |                              sampler=dict(type='RandomSampler',
78 |                                           num=512,
79 |                                           pos_fraction=0.25,
80 |                                           neg_pos_ub=-1,
81 |                                           add_gt_as_proposals=True),
82 |                              pos_weight=-1,
83 |                              debug=False)),
84 |     test_cfg=dict(
85 |         rpn=dict(nms_pre=1000,
86 |                  max_per_img=1000,
87 |                  nms=dict(type='nms', iou_threshold=0.7),
88 |                  min_bbox_size=0),
89 |         rcnn=dict(score_thr=0.05,
90 |                   nms=dict(type='nms', iou_threshold=0.5),
91 |                   max_per_img=100)
92 |         # soft-nms is also supported for rcnn testing
93 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
94 |     ))
95 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(type='ResNet',
  7 |                   depth=50,
  8 |                   num_stages=3,
  9 |                   strides=(1, 2, 2),
 10 |                   dilations=(1, 1, 1),
 11 |                   out_indices=(2, ),
 12 |                   frozen_stages=1,
 13 |                   norm_cfg=norm_cfg,
 14 |                   norm_eval=True,
 15 |                   style='caffe'),
 16 |     rpn_head=dict(type='RPNHead',
 17 |                   in_channels=1024,
 18 |                   feat_channels=1024,
 19 |                   anchor_generator=dict(type='AnchorGenerator',
 20 |                                         scales=[2, 4, 8, 16, 32],
 21 |                                         ratios=[0.5, 1.0, 2.0],
 22 |                                         strides=[16]),
 23 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 24 |                                   target_means=[.0, .0, .0, .0],
 25 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
 26 |                   loss_cls=dict(type='CrossEntropyLoss',
 27 |                                 use_sigmoid=True,
 28 |                                 loss_weight=1.0),
 29 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 30 |     roi_head=dict(type='StandardRoIHead',
 31 |                   shared_head=dict(type='ResLayer',
 32 |                                    depth=50,
 33 |                                    stage=3,
 34 |                                    stride=2,
 35 |                                    dilation=1,
 36 |                                    style='caffe',
 37 |                                    norm_cfg=norm_cfg,
 38 |                                    norm_eval=True),
 39 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
 40 |                                           roi_layer=dict(type='RoIAlign',
 41 |                                                          output_size=14,
 42 |                                                          sampling_ratio=0),
 43 |                                           out_channels=1024,
 44 |                                           featmap_strides=[16]),
 45 |                   bbox_head=dict(
 46 |                       type='BBoxHead',
 47 |                       with_avg_pool=True,
 48 |                       roi_feat_size=7,
 49 |                       in_channels=2048,
 50 |                       num_classes=80,
 51 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 52 |                                       target_means=[0., 0., 0., 0.],
 53 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
 54 |                       reg_class_agnostic=False,
 55 |                       loss_cls=dict(type='CrossEntropyLoss',
 56 |                                     use_sigmoid=False,
 57 |                                     loss_weight=1.0),
 58 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 59 |                   mask_roi_extractor=None,
 60 |                   mask_head=dict(type='FCNMaskHead',
 61 |                                  num_convs=0,
 62 |                                  in_channels=2048,
 63 |                                  conv_out_channels=256,
 64 |                                  num_classes=80,
 65 |                                  loss_mask=dict(type='CrossEntropyLoss',
 66 |                                                 use_mask=True,
 67 |                                                 loss_weight=1.0))),
 68 |     # model training and testing settings
 69 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
 70 |                                           pos_iou_thr=0.7,
 71 |                                           neg_iou_thr=0.3,
 72 |                                           min_pos_iou=0.3,
 73 |                                           match_low_quality=True,
 74 |                                           ignore_iof_thr=-1),
 75 |                             sampler=dict(type='RandomSampler',
 76 |                                          num=256,
 77 |                                          pos_fraction=0.5,
 78 |                                          neg_pos_ub=-1,
 79 |                                          add_gt_as_proposals=False),
 80 |                             allowed_border=0,
 81 |                             pos_weight=-1,
 82 |                             debug=False),
 83 |                    rpn_proposal=dict(nms_pre=12000,
 84 |                                      max_per_img=2000,
 85 |                                      nms=dict(type='nms', iou_threshold=0.7),
 86 |                                      min_bbox_size=0),
 87 |                    rcnn=dict(assigner=dict(type='MaxIoUAssigner',
 88 |                                            pos_iou_thr=0.5,
 89 |                                            neg_iou_thr=0.5,
 90 |                                            min_pos_iou=0.5,
 91 |                                            match_low_quality=False,
 92 |                                            ignore_iof_thr=-1),
 93 |                              sampler=dict(type='RandomSampler',
 94 |                                           num=512,
 95 |                                           pos_fraction=0.25,
 96 |                                           neg_pos_ub=-1,
 97 |                                           add_gt_as_proposals=True),
 98 |                              mask_size=14,
 99 |                              pos_weight=-1,
100 |                              debug=False)),
101 |     test_cfg=dict(rpn=dict(nms_pre=6000,
102 |                            nms=dict(type='nms', iou_threshold=0.7),
103 |                            max_per_img=1000,
104 |                            min_bbox_size=0),
105 |                   rcnn=dict(score_thr=0.05,
106 |                             nms=dict(type='nms', iou_threshold=0.5),
107 |                             max_per_img=100,
108 |                             mask_thr_binary=0.5)))
109 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(type='ResNet',
  6 |                   depth=50,
  7 |                   num_stages=4,
  8 |                   out_indices=(0, 1, 2, 3),
  9 |                   frozen_stages=1,
 10 |                   norm_cfg=dict(type='BN', requires_grad=True),
 11 |                   norm_eval=True,
 12 |                   style='pytorch'),
 13 |     neck=dict(type='FPN',
 14 |               in_channels=[256, 512, 1024, 2048],
 15 |               out_channels=256,
 16 |               num_outs=5),
 17 |     rpn_head=dict(type='RPNHead',
 18 |                   in_channels=256,
 19 |                   feat_channels=256,
 20 |                   anchor_generator=dict(type='AnchorGenerator',
 21 |                                         scales=[8],
 22 |                                         ratios=[0.5, 1.0, 2.0],
 23 |                                         strides=[4, 8, 16, 32, 64]),
 24 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 25 |                                   target_means=[.0, .0, .0, .0],
 26 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
 27 |                   loss_cls=dict(type='CrossEntropyLoss',
 28 |                                 use_sigmoid=True,
 29 |                                 loss_weight=1.0),
 30 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 31 |     roi_head=dict(type='StandardRoIHead',
 32 |                   bbox_roi_extractor=dict(type='SingleRoIExtractor',
 33 |                                           roi_layer=dict(type='RoIAlign',
 34 |                                                          output_size=7,
 35 |                                                          sampling_ratio=0),
 36 |                                           out_channels=256,
 37 |                                           featmap_strides=[4, 8, 16, 32]),
 38 |                   bbox_head=dict(
 39 |                       type='Shared2FCBBoxHead',
 40 |                       in_channels=256,
 41 |                       fc_out_channels=1024,
 42 |                       roi_feat_size=7,
 43 |                       num_classes=80,
 44 |                       bbox_coder=dict(type='DeltaXYWHBBoxCoder',
 45 |                                       target_means=[0., 0., 0., 0.],
 46 |                                       target_stds=[0.1, 0.1, 0.2, 0.2]),
 47 |                       reg_class_agnostic=False,
 48 |                       loss_cls=dict(type='CrossEntropyLoss',
 49 |                                     use_sigmoid=False,
 50 |                                     loss_weight=1.0),
 51 |                       loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 52 |                   mask_roi_extractor=dict(type='SingleRoIExtractor',
 53 |                                           roi_layer=dict(type='RoIAlign',
 54 |                                                          output_size=14,
 55 |                                                          sampling_ratio=0),
 56 |                                           out_channels=256,
 57 |                                           featmap_strides=[4, 8, 16, 32]),
 58 |                   mask_head=dict(type='FCNMaskHead',
 59 |                                  num_convs=4,
 60 |                                  in_channels=256,
 61 |                                  conv_out_channels=256,
 62 |                                  num_classes=80,
 63 |                                  loss_mask=dict(type='CrossEntropyLoss',
 64 |                                                 use_mask=True,
 65 |                                                 loss_weight=1.0))),
 66 |     # model training and testing settings
 67 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
 68 |                                           pos_iou_thr=0.7,
 69 |                                           neg_iou_thr=0.3,
 70 |                                           min_pos_iou=0.3,
 71 |                                           match_low_quality=True,
 72 |                                           ignore_iof_thr=-1),
 73 |                             sampler=dict(type='RandomSampler',
 74 |                                          num=256,
 75 |                                          pos_fraction=0.5,
 76 |                                          neg_pos_ub=-1,
 77 |                                          add_gt_as_proposals=False),
 78 |                             allowed_border=-1,
 79 |                             pos_weight=-1,
 80 |                             debug=False),
 81 |                    rpn_proposal=dict(nms_pre=2000,
 82 |                                      max_per_img=1000,
 83 |                                      nms=dict(type='nms', iou_threshold=0.7),
 84 |                                      min_bbox_size=0),
 85 |                    rcnn=dict(assigner=dict(type='MaxIoUAssigner',
 86 |                                            pos_iou_thr=0.5,
 87 |                                            neg_iou_thr=0.5,
 88 |                                            min_pos_iou=0.5,
 89 |                                            match_low_quality=True,
 90 |                                            ignore_iof_thr=-1),
 91 |                              sampler=dict(type='RandomSampler',
 92 |                                           num=512,
 93 |                                           pos_fraction=0.25,
 94 |                                           neg_pos_ub=-1,
 95 |                                           add_gt_as_proposals=True),
 96 |                              mask_size=28,
 97 |                              pos_weight=-1,
 98 |                              debug=False)),
 99 |     test_cfg=dict(rpn=dict(nms_pre=1000,
100 |                            max_per_img=1000,
101 |                            nms=dict(type='nms', iou_threshold=0.7),
102 |                            min_bbox_size=0),
103 |                   rcnn=dict(score_thr=0.05,
104 |                             nms=dict(type='nms', iou_threshold=0.5),
105 |                             max_per_img=100,
106 |                             mask_thr_binary=0.5)))
107 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(type='ResNet',
 6 |                   depth=50,
 7 |                   num_stages=4,
 8 |                   out_indices=(0, 1, 2, 3),
 9 |                   frozen_stages=1,
10 |                   norm_cfg=dict(type='BN', requires_grad=True),
11 |                   norm_eval=True,
12 |                   style='pytorch'),
13 |     neck=dict(type='FPN',
14 |               in_channels=[256, 512, 1024, 2048],
15 |               out_channels=256,
16 |               start_level=1,
17 |               add_extra_convs='on_input',
18 |               num_outs=5),
19 |     bbox_head=dict(type='RetinaHead',
20 |                    num_classes=80,
21 |                    in_channels=256,
22 |                    stacked_convs=4,
23 |                    feat_channels=256,
24 |                    anchor_generator=dict(type='AnchorGenerator',
25 |                                          octave_base_scale=4,
26 |                                          scales_per_octave=3,
27 |                                          ratios=[0.5, 1.0, 2.0],
28 |                                          strides=[8, 16, 32, 64, 128]),
29 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
30 |                                    target_means=[.0, .0, .0, .0],
31 |                                    target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |                    loss_cls=dict(type='FocalLoss',
33 |                                  use_sigmoid=True,
34 |                                  gamma=2.0,
35 |                                  alpha=0.25,
36 |                                  loss_weight=1.0),
37 |                    loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
38 |     # model training and testing settings
39 |     train_cfg=dict(assigner=dict(type='MaxIoUAssigner',
40 |                                  pos_iou_thr=0.5,
41 |                                  neg_iou_thr=0.4,
42 |                                  min_pos_iou=0,
43 |                                  ignore_iof_thr=-1),
44 |                    allowed_border=-1,
45 |                    pos_weight=-1,
46 |                    debug=False),
47 |     test_cfg=dict(nms_pre=1000,
48 |                   min_bbox_size=0,
49 |                   score_thr=0.05,
50 |                   nms=dict(type='nms', iou_threshold=0.5),
51 |                   max_per_img=100))
52 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(type='ResNet',
 6 |                   depth=50,
 7 |                   num_stages=3,
 8 |                   strides=(1, 2, 2),
 9 |                   dilations=(1, 1, 1),
10 |                   out_indices=(2, ),
11 |                   frozen_stages=1,
12 |                   norm_cfg=dict(type='BN', requires_grad=False),
13 |                   norm_eval=True,
14 |                   style='caffe'),
15 |     neck=None,
16 |     rpn_head=dict(type='RPNHead',
17 |                   in_channels=1024,
18 |                   feat_channels=1024,
19 |                   anchor_generator=dict(type='AnchorGenerator',
20 |                                         scales=[2, 4, 8, 16, 32],
21 |                                         ratios=[0.5, 1.0, 2.0],
22 |                                         strides=[16]),
23 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
24 |                                   target_means=[.0, .0, .0, .0],
25 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
26 |                   loss_cls=dict(type='CrossEntropyLoss',
27 |                                 use_sigmoid=True,
28 |                                 loss_weight=1.0),
29 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
30 |     # model training and testing settings
31 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
32 |                                           pos_iou_thr=0.7,
33 |                                           neg_iou_thr=0.3,
34 |                                           min_pos_iou=0.3,
35 |                                           ignore_iof_thr=-1),
36 |                             sampler=dict(type='RandomSampler',
37 |                                          num=256,
38 |                                          pos_fraction=0.5,
39 |                                          neg_pos_ub=-1,
40 |                                          add_gt_as_proposals=False),
41 |                             allowed_border=0,
42 |                             pos_weight=-1,
43 |                             debug=False)),
44 |     test_cfg=dict(rpn=dict(nms_pre=12000,
45 |                            max_per_img=2000,
46 |                            nms=dict(type='nms', iou_threshold=0.7),
47 |                            min_bbox_size=0)))
48 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(type='ResNet',
 6 |                   depth=50,
 7 |                   num_stages=4,
 8 |                   out_indices=(0, 1, 2, 3),
 9 |                   frozen_stages=1,
10 |                   norm_cfg=dict(type='BN', requires_grad=True),
11 |                   norm_eval=True,
12 |                   style='pytorch'),
13 |     neck=dict(type='FPN',
14 |               in_channels=[256, 512, 1024, 2048],
15 |               out_channels=256,
16 |               num_outs=5),
17 |     rpn_head=dict(type='RPNHead',
18 |                   in_channels=256,
19 |                   feat_channels=256,
20 |                   anchor_generator=dict(type='AnchorGenerator',
21 |                                         scales=[8],
22 |                                         ratios=[0.5, 1.0, 2.0],
23 |                                         strides=[4, 8, 16, 32, 64]),
24 |                   bbox_coder=dict(type='DeltaXYWHBBoxCoder',
25 |                                   target_means=[.0, .0, .0, .0],
26 |                                   target_stds=[1.0, 1.0, 1.0, 1.0]),
27 |                   loss_cls=dict(type='CrossEntropyLoss',
28 |                                 use_sigmoid=True,
29 |                                 loss_weight=1.0),
30 |                   loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
31 |     # model training and testing settings
32 |     train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
33 |                                           pos_iou_thr=0.7,
34 |                                           neg_iou_thr=0.3,
35 |                                           min_pos_iou=0.3,
36 |                                           ignore_iof_thr=-1),
37 |                             sampler=dict(type='RandomSampler',
38 |                                          num=256,
39 |                                          pos_fraction=0.5,
40 |                                          neg_pos_ub=-1,
41 |                                          add_gt_as_proposals=False),
42 |                             allowed_border=0,
43 |                             pos_weight=-1,
44 |                             debug=False)),
45 |     test_cfg=dict(rpn=dict(nms_pre=2000,
46 |                            max_per_img=1000,
47 |                            nms=dict(type='nms', iou_threshold=0.7),
48 |                            min_bbox_size=0)))
49 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(type='SSDVGG',
 7 |                   input_size=input_size,
 8 |                   depth=16,
 9 |                   with_last_pool=False,
10 |                   ceil_mode=True,
11 |                   out_indices=(3, 4),
12 |                   out_feature_indices=(22, 34),
13 |                   l2_norm_scale=20),
14 |     neck=None,
15 |     bbox_head=dict(type='SSDHead',
16 |                    in_channels=(512, 1024, 512, 256, 256, 256),
17 |                    num_classes=80,
18 |                    anchor_generator=dict(type='SSDAnchorGenerator',
19 |                                          scale_major=False,
20 |                                          input_size=input_size,
21 |                                          basesize_ratio_range=(0.15, 0.9),
22 |                                          strides=[8, 16, 32, 64, 100, 300],
23 |                                          ratios=[[2], [2, 3], [2, 3], [2, 3],
24 |                                                  [2], [2]]),
25 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
26 |                                    target_means=[.0, .0, .0, .0],
27 |                                    target_stds=[0.1, 0.1, 0.2, 0.2])),
28 |     # model training and testing settings
29 |     train_cfg=dict(assigner=dict(type='MaxIoUAssigner',
30 |                                  pos_iou_thr=0.5,
31 |                                  neg_iou_thr=0.5,
32 |                                  min_pos_iou=0.,
33 |                                  ignore_iof_thr=-1,
34 |                                  gt_max_assign_all=False),
35 |                    smoothl1_beta=1.,
36 |                    allowed_border=-1,
37 |                    pos_weight=-1,
38 |                    neg_pos_ratio=3,
39 |                    debug=False),
40 |     test_cfg=dict(nms_pre=1000,
41 |                   nms=dict(type='nms', iou_threshold=0.45),
42 |                   min_bbox_size=0,
43 |                   score_thr=0.02,
44 |                   max_per_img=200))
45 | cudnn_benchmark = True
46 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(policy='step',
 6 |                  warmup='linear',
 7 |                  warmup_iters=500,
 8 |                  warmup_ratio=0.001,
 9 |                  step=[8, 11])
10 | runner = dict(type='EpochBasedRunner', max_epochs=12)
11 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(policy='step',
 6 |                  warmup='linear',
 7 |                  warmup_iters=500,
 8 |                  warmup_ratio=0.001,
 9 |                  step=[16, 19])
10 | runner = dict(type='EpochBasedRunner', max_epochs=20)
11 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(policy='step',
 6 |                  warmup='linear',
 7 |                  warmup_iters=500,
 8 |                  warmup_ratio=0.001,
 9 |                  step=[16, 22])
10 | runner = dict(type='EpochBasedRunner', max_epochs=24)
11 | 


--------------------------------------------------------------------------------
/detection/configs/atss/atss_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/coco_detection.py',
 3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 4 | ]
 5 | model = dict(
 6 |     type='ATSS',
 7 |     pretrained='torchvision://resnet50',
 8 |     backbone=dict(type='ResNet',
 9 |                   depth=50,
10 |                   num_stages=4,
11 |                   out_indices=(0, 1, 2, 3),
12 |                   frozen_stages=-1,
13 |                   norm_cfg=dict(type='BN', requires_grad=True),
14 |                   norm_eval=True,
15 |                   style='pytorch'),
16 |     neck=dict(type='FPN',
17 |               in_channels=[256, 512, 1024, 2048],
18 |               out_channels=256,
19 |               start_level=1,
20 |               add_extra_convs='on_output',
21 |               num_outs=5),
22 |     bbox_head=dict(type='ATSSHead',
23 |                    num_classes=80,
24 |                    in_channels=256,
25 |                    norm_cfg=None,
26 |                    stacked_convs=4,
27 |                    feat_channels=256,
28 |                    anchor_generator=dict(type='AnchorGenerator',
29 |                                          ratios=[1.0],
30 |                                          octave_base_scale=8,
31 |                                          scales_per_octave=1,
32 |                                          strides=[8, 16, 32, 64, 128]),
33 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
34 |                                    target_means=[.0, .0, .0, .0],
35 |                                    target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |                    loss_cls=dict(type='FocalLoss',
37 |                                  use_sigmoid=True,
38 |                                  gamma=2.0,
39 |                                  alpha=0.25,
40 |                                  loss_weight=1.0),
41 |                    loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
42 |                    loss_centerness=dict(type='CrossEntropyLoss',
43 |                                         use_sigmoid=True,
44 |                                         loss_weight=1.0)),
45 |     # training and testing settings
46 |     train_cfg=dict(assigner=dict(type='ATSSAssigner', topk=9),
47 |                    allowed_border=-1,
48 |                    pos_weight=-1,
49 |                    debug=False),
50 |     test_cfg=dict(nms_pre=1000,
51 |                   min_bbox_size=0,
52 |                   score_thr=0.05,
53 |                   nms=dict(type='nms', iou_threshold=0.6),
54 |                   max_per_img=100))
55 | # optimizer
56 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
57 | 
58 | custom_hooks = [
59 |     dict(type='FisherPruningHook',
60 |          pruning=False,
61 |          deploy_from='path to the pruned model')
62 | ]
63 | 


--------------------------------------------------------------------------------
/detection/configs/atss/atss_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/coco_detection.py',
 3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 4 | ]
 5 | model = dict(
 6 |     type='ATSS',
 7 |     pretrained='torchvision://resnet50',
 8 |     backbone=dict(type='ResNet',
 9 |                   depth=50,
10 |                   num_stages=4,
11 |                   out_indices=(0, 1, 2, 3),
12 |                   frozen_stages=-1,
13 |                   norm_cfg=dict(type='BN', requires_grad=True),
14 |                   norm_eval=True,
15 |                   style='pytorch'),
16 |     neck=dict(type='FPN',
17 |               in_channels=[256, 512, 1024, 2048],
18 |               out_channels=256,
19 |               start_level=1,
20 |               add_extra_convs='on_output',
21 |               num_outs=5),
22 |     bbox_head=dict(type='ATSSHead',
23 |                    num_classes=80,
24 |                    in_channels=256,
25 |                    norm_cfg=None,
26 |                    stacked_convs=4,
27 |                    feat_channels=256,
28 |                    anchor_generator=dict(type='AnchorGenerator',
29 |                                          ratios=[1.0],
30 |                                          octave_base_scale=8,
31 |                                          scales_per_octave=1,
32 |                                          strides=[8, 16, 32, 64, 128]),
33 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
34 |                                    target_means=[.0, .0, .0, .0],
35 |                                    target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |                    loss_cls=dict(type='FocalLoss',
37 |                                  use_sigmoid=True,
38 |                                  gamma=2.0,
39 |                                  alpha=0.25,
40 |                                  loss_weight=1.0),
41 |                    loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
42 |                    loss_centerness=dict(type='CrossEntropyLoss',
43 |                                         use_sigmoid=True,
44 |                                         loss_weight=1.0)),
45 |     # training and testing settings
46 |     train_cfg=dict(assigner=dict(type='ATSSAssigner', topk=9),
47 |                    allowed_border=-1,
48 |                    pos_weight=-1,
49 |                    debug=False),
50 |     test_cfg=dict(nms_pre=1000,
51 |                   min_bbox_size=0,
52 |                   score_thr=0.05,
53 |                   nms=dict(type='nms', iou_threshold=0.6),
54 |                   max_per_img=100))
55 | # optimizer
56 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
57 | 
58 | custom_hooks = [
59 |     dict(
60 |         type='FisherPruningHook',
61 |         # In pruning process, you need set priority
62 |         # as 'LOWEST' to insure the pruning_hook is excused
63 |         # after optimizer_hook, in fintune process, you
64 |         # should set it as 'HIGHEST' to insure it excused
65 |         # before checkpoint_hook
66 |         pruning=True,
67 |         batch_size=2,
68 |         interval=10,
69 |         priority='LOWEST',
70 |     )
71 | ]
72 | load_from = 'path to the baseline'  # noqa: E501
73 | 


--------------------------------------------------------------------------------
/detection/configs/faster_rcnn/faster_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/faster_rcnn_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # optimizer
 7 | optimizer = dict(lr=0.01)
 8 | custom_hooks = [
 9 |     dict(type='FisherPruningHook',
10 |          pruning=False,
11 |          deploy_from='path to the pruned model')
12 | ]
13 | #
14 | model = dict(backbone=dict(frozen_stages=-1, ))
15 | work_dir = "work_dirs/faster_rcnn"
16 | 


--------------------------------------------------------------------------------
/detection/configs/faster_rcnn/faster_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/faster_rcnn_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # optimizer
 7 | optimizer = dict(lr=0.002)
 8 | custom_hooks = [
 9 |     dict(
10 |         type='FisherPruningHook',
11 |         # In pruning process, you need set priority
12 |         # as 'LOWEST' to insure the pruning_hook is excused
13 |         # after optimizer_hook, in fintune process, you
14 |         # should set it as 'HIGHEST' to insure it excused
15 |         # before checkpoint_hook
16 |         pruning=True,
17 |         batch_size=2,
18 |         interval=10,
19 |         priority='LOWEST',
20 |     )
21 | ]
22 | #
23 | work_dir = "work_dirs/faster_rcnn"
24 | model = dict(backbone=dict(frozen_stages=-1, ))
25 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'  # noqa: E501
26 | 


--------------------------------------------------------------------------------
/detection/configs/fsaf/fsaf_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/retinanet_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # model settings
 7 | model = dict(
 8 |     type='FSAF',
 9 |     backbone=dict(frozen_stages=-1, ),
10 |     bbox_head=dict(
11 |         type='FSAFHead',
12 |         num_classes=80,
13 |         in_channels=256,
14 |         stacked_convs=4,
15 |         feat_channels=256,
16 |         reg_decoded_bbox=True,
17 |         # Only anchor-free branch is implemented. The anchor generator only
18 |         #  generates 1 anchor at each feature point, as a substitute of the
19 |         #  grid of features.
20 |         anchor_generator=dict(type='AnchorGenerator',
21 |                               octave_base_scale=1,
22 |                               scales_per_octave=1,
23 |                               ratios=[1.0],
24 |                               strides=[8, 16, 32, 64, 128]),
25 |         bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0),
26 |         loss_cls=dict(type='FocalLoss',
27 |                       use_sigmoid=True,
28 |                       gamma=2.0,
29 |                       alpha=0.25,
30 |                       loss_weight=1.0,
31 |                       reduction='none'),
32 |         loss_bbox=dict(_delete_=True,
33 |                        type='IoULoss',
34 |                        eps=1e-6,
35 |                        loss_weight=1.0,
36 |                        reduction='none')),
37 |     # training and testing settings
38 |     train_cfg=dict(assigner=dict(_delete_=True,
39 |                                  type='CenterRegionAssigner',
40 |                                  pos_scale=0.2,
41 |                                  neg_scale=0.2,
42 |                                  min_pos_iof=0.01),
43 |                    allowed_border=-1,
44 |                    pos_weight=-1,
45 |                    debug=False))
46 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
47 | optimizer_config = dict(_delete_=True,
48 |                         grad_clip=dict(max_norm=10, norm_type=2))
49 | 
50 | custom_hooks = [
51 |     dict(type='FisherPruningHook',
52 |          pruning=False,
53 |          deploy_from='path to the pruned model')
54 | ]
55 | work_dir = "work_dirs/fsaf"
56 | 
57 | 


--------------------------------------------------------------------------------
/detection/configs/fsaf/fsaf_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/retinanet_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # model settings
 7 | model = dict(
 8 |     type='FSAF',
 9 |     backbone=dict(frozen_stages=-1, ),
10 |     bbox_head=dict(
11 |         type='FSAFHead',
12 |         num_classes=80,
13 |         in_channels=256,
14 |         stacked_convs=4,
15 |         feat_channels=256,
16 |         reg_decoded_bbox=True,
17 |         # Only anchor-free branch is implemented. The anchor generator only
18 |         #  generates 1 anchor at each feature point, as a substitute of the
19 |         #  grid of features.
20 |         anchor_generator=dict(type='AnchorGenerator',
21 |                               octave_base_scale=1,
22 |                               scales_per_octave=1,
23 |                               ratios=[1.0],
24 |                               strides=[8, 16, 32, 64, 128]),
25 |         bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0),
26 |         loss_cls=dict(type='FocalLoss',
27 |                       use_sigmoid=True,
28 |                       gamma=2.0,
29 |                       alpha=0.25,
30 |                       loss_weight=1.0,
31 |                       reduction='none'),
32 |         loss_bbox=dict(_delete_=True,
33 |                        type='IoULoss',
34 |                        eps=1e-6,
35 |                        loss_weight=1.0,
36 |                        reduction='none')),
37 |     # training and testing settings
38 |     train_cfg=dict(assigner=dict(_delete_=True,
39 |                                  type='CenterRegionAssigner',
40 |                                  pos_scale=0.2,
41 |                                  neg_scale=0.2,
42 |                                  min_pos_iof=0.01),
43 |                    allowed_border=-1,
44 |                    pos_weight=-1,
45 |                    debug=False))
46 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
47 | optimizer_config = dict(_delete_=True,
48 |                         grad_clip=dict(max_norm=10, norm_type=2))
49 | 
50 | custom_hooks = [
51 |     dict(
52 |         type='FisherPruningHook',
53 |         # In pruning process, you need set priority
54 |         # as 'LOWEST' to insure the pruning_hook is excused
55 |         # after optimizer_hook, in fintune process, you
56 |         # should set it as 'HIGHEST' to insure it excused
57 |         # before checkpoint_hook
58 |         pruning=True,
59 |         batch_size=2,
60 |         interval=10,
61 |         priority='LOWEST',
62 |     )
63 | ]
64 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth'  # noqa: E501
65 | work_dir = "work_dirs/fsaf"
66 | 


--------------------------------------------------------------------------------
/detection/configs/paa/paa_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/coco_detection.py',
 3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 4 | ]
 5 | model = dict(
 6 |     type='PAA',
 7 |     pretrained='torchvision://resnet50',
 8 |     backbone=dict(type='ResNet',
 9 |                   depth=50,
10 |                   num_stages=4,
11 |                   out_indices=(0, 1, 2, 3),
12 |                   frozen_stages=-1,
13 |                   norm_cfg=dict(type='BN', requires_grad=True),
14 |                   norm_eval=True,
15 |                   style='pytorch'),
16 |     neck=dict(type='FPN',
17 |               in_channels=[256, 512, 1024, 2048],
18 |               out_channels=256,
19 |               start_level=1,
20 |               add_extra_convs='on_output',
21 |               num_outs=5),
22 |     bbox_head=dict(type='PAAHead',
23 |                    norm_cfg=None,
24 |                    reg_decoded_bbox=True,
25 |                    score_voting=True,
26 |                    topk=9,
27 |                    num_classes=80,
28 |                    in_channels=256,
29 |                    stacked_convs=4,
30 |                    feat_channels=256,
31 |                    anchor_generator=dict(type='AnchorGenerator',
32 |                                          ratios=[1.0],
33 |                                          octave_base_scale=8,
34 |                                          scales_per_octave=1,
35 |                                          strides=[8, 16, 32, 64, 128]),
36 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
37 |                                    target_means=[.0, .0, .0, .0],
38 |                                    target_stds=[0.1, 0.1, 0.2, 0.2]),
39 |                    loss_cls=dict(type='FocalLoss',
40 |                                  use_sigmoid=True,
41 |                                  gamma=2.0,
42 |                                  alpha=0.25,
43 |                                  loss_weight=1.0),
44 |                    loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
45 |                    loss_centerness=dict(type='CrossEntropyLoss',
46 |                                         use_sigmoid=True,
47 |                                         loss_weight=0.5)),
48 |     # training and testing settings
49 |     train_cfg=dict(assigner=dict(type='MaxIoUAssigner',
50 |                                  pos_iou_thr=0.1,
51 |                                  neg_iou_thr=0.1,
52 |                                  min_pos_iou=0,
53 |                                  ignore_iof_thr=-1),
54 |                    allowed_border=-1,
55 |                    pos_weight=-1,
56 |                    debug=False),
57 |     test_cfg=dict(nms_pre=1000,
58 |                   min_bbox_size=0,
59 |                   score_thr=0.05,
60 |                   nms=dict(type='nms', iou_threshold=0.6),
61 |                   max_per_img=100))
62 | # optimizer
63 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
64 | 
65 | custom_hooks = [
66 |     dict(type='FisherPruningHook',
67 |          pruning=False,
68 |          deploy_from='path to the pruned model')
69 | ]
70 | 
71 | work_dir = "work_dirs/paa"
72 | 


--------------------------------------------------------------------------------
/detection/configs/paa/paa_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/coco_detection.py',
 3 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 4 | ]
 5 | model = dict(
 6 |     type='PAA',
 7 |     pretrained='torchvision://resnet50',
 8 |     backbone=dict(type='ResNet',
 9 |                   depth=50,
10 |                   num_stages=4,
11 |                   out_indices=(0, 1, 2, 3),
12 |                   frozen_stages=-1,
13 |                   norm_cfg=dict(type='BN', requires_grad=True),
14 |                   norm_eval=True,
15 |                   style='pytorch'),
16 |     neck=dict(type='FPN',
17 |               in_channels=[256, 512, 1024, 2048],
18 |               out_channels=256,
19 |               start_level=1,
20 |               add_extra_convs='on_output',
21 |               num_outs=5),
22 |     bbox_head=dict(type='PAAHead',
23 |                    norm_cfg=None,
24 |                    reg_decoded_bbox=True,
25 |                    score_voting=True,
26 |                    topk=9,
27 |                    num_classes=80,
28 |                    in_channels=256,
29 |                    stacked_convs=4,
30 |                    feat_channels=256,
31 |                    anchor_generator=dict(type='AnchorGenerator',
32 |                                          ratios=[1.0],
33 |                                          octave_base_scale=8,
34 |                                          scales_per_octave=1,
35 |                                          strides=[8, 16, 32, 64, 128]),
36 |                    bbox_coder=dict(type='DeltaXYWHBBoxCoder',
37 |                                    target_means=[.0, .0, .0, .0],
38 |                                    target_stds=[0.1, 0.1, 0.2, 0.2]),
39 |                    loss_cls=dict(type='FocalLoss',
40 |                                  use_sigmoid=True,
41 |                                  gamma=2.0,
42 |                                  alpha=0.25,
43 |                                  loss_weight=1.0),
44 |                    loss_bbox=dict(type='GIoULoss', loss_weight=1.3),
45 |                    loss_centerness=dict(type='CrossEntropyLoss',
46 |                                         use_sigmoid=True,
47 |                                         loss_weight=0.5)),
48 |     # training and testing settings
49 |     train_cfg=dict(assigner=dict(type='MaxIoUAssigner',
50 |                                  pos_iou_thr=0.1,
51 |                                  neg_iou_thr=0.1,
52 |                                  min_pos_iou=0,
53 |                                  ignore_iof_thr=-1),
54 |                    allowed_border=-1,
55 |                    pos_weight=-1,
56 |                    debug=False),
57 |     test_cfg=dict(nms_pre=1000,
58 |                   min_bbox_size=0,
59 |                   score_thr=0.05,
60 |                   nms=dict(type='nms', iou_threshold=0.6),
61 |                   max_per_img=100))
62 | # optimizer
63 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
64 | 
65 | custom_hooks = [
66 |     dict(
67 |         type='FisherPruningHook',
68 |         # In pruning process, you need set priority
69 |         # as 'LOWEST' to insure the pruning_hook is excused
70 |         # after optimizer_hook, in fintune process, you
71 |         # should set it as 'HIGHEST' to insure it excused
72 |         # before checkpoint_hook
73 |         pruning=True,
74 |         batch_size=2,
75 |         interval=10,
76 |         priority='LOWEST',
77 |     )
78 | ]
79 | load_from = 'path to the baseline'
80 | work_dir = "work_dirs/paa"
81 | 
82 | 


--------------------------------------------------------------------------------
/detection/configs/retina/retina_finetune.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/retinanet_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # optimizer
 7 | optimizer = dict(lr=0.01)
 8 | custom_hooks = [
 9 |     dict(type='FisherPruningHook',
10 |          pruning=False,
11 |          deploy_from='path to the pruned model')
12 | ]
13 | #
14 | model = dict(backbone=dict(frozen_stages=-1, ))
15 | work_dir = "work_dirs/retina"
16 | 
17 | 


--------------------------------------------------------------------------------
/detection/configs/retina/retina_pruning.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/retinanet_r50_fpn.py',
 3 |     '../_base_/datasets/coco_detection.py',
 4 |     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 5 | ]
 6 | # optimizer
 7 | optimizer = dict(lr=0.002)
 8 | custom_hooks = [
 9 |     dict(
10 |         type='FisherPruningHook',
11 |         # In pruning process, you need set priority
12 |         # as 'LOWEST' to insure the pruning_hook is excused
13 |         # after optimizer_hook, in fintune process, you
14 |         # should set it as 'HIGHEST' to insure it excused
15 |         # before checkpoint_hook
16 |         pruning=True,
17 |         batch_size=2,
18 |         interval=10,
19 |         priority='LOWEST',
20 |     )
21 | ]
22 | #
23 | work_dir = "work_dirs/retina"
24 | model = dict(backbone=dict(frozen_stages=-1, ))
25 | load_from = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'  # noqa: E501
26 | 


--------------------------------------------------------------------------------
/detection/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/detection/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/detection/tools/fisher_pruning_hook:
--------------------------------------------------------------------------------
1 | ../../fisher_pruning_hook


--------------------------------------------------------------------------------
/detection/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/detection/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/detection/tools/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | import warnings
  7 | 
  8 | import mmcv
  9 | import torch
 10 | # register the FisherPruningHook to the `Registry('hook')`
 11 | from fisher_pruning_hook import FisherPruningHook  # noqa F401
 12 | from mmcv import Config, DictAction
 13 | from mmcv.runner import get_dist_info, init_dist
 14 | from mmcv.utils import get_git_hash
 15 | from mmdet import __version__
 16 | from mmdet.apis import set_random_seed, train_detector
 17 | from mmdet.datasets import build_dataset
 18 | from mmdet.models import build_detector
 19 | from mmdet.utils import collect_env, get_root_logger
 20 | 
 21 | 
 22 | def parse_args():
 23 |     parser = argparse.ArgumentParser(description='Train a detector')
 24 |     parser.add_argument('config', help='train config file path')
 25 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 26 |     parser.add_argument('--resume-from',
 27 |                         help='the checkpoint file to resume from')
 28 |     parser.add_argument(
 29 |         '--no-validate',
 30 |         action='store_true',
 31 |         help='whether not to evaluate the checkpoint during training')
 32 |     group_gpus = parser.add_mutually_exclusive_group()
 33 |     group_gpus.add_argument('--gpus',
 34 |                             type=int,
 35 |                             help='number of gpus to use '
 36 |                             '(only applicable to non-distributed training)')
 37 |     group_gpus.add_argument('--gpu-ids',
 38 |                             type=int,
 39 |                             nargs='+',
 40 |                             help='ids of gpus to use '
 41 |                             '(only applicable to non-distributed training)')
 42 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 43 |     parser.add_argument(
 44 |         '--deterministic',
 45 |         action='store_true',
 46 |         help='whether to set deterministic options for CUDNN backend.')
 47 |     parser.add_argument(
 48 |         '--options',
 49 |         nargs='+',
 50 |         action=DictAction,
 51 |         help='override some settings in the used config, the key-value pair '
 52 |         'in xxx=yyy format will be merged into config file (deprecate), '
 53 |         'change to --cfg-options instead.')
 54 |     parser.add_argument(
 55 |         '--cfg-options',
 56 |         nargs='+',
 57 |         action=DictAction,
 58 |         help='override some settings in the used config, the key-value pair '
 59 |         'in xxx=yyy format will be merged into config file. If the value to '
 60 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 61 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 62 |         'Note that the quotation marks are necessary and that no white space '
 63 |         'is allowed.')
 64 |     parser.add_argument('--launcher',
 65 |                         choices=['none', 'pytorch', 'slurm', 'mpi'],
 66 |                         default='none',
 67 |                         help='job launcher')
 68 |     parser.add_argument('--local_rank', type=int, default=0)
 69 |     args = parser.parse_args()
 70 |     if 'LOCAL_RANK' not in os.environ:
 71 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 72 | 
 73 |     if args.options and args.cfg_options:
 74 |         raise ValueError(
 75 |             '--options and --cfg-options cannot be both '
 76 |             'specified, --options is deprecated in favor of --cfg-options')
 77 |     if args.options:
 78 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 79 |         args.cfg_options = args.options
 80 | 
 81 |     return args
 82 | 
 83 | 
 84 | def main():
 85 |     args = parse_args()
 86 | 
 87 |     cfg = Config.fromfile(args.config)
 88 |     if args.cfg_options is not None:
 89 |         cfg.merge_from_dict(args.cfg_options)
 90 |     # import modules from string list.
 91 |     if cfg.get('custom_imports', None):
 92 |         from mmcv.utils import import_modules_from_strings
 93 |         import_modules_from_strings(**cfg['custom_imports'])
 94 |     # set cudnn_benchmark
 95 |     if cfg.get('cudnn_benchmark', False):
 96 |         torch.backends.cudnn.benchmark = True
 97 | 
 98 |     # work_dir is determined in this priority: CLI > segment in file > filename
 99 |     if args.work_dir is not None:
100 |         # update configs according to CLI args if args.work_dir is not None
101 |         cfg.work_dir = args.work_dir
102 |     elif cfg.get('work_dir', None) is None:
103 |         # use config filename as default work_dir if cfg.work_dir is None
104 |         cfg.work_dir = osp.join('./work_dirs',
105 |                                 osp.splitext(osp.basename(args.config))[0])
106 |     if args.resume_from is not None:
107 |         cfg.resume_from = args.resume_from
108 |     if args.gpu_ids is not None:
109 |         cfg.gpu_ids = args.gpu_ids
110 |     else:
111 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
112 | 
113 |     # init distributed env first, since logger depends on the dist info.
114 |     if args.launcher == 'none':
115 |         distributed = False
116 |     else:
117 |         distributed = True
118 |         init_dist(args.launcher, **cfg.dist_params)
119 |         # re-set gpu_ids with distributed training mode
120 |         _, world_size = get_dist_info()
121 |         cfg.gpu_ids = range(world_size)
122 | 
123 |     # create work_dir
124 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
125 |     # dump config
126 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
127 |     # init the logger before other steps
128 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
129 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
130 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
131 | 
132 |     # init the meta dict to record some important information such as
133 |     # environment info and seed, which will be logged
134 |     meta = dict()
135 |     # log env info
136 |     env_info_dict = collect_env()
137 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
138 |     dash_line = '-' * 60 + '\n'
139 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
140 |                 dash_line)
141 |     meta['env_info'] = env_info
142 |     meta['config'] = cfg.pretty_text
143 |     # log some basic info
144 |     logger.info(f'Distributed training: {distributed}')
145 |     logger.info(f'Config:\n{cfg.pretty_text}')
146 | 
147 |     # set random seeds
148 |     if args.seed is not None:
149 |         logger.info(f'Set random seed to {args.seed}, '
150 |                     f'deterministic: {args.deterministic}')
151 |         set_random_seed(args.seed, deterministic=args.deterministic)
152 |     cfg.seed = args.seed
153 |     meta['seed'] = args.seed
154 |     meta['exp_name'] = osp.basename(args.config)
155 | 
156 |     model = build_detector(cfg.model,
157 |                            train_cfg=cfg.get('train_cfg'),
158 |                            test_cfg=cfg.get('test_cfg'))
159 |     model.init_weights()
160 | 
161 |     if 'custom_hooks' in cfg:
162 |         for hook in cfg.custom_hooks:
163 |             if hook.type == 'FisherPruningHook':
164 |                 hook_cfg = hook.copy()
165 |                 hook_cfg.pop('priority', None)
166 |                 from mmcv.runner.hooks import HOOKS
167 |                 hook_cls = HOOKS.get(hook_cfg['type'])
168 |                 if hasattr(hook_cls, 'after_build_model'):
169 |                     pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS)
170 |                     pruning_hook.after_build_model(model, cfg.work_dir)
171 | 
172 |     datasets = [build_dataset(cfg.data.train)]
173 |     if len(cfg.workflow) == 2:
174 |         val_dataset = copy.deepcopy(cfg.data.val)
175 |         val_dataset.pipeline = cfg.data.train.pipeline
176 |         datasets.append(build_dataset(val_dataset))
177 |     if cfg.checkpoint_config is not None:
178 |         # save mmdet version, config file content and class names in
179 |         # checkpoints as meta data
180 |         cfg.checkpoint_config.meta = dict(mmdet_version=__version__ +
181 |                                           get_git_hash()[:7],
182 |                                           CLASSES=datasets[0].CLASSES)
183 |     # add an attribute for visualization convenience
184 |     model.CLASSES = datasets[0].CLASSES
185 |     train_detector(model,
186 |                    datasets,
187 |                    cfg,
188 |                    distributed=distributed,
189 |                    validate=(not args.no_validate),
190 |                    timestamp=timestamp,
191 |                    meta=meta)
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     main()
196 | 


--------------------------------------------------------------------------------
/fisher_pruning_hook/__init__.py:
--------------------------------------------------------------------------------
1 | from .fisher_pruning import FisherPruningHook


--------------------------------------------------------------------------------
/fisher_pruning_hook/utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn.functional as F
  5 | import random
  6 | 
  7 | 
  8 | # These grad_fn pattern are flags of specific a nn.Module
  9 | CONV = ('ThnnConv2DBackward', 'CudnnConvolutionBackward')
 10 | FC = ('ThAddmmBackward', 'AddmmBackward', 'MmBackward')
 11 | BN = ('ThnnBatchNormBackward', 'CudnnBatchNormBackward')
 12 | # the modules which contains NON_PASS grad_fn need to change the parameter size
 13 | # according to channels after pruning
 14 | NON_PASS = CONV + FC
 15 | 
 16 | def feed_forward_once(model):
 17 |     inputs = torch.zeros(1, 3, 256, 256).cuda()
 18 |     inputs_meta = [{"img_shape": (256, 256, 3), "scale_factor": np.zeros(4, dtype=np.float32)}]
 19 |     neck_out = model.module.neck(model.module.backbone(inputs))
 20 | 
 21 |     if hasattr(model.module, "head"):
 22 |         # for classification models
 23 |         return model.module.head.fc(neck_out[-1]).sum()
 24 |     elif hasattr(model.module, "bbox_head"):
 25 |         # for one-stage detectors
 26 |         bbox_out = model.module.bbox_head(neck_out)
 27 |         return sum([sum([level.sum() for level in levels]) for levels in bbox_out])
 28 |     elif hasattr(model.module, "rpn_head") and hasattr(model.module, "roi_head"):
 29 |         # for two-stage detectors
 30 |         from mmdet.core import bbox2roi
 31 |         rpn_out = model.module.rpn_head(neck_out)
 32 |         proposals = model.module.rpn_head.get_bboxes(*rpn_out, inputs_meta)
 33 |         rois = bbox2roi(proposals)
 34 |         roi_out = model.module.roi_head._bbox_forward(neck_out, rois)
 35 |         loss = sum([sum([level.sum() for level in levels]) for levels in rpn_out])
 36 |         loss += roi_out['cls_score'].sum() + roi_out['bbox_pred'].sum()
 37 |         return loss
 38 |     else:
 39 |         raise NotImplementedError("This kind of model has not been supported yet.")
 40 | 
 41 | 
 42 | 
 43 | def traverse(op, op2parents, pattern=NON_PASS, max_pattern_layer=-1):
 44 |     """to get a dict which can describe the computer Graph,
 45 | 
 46 |     Args:
 47 |         op (grad_fn): as a root of DFS
 48 |         op2parents (dict): key is the grad_fn match the patter,and
 49 |             value is first grad_fn match NON_PASS when DFS from Key
 50 |         pattern (Tuple[str]): the patter of grad_fn to match
 51 |     """
 52 | 
 53 |     if op is not None:
 54 |         parents = op.next_functions
 55 |         if parents is not None:
 56 |             if match(op, pattern):
 57 |                 if pattern is FC:
 58 |                     op2parents[op] = dfs(parents[1][0], [])
 59 |                 else:
 60 |                     op2parents[op] = dfs(parents[0][0], [])
 61 |             if len(op2parents.keys()) == max_pattern_layer:
 62 |                 return
 63 |             for parent in parents:
 64 |                 parent = parent[0]
 65 |                 if parent not in op2parents:
 66 |                     traverse(parent, op2parents, pattern, max_pattern_layer)
 67 | 
 68 | 
 69 | def dfs(op, visited):
 70 |     """DFS from a op,return all op when find a op match the patter
 71 |     NON_PASS.
 72 | 
 73 |     Args:
 74 |         op (grad_fn): the root of DFS
 75 |         visited (list[grad_fn]): contains all op has been visited
 76 | 
 77 |     Returns:
 78 |         list : all the ops  match the patter NON_PASS
 79 |     """
 80 | 
 81 |     ret = []
 82 |     if op is not None:
 83 |         visited.append(op)
 84 |         if match(op, NON_PASS):
 85 |             return [op]
 86 |         parents = op.next_functions
 87 |         if parents is not None:
 88 |             for parent in parents:
 89 |                 parent = parent[0]
 90 |                 if parent not in visited:
 91 |                     ret.extend(dfs(parent, visited))
 92 |     return ret
 93 | 
 94 | 
 95 | def match(op, op_to_match):
 96 |     """Match an operation to a group of operations; In pytorch graph, there
 97 |     may be an additional '0' or '1' (e.g. Addbackward1) after the ops
 98 |     listed above.
 99 | 
100 |     Args:
101 |         op (grad_fn): the grad_fn to match the pattern
102 |         op_to_match (list[str]): the pattern need to match
103 | 
104 |     Returns:
105 |         bool: return True when match the pattern else False
106 |     """
107 | 
108 |     for to_match in op_to_match:
109 |         if re.match(to_match + '[0-1]?$', type(op).__name__):
110 |             return True
111 |     return False
112 | 
113 | 
114 | def get_channel_num(module, flag="in"):
115 |     if type(module).__name__ == 'Conv2d':
116 |         return getattr(module, f"{flag}_channels")
117 |     elif type(module).__name__ == 'Linear':
118 |         return getattr(module, f"{flag}_features")
119 |     else:
120 |         for attr in dir(module):
121 |             if attr.startswith(f"{flag}_"):
122 |                 return getattr(module, attr)
123 |     raise NotImplementedError(f"The module {type(module).__name__} has not been supported yet.")
124 | 
125 | 
126 | def modified_forward_conv(self, feature):
127 |     if not self.finetune and hasattr(self, "in_mask"):
128 |         in_mask = self.in_mask.unsqueeze(1).expand(-1, feature.size(1) // self.in_mask.size(0))
129 |         feature = feature * in_mask.reshape(1, -1, 1, 1)
130 |     return F.conv2d(feature, self.weight, self.bias, self.stride,
131 |                     self.padding, self.dilation, self.groups)
132 | 
133 | 
134 | def modified_forward_linear(self, feature):
135 |     if not self.finetune and hasattr(self, "in_mask"):
136 |         in_mask = self.in_mask.unsqueeze(1).expand(-1, feature.size(1) // self.in_mask.size(0))
137 |         feature = feature * in_mask.reshape(1, -1)
138 |     return F.linear(feature, self.weight, self.bias)


--------------------------------------------------------------------------------