├── .gitignore ├── README.md ├── classification ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── cifar100_bs16.py │ │ │ ├── cifar10_bs16.py │ │ │ ├── imagenet_bs32.py │ │ │ ├── imagenet_bs32_pil_resize.py │ │ │ ├── imagenet_bs64.py │ │ │ ├── imagenet_bs64_autoaug.py │ │ │ ├── imagenet_bs64_pil_resize.py │ │ │ ├── imagenet_bs64_swin_224.py │ │ │ ├── imagenet_bs64_swin_384.py │ │ │ ├── pipelines │ │ │ │ ├── auto_aug.py │ │ │ │ └── rand_aug.py │ │ │ └── voc_bs16.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── mobilenet_v2_1x.py │ │ │ ├── mobilenet_v3_large_imagenet.py │ │ │ ├── mobilenet_v3_small_cifar.py │ │ │ ├── mobilenet_v3_small_imagenet.py │ │ │ ├── regnet │ │ │ │ ├── regnetx_1.6gf.py │ │ │ │ ├── regnetx_12gf.py │ │ │ │ ├── regnetx_3.2gf.py │ │ │ │ ├── regnetx_4.0gf.py │ │ │ │ ├── regnetx_400mf.py │ │ │ │ ├── regnetx_6.4gf.py │ │ │ │ ├── regnetx_8.0gf.py │ │ │ │ └── regnetx_800mf.py │ │ │ ├── repvgg-A0_in1k.py │ │ │ ├── repvgg-B3_lbs-mixup_in1k.py │ │ │ ├── resnest101.py │ │ │ ├── resnest200.py │ │ │ ├── resnest269.py │ │ │ ├── resnest50.py │ │ │ ├── resnet101.py │ │ │ ├── resnet101_cifar.py │ │ │ ├── resnet152.py │ │ │ ├── resnet152_cifar.py │ │ │ ├── resnet18.py │ │ │ ├── resnet18_cifar.py │ │ │ ├── resnet34.py │ │ │ ├── resnet34_cifar.py │ │ │ ├── resnet50.py │ │ │ ├── resnet50_cifar.py │ │ │ ├── resnet50_cifar_cutmix.py │ │ │ ├── resnet50_cifar_mixup.py │ │ │ ├── resnet50_cutmix.py │ │ │ ├── resnet50_label_smooth.py │ │ │ ├── resnet50_mixup.py │ │ │ ├── resnetv1d101.py │ │ │ ├── resnetv1d152.py │ │ │ ├── resnetv1d50.py │ │ │ ├── resnext101_32x4d.py │ │ │ ├── resnext101_32x8d.py │ │ │ ├── resnext152_32x4d.py │ │ │ ├── resnext50_32x4d.py │ │ │ ├── seresnet101.py │ │ │ ├── seresnet50.py │ │ │ ├── seresnext101_32x4d.py │ │ │ ├── seresnext50_32x4d.py │ │ │ ├── shufflenet_v1_1x.py │ │ │ ├── shufflenet_v2_1x.py │ │ │ ├── swin_transformer │ │ │ │ ├── base_224.py │ │ │ │ ├── base_384.py │ │ │ │ ├── large_224.py │ │ │ │ ├── large_384.py │ │ │ │ ├── small_224.py │ │ │ │ └── tiny_224.py │ │ │ ├── tnt_s_patch16_224.py │ │ │ ├── vgg11.py │ │ │ ├── vgg11bn.py │ │ │ ├── vgg13.py │ │ │ ├── vgg13bn.py │ │ │ ├── vgg16.py │ │ │ ├── vgg16bn.py │ │ │ ├── vgg19.py │ │ │ ├── vgg19bn.py │ │ │ ├── vit_base_patch16_224_finetune.py │ │ │ ├── vit_base_patch16_224_pretrain.py │ │ │ ├── vit_base_patch16_384_finetune.py │ │ │ ├── vit_base_patch32_384_finetune.py │ │ │ ├── vit_large_patch16_224_finetune.py │ │ │ ├── vit_large_patch16_384_finetune.py │ │ │ └── vit_large_patch32_384_finetune.py │ │ └── schedules │ │ │ ├── cifar10_bs128.py │ │ │ ├── imagenet_bs1024_adamw_swin.py │ │ │ ├── imagenet_bs1024_linearlr_bn_nowd.py │ │ │ ├── imagenet_bs2048.py │ │ │ ├── imagenet_bs2048_AdamW.py │ │ │ ├── imagenet_bs2048_coslr.py │ │ │ ├── imagenet_bs256.py │ │ │ ├── imagenet_bs256_140e.py │ │ │ ├── imagenet_bs256_200e_coslr_warmup.py │ │ │ ├── imagenet_bs256_coslr.py │ │ │ ├── imagenet_bs256_epochstep.py │ │ │ └── imagenet_bs4096_AdamW.py │ ├── regnet │ │ ├── regnet_0.4G_origin.py │ │ ├── regnet_0.4G_pruning.py │ │ ├── regnet_0.8G_origin.py │ │ ├── regnet_0.8G_pruning.py │ │ ├── regnet_1.6G_origin.py │ │ ├── regnet_1.6G_pruning.py │ │ ├── regnet_3.2G_finetune.py │ │ ├── regnet_3.2G_origin.py │ │ ├── regnet_3.2G_pruning.py │ │ └── regnet_6.4G_origin.py │ ├── resnet50 │ │ ├── resnet50_finetune.py │ │ └── resnet50_pruning.py │ └── resnext │ │ ├── resnext50_finetune.py │ │ └── resnext50_pruning.py └── tools │ ├── dist_test.sh │ ├── dist_train.sh │ ├── fisher_pruning_hook │ ├── model_eval.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ └── train.py ├── detection ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── cityscapes_detection.py │ │ │ ├── cityscapes_instance.py │ │ │ ├── coco_detection.py │ │ │ ├── coco_instance.py │ │ │ ├── coco_instance_semantic.py │ │ │ ├── deepfashion.py │ │ │ ├── lvis_v0.5_instance.py │ │ │ ├── lvis_v1_instance.py │ │ │ ├── voc0712.py │ │ │ └── wider_face.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── cascade_rcnn_r50_fpn.py │ │ │ ├── fast_rcnn_r50_fpn.py │ │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ │ ├── faster_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50_caffe_c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ ├── atss │ │ ├── atss_finetune.py │ │ └── atss_pruning.py │ ├── faster_rcnn │ │ ├── faster_finetune.py │ │ └── faster_pruning.py │ ├── fsaf │ │ ├── fsaf_finetune.py │ │ └── fsaf_pruning.py │ ├── paa │ │ ├── paa_finetune.py │ │ └── paa_pruning.py │ └── retina │ │ ├── retina_finetune.py │ │ └── retina_pruning.py └── tools │ ├── dist_test.sh │ ├── dist_train.sh │ ├── fisher_pruning_hook │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ └── train.py └── fisher_pruning_hook ├── __init__.py ├── fisher_pruning.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .pyc 2 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FisherPruning-Pytorch 2 | An implementation of <Group Fisher Pruning for Practical Network Compression> based on pytorch and mmcv 3 | 4 | --- 5 | 6 | 7 | ### Main Functions 8 | 9 | - Pruning for fully-convolutional structures, 10 | such as one-stage detectors; (copied from the [official code](https://github.com/jshilong/FisherPruning)) 11 | 12 | - Pruning for networks combining convolutional layers and fully-connected layers, such as faster-RCNN and ResNet; 13 | 14 | - Pruning for networks which involve group convolutions, such as ResNeXt and RegNet. 15 | 16 | ### Usage 17 | 18 | #### Requirements 19 | 20 | ```text 21 | torch 22 | torchvision 23 | mmcv / mmcv-full 24 | mmcls 25 | mmdet 26 | ``` 27 | #### Compatibility 28 | This code is tested with 29 | 30 | ```text 31 | pytorch=1.3 32 | torchvision=0.4 33 | cudatoolkit=10.0 34 | mmcv-full==1.3.14 35 | mmcls=0.16 36 | mmdet=2.17 37 | ``` 38 | 39 | and 40 | 41 | ```text 42 | pytorch=1.8 43 | torchvision=0.9 44 | cudatoolkit=11.1 45 | mmcv==1.3.16 46 | mmcls=0.16 47 | mmdet=2.17 48 | ``` 49 | 50 | #### Data 51 | 52 | Download [ImageNet](https://image-net.org/download.php) and [COCO](https://cocodataset.org/), 53 | then extract them and organize the folders as 54 | 55 | ``` 56 | - detection 57 | |- tools 58 | |- configs 59 | |- data 60 | | |- coco 61 | | | |- train2017 62 | | | |- val2017 63 | | | |- test2017 64 | | | |- annotations 65 | | 66 | - classification 67 | |- tools 68 | |- configs 69 | |- data 70 | | |- imagenet 71 | | | |- train 72 | | | |- val 73 | | | |- test 74 | | | |- meta 75 | | 76 | - ... 77 | ``` 78 | 79 | #### Commands 80 | 81 | e.g. Classification 82 | ```bash 83 | cd classification 84 | ``` 85 | 1. Pruning 86 | ```bash 87 | # single GPU 88 | python tools/train.py configs/xxx_pruning.py --gpus=1 89 | # multi GPUs (e.g. 4 GPUs) 90 | python -m torch.distributed.launch --nproc_per_node=4 tools/train.py configs/xxx_pruning.py --launch pytorch 91 | ``` 92 | 93 | 2. Fine-tune 94 | 95 | In the config file, modify the `deploy_from` to the pruned model, and modify the `samples_per_gpu` to 256/#GPUs. Then 96 | ```bash 97 | # single GPU 98 | python tools/train.py configs/xxx_finetune.py --gpus=1 99 | # multi GPUs (e.g. 4 GPUs) 100 | python -m torch.distributed.launch --nproc_per_node=4 tools/train.py configs/xxx_finetune.py --launch pytorch 101 | ``` 102 | 103 | 3. Test 104 | 105 | In the config file, add the attribute `load_from` to the finetuned model. Then 106 | ```bash 107 | python tools/test.py configs/xxx_finetune.py --metrics=accuracy 108 | ``` 109 | 110 | The commands for pruning and finetuning of detection models are similar to that of classification models. 111 | Instructions will be added soon. 112 | 113 | ## Acknowledgments 114 | 115 | My project acknowledges the official code [FisherPruning](https://github.com/jshilong/FisherPruning). -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/cifar100_bs16.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CIFAR100' 3 | img_norm_cfg = dict( 4 | mean=[129.304, 124.070, 112.434], 5 | std=[68.170, 65.392, 70.418], 6 | to_rgb=False) 7 | train_pipeline = [ 8 | dict(type='RandomCrop', size=32, padding=4), 9 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='ImageToTensor', keys=['img']), 12 | dict(type='ToTensor', keys=['gt_label']), 13 | dict(type='Collect', keys=['img', 'gt_label']) 14 | ] 15 | test_pipeline = [ 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='ImageToTensor', keys=['img']), 18 | dict(type='Collect', keys=['img']) 19 | ] 20 | data = dict( 21 | samples_per_gpu=16, 22 | workers_per_gpu=2, 23 | train=dict( 24 | type=dataset_type, 25 | data_prefix='data/cifar100', 26 | pipeline=train_pipeline), 27 | val=dict( 28 | type=dataset_type, 29 | data_prefix='data/cifar100', 30 | pipeline=test_pipeline, 31 | test_mode=True), 32 | test=dict( 33 | type=dataset_type, 34 | data_prefix='data/cifar100', 35 | pipeline=test_pipeline, 36 | test_mode=True)) 37 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/cifar10_bs16.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CIFAR10' 3 | img_norm_cfg = dict( 4 | mean=[125.307, 122.961, 113.8575], 5 | std=[51.5865, 50.847, 51.255], 6 | to_rgb=False) 7 | train_pipeline = [ 8 | dict(type='RandomCrop', size=32, padding=4), 9 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='ImageToTensor', keys=['img']), 12 | dict(type='ToTensor', keys=['gt_label']), 13 | dict(type='Collect', keys=['img', 'gt_label']) 14 | ] 15 | test_pipeline = [ 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='ImageToTensor', keys=['img']), 18 | dict(type='Collect', keys=['img']) 19 | ] 20 | data = dict( 21 | samples_per_gpu=16, 22 | workers_per_gpu=2, 23 | train=dict( 24 | type=dataset_type, data_prefix='data/cifar10', 25 | pipeline=train_pipeline), 26 | val=dict( 27 | type=dataset_type, 28 | data_prefix='data/cifar10', 29 | pipeline=test_pipeline, 30 | test_mode=True), 31 | test=dict( 32 | type=dataset_type, 33 | data_prefix='data/cifar10', 34 | pipeline=test_pipeline, 35 | test_mode=True)) 36 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs32.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNet' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='RandomResizedCrop', size=224), 8 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 9 | dict(type='Normalize', **img_norm_cfg), 10 | dict(type='ImageToTensor', keys=['img']), 11 | dict(type='ToTensor', keys=['gt_label']), 12 | dict(type='Collect', keys=['img', 'gt_label']) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='Resize', size=(256, -1)), 17 | dict(type='CenterCrop', crop_size=224), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']) 21 | ] 22 | data = dict( 23 | samples_per_gpu=32, 24 | workers_per_gpu=2, 25 | train=dict( 26 | type=dataset_type, 27 | data_prefix='data/imagenet/train', 28 | pipeline=train_pipeline), 29 | val=dict( 30 | type=dataset_type, 31 | data_prefix='data/imagenet/val', 32 | ann_file='data/imagenet/meta/val.txt', 33 | pipeline=test_pipeline), 34 | test=dict( 35 | # replace `data/val` with `data/test` for standard test 36 | type=dataset_type, 37 | data_prefix='data/imagenet/val', 38 | ann_file='data/imagenet/meta/val.txt', 39 | pipeline=test_pipeline)) 40 | evaluation = dict(interval=1, metric='accuracy') 41 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs32_pil_resize.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNet' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='RandomResizedCrop', size=224, backend='pillow'), 8 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 9 | dict(type='Normalize', **img_norm_cfg), 10 | dict(type='ImageToTensor', keys=['img']), 11 | dict(type='ToTensor', keys=['gt_label']), 12 | dict(type='Collect', keys=['img', 'gt_label']) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='Resize', size=(256, -1), backend='pillow'), 17 | dict(type='CenterCrop', crop_size=224), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']) 21 | ] 22 | data = dict( 23 | samples_per_gpu=32, 24 | workers_per_gpu=2, 25 | train=dict( 26 | type=dataset_type, 27 | data_prefix='data/imagenet/train', 28 | pipeline=train_pipeline), 29 | val=dict( 30 | type=dataset_type, 31 | data_prefix='data/imagenet/val', 32 | ann_file='data/imagenet/meta/val.txt', 33 | pipeline=test_pipeline), 34 | test=dict( 35 | # replace `data/val` with `data/test` for standard test 36 | type=dataset_type, 37 | data_prefix='data/imagenet/val', 38 | ann_file='data/imagenet/meta/val.txt', 39 | pipeline=test_pipeline)) 40 | evaluation = dict(interval=1, metric='accuracy') 41 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs64.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNet' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='RandomResizedCrop', size=224), 8 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 9 | dict(type='Normalize', **img_norm_cfg), 10 | dict(type='ImageToTensor', keys=['img']), 11 | dict(type='ToTensor', keys=['gt_label']), 12 | dict(type='Collect', keys=['img', 'gt_label']) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='Resize', size=(256, -1)), 17 | dict(type='CenterCrop', crop_size=224), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']) 21 | ] 22 | data = dict( 23 | samples_per_gpu=64, 24 | workers_per_gpu=2, 25 | train=dict( 26 | type=dataset_type, 27 | data_prefix='data/imagenet/train', 28 | pipeline=train_pipeline), 29 | val=dict( 30 | type=dataset_type, 31 | data_prefix='data/imagenet/val', 32 | ann_file='data/imagenet/meta/val.txt', 33 | pipeline=test_pipeline), 34 | test=dict( 35 | # replace `data/val` with `data/test` for standard test 36 | type=dataset_type, 37 | data_prefix='data/imagenet/val', 38 | ann_file='data/imagenet/meta/val.txt', 39 | pipeline=test_pipeline)) 40 | evaluation = dict(interval=1, metric='accuracy') 41 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs64_autoaug.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./pipelines/auto_aug.py'] 2 | 3 | # dataset settings 4 | dataset_type = 'ImageNet' 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='RandomResizedCrop', size=224), 10 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 11 | dict(type='AutoAugment', policies={{_base_.auto_increasing_policies}}), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='ImageToTensor', keys=['img']), 14 | dict(type='ToTensor', keys=['gt_label']), 15 | dict(type='Collect', keys=['img', 'gt_label']) 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='Resize', size=(256, -1)), 20 | dict(type='CenterCrop', crop_size=224), 21 | dict(type='Normalize', **img_norm_cfg), 22 | dict(type='ImageToTensor', keys=['img']), 23 | dict(type='Collect', keys=['img']) 24 | ] 25 | data = dict( 26 | samples_per_gpu=64, 27 | workers_per_gpu=2, 28 | train=dict( 29 | type=dataset_type, 30 | data_prefix='data/imagenet/train', 31 | pipeline=train_pipeline), 32 | val=dict( 33 | type=dataset_type, 34 | data_prefix='data/imagenet/val', 35 | ann_file='data/imagenet/meta/val.txt', 36 | pipeline=test_pipeline), 37 | test=dict( 38 | # replace `data/val` with `data/test` for standard test 39 | type=dataset_type, 40 | data_prefix='data/imagenet/val', 41 | ann_file='data/imagenet/meta/val.txt', 42 | pipeline=test_pipeline)) 43 | evaluation = dict(interval=1, metric='accuracy') 44 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs64_pil_resize.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNet' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='RandomResizedCrop', size=224, backend='pillow'), 8 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 9 | dict(type='Normalize', **img_norm_cfg), 10 | dict(type='ImageToTensor', keys=['img']), 11 | dict(type='ToTensor', keys=['gt_label']), 12 | dict(type='Collect', keys=['img', 'gt_label']) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='Resize', size=(256, -1), backend='pillow'), 17 | dict(type='CenterCrop', crop_size=224), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']) 21 | ] 22 | data = dict( 23 | samples_per_gpu=64, 24 | workers_per_gpu=2, 25 | train=dict( 26 | type=dataset_type, 27 | data_prefix='data/imagenet/train', 28 | pipeline=train_pipeline), 29 | val=dict( 30 | type=dataset_type, 31 | data_prefix='data/imagenet/val', 32 | ann_file='data/imagenet/meta/val.txt', 33 | pipeline=test_pipeline), 34 | test=dict( 35 | # replace `data/val` with `data/test` for standard test 36 | type=dataset_type, 37 | data_prefix='data/imagenet/val', 38 | ann_file='data/imagenet/meta/val.txt', 39 | pipeline=test_pipeline)) 40 | evaluation = dict(interval=1, metric='accuracy') 41 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs64_swin_224.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./pipelines/rand_aug.py'] 2 | 3 | # dataset settings 4 | dataset_type = 'ImageNet' 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict( 11 | type='RandomResizedCrop', 12 | size=224, 13 | backend='pillow', 14 | interpolation='bicubic'), 15 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 16 | dict( 17 | type='RandAugment', 18 | policies={{_base_.rand_increasing_policies}}, 19 | num_policies=2, 20 | total_level=10, 21 | magnitude_level=9, 22 | magnitude_std=0.5, 23 | hparams=dict( 24 | pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]], 25 | interpolation='bicubic')), 26 | dict( 27 | type='RandomErasing', 28 | erase_prob=0.25, 29 | mode='rand', 30 | min_area_ratio=0.02, 31 | max_area_ratio=1 / 3, 32 | fill_color=img_norm_cfg['mean'][::-1], 33 | fill_std=img_norm_cfg['std'][::-1]), 34 | dict(type='Normalize', **img_norm_cfg), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='ToTensor', keys=['gt_label']), 37 | dict(type='Collect', keys=['img', 'gt_label']) 38 | ] 39 | 40 | test_pipeline = [ 41 | dict(type='LoadImageFromFile'), 42 | dict( 43 | type='Resize', 44 | size=(256, -1), 45 | backend='pillow', 46 | interpolation='bicubic'), 47 | dict(type='CenterCrop', crop_size=224), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']) 51 | ] 52 | data = dict( 53 | samples_per_gpu=64, 54 | workers_per_gpu=8, 55 | train=dict( 56 | type=dataset_type, 57 | data_prefix='data/imagenet/train', 58 | pipeline=train_pipeline), 59 | val=dict( 60 | type=dataset_type, 61 | data_prefix='data/imagenet/val', 62 | ann_file='data/imagenet/meta/val.txt', 63 | pipeline=test_pipeline), 64 | test=dict( 65 | # replace `data/val` with `data/test` for standard test 66 | type=dataset_type, 67 | data_prefix='data/imagenet/val', 68 | ann_file='data/imagenet/meta/val.txt', 69 | pipeline=test_pipeline)) 70 | 71 | evaluation = dict(interval=10, metric='accuracy') 72 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/imagenet_bs64_swin_384.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ImageNet' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict( 8 | type='RandomResizedCrop', 9 | size=384, 10 | backend='pillow', 11 | interpolation='bicubic'), 12 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='ImageToTensor', keys=['img']), 15 | dict(type='ToTensor', keys=['gt_label']), 16 | dict(type='Collect', keys=['img', 'gt_label']) 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='Resize', size=384, backend='pillow', interpolation='bicubic'), 21 | dict(type='Normalize', **img_norm_cfg), 22 | dict(type='ImageToTensor', keys=['img']), 23 | dict(type='Collect', keys=['img']) 24 | ] 25 | data = dict( 26 | samples_per_gpu=64, 27 | workers_per_gpu=8, 28 | train=dict( 29 | type=dataset_type, 30 | data_prefix='data/imagenet/train', 31 | pipeline=train_pipeline), 32 | val=dict( 33 | type=dataset_type, 34 | data_prefix='data/imagenet/val', 35 | ann_file='data/imagenet/meta/val.txt', 36 | pipeline=test_pipeline), 37 | test=dict( 38 | # replace `data/val` with `data/test` for standard test 39 | type=dataset_type, 40 | data_prefix='data/imagenet/val', 41 | ann_file='data/imagenet/meta/val.txt', 42 | pipeline=test_pipeline)) 43 | evaluation = dict(interval=10, metric='accuracy') 44 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/pipelines/auto_aug.py: -------------------------------------------------------------------------------- 1 | # Policy for ImageNet, refers to 2 | # https://github.com/DeepVoltaire/AutoAugment/blame/master/autoaugment.py 3 | policy_imagenet = [ 4 | [ 5 | dict(type='Posterize', bits=4, prob=0.4), 6 | dict(type='Rotate', angle=30., prob=0.6) 7 | ], 8 | [ 9 | dict(type='Solarize', thr=256 / 9 * 4, prob=0.6), 10 | dict(type='AutoContrast', prob=0.6) 11 | ], 12 | [dict(type='Equalize', prob=0.8), 13 | dict(type='Equalize', prob=0.6)], 14 | [ 15 | dict(type='Posterize', bits=5, prob=0.6), 16 | dict(type='Posterize', bits=5, prob=0.6) 17 | ], 18 | [ 19 | dict(type='Equalize', prob=0.4), 20 | dict(type='Solarize', thr=256 / 9 * 5, prob=0.2) 21 | ], 22 | [ 23 | dict(type='Equalize', prob=0.4), 24 | dict(type='Rotate', angle=30 / 9 * 8, prob=0.8) 25 | ], 26 | [ 27 | dict(type='Solarize', thr=256 / 9 * 6, prob=0.6), 28 | dict(type='Equalize', prob=0.6) 29 | ], 30 | [dict(type='Posterize', bits=6, prob=0.8), 31 | dict(type='Equalize', prob=1.)], 32 | [ 33 | dict(type='Rotate', angle=10., prob=0.2), 34 | dict(type='Solarize', thr=256 / 9, prob=0.6) 35 | ], 36 | [ 37 | dict(type='Equalize', prob=0.6), 38 | dict(type='Posterize', bits=5, prob=0.4) 39 | ], 40 | [ 41 | dict(type='Rotate', angle=30 / 9 * 8, prob=0.8), 42 | dict(type='ColorTransform', magnitude=0., prob=0.4) 43 | ], 44 | [ 45 | dict(type='Rotate', angle=30., prob=0.4), 46 | dict(type='Equalize', prob=0.6) 47 | ], 48 | [dict(type='Equalize', prob=0.0), 49 | dict(type='Equalize', prob=0.8)], 50 | [dict(type='Invert', prob=0.6), 51 | dict(type='Equalize', prob=1.)], 52 | [ 53 | dict(type='ColorTransform', magnitude=0.4, prob=0.6), 54 | dict(type='Contrast', magnitude=0.8, prob=1.) 55 | ], 56 | [ 57 | dict(type='Rotate', angle=30 / 9 * 8, prob=0.8), 58 | dict(type='ColorTransform', magnitude=0.2, prob=1.) 59 | ], 60 | [ 61 | dict(type='ColorTransform', magnitude=0.8, prob=0.8), 62 | dict(type='Solarize', thr=256 / 9 * 2, prob=0.8) 63 | ], 64 | [ 65 | dict(type='Sharpness', magnitude=0.7, prob=0.4), 66 | dict(type='Invert', prob=0.6) 67 | ], 68 | [ 69 | dict( 70 | type='Shear', 71 | magnitude=0.3 / 9 * 5, 72 | prob=0.6, 73 | direction='horizontal'), 74 | dict(type='Equalize', prob=1.) 75 | ], 76 | [ 77 | dict(type='ColorTransform', magnitude=0., prob=0.4), 78 | dict(type='Equalize', prob=0.6) 79 | ], 80 | [ 81 | dict(type='Equalize', prob=0.4), 82 | dict(type='Solarize', thr=256 / 9 * 5, prob=0.2) 83 | ], 84 | [ 85 | dict(type='Solarize', thr=256 / 9 * 4, prob=0.6), 86 | dict(type='AutoContrast', prob=0.6) 87 | ], 88 | [dict(type='Invert', prob=0.6), 89 | dict(type='Equalize', prob=1.)], 90 | [ 91 | dict(type='ColorTransform', magnitude=0.4, prob=0.6), 92 | dict(type='Contrast', magnitude=0.8, prob=1.) 93 | ], 94 | [dict(type='Equalize', prob=0.8), 95 | dict(type='Equalize', prob=0.6)], 96 | ] 97 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/pipelines/rand_aug.py: -------------------------------------------------------------------------------- 1 | # Refers to `_RAND_INCREASING_TRANSFORMS` in pytorch-image-models 2 | rand_increasing_policies = [ 3 | dict(type='AutoContrast'), 4 | dict(type='Equalize'), 5 | dict(type='Invert'), 6 | dict(type='Rotate', magnitude_key='angle', magnitude_range=(0, 30)), 7 | dict(type='Posterize', magnitude_key='bits', magnitude_range=(4, 0)), 8 | dict(type='Solarize', magnitude_key='thr', magnitude_range=(256, 0)), 9 | dict( 10 | type='SolarizeAdd', 11 | magnitude_key='magnitude', 12 | magnitude_range=(0, 110)), 13 | dict( 14 | type='ColorTransform', 15 | magnitude_key='magnitude', 16 | magnitude_range=(0, 0.9)), 17 | dict(type='Contrast', magnitude_key='magnitude', magnitude_range=(0, 0.9)), 18 | dict( 19 | type='Brightness', magnitude_key='magnitude', 20 | magnitude_range=(0, 0.9)), 21 | dict( 22 | type='Sharpness', magnitude_key='magnitude', magnitude_range=(0, 0.9)), 23 | dict( 24 | type='Shear', 25 | magnitude_key='magnitude', 26 | magnitude_range=(0, 0.3), 27 | direction='horizontal'), 28 | dict( 29 | type='Shear', 30 | magnitude_key='magnitude', 31 | magnitude_range=(0, 0.3), 32 | direction='vertical'), 33 | dict( 34 | type='Translate', 35 | magnitude_key='magnitude', 36 | magnitude_range=(0, 0.45), 37 | direction='horizontal'), 38 | dict( 39 | type='Translate', 40 | magnitude_key='magnitude', 41 | magnitude_range=(0, 0.45), 42 | direction='vertical') 43 | ] 44 | -------------------------------------------------------------------------------- /classification/configs/_base_/datasets/voc_bs16.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOC' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='RandomResizedCrop', size=224), 8 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 9 | dict(type='Normalize', **img_norm_cfg), 10 | dict(type='ImageToTensor', keys=['img']), 11 | dict(type='ToTensor', keys=['gt_label']), 12 | dict(type='Collect', keys=['img', 'gt_label']) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='Resize', size=(256, -1)), 17 | dict(type='CenterCrop', crop_size=224), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='ImageToTensor', keys=['img']), 20 | dict(type='Collect', keys=['img']) 21 | ] 22 | data = dict( 23 | samples_per_gpu=16, 24 | workers_per_gpu=2, 25 | train=dict( 26 | type=dataset_type, 27 | data_prefix='data/VOCdevkit/VOC2007/', 28 | ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt', 29 | pipeline=train_pipeline), 30 | val=dict( 31 | type=dataset_type, 32 | data_prefix='data/VOCdevkit/VOC2007/', 33 | ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt', 34 | pipeline=test_pipeline), 35 | test=dict( 36 | type=dataset_type, 37 | data_prefix='data/VOCdevkit/VOC2007/', 38 | ann_file='data/VOCdevkit/VOC2007/ImageSets/Main/test.txt', 39 | pipeline=test_pipeline)) 40 | evaluation = dict( 41 | interval=1, metric=['mAP', 'CP', 'OP', 'CR', 'OR', 'CF1', 'OF1']) 42 | -------------------------------------------------------------------------------- /classification/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # checkpoint saving 2 | checkpoint_config = dict(interval=1) 3 | # yapf:disable 4 | log_config = dict( 5 | interval=100, 6 | hooks=[ 7 | dict(type='TextLoggerHook'), 8 | # dict(type='TensorboardLoggerHook') 9 | ]) 10 | # yapf:enable 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/mobilenet_v2_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='MobileNetV2', widen_factor=1.0), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1280, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/mobilenet_v3_large_imagenet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='MobileNetV3', arch='large'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='StackedLinearClsHead', 8 | num_classes=1000, 9 | in_channels=960, 10 | mid_channels=[1280], 11 | dropout_rate=0.2, 12 | act_cfg=dict(type='HSwish'), 13 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 14 | topk=(1, 5))) 15 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/mobilenet_v3_small_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='MobileNetV3', arch='small'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='StackedLinearClsHead', 8 | num_classes=10, 9 | in_channels=576, 10 | mid_channels=[1280], 11 | act_cfg=dict(type='HSwish'), 12 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 13 | topk=(1, 5))) 14 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/mobilenet_v3_small_imagenet.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='MobileNetV3', arch='small'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='StackedLinearClsHead', 8 | num_classes=1000, 9 | in_channels=576, 10 | mid_channels=[1024], 11 | dropout_rate=0.2, 12 | act_cfg=dict(type='HSwish'), 13 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 14 | topk=(1, 5))) 15 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_1.6gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_1.6gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=912, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_12gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_12gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=2240, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_3.2gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_3.2gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1008, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_4.0gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_4.0gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1360, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_400mf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_400mf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=384, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_6.4gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_6.4gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1624, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_8.0gf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_8.0gf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1920, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/regnet/regnetx_800mf.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='RegNet', arch='regnetx_800mf'), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=672, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/repvgg-A0_in1k.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImageClassifier', 3 | backbone=dict( 4 | type='RepVGG', 5 | arch='A0', 6 | out_indices=(3, ), 7 | ), 8 | neck=dict(type='GlobalAveragePooling'), 9 | head=dict( 10 | type='LinearClsHead', 11 | num_classes=1000, 12 | in_channels=1280, 13 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 14 | topk=(1, 5), 15 | )) 16 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/repvgg-B3_lbs-mixup_in1k.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImageClassifier', 3 | backbone=dict( 4 | type='RepVGG', 5 | arch='B3', 6 | out_indices=(3, ), 7 | ), 8 | neck=dict(type='GlobalAveragePooling'), 9 | head=dict( 10 | type='LinearClsHead', 11 | num_classes=1000, 12 | in_channels=2560, 13 | loss=dict( 14 | type='LabelSmoothLoss', 15 | loss_weight=1.0, 16 | label_smooth_val=0.1, 17 | mode='classy_vision', 18 | num_classes=1000), 19 | topk=(1, 5), 20 | ), 21 | train_cfg=dict( 22 | augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000, 23 | prob=1.))) 24 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnest101.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeSt', 6 | depth=101, 7 | num_stages=4, 8 | stem_channels=128, 9 | out_indices=(3, ), 10 | style='pytorch'), 11 | neck=dict(type='GlobalAveragePooling'), 12 | head=dict( 13 | type='LinearClsHead', 14 | num_classes=1000, 15 | in_channels=2048, 16 | loss=dict( 17 | type='LabelSmoothLoss', 18 | label_smooth_val=0.1, 19 | num_classes=1000, 20 | reduction='mean', 21 | loss_weight=1.0), 22 | topk=(1, 5), 23 | cal_acc=False)) 24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000)) 25 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnest200.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeSt', 6 | depth=200, 7 | num_stages=4, 8 | stem_channels=128, 9 | out_indices=(3, ), 10 | style='pytorch'), 11 | neck=dict(type='GlobalAveragePooling'), 12 | head=dict( 13 | type='LinearClsHead', 14 | num_classes=1000, 15 | in_channels=2048, 16 | loss=dict( 17 | type='LabelSmoothLoss', 18 | label_smooth_val=0.1, 19 | num_classes=1000, 20 | reduction='mean', 21 | loss_weight=1.0), 22 | topk=(1, 5), 23 | cal_acc=False)) 24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000)) 25 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnest269.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeSt', 6 | depth=269, 7 | num_stages=4, 8 | stem_channels=128, 9 | out_indices=(3, ), 10 | style='pytorch'), 11 | neck=dict(type='GlobalAveragePooling'), 12 | head=dict( 13 | type='LinearClsHead', 14 | num_classes=1000, 15 | in_channels=2048, 16 | loss=dict( 17 | type='LabelSmoothLoss', 18 | label_smooth_val=0.1, 19 | num_classes=1000, 20 | reduction='mean', 21 | loss_weight=1.0), 22 | topk=(1, 5), 23 | cal_acc=False)) 24 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000)) 25 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnest50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeSt', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | label_smooth_val=0.1, 18 | num_classes=1000, 19 | reduction='mean', 20 | loss_weight=1.0), 21 | topk=(1, 5), 22 | cal_acc=False)) 23 | train_cfg = dict(mixup=dict(alpha=0.2, num_classes=1000)) 24 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet101.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet101_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=10, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | )) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet152.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=152, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet152_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=152, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=10, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | )) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet18.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=18, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=512, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet18_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=18, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=10, 14 | in_channels=512, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | )) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet34.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=34, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=512, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet34_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=34, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=10, 14 | in_channels=512, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | )) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_cifar.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=10, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | )) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_cifar_cutmix.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='MultiLabelLinearClsHead', 13 | num_classes=10, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)), 16 | train_cfg=dict( 17 | augments=dict(type='BatchCutMix', alpha=1.0, num_classes=10, 18 | prob=1.0))) 19 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_cifar_mixup.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet_CIFAR', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='MultiLabelLinearClsHead', 13 | num_classes=10, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)), 16 | train_cfg=dict( 17 | augments=dict(type='BatchMixup', alpha=1., num_classes=10, prob=1.))) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_cutmix.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='MultiLabelLinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)), 16 | train_cfg=dict( 17 | augments=dict( 18 | type='BatchCutMix', alpha=1.0, num_classes=1000, prob=1.0))) 19 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_label_smooth.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', label_smooth_val=0.1, loss_weight=1.0), 17 | topk=(1, 5), 18 | )) 19 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnet50_mixup.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='MultiLabelLinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0, use_soft=True)), 16 | train_cfg=dict( 17 | augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000, 18 | prob=1.))) 19 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnetv1d101.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNetV1d', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnetv1d152.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNetV1d', 6 | depth=152, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnetv1d50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNetV1d', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnext101_32x4d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=4, 11 | style='pytorch'), 12 | neck=dict(type='GlobalAveragePooling'), 13 | head=dict( 14 | type='LinearClsHead', 15 | num_classes=1000, 16 | in_channels=2048, 17 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 18 | topk=(1, 5), 19 | )) 20 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnext101_32x8d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=8, 11 | style='pytorch'), 12 | neck=dict(type='GlobalAveragePooling'), 13 | head=dict( 14 | type='LinearClsHead', 15 | num_classes=1000, 16 | in_channels=2048, 17 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 18 | topk=(1, 5), 19 | )) 20 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnext152_32x4d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=152, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=4, 11 | style='pytorch'), 12 | neck=dict(type='GlobalAveragePooling'), 13 | head=dict( 14 | type='LinearClsHead', 15 | num_classes=1000, 16 | in_channels=2048, 17 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 18 | topk=(1, 5), 19 | )) 20 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/resnext50_32x4d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=4, 11 | style='pytorch'), 12 | neck=dict(type='GlobalAveragePooling'), 13 | head=dict( 14 | type='LinearClsHead', 15 | num_classes=1000, 16 | in_channels=2048, 17 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 18 | topk=(1, 5), 19 | )) 20 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/seresnet101.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SEResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/seresnet50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SEResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5), 17 | )) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/seresnext101_32x4d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SEResNeXt', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=4, 11 | se_ratio=16, 12 | style='pytorch'), 13 | neck=dict(type='GlobalAveragePooling'), 14 | head=dict( 15 | type='LinearClsHead', 16 | num_classes=1000, 17 | in_channels=2048, 18 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 19 | topk=(1, 5), 20 | )) 21 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/seresnext50_32x4d.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SEResNeXt', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | groups=32, 10 | width_per_group=4, 11 | se_ratio=16, 12 | style='pytorch'), 13 | neck=dict(type='GlobalAveragePooling'), 14 | head=dict( 15 | type='LinearClsHead', 16 | num_classes=1000, 17 | in_channels=2048, 18 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 19 | topk=(1, 5), 20 | )) 21 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/shufflenet_v1_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='ShuffleNetV1', groups=3), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=960, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/shufflenet_v2_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='ShuffleNetV2', widen_factor=1.0), 5 | neck=dict(type='GlobalAveragePooling'), 6 | head=dict( 7 | type='LinearClsHead', 8 | num_classes=1000, 9 | in_channels=1024, 10 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 11 | topk=(1, 5), 12 | )) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/base_224.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SwinTransformer', arch='base', img_size=224, drop_path_rate=0.5), 6 | neck=dict(type='GlobalAveragePooling'), 7 | head=dict( 8 | type='LinearClsHead', 9 | num_classes=1000, 10 | in_channels=1024, 11 | init_cfg=None, # suppress the default init_cfg of LinearClsHead. 12 | loss=dict( 13 | type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), 14 | cal_acc=False), 15 | init_cfg=[ 16 | dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), 17 | dict(type='Constant', layer='LayerNorm', val=1., bias=0.) 18 | ], 19 | train_cfg=dict(augments=[ 20 | dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), 21 | dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) 22 | ])) 23 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/base_384.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Only for evaluation 3 | model = dict( 4 | type='ImageClassifier', 5 | backbone=dict( 6 | type='SwinTransformer', 7 | arch='base', 8 | img_size=384, 9 | stage_cfgs=dict(block_cfgs=dict(window_size=12))), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=1024, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5))) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/large_224.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Only for evaluation 3 | model = dict( 4 | type='ImageClassifier', 5 | backbone=dict(type='SwinTransformer', arch='large', img_size=224), 6 | neck=dict(type='GlobalAveragePooling'), 7 | head=dict( 8 | type='LinearClsHead', 9 | num_classes=1000, 10 | in_channels=1536, 11 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 12 | topk=(1, 5))) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/large_384.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Only for evaluation 3 | model = dict( 4 | type='ImageClassifier', 5 | backbone=dict( 6 | type='SwinTransformer', 7 | arch='large', 8 | img_size=384, 9 | stage_cfgs=dict(block_cfgs=dict(window_size=12))), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=1536, 15 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 16 | topk=(1, 5))) 17 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/small_224.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SwinTransformer', arch='small', img_size=224, 6 | drop_path_rate=0.3), 7 | neck=dict(type='GlobalAveragePooling'), 8 | head=dict( 9 | type='LinearClsHead', 10 | num_classes=1000, 11 | in_channels=768, 12 | init_cfg=None, # suppress the default init_cfg of LinearClsHead. 13 | loss=dict( 14 | type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), 15 | cal_acc=False), 16 | init_cfg=[ 17 | dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), 18 | dict(type='Constant', layer='LayerNorm', val=1., bias=0.) 19 | ], 20 | train_cfg=dict(augments=[ 21 | dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), 22 | dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) 23 | ])) 24 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/swin_transformer/tiny_224.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='SwinTransformer', arch='tiny', img_size=224, drop_path_rate=0.2), 6 | neck=dict(type='GlobalAveragePooling'), 7 | head=dict( 8 | type='LinearClsHead', 9 | num_classes=1000, 10 | in_channels=768, 11 | init_cfg=None, # suppress the default init_cfg of LinearClsHead. 12 | loss=dict( 13 | type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), 14 | cal_acc=False), 15 | init_cfg=[ 16 | dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.), 17 | dict(type='Constant', layer='LayerNorm', val=1., bias=0.) 18 | ], 19 | train_cfg=dict(augments=[ 20 | dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5), 21 | dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5) 22 | ])) 23 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/tnt_s_patch16_224.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='TNT', 6 | arch='s', 7 | img_size=224, 8 | patch_size=16, 9 | in_channels=3, 10 | ffn_ratio=4, 11 | qkv_bias=False, 12 | drop_rate=0., 13 | attn_drop_rate=0., 14 | drop_path_rate=0.1, 15 | first_stride=4, 16 | num_fcs=2, 17 | init_cfg=[ 18 | dict(type='TruncNormal', layer='Linear', std=.02), 19 | dict(type='Constant', layer='LayerNorm', val=1., bias=0.) 20 | ]), 21 | neck=None, 22 | head=dict( 23 | type='LinearClsHead', 24 | num_classes=1000, 25 | in_channels=384, 26 | loss=dict( 27 | type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'), 28 | topk=(1, 5), 29 | init_cfg=dict(type='TruncNormal', layer='Linear', std=.02))) 30 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg11.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='VGG', depth=11, num_classes=1000), 5 | neck=None, 6 | head=dict( 7 | type='ClsHead', 8 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 9 | topk=(1, 5), 10 | )) 11 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg11bn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VGG', depth=11, norm_cfg=dict(type='BN'), num_classes=1000), 6 | neck=None, 7 | head=dict( 8 | type='ClsHead', 9 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 10 | topk=(1, 5), 11 | )) 12 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg13.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='VGG', depth=13, num_classes=1000), 5 | neck=None, 6 | head=dict( 7 | type='ClsHead', 8 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 9 | topk=(1, 5), 10 | )) 11 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg13bn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VGG', depth=13, norm_cfg=dict(type='BN'), num_classes=1000), 6 | neck=None, 7 | head=dict( 8 | type='ClsHead', 9 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 10 | topk=(1, 5), 11 | )) 12 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg16.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='VGG', depth=16, num_classes=1000), 5 | neck=None, 6 | head=dict( 7 | type='ClsHead', 8 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 9 | topk=(1, 5), 10 | )) 11 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg16bn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VGG', depth=16, norm_cfg=dict(type='BN'), num_classes=1000), 6 | neck=None, 7 | head=dict( 8 | type='ClsHead', 9 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 10 | topk=(1, 5), 11 | )) 12 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg19.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict(type='VGG', depth=19, num_classes=1000), 5 | neck=None, 6 | head=dict( 7 | type='ClsHead', 8 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 9 | topk=(1, 5), 10 | )) 11 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vgg19bn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VGG', depth=19, norm_cfg=dict(type='BN'), num_classes=1000), 6 | neck=None, 7 | head=dict( 8 | type='ClsHead', 9 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 10 | topk=(1, 5), 11 | )) 12 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_base_patch16_224_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=12, 7 | embed_dim=768, 8 | num_heads=12, 9 | img_size=224, 10 | patch_size=16, 11 | in_channels=3, 12 | feedforward_channels=3072, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=768, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_base_patch16_224_pretrain.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=12, 7 | embed_dim=768, 8 | num_heads=12, 9 | img_size=224, 10 | patch_size=16, 11 | in_channels=3, 12 | feedforward_channels=3072, 13 | drop_rate=0.1, 14 | attn_drop_rate=0.), 15 | neck=None, 16 | head=dict( 17 | type='VisionTransformerClsHead', 18 | num_classes=1000, 19 | in_channels=768, 20 | hidden_dim=3072, 21 | loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1), 22 | topk=(1, 5), 23 | ), 24 | train_cfg=dict( 25 | augments=dict(type='BatchMixup', alpha=0.2, num_classes=1000, 26 | prob=1.))) 27 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_base_patch16_384_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=12, 7 | embed_dim=768, 8 | num_heads=12, 9 | img_size=384, 10 | patch_size=16, 11 | in_channels=3, 12 | feedforward_channels=3072, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=768, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_base_patch32_384_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=12, 7 | embed_dim=768, 8 | num_heads=12, 9 | img_size=384, 10 | patch_size=32, 11 | in_channels=3, 12 | feedforward_channels=3072, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=768, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_large_patch16_224_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=24, 7 | embed_dim=1024, 8 | num_heads=16, 9 | img_size=224, 10 | patch_size=16, 11 | in_channels=3, 12 | feedforward_channels=4096, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=1024, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_large_patch16_384_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=24, 7 | embed_dim=1024, 8 | num_heads=16, 9 | img_size=384, 10 | patch_size=16, 11 | in_channels=3, 12 | feedforward_channels=4096, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=1024, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/models/vit_large_patch32_384_finetune.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='VisionTransformer', 6 | num_layers=24, 7 | embed_dim=1024, 8 | num_heads=16, 9 | img_size=384, 10 | patch_size=32, 11 | in_channels=3, 12 | feedforward_channels=4096, 13 | drop_rate=0.1), 14 | neck=None, 15 | head=dict( 16 | type='VisionTransformerClsHead', 17 | num_classes=1000, 18 | in_channels=1024, 19 | loss=dict(type='CrossEntropyLoss', loss_weight=1.0), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/cifar10_bs128.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', step=[100, 150]) 6 | runner = dict(type='EpochBasedRunner', max_epochs=200) 7 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py: -------------------------------------------------------------------------------- 1 | paramwise_cfg = dict( 2 | norm_decay_mult=0.0, 3 | bias_decay_mult=0.0, 4 | custom_keys={ 5 | '.absolute_pos_embed': dict(decay_mult=0.0), 6 | '.relative_position_bias_table': dict(decay_mult=0.0) 7 | }) 8 | 9 | # for batch in each gpu is 128, 8 gpu 10 | # lr = 5e-4 * 128 * 8 / 512 = 0.001 11 | optimizer = dict( 12 | type='AdamW', 13 | lr=5e-4 * 128 * 8 / 512, 14 | weight_decay=0.05, 15 | eps=1e-8, 16 | betas=(0.9, 0.999), 17 | paramwise_cfg=paramwise_cfg) 18 | optimizer_config = dict(grad_clip=dict(max_norm=5.0)) 19 | 20 | # learning policy 21 | lr_config = dict( 22 | policy='CosineAnnealing', 23 | by_epoch=False, 24 | min_lr_ratio=1e-2, 25 | warmup='linear', 26 | warmup_ratio=1e-3, 27 | warmup_iters=20 * 1252, 28 | warmup_by_epoch=False) 29 | 30 | runner = dict(type='EpochBasedRunner', max_epochs=300) 31 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs1024_linearlr_bn_nowd.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict( 3 | type='SGD', 4 | lr=0.5, 5 | momentum=0.9, 6 | weight_decay=0.00004, 7 | paramwise_cfg=dict(norm_decay_mult=0)) 8 | optimizer_config = dict(grad_clip=None) 9 | # learning policy 10 | lr_config = dict( 11 | policy='poly', 12 | min_lr=0, 13 | by_epoch=False, 14 | warmup='constant', 15 | warmup_iters=5000, 16 | ) 17 | runner = dict(type='EpochBasedRunner', max_epochs=300) 18 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs2048.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict( 3 | type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True) 4 | optimizer_config = dict(grad_clip=None) 5 | # learning policy 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=2500, 10 | warmup_ratio=0.25, 11 | step=[30, 60, 90]) 12 | runner = dict(type='EpochBasedRunner', max_epochs=100) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs2048_AdamW.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # In ClassyVision, the lr is set to 0.003 for bs4096. 3 | # In this implementation(bs2048), lr = 0.003 / 4096 * (32bs * 64gpus) = 0.0015 4 | optimizer = dict(type='AdamW', lr=0.0015, weight_decay=0.3) 5 | optimizer_config = dict(grad_clip=dict(max_norm=1.0)) 6 | 7 | # specific to vit pretrain 8 | paramwise_cfg = dict( 9 | custom_keys={ 10 | '.backbone.cls_token': dict(decay_mult=0.0), 11 | '.backbone.pos_embed': dict(decay_mult=0.0) 12 | }) 13 | # learning policy 14 | lr_config = dict( 15 | policy='CosineAnnealing', 16 | min_lr=0, 17 | warmup='linear', 18 | warmup_iters=10000, 19 | warmup_ratio=1e-4) 20 | runner = dict(type='EpochBasedRunner', max_epochs=300) 21 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs2048_coslr.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict( 3 | type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True) 4 | optimizer_config = dict(grad_clip=None) 5 | # learning policy 6 | lr_config = dict( 7 | policy='CosineAnnealing', 8 | min_lr=0, 9 | warmup='linear', 10 | warmup_iters=2500, 11 | warmup_ratio=0.25) 12 | runner = dict(type='EpochBasedRunner', max_epochs=100) 13 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs256.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', step=[30, 60, 90]) 6 | runner = dict(type='EpochBasedRunner', max_epochs=100) 7 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs256_140e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', step=[40, 80, 120]) 6 | runner = dict(type='EpochBasedRunner', max_epochs=140) 7 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs256_200e_coslr_warmup.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='CosineAnnealing', 7 | min_lr=0, 8 | warmup='linear', 9 | warmup_iters=25025, 10 | warmup_ratio=0.25) 11 | runner = dict(type='EpochBasedRunner', max_epochs=200) 12 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs256_coslr.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='CosineAnnealing', min_lr=0) 6 | runner = dict(type='EpochBasedRunner', max_epochs=100) 7 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs256_epochstep.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.045, momentum=0.9, weight_decay=0.00004) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', gamma=0.98, step=1) 6 | runner = dict(type='EpochBasedRunner', max_epochs=300) 7 | -------------------------------------------------------------------------------- /classification/configs/_base_/schedules/imagenet_bs4096_AdamW.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='AdamW', lr=0.003, weight_decay=0.3) 3 | optimizer_config = dict(grad_clip=dict(max_norm=1.0)) 4 | 5 | # specific to vit pretrain 6 | paramwise_cfg = dict( 7 | custom_keys={ 8 | '.backbone.cls_token': dict(decay_mult=0.0), 9 | '.backbone.pos_embed': dict(decay_mult=0.0) 10 | }) 11 | # learning policy 12 | lr_config = dict( 13 | policy='CosineAnnealing', 14 | min_lr=0, 15 | warmup='linear', 16 | warmup_iters=10000, 17 | warmup_ratio=1e-4) 18 | runner = dict(type='EpochBasedRunner', max_epochs=300) 19 | -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_0.4G_origin.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/regnet/regnetx_400mf.py', 3 | '../_base_/datasets/imagenet_bs32.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | work_dir = "work_dirs/regnet_0.4G_origin" 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-400MF-0db9f35c.pth" 9 | 10 | # dataset settings 11 | dataset_type = 'ImageNet' 12 | 13 | img_norm_cfg = dict( 14 | # The mean and std are used in PyCls when training RegNets 15 | mean=[103.53, 116.28, 123.675], 16 | std=[57.375, 57.12, 58.395], 17 | to_rgb=False) 18 | 19 | train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='RandomResizedCrop', size=224), 22 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='ToTensor', keys=['gt_label']), 26 | dict(type='Collect', keys=['img', 'gt_label']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='Resize', size=(256, -1)), 31 | dict(type='CenterCrop', crop_size=224), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']) 35 | ] 36 | data = dict( 37 | samples_per_gpu=256, 38 | workers_per_gpu=16, 39 | train=dict( 40 | type=dataset_type, 41 | data_prefix='data/imagenet/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_prefix='data/imagenet/val', 46 | ann_file='data/imagenet/meta/val.txt', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | # replace `data/val` with `data/test` for standard test 50 | type=dataset_type, 51 | data_prefix='data/imagenet/val', 52 | ann_file='data/imagenet/meta/val.txt', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(interval=1, metric='accuracy') -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_0.4G_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = ["./regnet_0.8G_origin.py"] 2 | 3 | work_dir = "work_dirs/regnet_0.4G_pruning" 4 | optimizer = dict(lr=0.004) 5 | 6 | custom_hooks = [ 7 | dict( 8 | type='FisherPruningHook', 9 | # In pruning process, you need set priority 10 | # as 'LOWEST' to insure the pruning_hook is excused 11 | # after optimizer_hook, in fintune process, you 12 | # should set it as 'HIGHEST' to insure it excused 13 | # before checkpoint_hook 14 | pruning=True, 15 | batch_size=32, 16 | interval=25, 17 | priority='LOWEST', 18 | ) 19 | ] 20 | 21 | data = dict(samples_per_gpu=32, workers_per_gpu=2) -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_0.8G_origin.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/regnet/regnetx_800mf.py', 3 | '../_base_/datasets/imagenet_bs32.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | work_dir = "work_dirs/regnet_0.8G_origin" 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-800MF-4f9d1e8a.pth" 9 | 10 | # dataset settings 11 | dataset_type = 'ImageNet' 12 | 13 | img_norm_cfg = dict( 14 | # The mean and std are used in PyCls when training RegNets 15 | mean=[103.53, 116.28, 123.675], 16 | std=[57.375, 57.12, 58.395], 17 | to_rgb=False) 18 | 19 | train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='RandomResizedCrop', size=224), 22 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='ToTensor', keys=['gt_label']), 26 | dict(type='Collect', keys=['img', 'gt_label']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='Resize', size=(256, -1)), 31 | dict(type='CenterCrop', crop_size=224), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']) 35 | ] 36 | data = dict( 37 | samples_per_gpu=256, 38 | workers_per_gpu=16, 39 | train=dict( 40 | type=dataset_type, 41 | data_prefix='data/imagenet/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_prefix='data/imagenet/val', 46 | ann_file='data/imagenet/meta/val.txt', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | # replace `data/val` with `data/test` for standard test 50 | type=dataset_type, 51 | data_prefix='data/imagenet/val', 52 | ann_file='data/imagenet/meta/val.txt', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(interval=1, metric='accuracy') -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_0.8G_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = ["./regnet_1.6G_origin.py"] 2 | 3 | work_dir = "work_dirs/regnet_0.8G_pruning" 4 | optimizer = dict(lr=0.004) 5 | 6 | custom_hooks = [ 7 | dict( 8 | type='FisherPruningHook', 9 | # In pruning process, you need set priority 10 | # as 'LOWEST' to insure the pruning_hook is excused 11 | # after optimizer_hook, in fintune process, you 12 | # should set it as 'HIGHEST' to insure it excused 13 | # before checkpoint_hook 14 | pruning=True, 15 | batch_size=32, 16 | interval=25, 17 | priority='LOWEST', 18 | ) 19 | ] 20 | 21 | data = dict(samples_per_gpu=32, workers_per_gpu=2) -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_1.6G_origin.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/regnet/regnetx_1.6gf.py', 3 | '../_base_/datasets/imagenet_bs32.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | work_dir = "work_dirs/regnet_1.6G_origin" 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-1.6GF-cfb32375.pth" 9 | 10 | # dataset settings 11 | dataset_type = 'ImageNet' 12 | 13 | img_norm_cfg = dict( 14 | # The mean and std are used in PyCls when training RegNets 15 | mean=[103.53, 116.28, 123.675], 16 | std=[57.375, 57.12, 58.395], 17 | to_rgb=False) 18 | 19 | train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='RandomResizedCrop', size=224), 22 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='ToTensor', keys=['gt_label']), 26 | dict(type='Collect', keys=['img', 'gt_label']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='Resize', size=(256, -1)), 31 | dict(type='CenterCrop', crop_size=224), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']) 35 | ] 36 | data = dict( 37 | samples_per_gpu=256, 38 | workers_per_gpu=16, 39 | train=dict( 40 | type=dataset_type, 41 | data_prefix='data/imagenet/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_prefix='data/imagenet/val', 46 | ann_file='data/imagenet/meta/val.txt', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | # replace `data/val` with `data/test` for standard test 50 | type=dataset_type, 51 | data_prefix='data/imagenet/val', 52 | ann_file='data/imagenet/meta/val.txt', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(interval=1, metric='accuracy') -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_1.6G_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = ["./regnet_3.2G_origin.py"] 2 | 3 | work_dir = "work_dirs/regnet_1.6G_pruning" 4 | optimizer = dict(lr=0.004) 5 | 6 | custom_hooks = [ 7 | dict( 8 | type='FisherPruningHook', 9 | # In pruning process, you need set priority 10 | # as 'LOWEST' to insure the pruning_hook is excused 11 | # after optimizer_hook, in fintune process, you 12 | # should set it as 'HIGHEST' to insure it excused 13 | # before checkpoint_hook 14 | pruning=True, 15 | batch_size=32, 16 | interval=25, 17 | priority='LOWEST', 18 | ) 19 | ] 20 | 21 | data = dict(samples_per_gpu=32, workers_per_gpu=2) -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_3.2G_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = ["./regnet_6.4G_origin.py"] 2 | 3 | work_dir = "work_dirs/regnet_3.2G" 4 | 5 | custom_hooks = [ 6 | dict(type='FisherPruningHook', 7 | pruning=False, 8 | deploy_from='path to the pruned model') 9 | ] 10 | 11 | optimizer = dict(lr=0.1) 12 | data = dict(samples_per_gpu = 256, workers_per_gpu=16) -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_3.2G_origin.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/regnet/regnetx_3.2gf.py', 3 | '../_base_/datasets/imagenet_bs32.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | work_dir = "work_dirs/regnet_3.2G_origin" 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-3.2GF-82c43fd5.pth" 9 | 10 | # dataset settings 11 | dataset_type = 'ImageNet' 12 | 13 | img_norm_cfg = dict( 14 | # The mean and std are used in PyCls when training RegNets 15 | mean=[103.53, 116.28, 123.675], 16 | std=[57.375, 57.12, 58.395], 17 | to_rgb=False) 18 | 19 | train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='RandomResizedCrop', size=224), 22 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='ToTensor', keys=['gt_label']), 26 | dict(type='Collect', keys=['img', 'gt_label']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='Resize', size=(256, -1)), 31 | dict(type='CenterCrop', crop_size=224), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']) 35 | ] 36 | data = dict( 37 | samples_per_gpu=256, 38 | workers_per_gpu=16, 39 | train=dict( 40 | type=dataset_type, 41 | data_prefix='data/imagenet/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_prefix='data/imagenet/val', 46 | ann_file='data/imagenet/meta/val.txt', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | # replace `data/val` with `data/test` for standard test 50 | type=dataset_type, 51 | data_prefix='data/imagenet/val', 52 | ann_file='data/imagenet/meta/val.txt', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(interval=1, metric='accuracy') -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_3.2G_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = ["./regnet_6.4G_origin.py"] 2 | 3 | work_dir = "work_dirs/regnet_3.2G_pruning" 4 | optimizer = dict(lr=0.004) 5 | 6 | custom_hooks = [ 7 | dict( 8 | type='FisherPruningHook', 9 | # In pruning process, you need set priority 10 | # as 'LOWEST' to insure the pruning_hook is excused 11 | # after optimizer_hook, in fintune process, you 12 | # should set it as 'HIGHEST' to insure it excused 13 | # before checkpoint_hook 14 | pruning=True, 15 | batch_size=32, 16 | interval=25, 17 | priority='LOWEST', 18 | ) 19 | ] 20 | 21 | data = dict(samples_per_gpu=32, workers_per_gpu=2) -------------------------------------------------------------------------------- /classification/configs/regnet/regnet_6.4G_origin.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/regnet/regnetx_6.4gf.py', 3 | '../_base_/datasets/imagenet_bs32.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | work_dir = "work_dirs/regnet_6.4G_origin" 8 | load_from = "https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-6.4GF-6888c0ea.pth" 9 | 10 | # dataset settings 11 | dataset_type = 'ImageNet' 12 | 13 | img_norm_cfg = dict( 14 | # The mean and std are used in PyCls when training RegNets 15 | mean=[103.53, 116.28, 123.675], 16 | std=[57.375, 57.12, 58.395], 17 | to_rgb=False) 18 | 19 | train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='RandomResizedCrop', size=224), 22 | dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='ToTensor', keys=['gt_label']), 26 | dict(type='Collect', keys=['img', 'gt_label']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='Resize', size=(256, -1)), 31 | dict(type='CenterCrop', crop_size=224), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']) 35 | ] 36 | data = dict( 37 | samples_per_gpu=32, 38 | workers_per_gpu=2, 39 | train=dict( 40 | type=dataset_type, 41 | data_prefix='data/imagenet/train', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_prefix='data/imagenet/val', 46 | ann_file='data/imagenet/meta/val.txt', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | # replace `data/val` with `data/test` for standard test 50 | type=dataset_type, 51 | data_prefix='data/imagenet/val', 52 | ann_file='data/imagenet/meta/val.txt', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(interval=1, metric='accuracy') -------------------------------------------------------------------------------- /classification/configs/resnet50/resnet50_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | 7 | custom_hooks = [ 8 | dict(type='FisherPruningHook', 9 | pruning=False, 10 | deploy_from='path to the pruned model') 11 | ] 12 | 13 | work_dir = "work_dirs/resnet50" 14 | optimizer = dict(lr=0.1) 15 | data = dict(samples_per_gpu = 256, workers_per_gpu=16) # for single GPU 16 | -------------------------------------------------------------------------------- /classification/configs/resnet50/resnet50_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | optimizer = dict(lr=0.004) 7 | 8 | custom_hooks = [ 9 | dict( 10 | type='FisherPruningHook', 11 | # In pruning process, you need set priority 12 | # as 'LOWEST' to insure the pruning_hook is excused 13 | # after optimizer_hook, in fintune process, you 14 | # should set it as 'HIGHEST' to insure it excused 15 | # before checkpoint_hook 16 | pruning=True, 17 | batch_size=32, 18 | interval=25, 19 | priority='LOWEST', 20 | ) 21 | ] 22 | 23 | work_dir = "work_dirs/resnet50" 24 | load_from = "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth" 25 | -------------------------------------------------------------------------------- /classification/configs/resnext/resnext50_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/resnext50_32x4d.py', 3 | '../_base_/datasets/imagenet_bs32_pil_resize.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | custom_hooks = [ 8 | dict(type='FisherPruningHook', 9 | pruning=False, 10 | deploy_from='path to the pruned model') 11 | ] 12 | 13 | work_dir = "work_dirs/resnext50" 14 | optimizer = dict(lr=0.1) 15 | data = dict(samples_per_gpu = 256, workers_per_gpu=16) # for single GPU 16 | -------------------------------------------------------------------------------- /classification/configs/resnext/resnext50_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/resnext50_32x4d.py', 3 | '../_base_/datasets/imagenet_bs32_pil_resize.py', 4 | '../_base_/schedules/imagenet_bs256.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | optimizer = dict(lr=0.004) 8 | 9 | custom_hooks = [ 10 | dict( 11 | type='FisherPruningHook', 12 | # In pruning process, you need set priority 13 | # as 'LOWEST' to insure the pruning_hook is excused 14 | # after optimizer_hook, in fintune process, you 15 | # should set it as 'HIGHEST' to insure it excused 16 | # before checkpoint_hook 17 | pruning=True, 18 | batch_size=32, 19 | interval=10, 20 | priority='LOWEST', 21 | ) 22 | ] 23 | work_dir = "work_dirs/resnext50" 24 | load_from = "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth" 25 | -------------------------------------------------------------------------------- /classification/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /classification/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /classification/tools/fisher_pruning_hook: -------------------------------------------------------------------------------- 1 | ../../fisher_pruning_hook -------------------------------------------------------------------------------- /classification/tools/model_eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import mmcv 3 | import numpy as np 4 | import torch 5 | from mmcls.models import build_classifier 6 | import time 7 | from fisher_pruning_hook import FisherPruningHook 8 | from torch.nn import Conv2d, Linear 9 | from torch.nn.modules.batchnorm import _BatchNorm 10 | from torch.nn.modules.activation import ReLU 11 | from functools import partial 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='mmcls test model') 15 | parser.add_argument('config', help='test config file path') 16 | 17 | parser.add_argument( 18 | '--device', 19 | choices=['cpu', 'cuda'], 20 | default='cuda', 21 | help='device used for testing') 22 | args = parser.parse_args() 23 | 24 | return args 25 | 26 | def speed_test(model, device, batchsize, iterations): 27 | x = torch.randn(batchsize, 3, 224, 224).to(device) 28 | model = model.to(device) 29 | model.eval() 30 | with torch.no_grad(): 31 | start = time.time() 32 | for _ in range(iterations): 33 | _ = model(x) 34 | mid = time.time() 35 | for _ in range(iterations): 36 | _ = model(x) 37 | end = time.time() 38 | return start, mid, end 39 | 40 | def compute_parameters(model): 41 | params = sum(p.numel() for p in model.parameters()) 42 | return params 43 | 44 | class FlopsActsHook: 45 | def __init__(self, model): 46 | self.flops = {} 47 | self.acts = {} 48 | self.non_registered = [] 49 | for n, m in model.named_modules(): 50 | self.flops[n] = 0 51 | self.acts[n] = 0 52 | 53 | if isinstance(m, Conv2d): m.register_forward_hook(self.forward_hook_conv) 54 | elif isinstance(m, Linear): m.register_forward_hook(self.forward_hook_fc) 55 | elif isinstance(m, _BatchNorm): m.register_forward_hook(self.forward_hook_bn) 56 | elif isinstance(m, ReLU): m.register_forward_hook(self.forward_hook_relu) 57 | else: 58 | # print(n, type(m)) 59 | self.non_registered.append([n, type(m)]) 60 | 61 | def forward_hook_conv(self, module, inputs, outputs): 62 | ic = module.in_channels // module.groups 63 | kh, kw = module.kernel_size 64 | self.flops[module.name] += np.prod([ic, kh, kw, *outputs.shape]) 65 | if module.bias is not None: 66 | self.flops[module.name] += np.prod(outputs.shape) 67 | self.acts[module.name] += np.prod(outputs.shape) 68 | 69 | def forward_hook_fc(self, module, inputs, outputs): 70 | ic = module.in_features 71 | self.flops[module.name] += np.prod([ic, *outputs.shape]) 72 | if module.bias is not None: 73 | self.flops[module.name] += np.prod(outputs.shape) 74 | self.acts[module.name] += np.prod(outputs.shape) 75 | 76 | def forward_hook_bn(self, module, inputs, outputs): 77 | self.flops[module.name] += np.prod(outputs.shape) * (4 if module.affine else 2) 78 | self.acts[module.name] += np.prod(outputs.shape) 79 | 80 | def forward_hook_relu(self, module, inputs, outputs): 81 | self.flops[module.name] += np.prod(outputs.shape) 82 | self.acts[module.name] += 0 if module.inplace else np.prod(outputs.shape) 83 | 84 | def init(self): 85 | for n in self.flops: 86 | self.flops[n] = 0 87 | self.acts[n] = 0 88 | 89 | def summarize(self): 90 | flops, acts = 0, 0 91 | for n in self.flops: 92 | flops += self.flops[n] 93 | acts += self.acts[n] 94 | return flops, acts 95 | 96 | def compute_flops_acts(model, device): 97 | model.eval() 98 | model.to(device) 99 | hook = FlopsActsHook(model) 100 | hook.init() 101 | x = torch.randn(32, 3, 224, 224).to(device) 102 | _ = model(x) 103 | flops, acts = hook.summarize() 104 | return flops / x.size(0), acts / x.size(0) 105 | 106 | def compute_flops_params_thop(model, device): 107 | from thop import profile 108 | model.eval() 109 | model.to(device) 110 | x = torch.randn(32, 3, 224, 224).to(device) 111 | flops, params = profile(model, inputs=(x,)) 112 | return flops / x.size(0), params 113 | 114 | def main(): 115 | args = parse_args() 116 | cfg = mmcv.Config.fromfile(args.config) 117 | # set cudnn_benchmark 118 | if cfg.get('cudnn_benchmark', False): 119 | torch.backends.cudnn.benchmark = True 120 | cfg.device= args.device 121 | 122 | # build the model 123 | model = build_classifier(cfg.model) 124 | model.forward = partial(model.forward, return_loss=False, img_metas=None) 125 | for n, m in model.named_modules(): 126 | m.name = n 127 | 128 | if 'custom_hooks' in cfg: 129 | for hook in cfg.custom_hooks: 130 | if hook.type.startswith('FisherPruningHook'): 131 | hook_cfg = hook.copy() 132 | hook_cfg.pop('priority', None) 133 | from mmcv.runner.hooks import HOOKS 134 | hook_cls = HOOKS.get(hook_cfg['type']) 135 | if hasattr(hook_cls, 'after_build_model'): 136 | pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS) 137 | pruning_hook.after_build_model(model, cfg.work_dir) 138 | 139 | # test speed 140 | batchsize, iterations = (16, 50) if cfg.device == "cpu" else (64, 100) 141 | start, mid, end = speed_test(model, cfg.device, batchsize, iterations) 142 | print(f"time elapse for each iteration with batchsize {batchsize}:") 143 | print(f"first {iterations} iterations: {(mid - start) * 1000 / iterations:.3f}ms") 144 | print(f"last {iterations} iterations: {(end - mid) * 1000 / iterations:.3f}ms") 145 | 146 | # flops and acts 147 | flops, acts = compute_flops_acts(model, cfg.device) 148 | print(f"flops: {flops / (10 ** 9):.3f}G") 149 | print(f"memory: {acts / (10 ** 6):.3f}M") 150 | params = compute_parameters(model) 151 | print(f"params: {params / (10 ** 6):.3f}M") 152 | 153 | 154 | if __name__ == "__main__": 155 | main() -------------------------------------------------------------------------------- /classification/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /classification/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /classification/tools/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import copy 4 | import os 5 | import os.path as osp 6 | import time 7 | import warnings 8 | 9 | import mmcv 10 | import torch 11 | from mmcv import Config, DictAction 12 | from mmcv.runner import get_dist_info, init_dist 13 | 14 | from mmcls import __version__ 15 | from mmcls.apis import set_random_seed, train_model 16 | from mmcls.datasets import build_dataset 17 | from mmcls.models import build_classifier 18 | from mmcls.utils import collect_env, get_root_logger 19 | from fisher_pruning_hook import FisherPruningHook 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser(description='Train a model') 23 | parser.add_argument('config', help='train config file path') 24 | parser.add_argument('--work-dir', help='the dir to save logs and models') 25 | parser.add_argument( 26 | '--resume-from', help='the checkpoint file to resume from') 27 | parser.add_argument( 28 | '--no-validate', 29 | action='store_true', 30 | help='whether not to evaluate the checkpoint during training') 31 | group_gpus = parser.add_mutually_exclusive_group() 32 | group_gpus.add_argument('--device', help='device used for training') 33 | group_gpus.add_argument( 34 | '--gpus', 35 | type=int, 36 | help='number of gpus to use ' 37 | '(only applicable to non-distributed training)') 38 | group_gpus.add_argument( 39 | '--gpu-ids', 40 | type=int, 41 | nargs='+', 42 | help='ids of gpus to use ' 43 | '(only applicable to non-distributed training)') 44 | parser.add_argument('--seed', type=int, default=None, help='random seed') 45 | parser.add_argument( 46 | '--deterministic', 47 | action='store_true', 48 | help='whether to set deterministic options for CUDNN backend.') 49 | parser.add_argument( 50 | '--options', 51 | nargs='+', 52 | action=DictAction, 53 | help='override some settings in the used config, the key-value pair ' 54 | 'in xxx=yyy format will be merged into config file (deprecate), ' 55 | 'change to --cfg-options instead.') 56 | parser.add_argument( 57 | '--cfg-options', 58 | nargs='+', 59 | action=DictAction, 60 | help='override some settings in the used config, the key-value pair ' 61 | 'in xxx=yyy format will be merged into config file. If the value to ' 62 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 63 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 64 | 'Note that the quotation marks are necessary and that no white space ' 65 | 'is allowed.') 66 | parser.add_argument( 67 | '--launcher', 68 | choices=['none', 'pytorch', 'slurm', 'mpi'], 69 | default='none', 70 | help='job launcher') 71 | parser.add_argument('--local_rank', type=int, default=0) 72 | args = parser.parse_args() 73 | if 'LOCAL_RANK' not in os.environ: 74 | os.environ['LOCAL_RANK'] = str(args.local_rank) 75 | 76 | if args.options and args.cfg_options: 77 | raise ValueError( 78 | '--options and --cfg-options cannot be both ' 79 | 'specified, --options is deprecated in favor of --cfg-options') 80 | if args.options: 81 | warnings.warn('--options is deprecated in favor of --cfg-options') 82 | args.cfg_options = args.options 83 | 84 | return args 85 | 86 | 87 | def main(): 88 | args = parse_args() 89 | 90 | cfg = Config.fromfile(args.config) 91 | if args.cfg_options is not None: 92 | cfg.merge_from_dict(args.cfg_options) 93 | # set cudnn_benchmark 94 | if cfg.get('cudnn_benchmark', False): 95 | torch.backends.cudnn.benchmark = True 96 | 97 | # work_dir is determined in this priority: CLI > segment in file > filename 98 | if args.work_dir is not None: 99 | # update configs according to CLI args if args.work_dir is not None 100 | cfg.work_dir = args.work_dir 101 | elif cfg.get('work_dir', None) is None: 102 | # use config filename as default work_dir if cfg.work_dir is None 103 | cfg.work_dir = osp.join('./work_dirs', 104 | osp.splitext(osp.basename(args.config))[0]) 105 | if args.resume_from is not None: 106 | cfg.resume_from = args.resume_from 107 | if args.gpu_ids is not None: 108 | cfg.gpu_ids = args.gpu_ids 109 | else: 110 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 111 | 112 | # init distributed env first, since logger depends on the dist info. 113 | if args.launcher == 'none': 114 | distributed = False 115 | else: 116 | distributed = True 117 | init_dist(args.launcher, **cfg.dist_params) 118 | _, world_size = get_dist_info() 119 | cfg.gpu_ids = range(world_size) 120 | 121 | # create work_dir 122 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 123 | # dump config 124 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 125 | # init the logger before other steps 126 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 127 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 128 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 129 | 130 | # init the meta dict to record some important information such as 131 | # environment info and seed, which will be logged 132 | meta = dict() 133 | # log env info 134 | env_info_dict = collect_env() 135 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 136 | dash_line = '-' * 60 + '\n' 137 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 138 | dash_line) 139 | meta['env_info'] = env_info 140 | 141 | # log some basic info 142 | logger.info(f'Distributed training: {distributed}') 143 | logger.info(f'Config:\n{cfg.pretty_text}') 144 | 145 | # set random seeds 146 | if args.seed is not None: 147 | logger.info(f'Set random seed to {args.seed}, ' 148 | f'deterministic: {args.deterministic}') 149 | set_random_seed(args.seed, deterministic=args.deterministic) 150 | cfg.seed = args.seed 151 | meta['seed'] = args.seed 152 | 153 | model = build_classifier(cfg.model) 154 | model.init_weights() 155 | 156 | if 'custom_hooks' in cfg: 157 | for hook in cfg.custom_hooks: 158 | if hook.type.startswith('FisherPruningHook'): 159 | hook_cfg = hook.copy() 160 | hook_cfg.pop('priority', None) 161 | from mmcv.runner.hooks import HOOKS 162 | hook_cls = HOOKS.get(hook_cfg['type']) 163 | if hasattr(hook_cls, 'after_build_model'): 164 | pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS) 165 | pruning_hook.after_build_model(model, cfg.work_dir) 166 | 167 | datasets = [build_dataset(cfg.data.train)] 168 | if len(cfg.workflow) == 2: 169 | val_dataset = copy.deepcopy(cfg.data.val) 170 | val_dataset.pipeline = cfg.data.train.pipeline 171 | datasets.append(build_dataset(val_dataset)) 172 | if cfg.checkpoint_config is not None: 173 | # save mmcls version, config file content and class names in 174 | # checkpoints as meta data 175 | cfg.checkpoint_config.meta = dict( 176 | mmcls_version=__version__, 177 | config=cfg.pretty_text, 178 | CLASSES=datasets[0].CLASSES) 179 | # add an attribute for visualization convenience 180 | train_model( 181 | model, 182 | datasets, 183 | cfg, 184 | distributed=distributed, 185 | validate=(not args.no_validate), 186 | timestamp=timestamp, 187 | device='cpu' if args.device == 'cpu' else 'cuda', 188 | meta=meta) 189 | 190 | 191 | if __name__ == '__main__': 192 | main() 193 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)], 11 | keep_ratio=True), 12 | dict(type='RandomFlip', flip_ratio=0.5), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='DefaultFormatBundle'), 16 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=1, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type='RepeatDataset', 37 | times=8, 38 | dataset=dict(type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict(type=dataset_type, 44 | ann_file=data_root + 45 | 'annotations/instancesonly_filtered_gtFine_val.json', 46 | img_prefix=data_root + 'leftImg8bit/val/', 47 | pipeline=test_pipeline), 48 | test=dict(type=dataset_type, 49 | ann_file=data_root + 50 | 'annotations/instancesonly_filtered_gtFine_test.json', 51 | img_prefix=data_root + 'leftImg8bit/test/', 52 | pipeline=test_pipeline)) 53 | evaluation = dict(interval=1, metric='bbox') 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 10 | dict(type='Resize', img_scale=[(2048, 800), (2048, 1024)], 11 | keep_ratio=True), 12 | dict(type='RandomFlip', flip_ratio=0.5), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='DefaultFormatBundle'), 16 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 17 | ] 18 | test_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=1, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type='RepeatDataset', 37 | times=8, 38 | dataset=dict(type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict(type=dataset_type, 44 | ann_file=data_root + 45 | 'annotations/instancesonly_filtered_gtFine_val.json', 46 | img_prefix=data_root + 'leftImg8bit/val/', 47 | pipeline=test_pipeline), 48 | test=dict(type=dataset_type, 49 | ann_file=data_root + 50 | 'annotations/instancesonly_filtered_gtFine_test.json', 51 | img_prefix=data_root + 'leftImg8bit/test/', 52 | pipeline=test_pipeline)) 53 | evaluation = dict(metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict(type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict(type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_val2017.json', 40 | img_prefix=data_root + 'val2017/', 41 | pipeline=test_pipeline), 42 | test=dict(type=dataset_type, 43 | ann_file=data_root + 'annotations/instances_val2017.json', 44 | img_prefix=data_root + 'val2017/', 45 | pipeline=test_pipeline)) 46 | evaluation = dict(interval=1, metric='bbox') 47 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict(type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict(type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_val2017.json', 40 | img_prefix=data_root + 'val2017/', 41 | pipeline=test_pipeline), 42 | test=dict(type=dataset_type, 43 | ann_file=data_root + 'annotations/instances_val2017.json', 44 | img_prefix=data_root + 'val2017/', 45 | pipeline=test_pipeline)) 46 | evaluation = dict(metric=['bbox', 'segm']) 47 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True, 10 | with_seg=True), 11 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 12 | dict(type='RandomFlip', flip_ratio=0.5), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='SegRescale', scale_factor=1 / 8), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', 18 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 19 | 'gt_semantic_seg']), 20 | ] 21 | test_pipeline = [ 22 | dict(type='LoadImageFromFile'), 23 | dict(type='MultiScaleFlipAug', 24 | img_scale=(1333, 800), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip', flip_ratio=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | samples_per_gpu=2, 37 | workers_per_gpu=2, 38 | train=dict(type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_train2017.json', 40 | img_prefix=data_root + 'train2017/', 41 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 42 | pipeline=train_pipeline), 43 | val=dict(type=dataset_type, 44 | ann_file=data_root + 'annotations/instances_val2017.json', 45 | img_prefix=data_root + 'val2017/', 46 | pipeline=test_pipeline), 47 | test=dict(type=dataset_type, 48 | ann_file=data_root + 'annotations/instances_val2017.json', 49 | img_prefix=data_root + 'val2017/', 50 | pipeline=test_pipeline)) 51 | evaluation = dict(metric=['bbox', 'segm']) 52 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 10 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict(imgs_per_gpu=2, 32 | workers_per_gpu=1, 33 | train=dict(type=dataset_type, 34 | ann_file=data_root + 35 | 'annotations/DeepFashion_segmentation_query.json', 36 | img_prefix=data_root + 'Img/', 37 | pipeline=train_pipeline, 38 | data_root=data_root), 39 | val=dict(type=dataset_type, 40 | ann_file=data_root + 41 | 'annotations/DeepFashion_segmentation_query.json', 42 | img_prefix=data_root + 'Img/', 43 | pipeline=test_pipeline, 44 | data_root=data_root), 45 | test=dict(type=dataset_type, 46 | ann_file=data_root + 47 | 'annotations/DeepFashion_segmentation_gallery.json', 48 | img_prefix=data_root + 'Img/', 49 | pipeline=test_pipeline, 50 | data_root=data_root)) 51 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 52 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV05Dataset' 4 | data_root = 'data/lvis_v0.5/' 5 | data = dict(samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict(_delete_=True, 8 | type='ClassBalancedDataset', 9 | oversample_thr=1e-3, 10 | dataset=dict(type=dataset_type, 11 | ann_file=data_root + 12 | 'annotations/lvis_v0.5_train.json', 13 | img_prefix=data_root + 'train2017/')), 14 | val=dict(type=dataset_type, 15 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 16 | img_prefix=data_root + 'val2017/'), 17 | test=dict(type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/')) 20 | evaluation = dict(metric=['bbox', 'segm']) 21 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV1Dataset' 4 | data_root = 'data/lvis_v1/' 5 | data = dict(samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict(_delete_=True, 8 | type='ClassBalancedDataset', 9 | oversample_thr=1e-3, 10 | dataset=dict(type=dataset_type, 11 | ann_file=data_root + 12 | 'annotations/lvis_v1_train.json', 13 | img_prefix=data_root)), 14 | val=dict(type=dataset_type, 15 | ann_file=data_root + 'annotations/lvis_v1_val.json', 16 | img_prefix=data_root), 17 | test=dict(type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v1_val.json', 19 | img_prefix=data_root)) 20 | evaluation = dict(metric=['bbox', 'segm']) 21 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict(type='RepeatDataset', 35 | times=3, 36 | dataset=dict( 37 | type=dataset_type, 38 | ann_file=[ 39 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 40 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 41 | ], 42 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 43 | pipeline=train_pipeline)), 44 | val=dict(type=dataset_type, 45 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 46 | img_prefix=data_root + 'VOC2007/', 47 | pipeline=test_pipeline), 48 | test=dict(type=dataset_type, 49 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 50 | img_prefix=data_root + 'VOC2007/', 51 | pipeline=test_pipeline)) 52 | evaluation = dict(interval=1, metric='mAP') 53 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'WIDERFaceDataset' 3 | data_root = 'data/WIDERFace/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile', to_float32=True), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict(type='PhotoMetricDistortion', 9 | brightness_delta=32, 10 | contrast_range=(0.5, 1.5), 11 | saturation_range=(0.5, 1.5), 12 | hue_delta=18), 13 | dict(type='Expand', 14 | mean=img_norm_cfg['mean'], 15 | to_rgb=img_norm_cfg['to_rgb'], 16 | ratio_range=(1, 4)), 17 | dict(type='MinIoURandomCrop', 18 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 19 | min_crop_size=0.3), 20 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False), 21 | dict(type='Normalize', **img_norm_cfg), 22 | dict(type='RandomFlip', flip_ratio=0.5), 23 | dict(type='DefaultFormatBundle'), 24 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 25 | ] 26 | test_pipeline = [ 27 | dict(type='LoadImageFromFile'), 28 | dict(type='MultiScaleFlipAug', 29 | img_scale=(300, 300), 30 | flip=False, 31 | transforms=[ 32 | dict(type='Resize', keep_ratio=False), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict(samples_per_gpu=60, 39 | workers_per_gpu=2, 40 | train=dict(type='RepeatDataset', 41 | times=2, 42 | dataset=dict(type=dataset_type, 43 | ann_file=data_root + 'train.txt', 44 | img_prefix=data_root + 'WIDER_train/', 45 | min_size=17, 46 | pipeline=train_pipeline)), 47 | val=dict(type=dataset_type, 48 | ann_file=data_root + 'val.txt', 49 | img_prefix=data_root + 'WIDER_val/', 50 | pipeline=test_pipeline), 51 | test=dict(type=dataset_type, 52 | ann_file=data_root + 'val.txt', 53 | img_prefix=data_root + 'WIDER_val/', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/cascade_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict(type='RPNHead', 18 | in_channels=256, 19 | feat_channels=256, 20 | anchor_generator=dict(type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 25 | target_means=[.0, .0, .0, .0], 26 | target_stds=[1.0, 1.0, 1.0, 1.0]), 27 | loss_cls=dict(type='CrossEntropyLoss', 28 | use_sigmoid=True, 29 | loss_weight=1.0), 30 | loss_bbox=dict(type='SmoothL1Loss', 31 | beta=1.0 / 9.0, 32 | loss_weight=1.0)), 33 | roi_head=dict(type='CascadeRoIHead', 34 | num_stages=3, 35 | stage_loss_weights=[1, 0.5, 0.25], 36 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', 38 | output_size=7, 39 | sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=[ 43 | dict(type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 49 | target_means=[0., 0., 0., 0.], 50 | target_stds=[0.1, 0.1, 0.2, 0.2]), 51 | reg_class_agnostic=True, 52 | loss_cls=dict(type='CrossEntropyLoss', 53 | use_sigmoid=False, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', 56 | beta=1.0, 57 | loss_weight=1.0)), 58 | dict(type='Shared2FCBBoxHead', 59 | in_channels=256, 60 | fc_out_channels=1024, 61 | roi_feat_size=7, 62 | num_classes=80, 63 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 64 | target_means=[0., 0., 0., 0.], 65 | target_stds=[0.05, 0.05, 0.1, 0.1]), 66 | reg_class_agnostic=True, 67 | loss_cls=dict(type='CrossEntropyLoss', 68 | use_sigmoid=False, 69 | loss_weight=1.0), 70 | loss_bbox=dict(type='SmoothL1Loss', 71 | beta=1.0, 72 | loss_weight=1.0)), 73 | dict(type='Shared2FCBBoxHead', 74 | in_channels=256, 75 | fc_out_channels=1024, 76 | roi_feat_size=7, 77 | num_classes=80, 78 | bbox_coder=dict( 79 | type='DeltaXYWHBBoxCoder', 80 | target_means=[0., 0., 0., 0.], 81 | target_stds=[0.033, 0.033, 0.067, 0.067]), 82 | reg_class_agnostic=True, 83 | loss_cls=dict(type='CrossEntropyLoss', 84 | use_sigmoid=False, 85 | loss_weight=1.0), 86 | loss_bbox=dict(type='SmoothL1Loss', 87 | beta=1.0, 88 | loss_weight=1.0)) 89 | ]), 90 | # model training and testing settings 91 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 92 | pos_iou_thr=0.7, 93 | neg_iou_thr=0.3, 94 | min_pos_iou=0.3, 95 | match_low_quality=True, 96 | ignore_iof_thr=-1), 97 | sampler=dict(type='RandomSampler', 98 | num=256, 99 | pos_fraction=0.5, 100 | neg_pos_ub=-1, 101 | add_gt_as_proposals=False), 102 | allowed_border=0, 103 | pos_weight=-1, 104 | debug=False), 105 | rpn_proposal=dict(nms_pre=2000, 106 | max_per_img=2000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=[ 110 | dict(assigner=dict(type='MaxIoUAssigner', 111 | pos_iou_thr=0.5, 112 | neg_iou_thr=0.5, 113 | min_pos_iou=0.5, 114 | match_low_quality=False, 115 | ignore_iof_thr=-1), 116 | sampler=dict(type='RandomSampler', 117 | num=512, 118 | pos_fraction=0.25, 119 | neg_pos_ub=-1, 120 | add_gt_as_proposals=True), 121 | pos_weight=-1, 122 | debug=False), 123 | dict(assigner=dict(type='MaxIoUAssigner', 124 | pos_iou_thr=0.6, 125 | neg_iou_thr=0.6, 126 | min_pos_iou=0.6, 127 | match_low_quality=False, 128 | ignore_iof_thr=-1), 129 | sampler=dict(type='RandomSampler', 130 | num=512, 131 | pos_fraction=0.25, 132 | neg_pos_ub=-1, 133 | add_gt_as_proposals=True), 134 | pos_weight=-1, 135 | debug=False), 136 | dict(assigner=dict(type='MaxIoUAssigner', 137 | pos_iou_thr=0.7, 138 | neg_iou_thr=0.7, 139 | min_pos_iou=0.7, 140 | match_low_quality=False, 141 | ignore_iof_thr=-1), 142 | sampler=dict(type='RandomSampler', 143 | num=512, 144 | pos_fraction=0.25, 145 | neg_pos_ub=-1, 146 | add_gt_as_proposals=True), 147 | pos_weight=-1, 148 | debug=False) 149 | ]), 150 | test_cfg=dict(rpn=dict(nms_pre=1000, 151 | max_per_img=1000, 152 | nms=dict(type='nms', iou_threshold=0.7), 153 | min_bbox_size=0), 154 | rcnn=dict(score_thr=0.05, 155 | nms=dict(type='nms', iou_threshold=0.5), 156 | max_per_img=100))) 157 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | roi_head=dict(type='StandardRoIHead', 18 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', 20 | output_size=7, 21 | sampling_ratio=0), 22 | out_channels=256, 23 | featmap_strides=[4, 8, 16, 32]), 24 | bbox_head=dict( 25 | type='Shared2FCBBoxHead', 26 | in_channels=256, 27 | fc_out_channels=1024, 28 | roi_feat_size=7, 29 | num_classes=80, 30 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 31 | target_means=[0., 0., 0., 0.], 32 | target_stds=[0.1, 0.1, 0.2, 0.2]), 33 | reg_class_agnostic=False, 34 | loss_cls=dict(type='CrossEntropyLoss', 35 | use_sigmoid=False, 36 | loss_weight=1.0), 37 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 38 | # model training and testing settings 39 | train_cfg=dict(rcnn=dict(assigner=dict(type='MaxIoUAssigner', 40 | pos_iou_thr=0.5, 41 | neg_iou_thr=0.5, 42 | min_pos_iou=0.5, 43 | match_low_quality=False, 44 | ignore_iof_thr=-1), 45 | sampler=dict(type='RandomSampler', 46 | num=512, 47 | pos_fraction=0.25, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=True), 50 | pos_weight=-1, 51 | debug=False)), 52 | test_cfg=dict(rcnn=dict(score_thr=0.05, 53 | nms=dict(type='nms', iou_threshold=0.5), 54 | max_per_img=100))) 55 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict(type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=norm_cfg, 14 | norm_eval=True, 15 | style='caffe'), 16 | rpn_head=dict(type='RPNHead', 17 | in_channels=1024, 18 | feat_channels=1024, 19 | anchor_generator=dict(type='AnchorGenerator', 20 | scales=[2, 4, 8, 16, 32], 21 | ratios=[0.5, 1.0, 2.0], 22 | strides=[16]), 23 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict(type='CrossEntropyLoss', 27 | use_sigmoid=True, 28 | loss_weight=1.0), 29 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 30 | roi_head=dict(type='StandardRoIHead', 31 | shared_head=dict(type='ResLayer', 32 | depth=50, 33 | stage=3, 34 | stride=2, 35 | dilation=1, 36 | style='caffe', 37 | norm_cfg=norm_cfg, 38 | norm_eval=True), 39 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 40 | roi_layer=dict(type='RoIAlign', 41 | output_size=14, 42 | sampling_ratio=0), 43 | out_channels=1024, 44 | featmap_strides=[16]), 45 | bbox_head=dict( 46 | type='BBoxHead', 47 | with_avg_pool=True, 48 | roi_feat_size=7, 49 | in_channels=2048, 50 | num_classes=80, 51 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 52 | target_means=[0., 0., 0., 0.], 53 | target_stds=[0.1, 0.1, 0.2, 0.2]), 54 | reg_class_agnostic=False, 55 | loss_cls=dict(type='CrossEntropyLoss', 56 | use_sigmoid=False, 57 | loss_weight=1.0), 58 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 59 | # model training and testing settings 60 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict(type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=0, 72 | pos_weight=-1, 73 | debug=False), 74 | rpn_proposal=dict(nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict(assigner=dict(type='MaxIoUAssigner', 79 | pos_iou_thr=0.5, 80 | neg_iou_thr=0.5, 81 | min_pos_iou=0.5, 82 | match_low_quality=False, 83 | ignore_iof_thr=-1), 84 | sampler=dict(type='RandomSampler', 85 | num=512, 86 | pos_fraction=0.25, 87 | neg_pos_ub=-1, 88 | add_gt_as_proposals=True), 89 | pos_weight=-1, 90 | debug=False)), 91 | test_cfg=dict(rpn=dict(nms_pre=6000, 92 | max_per_img=1000, 93 | nms=dict(type='nms', iou_threshold=0.7), 94 | min_bbox_size=0), 95 | rcnn=dict(score_thr=0.05, 96 | nms=dict(type='nms', iou_threshold=0.5), 97 | max_per_img=100))) 98 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict(type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | strides=(1, 2, 2, 1), 10 | dilations=(1, 1, 1, 2), 11 | out_indices=(3, ), 12 | frozen_stages=1, 13 | norm_cfg=norm_cfg, 14 | norm_eval=True, 15 | style='caffe'), 16 | rpn_head=dict(type='RPNHead', 17 | in_channels=2048, 18 | feat_channels=2048, 19 | anchor_generator=dict(type='AnchorGenerator', 20 | scales=[2, 4, 8, 16, 32], 21 | ratios=[0.5, 1.0, 2.0], 22 | strides=[16]), 23 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict(type='CrossEntropyLoss', 27 | use_sigmoid=True, 28 | loss_weight=1.0), 29 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 30 | roi_head=dict(type='StandardRoIHead', 31 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 32 | roi_layer=dict(type='RoIAlign', 33 | output_size=7, 34 | sampling_ratio=0), 35 | out_channels=2048, 36 | featmap_strides=[16]), 37 | bbox_head=dict( 38 | type='Shared2FCBBoxHead', 39 | in_channels=2048, 40 | fc_out_channels=1024, 41 | roi_feat_size=7, 42 | num_classes=80, 43 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 44 | target_means=[0., 0., 0., 0.], 45 | target_stds=[0.1, 0.1, 0.2, 0.2]), 46 | reg_class_agnostic=False, 47 | loss_cls=dict(type='CrossEntropyLoss', 48 | use_sigmoid=False, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 51 | # model training and testing settings 52 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 53 | pos_iou_thr=0.7, 54 | neg_iou_thr=0.3, 55 | min_pos_iou=0.3, 56 | match_low_quality=True, 57 | ignore_iof_thr=-1), 58 | sampler=dict(type='RandomSampler', 59 | num=256, 60 | pos_fraction=0.5, 61 | neg_pos_ub=-1, 62 | add_gt_as_proposals=False), 63 | allowed_border=0, 64 | pos_weight=-1, 65 | debug=False), 66 | rpn_proposal=dict(nms_pre=12000, 67 | max_per_img=2000, 68 | nms=dict(type='nms', iou_threshold=0.7), 69 | min_bbox_size=0), 70 | rcnn=dict(assigner=dict(type='MaxIoUAssigner', 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.5, 73 | min_pos_iou=0.5, 74 | match_low_quality=False, 75 | ignore_iof_thr=-1), 76 | sampler=dict(type='RandomSampler', 77 | num=512, 78 | pos_fraction=0.25, 79 | neg_pos_ub=-1, 80 | add_gt_as_proposals=True), 81 | pos_weight=-1, 82 | debug=False)), 83 | test_cfg=dict(rpn=dict(nms=dict(type='nms', iou_threshold=0.7), 84 | nms_pre=6000, 85 | max_per_img=1000, 86 | min_bbox_size=0), 87 | rcnn=dict(score_thr=0.05, 88 | nms=dict(type='nms', iou_threshold=0.5), 89 | max_per_img=100))) 90 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict(type='RPNHead', 18 | in_channels=256, 19 | feat_channels=256, 20 | anchor_generator=dict(type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 25 | target_means=[.0, .0, .0, .0], 26 | target_stds=[1.0, 1.0, 1.0, 1.0]), 27 | loss_cls=dict(type='CrossEntropyLoss', 28 | use_sigmoid=True, 29 | loss_weight=1.0), 30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 31 | roi_head=dict(type='StandardRoIHead', 32 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 33 | roi_layer=dict(type='RoIAlign', 34 | output_size=7, 35 | sampling_ratio=0), 36 | out_channels=256, 37 | featmap_strides=[4, 8, 16, 32]), 38 | bbox_head=dict( 39 | type='Shared2FCBBoxHead', 40 | in_channels=256, 41 | fc_out_channels=1024, 42 | roi_feat_size=7, 43 | num_classes=80, 44 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 45 | target_means=[0., 0., 0., 0.], 46 | target_stds=[0.1, 0.1, 0.2, 0.2]), 47 | reg_class_agnostic=False, 48 | loss_cls=dict(type='CrossEntropyLoss', 49 | use_sigmoid=False, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 52 | # model training and testing settings 53 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 54 | pos_iou_thr=0.7, 55 | neg_iou_thr=0.3, 56 | min_pos_iou=0.3, 57 | match_low_quality=True, 58 | ignore_iof_thr=-1), 59 | sampler=dict(type='RandomSampler', 60 | num=256, 61 | pos_fraction=0.5, 62 | neg_pos_ub=-1, 63 | add_gt_as_proposals=False), 64 | allowed_border=-1, 65 | pos_weight=-1, 66 | debug=False), 67 | rpn_proposal=dict(nms_pre=2000, 68 | max_per_img=1000, 69 | nms=dict(type='nms', iou_threshold=0.7), 70 | min_bbox_size=0), 71 | rcnn=dict(assigner=dict(type='MaxIoUAssigner', 72 | pos_iou_thr=0.5, 73 | neg_iou_thr=0.5, 74 | min_pos_iou=0.5, 75 | match_low_quality=False, 76 | ignore_iof_thr=-1), 77 | sampler=dict(type='RandomSampler', 78 | num=512, 79 | pos_fraction=0.25, 80 | neg_pos_ub=-1, 81 | add_gt_as_proposals=True), 82 | pos_weight=-1, 83 | debug=False)), 84 | test_cfg=dict( 85 | rpn=dict(nms_pre=1000, 86 | max_per_img=1000, 87 | nms=dict(type='nms', iou_threshold=0.7), 88 | min_bbox_size=0), 89 | rcnn=dict(score_thr=0.05, 90 | nms=dict(type='nms', iou_threshold=0.5), 91 | max_per_img=100) 92 | # soft-nms is also supported for rcnn testing 93 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 94 | )) 95 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict(type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=norm_cfg, 14 | norm_eval=True, 15 | style='caffe'), 16 | rpn_head=dict(type='RPNHead', 17 | in_channels=1024, 18 | feat_channels=1024, 19 | anchor_generator=dict(type='AnchorGenerator', 20 | scales=[2, 4, 8, 16, 32], 21 | ratios=[0.5, 1.0, 2.0], 22 | strides=[16]), 23 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict(type='CrossEntropyLoss', 27 | use_sigmoid=True, 28 | loss_weight=1.0), 29 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 30 | roi_head=dict(type='StandardRoIHead', 31 | shared_head=dict(type='ResLayer', 32 | depth=50, 33 | stage=3, 34 | stride=2, 35 | dilation=1, 36 | style='caffe', 37 | norm_cfg=norm_cfg, 38 | norm_eval=True), 39 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 40 | roi_layer=dict(type='RoIAlign', 41 | output_size=14, 42 | sampling_ratio=0), 43 | out_channels=1024, 44 | featmap_strides=[16]), 45 | bbox_head=dict( 46 | type='BBoxHead', 47 | with_avg_pool=True, 48 | roi_feat_size=7, 49 | in_channels=2048, 50 | num_classes=80, 51 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 52 | target_means=[0., 0., 0., 0.], 53 | target_stds=[0.1, 0.1, 0.2, 0.2]), 54 | reg_class_agnostic=False, 55 | loss_cls=dict(type='CrossEntropyLoss', 56 | use_sigmoid=False, 57 | loss_weight=1.0), 58 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 59 | mask_roi_extractor=None, 60 | mask_head=dict(type='FCNMaskHead', 61 | num_convs=0, 62 | in_channels=2048, 63 | conv_out_channels=256, 64 | num_classes=80, 65 | loss_mask=dict(type='CrossEntropyLoss', 66 | use_mask=True, 67 | loss_weight=1.0))), 68 | # model training and testing settings 69 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 70 | pos_iou_thr=0.7, 71 | neg_iou_thr=0.3, 72 | min_pos_iou=0.3, 73 | match_low_quality=True, 74 | ignore_iof_thr=-1), 75 | sampler=dict(type='RandomSampler', 76 | num=256, 77 | pos_fraction=0.5, 78 | neg_pos_ub=-1, 79 | add_gt_as_proposals=False), 80 | allowed_border=0, 81 | pos_weight=-1, 82 | debug=False), 83 | rpn_proposal=dict(nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict(assigner=dict(type='MaxIoUAssigner', 88 | pos_iou_thr=0.5, 89 | neg_iou_thr=0.5, 90 | min_pos_iou=0.5, 91 | match_low_quality=False, 92 | ignore_iof_thr=-1), 93 | sampler=dict(type='RandomSampler', 94 | num=512, 95 | pos_fraction=0.25, 96 | neg_pos_ub=-1, 97 | add_gt_as_proposals=True), 98 | mask_size=14, 99 | pos_weight=-1, 100 | debug=False)), 101 | test_cfg=dict(rpn=dict(nms_pre=6000, 102 | nms=dict(type='nms', iou_threshold=0.7), 103 | max_per_img=1000, 104 | min_bbox_size=0), 105 | rcnn=dict(score_thr=0.05, 106 | nms=dict(type='nms', iou_threshold=0.5), 107 | max_per_img=100, 108 | mask_thr_binary=0.5))) 109 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict(type='RPNHead', 18 | in_channels=256, 19 | feat_channels=256, 20 | anchor_generator=dict(type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 25 | target_means=[.0, .0, .0, .0], 26 | target_stds=[1.0, 1.0, 1.0, 1.0]), 27 | loss_cls=dict(type='CrossEntropyLoss', 28 | use_sigmoid=True, 29 | loss_weight=1.0), 30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 31 | roi_head=dict(type='StandardRoIHead', 32 | bbox_roi_extractor=dict(type='SingleRoIExtractor', 33 | roi_layer=dict(type='RoIAlign', 34 | output_size=7, 35 | sampling_ratio=0), 36 | out_channels=256, 37 | featmap_strides=[4, 8, 16, 32]), 38 | bbox_head=dict( 39 | type='Shared2FCBBoxHead', 40 | in_channels=256, 41 | fc_out_channels=1024, 42 | roi_feat_size=7, 43 | num_classes=80, 44 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 45 | target_means=[0., 0., 0., 0.], 46 | target_stds=[0.1, 0.1, 0.2, 0.2]), 47 | reg_class_agnostic=False, 48 | loss_cls=dict(type='CrossEntropyLoss', 49 | use_sigmoid=False, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 52 | mask_roi_extractor=dict(type='SingleRoIExtractor', 53 | roi_layer=dict(type='RoIAlign', 54 | output_size=14, 55 | sampling_ratio=0), 56 | out_channels=256, 57 | featmap_strides=[4, 8, 16, 32]), 58 | mask_head=dict(type='FCNMaskHead', 59 | num_convs=4, 60 | in_channels=256, 61 | conv_out_channels=256, 62 | num_classes=80, 63 | loss_mask=dict(type='CrossEntropyLoss', 64 | use_mask=True, 65 | loss_weight=1.0))), 66 | # model training and testing settings 67 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict(type='RandomSampler', 74 | num=256, 75 | pos_fraction=0.5, 76 | neg_pos_ub=-1, 77 | add_gt_as_proposals=False), 78 | allowed_border=-1, 79 | pos_weight=-1, 80 | debug=False), 81 | rpn_proposal=dict(nms_pre=2000, 82 | max_per_img=1000, 83 | nms=dict(type='nms', iou_threshold=0.7), 84 | min_bbox_size=0), 85 | rcnn=dict(assigner=dict(type='MaxIoUAssigner', 86 | pos_iou_thr=0.5, 87 | neg_iou_thr=0.5, 88 | min_pos_iou=0.5, 89 | match_low_quality=True, 90 | ignore_iof_thr=-1), 91 | sampler=dict(type='RandomSampler', 92 | num=512, 93 | pos_fraction=0.25, 94 | neg_pos_ub=-1, 95 | add_gt_as_proposals=True), 96 | mask_size=28, 97 | pos_weight=-1, 98 | debug=False)), 99 | test_cfg=dict(rpn=dict(nms_pre=1000, 100 | max_per_img=1000, 101 | nms=dict(type='nms', iou_threshold=0.7), 102 | min_bbox_size=0), 103 | rcnn=dict(score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100, 106 | mask_thr_binary=0.5))) 107 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs='on_input', 18 | num_outs=5), 19 | bbox_head=dict(type='RetinaHead', 20 | num_classes=80, 21 | in_channels=256, 22 | stacked_convs=4, 23 | feat_channels=256, 24 | anchor_generator=dict(type='AnchorGenerator', 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | ratios=[0.5, 1.0, 2.0], 28 | strides=[8, 16, 32, 64, 128]), 29 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict(type='FocalLoss', 33 | use_sigmoid=True, 34 | gamma=2.0, 35 | alpha=0.25, 36 | loss_weight=1.0), 37 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 38 | # model training and testing settings 39 | train_cfg=dict(assigner=dict(type='MaxIoUAssigner', 40 | pos_iou_thr=0.5, 41 | neg_iou_thr=0.4, 42 | min_pos_iou=0, 43 | ignore_iof_thr=-1), 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False), 47 | test_cfg=dict(nms_pre=1000, 48 | min_bbox_size=0, 49 | score_thr=0.05, 50 | nms=dict(type='nms', iou_threshold=0.5), 51 | max_per_img=100)) 52 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=3, 8 | strides=(1, 2, 2), 9 | dilations=(1, 1, 1), 10 | out_indices=(2, ), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=False), 13 | norm_eval=True, 14 | style='caffe'), 15 | neck=None, 16 | rpn_head=dict(type='RPNHead', 17 | in_channels=1024, 18 | feat_channels=1024, 19 | anchor_generator=dict(type='AnchorGenerator', 20 | scales=[2, 4, 8, 16, 32], 21 | ratios=[0.5, 1.0, 2.0], 22 | strides=[16]), 23 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict(type='CrossEntropyLoss', 27 | use_sigmoid=True, 28 | loss_weight=1.0), 29 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 30 | # model training and testing settings 31 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict(type='RandomSampler', 37 | num=256, 38 | pos_fraction=0.5, 39 | neg_pos_ub=-1, 40 | add_gt_as_proposals=False), 41 | allowed_border=0, 42 | pos_weight=-1, 43 | debug=False)), 44 | test_cfg=dict(rpn=dict(nms_pre=12000, 45 | max_per_img=2000, 46 | nms=dict(type='nms', iou_threshold=0.7), 47 | min_bbox_size=0))) 48 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict(type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict(type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict(type='RPNHead', 18 | in_channels=256, 19 | feat_channels=256, 20 | anchor_generator=dict(type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 25 | target_means=[.0, .0, .0, .0], 26 | target_stds=[1.0, 1.0, 1.0, 1.0]), 27 | loss_cls=dict(type='CrossEntropyLoss', 28 | use_sigmoid=True, 29 | loss_weight=1.0), 30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 31 | # model training and testing settings 32 | train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner', 33 | pos_iou_thr=0.7, 34 | neg_iou_thr=0.3, 35 | min_pos_iou=0.3, 36 | ignore_iof_thr=-1), 37 | sampler=dict(type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | debug=False)), 45 | test_cfg=dict(rpn=dict(nms_pre=2000, 46 | max_per_img=1000, 47 | nms=dict(type='nms', iou_threshold=0.7), 48 | min_bbox_size=0))) 49 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict(type='SSDVGG', 7 | input_size=input_size, 8 | depth=16, 9 | with_last_pool=False, 10 | ceil_mode=True, 11 | out_indices=(3, 4), 12 | out_feature_indices=(22, 34), 13 | l2_norm_scale=20), 14 | neck=None, 15 | bbox_head=dict(type='SSDHead', 16 | in_channels=(512, 1024, 512, 256, 256, 256), 17 | num_classes=80, 18 | anchor_generator=dict(type='SSDAnchorGenerator', 19 | scale_major=False, 20 | input_size=input_size, 21 | basesize_ratio_range=(0.15, 0.9), 22 | strides=[8, 16, 32, 64, 100, 300], 23 | ratios=[[2], [2, 3], [2, 3], [2, 3], 24 | [2], [2]]), 25 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[0.1, 0.1, 0.2, 0.2])), 28 | # model training and testing settings 29 | train_cfg=dict(assigner=dict(type='MaxIoUAssigner', 30 | pos_iou_thr=0.5, 31 | neg_iou_thr=0.5, 32 | min_pos_iou=0., 33 | ignore_iof_thr=-1, 34 | gt_max_assign_all=False), 35 | smoothl1_beta=1., 36 | allowed_border=-1, 37 | pos_weight=-1, 38 | neg_pos_ratio=3, 39 | debug=False), 40 | test_cfg=dict(nms_pre=1000, 41 | nms=dict(type='nms', iou_threshold=0.45), 42 | min_bbox_size=0, 43 | score_thr=0.02, 44 | max_per_img=200)) 45 | cudnn_benchmark = True 46 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', 6 | warmup='linear', 7 | warmup_iters=500, 8 | warmup_ratio=0.001, 9 | step=[8, 11]) 10 | runner = dict(type='EpochBasedRunner', max_epochs=12) 11 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', 6 | warmup='linear', 7 | warmup_iters=500, 8 | warmup_ratio=0.001, 9 | step=[16, 19]) 10 | runner = dict(type='EpochBasedRunner', max_epochs=20) 11 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict(policy='step', 6 | warmup='linear', 7 | warmup_iters=500, 8 | warmup_ratio=0.001, 9 | step=[16, 22]) 10 | runner = dict(type='EpochBasedRunner', max_epochs=24) 11 | -------------------------------------------------------------------------------- /detection/configs/atss/atss_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | model = dict( 6 | type='ATSS', 7 | pretrained='torchvision://resnet50', 8 | backbone=dict(type='ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch'), 16 | neck=dict(type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5), 22 | bbox_head=dict(type='ATSSHead', 23 | num_classes=80, 24 | in_channels=256, 25 | norm_cfg=None, 26 | stacked_convs=4, 27 | feat_channels=256, 28 | anchor_generator=dict(type='AnchorGenerator', 29 | ratios=[1.0], 30 | octave_base_scale=8, 31 | scales_per_octave=1, 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 34 | target_means=[.0, .0, .0, .0], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | loss_cls=dict(type='FocalLoss', 37 | use_sigmoid=True, 38 | gamma=2.0, 39 | alpha=0.25, 40 | loss_weight=1.0), 41 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 42 | loss_centerness=dict(type='CrossEntropyLoss', 43 | use_sigmoid=True, 44 | loss_weight=1.0)), 45 | # training and testing settings 46 | train_cfg=dict(assigner=dict(type='ATSSAssigner', topk=9), 47 | allowed_border=-1, 48 | pos_weight=-1, 49 | debug=False), 50 | test_cfg=dict(nms_pre=1000, 51 | min_bbox_size=0, 52 | score_thr=0.05, 53 | nms=dict(type='nms', iou_threshold=0.6), 54 | max_per_img=100)) 55 | # optimizer 56 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 57 | 58 | custom_hooks = [ 59 | dict(type='FisherPruningHook', 60 | pruning=False, 61 | deploy_from='path to the pruned model') 62 | ] 63 | -------------------------------------------------------------------------------- /detection/configs/atss/atss_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | model = dict( 6 | type='ATSS', 7 | pretrained='torchvision://resnet50', 8 | backbone=dict(type='ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch'), 16 | neck=dict(type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5), 22 | bbox_head=dict(type='ATSSHead', 23 | num_classes=80, 24 | in_channels=256, 25 | norm_cfg=None, 26 | stacked_convs=4, 27 | feat_channels=256, 28 | anchor_generator=dict(type='AnchorGenerator', 29 | ratios=[1.0], 30 | octave_base_scale=8, 31 | scales_per_octave=1, 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 34 | target_means=[.0, .0, .0, .0], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | loss_cls=dict(type='FocalLoss', 37 | use_sigmoid=True, 38 | gamma=2.0, 39 | alpha=0.25, 40 | loss_weight=1.0), 41 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 42 | loss_centerness=dict(type='CrossEntropyLoss', 43 | use_sigmoid=True, 44 | loss_weight=1.0)), 45 | # training and testing settings 46 | train_cfg=dict(assigner=dict(type='ATSSAssigner', topk=9), 47 | allowed_border=-1, 48 | pos_weight=-1, 49 | debug=False), 50 | test_cfg=dict(nms_pre=1000, 51 | min_bbox_size=0, 52 | score_thr=0.05, 53 | nms=dict(type='nms', iou_threshold=0.6), 54 | max_per_img=100)) 55 | # optimizer 56 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 57 | 58 | custom_hooks = [ 59 | dict( 60 | type='FisherPruningHook', 61 | # In pruning process, you need set priority 62 | # as 'LOWEST' to insure the pruning_hook is excused 63 | # after optimizer_hook, in fintune process, you 64 | # should set it as 'HIGHEST' to insure it excused 65 | # before checkpoint_hook 66 | pruning=True, 67 | batch_size=2, 68 | interval=10, 69 | priority='LOWEST', 70 | ) 71 | ] 72 | load_from = 'path to the baseline' # noqa: E501 73 | -------------------------------------------------------------------------------- /detection/configs/faster_rcnn/faster_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(lr=0.01) 8 | custom_hooks = [ 9 | dict(type='FisherPruningHook', 10 | pruning=False, 11 | deploy_from='path to the pruned model') 12 | ] 13 | # 14 | model = dict(backbone=dict(frozen_stages=-1, )) 15 | work_dir = "work_dirs/faster_rcnn" 16 | -------------------------------------------------------------------------------- /detection/configs/faster_rcnn/faster_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(lr=0.002) 8 | custom_hooks = [ 9 | dict( 10 | type='FisherPruningHook', 11 | # In pruning process, you need set priority 12 | # as 'LOWEST' to insure the pruning_hook is excused 13 | # after optimizer_hook, in fintune process, you 14 | # should set it as 'HIGHEST' to insure it excused 15 | # before checkpoint_hook 16 | pruning=True, 17 | batch_size=2, 18 | interval=10, 19 | priority='LOWEST', 20 | ) 21 | ] 22 | # 23 | work_dir = "work_dirs/faster_rcnn" 24 | model = dict(backbone=dict(frozen_stages=-1, )) 25 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' # noqa: E501 26 | -------------------------------------------------------------------------------- /detection/configs/fsaf/fsaf_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | type='FSAF', 9 | backbone=dict(frozen_stages=-1, ), 10 | bbox_head=dict( 11 | type='FSAFHead', 12 | num_classes=80, 13 | in_channels=256, 14 | stacked_convs=4, 15 | feat_channels=256, 16 | reg_decoded_bbox=True, 17 | # Only anchor-free branch is implemented. The anchor generator only 18 | # generates 1 anchor at each feature point, as a substitute of the 19 | # grid of features. 20 | anchor_generator=dict(type='AnchorGenerator', 21 | octave_base_scale=1, 22 | scales_per_octave=1, 23 | ratios=[1.0], 24 | strides=[8, 16, 32, 64, 128]), 25 | bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0), 26 | loss_cls=dict(type='FocalLoss', 27 | use_sigmoid=True, 28 | gamma=2.0, 29 | alpha=0.25, 30 | loss_weight=1.0, 31 | reduction='none'), 32 | loss_bbox=dict(_delete_=True, 33 | type='IoULoss', 34 | eps=1e-6, 35 | loss_weight=1.0, 36 | reduction='none')), 37 | # training and testing settings 38 | train_cfg=dict(assigner=dict(_delete_=True, 39 | type='CenterRegionAssigner', 40 | pos_scale=0.2, 41 | neg_scale=0.2, 42 | min_pos_iof=0.01), 43 | allowed_border=-1, 44 | pos_weight=-1, 45 | debug=False)) 46 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 47 | optimizer_config = dict(_delete_=True, 48 | grad_clip=dict(max_norm=10, norm_type=2)) 49 | 50 | custom_hooks = [ 51 | dict(type='FisherPruningHook', 52 | pruning=False, 53 | deploy_from='path to the pruned model') 54 | ] 55 | work_dir = "work_dirs/fsaf" 56 | 57 | -------------------------------------------------------------------------------- /detection/configs/fsaf/fsaf_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | type='FSAF', 9 | backbone=dict(frozen_stages=-1, ), 10 | bbox_head=dict( 11 | type='FSAFHead', 12 | num_classes=80, 13 | in_channels=256, 14 | stacked_convs=4, 15 | feat_channels=256, 16 | reg_decoded_bbox=True, 17 | # Only anchor-free branch is implemented. The anchor generator only 18 | # generates 1 anchor at each feature point, as a substitute of the 19 | # grid of features. 20 | anchor_generator=dict(type='AnchorGenerator', 21 | octave_base_scale=1, 22 | scales_per_octave=1, 23 | ratios=[1.0], 24 | strides=[8, 16, 32, 64, 128]), 25 | bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0), 26 | loss_cls=dict(type='FocalLoss', 27 | use_sigmoid=True, 28 | gamma=2.0, 29 | alpha=0.25, 30 | loss_weight=1.0, 31 | reduction='none'), 32 | loss_bbox=dict(_delete_=True, 33 | type='IoULoss', 34 | eps=1e-6, 35 | loss_weight=1.0, 36 | reduction='none')), 37 | # training and testing settings 38 | train_cfg=dict(assigner=dict(_delete_=True, 39 | type='CenterRegionAssigner', 40 | pos_scale=0.2, 41 | neg_scale=0.2, 42 | min_pos_iof=0.01), 43 | allowed_border=-1, 44 | pos_weight=-1, 45 | debug=False)) 46 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 47 | optimizer_config = dict(_delete_=True, 48 | grad_clip=dict(max_norm=10, norm_type=2)) 49 | 50 | custom_hooks = [ 51 | dict( 52 | type='FisherPruningHook', 53 | # In pruning process, you need set priority 54 | # as 'LOWEST' to insure the pruning_hook is excused 55 | # after optimizer_hook, in fintune process, you 56 | # should set it as 'HIGHEST' to insure it excused 57 | # before checkpoint_hook 58 | pruning=True, 59 | batch_size=2, 60 | interval=10, 61 | priority='LOWEST', 62 | ) 63 | ] 64 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth' # noqa: E501 65 | work_dir = "work_dirs/fsaf" 66 | -------------------------------------------------------------------------------- /detection/configs/paa/paa_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | model = dict( 6 | type='PAA', 7 | pretrained='torchvision://resnet50', 8 | backbone=dict(type='ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch'), 16 | neck=dict(type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5), 22 | bbox_head=dict(type='PAAHead', 23 | norm_cfg=None, 24 | reg_decoded_bbox=True, 25 | score_voting=True, 26 | topk=9, 27 | num_classes=80, 28 | in_channels=256, 29 | stacked_convs=4, 30 | feat_channels=256, 31 | anchor_generator=dict(type='AnchorGenerator', 32 | ratios=[1.0], 33 | octave_base_scale=8, 34 | scales_per_octave=1, 35 | strides=[8, 16, 32, 64, 128]), 36 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[0.1, 0.1, 0.2, 0.2]), 39 | loss_cls=dict(type='FocalLoss', 40 | use_sigmoid=True, 41 | gamma=2.0, 42 | alpha=0.25, 43 | loss_weight=1.0), 44 | loss_bbox=dict(type='GIoULoss', loss_weight=1.3), 45 | loss_centerness=dict(type='CrossEntropyLoss', 46 | use_sigmoid=True, 47 | loss_weight=0.5)), 48 | # training and testing settings 49 | train_cfg=dict(assigner=dict(type='MaxIoUAssigner', 50 | pos_iou_thr=0.1, 51 | neg_iou_thr=0.1, 52 | min_pos_iou=0, 53 | ignore_iof_thr=-1), 54 | allowed_border=-1, 55 | pos_weight=-1, 56 | debug=False), 57 | test_cfg=dict(nms_pre=1000, 58 | min_bbox_size=0, 59 | score_thr=0.05, 60 | nms=dict(type='nms', iou_threshold=0.6), 61 | max_per_img=100)) 62 | # optimizer 63 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 64 | 65 | custom_hooks = [ 66 | dict(type='FisherPruningHook', 67 | pruning=False, 68 | deploy_from='path to the pruned model') 69 | ] 70 | 71 | work_dir = "work_dirs/paa" 72 | -------------------------------------------------------------------------------- /detection/configs/paa/paa_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | model = dict( 6 | type='PAA', 7 | pretrained='torchvision://resnet50', 8 | backbone=dict(type='ResNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=-1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | norm_eval=True, 15 | style='pytorch'), 16 | neck=dict(type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_output', 21 | num_outs=5), 22 | bbox_head=dict(type='PAAHead', 23 | norm_cfg=None, 24 | reg_decoded_bbox=True, 25 | score_voting=True, 26 | topk=9, 27 | num_classes=80, 28 | in_channels=256, 29 | stacked_convs=4, 30 | feat_channels=256, 31 | anchor_generator=dict(type='AnchorGenerator', 32 | ratios=[1.0], 33 | octave_base_scale=8, 34 | scales_per_octave=1, 35 | strides=[8, 16, 32, 64, 128]), 36 | bbox_coder=dict(type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[0.1, 0.1, 0.2, 0.2]), 39 | loss_cls=dict(type='FocalLoss', 40 | use_sigmoid=True, 41 | gamma=2.0, 42 | alpha=0.25, 43 | loss_weight=1.0), 44 | loss_bbox=dict(type='GIoULoss', loss_weight=1.3), 45 | loss_centerness=dict(type='CrossEntropyLoss', 46 | use_sigmoid=True, 47 | loss_weight=0.5)), 48 | # training and testing settings 49 | train_cfg=dict(assigner=dict(type='MaxIoUAssigner', 50 | pos_iou_thr=0.1, 51 | neg_iou_thr=0.1, 52 | min_pos_iou=0, 53 | ignore_iof_thr=-1), 54 | allowed_border=-1, 55 | pos_weight=-1, 56 | debug=False), 57 | test_cfg=dict(nms_pre=1000, 58 | min_bbox_size=0, 59 | score_thr=0.05, 60 | nms=dict(type='nms', iou_threshold=0.6), 61 | max_per_img=100)) 62 | # optimizer 63 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 64 | 65 | custom_hooks = [ 66 | dict( 67 | type='FisherPruningHook', 68 | # In pruning process, you need set priority 69 | # as 'LOWEST' to insure the pruning_hook is excused 70 | # after optimizer_hook, in fintune process, you 71 | # should set it as 'HIGHEST' to insure it excused 72 | # before checkpoint_hook 73 | pruning=True, 74 | batch_size=2, 75 | interval=10, 76 | priority='LOWEST', 77 | ) 78 | ] 79 | load_from = 'path to the baseline' 80 | work_dir = "work_dirs/paa" 81 | 82 | -------------------------------------------------------------------------------- /detection/configs/retina/retina_finetune.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(lr=0.01) 8 | custom_hooks = [ 9 | dict(type='FisherPruningHook', 10 | pruning=False, 11 | deploy_from='path to the pruned model') 12 | ] 13 | # 14 | model = dict(backbone=dict(frozen_stages=-1, )) 15 | work_dir = "work_dirs/retina" 16 | 17 | -------------------------------------------------------------------------------- /detection/configs/retina/retina_pruning.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(lr=0.002) 8 | custom_hooks = [ 9 | dict( 10 | type='FisherPruningHook', 11 | # In pruning process, you need set priority 12 | # as 'LOWEST' to insure the pruning_hook is excused 13 | # after optimizer_hook, in fintune process, you 14 | # should set it as 'HIGHEST' to insure it excused 15 | # before checkpoint_hook 16 | pruning=True, 17 | batch_size=2, 18 | interval=10, 19 | priority='LOWEST', 20 | ) 21 | ] 22 | # 23 | work_dir = "work_dirs/retina" 24 | model = dict(backbone=dict(frozen_stages=-1, )) 25 | load_from = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' # noqa: E501 26 | -------------------------------------------------------------------------------- /detection/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /detection/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /detection/tools/fisher_pruning_hook: -------------------------------------------------------------------------------- 1 | ../../fisher_pruning_hook -------------------------------------------------------------------------------- /detection/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /detection/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /detection/tools/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | import warnings 7 | 8 | import mmcv 9 | import torch 10 | # register the FisherPruningHook to the `Registry('hook')` 11 | from fisher_pruning_hook import FisherPruningHook # noqa F401 12 | from mmcv import Config, DictAction 13 | from mmcv.runner import get_dist_info, init_dist 14 | from mmcv.utils import get_git_hash 15 | from mmdet import __version__ 16 | from mmdet.apis import set_random_seed, train_detector 17 | from mmdet.datasets import build_dataset 18 | from mmdet.models import build_detector 19 | from mmdet.utils import collect_env, get_root_logger 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Train a detector') 24 | parser.add_argument('config', help='train config file path') 25 | parser.add_argument('--work-dir', help='the dir to save logs and models') 26 | parser.add_argument('--resume-from', 27 | help='the checkpoint file to resume from') 28 | parser.add_argument( 29 | '--no-validate', 30 | action='store_true', 31 | help='whether not to evaluate the checkpoint during training') 32 | group_gpus = parser.add_mutually_exclusive_group() 33 | group_gpus.add_argument('--gpus', 34 | type=int, 35 | help='number of gpus to use ' 36 | '(only applicable to non-distributed training)') 37 | group_gpus.add_argument('--gpu-ids', 38 | type=int, 39 | nargs='+', 40 | help='ids of gpus to use ' 41 | '(only applicable to non-distributed training)') 42 | parser.add_argument('--seed', type=int, default=None, help='random seed') 43 | parser.add_argument( 44 | '--deterministic', 45 | action='store_true', 46 | help='whether to set deterministic options for CUDNN backend.') 47 | parser.add_argument( 48 | '--options', 49 | nargs='+', 50 | action=DictAction, 51 | help='override some settings in the used config, the key-value pair ' 52 | 'in xxx=yyy format will be merged into config file (deprecate), ' 53 | 'change to --cfg-options instead.') 54 | parser.add_argument( 55 | '--cfg-options', 56 | nargs='+', 57 | action=DictAction, 58 | help='override some settings in the used config, the key-value pair ' 59 | 'in xxx=yyy format will be merged into config file. If the value to ' 60 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 61 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 62 | 'Note that the quotation marks are necessary and that no white space ' 63 | 'is allowed.') 64 | parser.add_argument('--launcher', 65 | choices=['none', 'pytorch', 'slurm', 'mpi'], 66 | default='none', 67 | help='job launcher') 68 | parser.add_argument('--local_rank', type=int, default=0) 69 | args = parser.parse_args() 70 | if 'LOCAL_RANK' not in os.environ: 71 | os.environ['LOCAL_RANK'] = str(args.local_rank) 72 | 73 | if args.options and args.cfg_options: 74 | raise ValueError( 75 | '--options and --cfg-options cannot be both ' 76 | 'specified, --options is deprecated in favor of --cfg-options') 77 | if args.options: 78 | warnings.warn('--options is deprecated in favor of --cfg-options') 79 | args.cfg_options = args.options 80 | 81 | return args 82 | 83 | 84 | def main(): 85 | args = parse_args() 86 | 87 | cfg = Config.fromfile(args.config) 88 | if args.cfg_options is not None: 89 | cfg.merge_from_dict(args.cfg_options) 90 | # import modules from string list. 91 | if cfg.get('custom_imports', None): 92 | from mmcv.utils import import_modules_from_strings 93 | import_modules_from_strings(**cfg['custom_imports']) 94 | # set cudnn_benchmark 95 | if cfg.get('cudnn_benchmark', False): 96 | torch.backends.cudnn.benchmark = True 97 | 98 | # work_dir is determined in this priority: CLI > segment in file > filename 99 | if args.work_dir is not None: 100 | # update configs according to CLI args if args.work_dir is not None 101 | cfg.work_dir = args.work_dir 102 | elif cfg.get('work_dir', None) is None: 103 | # use config filename as default work_dir if cfg.work_dir is None 104 | cfg.work_dir = osp.join('./work_dirs', 105 | osp.splitext(osp.basename(args.config))[0]) 106 | if args.resume_from is not None: 107 | cfg.resume_from = args.resume_from 108 | if args.gpu_ids is not None: 109 | cfg.gpu_ids = args.gpu_ids 110 | else: 111 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 112 | 113 | # init distributed env first, since logger depends on the dist info. 114 | if args.launcher == 'none': 115 | distributed = False 116 | else: 117 | distributed = True 118 | init_dist(args.launcher, **cfg.dist_params) 119 | # re-set gpu_ids with distributed training mode 120 | _, world_size = get_dist_info() 121 | cfg.gpu_ids = range(world_size) 122 | 123 | # create work_dir 124 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 125 | # dump config 126 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 127 | # init the logger before other steps 128 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 129 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 130 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 131 | 132 | # init the meta dict to record some important information such as 133 | # environment info and seed, which will be logged 134 | meta = dict() 135 | # log env info 136 | env_info_dict = collect_env() 137 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 138 | dash_line = '-' * 60 + '\n' 139 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 140 | dash_line) 141 | meta['env_info'] = env_info 142 | meta['config'] = cfg.pretty_text 143 | # log some basic info 144 | logger.info(f'Distributed training: {distributed}') 145 | logger.info(f'Config:\n{cfg.pretty_text}') 146 | 147 | # set random seeds 148 | if args.seed is not None: 149 | logger.info(f'Set random seed to {args.seed}, ' 150 | f'deterministic: {args.deterministic}') 151 | set_random_seed(args.seed, deterministic=args.deterministic) 152 | cfg.seed = args.seed 153 | meta['seed'] = args.seed 154 | meta['exp_name'] = osp.basename(args.config) 155 | 156 | model = build_detector(cfg.model, 157 | train_cfg=cfg.get('train_cfg'), 158 | test_cfg=cfg.get('test_cfg')) 159 | model.init_weights() 160 | 161 | if 'custom_hooks' in cfg: 162 | for hook in cfg.custom_hooks: 163 | if hook.type == 'FisherPruningHook': 164 | hook_cfg = hook.copy() 165 | hook_cfg.pop('priority', None) 166 | from mmcv.runner.hooks import HOOKS 167 | hook_cls = HOOKS.get(hook_cfg['type']) 168 | if hasattr(hook_cls, 'after_build_model'): 169 | pruning_hook = mmcv.build_from_cfg(hook_cfg, HOOKS) 170 | pruning_hook.after_build_model(model, cfg.work_dir) 171 | 172 | datasets = [build_dataset(cfg.data.train)] 173 | if len(cfg.workflow) == 2: 174 | val_dataset = copy.deepcopy(cfg.data.val) 175 | val_dataset.pipeline = cfg.data.train.pipeline 176 | datasets.append(build_dataset(val_dataset)) 177 | if cfg.checkpoint_config is not None: 178 | # save mmdet version, config file content and class names in 179 | # checkpoints as meta data 180 | cfg.checkpoint_config.meta = dict(mmdet_version=__version__ + 181 | get_git_hash()[:7], 182 | CLASSES=datasets[0].CLASSES) 183 | # add an attribute for visualization convenience 184 | model.CLASSES = datasets[0].CLASSES 185 | train_detector(model, 186 | datasets, 187 | cfg, 188 | distributed=distributed, 189 | validate=(not args.no_validate), 190 | timestamp=timestamp, 191 | meta=meta) 192 | 193 | 194 | if __name__ == '__main__': 195 | main() 196 | -------------------------------------------------------------------------------- /fisher_pruning_hook/__init__.py: -------------------------------------------------------------------------------- 1 | from .fisher_pruning import FisherPruningHook -------------------------------------------------------------------------------- /fisher_pruning_hook/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | import torch 4 | import torch.nn.functional as F 5 | import random 6 | 7 | 8 | # These grad_fn pattern are flags of specific a nn.Module 9 | CONV = ('ThnnConv2DBackward', 'CudnnConvolutionBackward') 10 | FC = ('ThAddmmBackward', 'AddmmBackward', 'MmBackward') 11 | BN = ('ThnnBatchNormBackward', 'CudnnBatchNormBackward') 12 | # the modules which contains NON_PASS grad_fn need to change the parameter size 13 | # according to channels after pruning 14 | NON_PASS = CONV + FC 15 | 16 | def feed_forward_once(model): 17 | inputs = torch.zeros(1, 3, 256, 256).cuda() 18 | inputs_meta = [{"img_shape": (256, 256, 3), "scale_factor": np.zeros(4, dtype=np.float32)}] 19 | neck_out = model.module.neck(model.module.backbone(inputs)) 20 | 21 | if hasattr(model.module, "head"): 22 | # for classification models 23 | return model.module.head.fc(neck_out[-1]).sum() 24 | elif hasattr(model.module, "bbox_head"): 25 | # for one-stage detectors 26 | bbox_out = model.module.bbox_head(neck_out) 27 | return sum([sum([level.sum() for level in levels]) for levels in bbox_out]) 28 | elif hasattr(model.module, "rpn_head") and hasattr(model.module, "roi_head"): 29 | # for two-stage detectors 30 | from mmdet.core import bbox2roi 31 | rpn_out = model.module.rpn_head(neck_out) 32 | proposals = model.module.rpn_head.get_bboxes(*rpn_out, inputs_meta) 33 | rois = bbox2roi(proposals) 34 | roi_out = model.module.roi_head._bbox_forward(neck_out, rois) 35 | loss = sum([sum([level.sum() for level in levels]) for levels in rpn_out]) 36 | loss += roi_out['cls_score'].sum() + roi_out['bbox_pred'].sum() 37 | return loss 38 | else: 39 | raise NotImplementedError("This kind of model has not been supported yet.") 40 | 41 | 42 | 43 | def traverse(op, op2parents, pattern=NON_PASS, max_pattern_layer=-1): 44 | """to get a dict which can describe the computer Graph, 45 | 46 | Args: 47 | op (grad_fn): as a root of DFS 48 | op2parents (dict): key is the grad_fn match the patter,and 49 | value is first grad_fn match NON_PASS when DFS from Key 50 | pattern (Tuple[str]): the patter of grad_fn to match 51 | """ 52 | 53 | if op is not None: 54 | parents = op.next_functions 55 | if parents is not None: 56 | if match(op, pattern): 57 | if pattern is FC: 58 | op2parents[op] = dfs(parents[1][0], []) 59 | else: 60 | op2parents[op] = dfs(parents[0][0], []) 61 | if len(op2parents.keys()) == max_pattern_layer: 62 | return 63 | for parent in parents: 64 | parent = parent[0] 65 | if parent not in op2parents: 66 | traverse(parent, op2parents, pattern, max_pattern_layer) 67 | 68 | 69 | def dfs(op, visited): 70 | """DFS from a op,return all op when find a op match the patter 71 | NON_PASS. 72 | 73 | Args: 74 | op (grad_fn): the root of DFS 75 | visited (list[grad_fn]): contains all op has been visited 76 | 77 | Returns: 78 | list : all the ops match the patter NON_PASS 79 | """ 80 | 81 | ret = [] 82 | if op is not None: 83 | visited.append(op) 84 | if match(op, NON_PASS): 85 | return [op] 86 | parents = op.next_functions 87 | if parents is not None: 88 | for parent in parents: 89 | parent = parent[0] 90 | if parent not in visited: 91 | ret.extend(dfs(parent, visited)) 92 | return ret 93 | 94 | 95 | def match(op, op_to_match): 96 | """Match an operation to a group of operations; In pytorch graph, there 97 | may be an additional '0' or '1' (e.g. Addbackward1) after the ops 98 | listed above. 99 | 100 | Args: 101 | op (grad_fn): the grad_fn to match the pattern 102 | op_to_match (list[str]): the pattern need to match 103 | 104 | Returns: 105 | bool: return True when match the pattern else False 106 | """ 107 | 108 | for to_match in op_to_match: 109 | if re.match(to_match + '[0-1]?$', type(op).__name__): 110 | return True 111 | return False 112 | 113 | 114 | def get_channel_num(module, flag="in"): 115 | if type(module).__name__ == 'Conv2d': 116 | return getattr(module, f"{flag}_channels") 117 | elif type(module).__name__ == 'Linear': 118 | return getattr(module, f"{flag}_features") 119 | else: 120 | for attr in dir(module): 121 | if attr.startswith(f"{flag}_"): 122 | return getattr(module, attr) 123 | raise NotImplementedError(f"The module {type(module).__name__} has not been supported yet.") 124 | 125 | 126 | def modified_forward_conv(self, feature): 127 | if not self.finetune and hasattr(self, "in_mask"): 128 | in_mask = self.in_mask.unsqueeze(1).expand(-1, feature.size(1) // self.in_mask.size(0)) 129 | feature = feature * in_mask.reshape(1, -1, 1, 1) 130 | return F.conv2d(feature, self.weight, self.bias, self.stride, 131 | self.padding, self.dilation, self.groups) 132 | 133 | 134 | def modified_forward_linear(self, feature): 135 | if not self.finetune and hasattr(self, "in_mask"): 136 | in_mask = self.in_mask.unsqueeze(1).expand(-1, feature.size(1) // self.in_mask.size(0)) 137 | feature = feature * in_mask.reshape(1, -1) 138 | return F.linear(feature, self.weight, self.bias) --------------------------------------------------------------------------------