├── .gitignore
├── LICENSE.txt
├── README.md
├── assets
    ├── Arch.png
    └── Core.png
├── configs
    ├── _base_
    │   ├── custom_imports.py
    │   ├── datasets
    │   │   ├── cityscapes_detection.py
    │   │   ├── cityscapes_instance.py
    │   │   ├── coco_det_1x.py
    │   │   ├── coco_det_3x.py
    │   │   ├── coco_detection.py
    │   │   ├── coco_instance.py
    │   │   ├── coco_instance_semantic.py
    │   │   ├── coco_panoptic.py
    │   │   ├── crowdhuman.py
    │   │   ├── deepfashion.py
    │   │   ├── lvis_v0.5_instance.py
    │   │   ├── lvis_v1_instance.py
    │   │   ├── openimages_detection.py
    │   │   ├── voc0712.py
    │   │   └── wider_face.py
    │   ├── default_runtime.py
    │   ├── models
    │   │   ├── fast_rcnn_r50_fpn.py
    │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   ├── faster_rcnn_r50_fpn.py
    │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   ├── mask_rcnn_r50_fpn.py
    │   │   ├── retinanet_r50_fpn.py
    │   │   ├── rpn_r50_caffe_c4.py
    │   │   ├── rpn_r50_fpn.py
    │   │   └── ssd300.py
    │   └── schedules
    │   │   ├── adamw_1x.py
    │   │   ├── adamw_2x.py
    │   │   ├── adamw_30k.py
    │   │   ├── adamw_3x.py
    │   │   ├── adamw_50k.py
    │   │   ├── schedule_1x.py
    │   │   ├── schedule_20e.py
    │   │   └── schedule_2x.py
    └── date
    │   ├── date.py
    │   ├── date_r101_36e_8x2_fcos_poto_coco.py
    │   ├── date_r50_12e_8x2.py
    │   ├── date_r50_12e_8x2_fcos_poto_3dmf_coco.py
    │   ├── date_r50_12e_8x2_fcos_poto_coco.py
    │   ├── date_r50_12e_8x2_retina_poto_coco.py
    │   ├── date_r50_30k_8x2_fcos_poto_crowdhuman.py
    │   ├── date_r50_30k_8x2_retina_poto_crowdhuman.py
    │   ├── date_r50_36e_8x2.py
    │   ├── date_r50_36e_8x2_fcos_poto_3dmf_coco.py
    │   └── date_r50_36e_8x2_fcos_poto_coco.py
├── date
    ├── datasets
    │   ├── crowdhuman.py
    │   └── utils.py
    ├── models
    │   ├── heads
    │   │   └── date.py
    │   ├── modules
    │   │   ├── assigner.py
    │   │   ├── conv.py
    │   │   └── identity.py
    │   └── predictors
    │   │   ├── base_predictor.py
    │   │   ├── defcn_predictor.py
    │   │   ├── fcos_predictor.py
    │   │   ├── one2one_predictor.py
    │   │   └── retina_predictor.py
    └── utils
    │   ├── grid.py
    │   └── utils.py
├── install.sh
├── project.toml
├── setup.cfg
├── setup.py
└── tools
    ├── analysis_tools
        ├── analyze_logs.py
        ├── analyze_results.py
        ├── benchmark.py
        ├── coco_error_analysis.py
        ├── confusion_matrix.py
        ├── eval_metric.py
        ├── get_flops.py
        ├── optimize_anchors.py
        ├── robustness_eval.py
        └── test_robustness.py
    ├── dataset_converters
        ├── cityscapes.py
        ├── crowdhuman.py
        ├── images2coco.py
        └── pascal_voc.py
    ├── deployment
        ├── mmdet2torchserve.py
        ├── mmdet_handler.py
        ├── onnx2tensorrt.py
        ├── pytorch2onnx.py
        ├── test.py
        └── test_torchserver.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── infer.py
    ├── misc
        ├── browse_dataset.py
        ├── download_dataset.py
        ├── gen_coco_panoptic_test_info.py
        ├── get_image_metas.py
        ├── print_config.py
        └── split_coco.py
    ├── model_converters
        ├── detectron2pytorch.py
        ├── publish_model.py
        ├── regnet2mmdet.py
        ├── selfsup2mmdet.py
        ├── upgrade_model_version.py
        └── upgrade_ssd_version.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache
 2 | build
 3 | data
 4 | logs*
 5 | *.pt*
 6 | *tf_logs*
 7 | *.egg-info*
 8 | *__pycache__*
 9 | *.pytest_cache*
10 | *.so


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # DATE: Dual Assignment for End-to-End Fully Convolutional Object Detection
  3 | 
  4 | Code of our paper [DATE: Dual Assignment for End-to-End Fully Convolutional Object Detection](https://arxiv.org/abs/2211.13859).
  5 | 
  6 | ![Core](./assets/Core.png) ![Architecture](./assets/Arch.png)
  7 | 
  8 | Fully convolutional detectors discard the one-to-many assignment and adopt a one-to-one assigning strategy to achieve end-to-end detection but suffer from the slow convergence issue. In this paper, we revisit these two assignment methods and find that bringing one-to-many assignment back to end-to-end fully convolutional detectors helps with model convergence. Based on this observation, we propose **D**ual **A**ssignment for end-to-end fully convolutional de**TE**ction (**DATE**). Our method constructs two branches with one-to-many and one-to-one assignment during training and speeds up the convergence of the one-to-one assignment branch by providing more supervision signals. DATE only uses the branch with the one-to-one matching strategy for model inference, which doesn't bring inference overhead. 
  9 | 
 10 | ## Performance
 11 | 
 12 | ### Performance on COCO
 13 | 
 14 | | Model       | epoch | AP | AP50 | AP75 | APs | APm | APl | Download |
 15 | | ----------- | ----- | -- | ---- | ---- | --- | --- | --- | ---------- |
 16 | | DATE-R50-F  | 12    |37.3| 55.3 | 40.7 | 21.2| 40.3| 48.8| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EZ3CzL3SVgtEkccSjsz0-CcBUbZasnvOXRYKnCMu6qfRYA?e=vPzh0n) |
 17 | | DATE-R50-R  | 12    |37.0| 54.9 | 40.4 | 20.5| 39.8| 49.0| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EQiJyIZ5m4tCjgz_ORscvxQBX5PAvAWds0UdZxNk-zCDJQ?e=ax9Ty1) |
 18 | | DATE-R50-F  | 36    |40.6| 58.9 | 44.4 | 25.6| 44.1| 50.9| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EfpK-HGolPBCv9Zv0j0739gB5ifDEViNJ4HBZTlu7-6a9w?e=mTfdzm) |
 19 | | DATE-R101-F | 36    |42.2| 60.6 | 46.3 | 26.6| 45.8| 54.1| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EVDHUZemYPhOkcdkXvJD67cBtHdnO0941t4_h5M9OGUcjw?e=Clpo0e) |
 20 | | DATE-R50-F-3DMF| 12 |38.9| 57.1 | 42.9 | 22.5| 42.1| 51.3| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EVAXXEcwhpdMghEzhaQTaqsBYqKfcRdnadPuM4xZiAqiWw?e=C7EokO) |
 21 | | DATE-R50-F-3DMF| 36 |42.0| 60.3 | 46.2 | 27.3| 45.5| 53.0| [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/Ef-J9gQPR3ZOn0UMoci5m2kBHo9S0H-5aM6jNzqj4bWhDw?e=PT18i7) |
 22 | 
 23 | **NOTE:** The provided weights of DATE-R50-F produce slightly better results than that reported.
 24 | 
 25 | ### Performance on CrowdHuman
 26 | 
 27 | | Model       | iters | AP50 $\uparrow$ | mMR $\downarrow$  | Recall $\uparrow$ | Download |
 28 | | ----------- | ----- | ---- | ---- | ------ | ---------- |
 29 | | DATE-R50-F  | 30k   | 90.5 | 49.0 | 97.9   | [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EUZdioib2OVAg4TlSlMcDOABrMaP5r6ndA-O4zYrVPZt1Q?e=ig4nbW) |
 30 | | DATE-R50-R  | 30k   | 90.6 | 48.4 | 97.9   | [Weights](https://mailsdueducn-my.sharepoint.com/:u:/g/personal/201700181055_mail_sdu_edu_cn/EV7Nxs5KuX1Cr24GshiqJtkB1pmTf3MTbLx5PkVBBtTHvQ?e=ak1EJ6) |
 31 | 
 32 | ## Installation
 33 | 
 34 | Our project is based on [Pytorch](https://pytorch.org/) and [mmdetection](https://github.com/open-mmlab/mmdetection/). Code is tested under Python==3.10, Pytorch>=1.12.0, mmdetection==2.25. Other versions might also work.
 35 | 
 36 | Quick install:
 37 | ```bash
 38 | git clone https://github.com/yiqunchen1999/date.git && cd date && bash -i ./install.sh
 39 | ```
 40 | 
 41 | ## Dataset
 42 | 
 43 | The dataset should be organized as following:
 44 | ```
 45 | date
 46 |     |_ configs
 47 |     |_ data
 48 |         |_ coco
 49 |             |_ annotations
 50 |                 |_ ...
 51 |             |_ train2017
 52 |                 |_ ...
 53 |             |_ val2017
 54 |                 |_ ...
 55 |             |_ ...
 56 |         |_ CrowdHuman
 57 |             |_ annotations
 58 |                 |_ ...
 59 |             |_ Images
 60 |                 |_ ...
 61 | ```
 62 | 
 63 | ### COCO dataset
 64 | 
 65 | Please follow the [tutorial of mmdetection](https://mmdetection.readthedocs.io/en/stable/1_exist_data_model.html#prepare-datasets).
 66 | 
 67 | ### CrowdHuman
 68 | 
 69 | 1. Download [CrowdHuman](https://www.crowdhuman.org/) to your machine;
 70 | 2. Unzip and link the folder where CrowdHuman is stored to `date/data/`, i.e., 
 71 | ```
 72 | date
 73 |     |_ configs
 74 |     |_ data
 75 |         |_ coco
 76 |         |_ CrowdHuman
 77 |             |_ Images
 78 |                 |_ ...
 79 |             |_ annotation_train.odgt
 80 |             |_ annotation_val.odgt
 81 |             |_ ...
 82 | ```
 83 | 3. Run dataset converter to convert the format:
 84 | ```bash
 85 | python tools/dataset_converters/crowdhuman.py
 86 | ```
 87 | 
 88 | ## Training and Evaluation
 89 | 
 90 | Here are simple examples to train and evaluate DATE-R50-F. More details can be found in the [tutorial of mmdetection](https://mmdetection.readthedocs.io/en/stable/1_exist_data_model.html#).
 91 | 
 92 | To train DATE in a machine with 8 GPUs, e.g., DATE-F-R50, please run:
 93 | ```bash
 94 | ./tools/dist_train.sh configs/date/date_r50_12e_8x2_fcos_poto_coco.py 8
 95 | ```
 96 | 
 97 | Evaluation with 8 GPUs:
 98 | ```bash
 99 | bash ./tools/dist_test.sh \
100 |     configs/date/date_r50_12e_8x2_fcos_poto_coco.py \
101 |     work_dirs/date_r50_12e_8x2_fcos_poto_coco/latest.pth 8 \
102 |     --eval bbox
103 | ```
104 | 
105 | **NOTE:** We don't promise the code will produce the same numbers due to the randomness.
106 | 
107 | ## Citing DATE
108 | 
109 | If you find this work helpful, please consider citing our paper:
110 | 
111 | ```
112 | @misc{chen2022date,
113 |       title={DATE: Dual Assignment for End-to-End Fully Convolutional Object Detection}, 
114 |       author={Yiqun Chen and Qiang Chen and Qinghao Hu and Jian Cheng},
115 |       year={2022},
116 |       eprint={2211.13859},
117 |       archivePrefix={arXiv},
118 |       primaryClass={cs.CV}
119 | }
120 | ```
121 | 
122 | ## Acknowledgement
123 | 
124 | We want to thank the code of [OneNet](https://github.com/PeizeSun/OneNet) and [DeFCN](https://github.com/Megvii-BaseDetection/DeFCN). 
125 | 
126 | ## LICENSE
127 | 
128 | This project is open sourced under Apache License 2.0, see [LICENSE](./LICENSE.txt).
129 | 


--------------------------------------------------------------------------------
/assets/Arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YiqunChen1999/date/5daf092c74a88b666db8956c5772df55dafb018a/assets/Arch.png


--------------------------------------------------------------------------------
/assets/Core.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YiqunChen1999/date/5daf092c74a88b666db8956c5772df55dafb018a/assets/Core.png


--------------------------------------------------------------------------------
/configs/_base_/custom_imports.py:
--------------------------------------------------------------------------------
 1 | custom_imports = dict(
 2 |     imports=[
 3 |         'date.models.heads.date',
 4 |         'date.models.modules.conv',
 5 |         'date.models.modules.identity',
 6 |         'date.models.modules.assigner',
 7 |         'date.models.predictors.base_predictor',
 8 |         'date.models.predictors.defcn_predictor',
 9 |         'date.models.predictors.fcos_predictor',
10 |         'date.models.predictors.one2one_predictor',
11 |         'date.models.predictors.retina_predictor',
12 |         'date.datasets.crowdhuman',
13 |     ],
14 |     allow_failed_imports=False)
15 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(
10 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(
39 |             type=dataset_type,
40 |             ann_file=data_root +
41 |             'annotations/instancesonly_filtered_gtFine_train.json',
42 |             img_prefix=data_root + 'leftImg8bit/train/',
43 |             pipeline=train_pipeline)),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root +
47 |         'annotations/instancesonly_filtered_gtFine_val.json',
48 |         img_prefix=data_root + 'leftImg8bit/val/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root +
53 |         'annotations/instancesonly_filtered_gtFine_test.json',
54 |         img_prefix=data_root + 'leftImg8bit/test/',
55 |         pipeline=test_pipeline))
56 | evaluation = dict(interval=1, metric='bbox')
57 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CityscapesDataset'
 3 | data_root = 'data/cityscapes/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(
10 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(2048, 1024),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=1,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type='RepeatDataset',
37 |         times=8,
38 |         dataset=dict(
39 |             type=dataset_type,
40 |             ann_file=data_root +
41 |             'annotations/instancesonly_filtered_gtFine_train.json',
42 |             img_prefix=data_root + 'leftImg8bit/train/',
43 |             pipeline=train_pipeline)),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root +
47 |         'annotations/instancesonly_filtered_gtFine_val.json',
48 |         img_prefix=data_root + 'leftImg8bit/val/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root +
53 |         'annotations/instancesonly_filtered_gtFine_test.json',
54 |         img_prefix=data_root + 'leftImg8bit/test/',
55 |         pipeline=test_pipeline))
56 | evaluation = dict(metric=['bbox', 'segm'])
57 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_det_1x.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | 
 5 | img_norm_cfg = dict(
 6 |     mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=(1333, 800),
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     samples_per_gpu=2,
34 |     workers_per_gpu=2,
35 |     train=dict(
36 |         type=dataset_type,
37 |         ann_file=data_root + 'annotations/instances_train2017.json',
38 |         img_prefix=data_root + 'train2017/',
39 |         pipeline=train_pipeline),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/instances_val2017.json',
43 |         img_prefix=data_root + 'val2017/',
44 |         pipeline=test_pipeline),
45 |     test=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'annotations/instances_val2017.json',
48 |         img_prefix=data_root + 'val2017/',
49 |         pipeline=test_pipeline))
50 | evaluation = dict(interval=1, metric='bbox')
51 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_det_3x.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | 
 5 | img_norm_cfg = dict(
 6 |     mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 7 | # Use DETR strategy.
 8 | train_pipeline = [
 9 |     dict(type='LoadImageFromFile'),
10 |     dict(type='LoadAnnotations', with_bbox=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(
13 |         type='AutoAugment',
14 |         policies=[[
15 |             dict(
16 |                 type='Resize',
17 |                 img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
18 |                            (608, 1333), (640, 1333), (672, 1333), (704, 1333),
19 |                            (736, 1333), (768, 1333), (800, 1333)],
20 |                 multiscale_mode='value',
21 |                 keep_ratio=True)
22 |         ],
23 |                   [
24 |                       dict(
25 |                           type='Resize',
26 |                           img_scale=[(400, 1333), (500, 1333), (600, 1333)],
27 |                           multiscale_mode='value',
28 |                           keep_ratio=True),
29 |                       dict(
30 |                           type='RandomCrop',
31 |                           crop_type='absolute_range',
32 |                           crop_size=(384, 600),
33 |                           allow_negative_crop=True),
34 |                       dict(
35 |                           type='Resize',
36 |                           img_scale=[(480, 1333), (512, 1333), (544, 1333),
37 |                                      (576, 1333), (608, 1333), (640, 1333),
38 |                                      (672, 1333), (704, 1333), (736, 1333),
39 |                                      (768, 1333), (800, 1333)],
40 |                           multiscale_mode='value',
41 |                           override=True,
42 |                           keep_ratio=True)
43 |                   ]]),
44 |     dict(type='Normalize', **img_norm_cfg),
45 |     dict(type='Pad', size_divisor=32),
46 |     dict(type='DefaultFormatBundle'),
47 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
48 | ]
49 | test_pipeline = [
50 |     dict(type='LoadImageFromFile'),
51 |     dict(
52 |         type='MultiScaleFlipAug',
53 |         img_scale=(1333, 800),
54 |         flip=False,
55 |         transforms=[
56 |             dict(type='Resize', keep_ratio=True),
57 |             dict(type='RandomFlip'),
58 |             dict(type='Normalize', **img_norm_cfg),
59 |             dict(type='Pad', size_divisor=32),
60 |             dict(type='ImageToTensor', keys=['img']),
61 |             dict(type='Collect', keys=['img']),
62 |         ])
63 | ]
64 | data = dict(
65 |     samples_per_gpu=2,
66 |     workers_per_gpu=2,
67 |     train=dict(
68 |         type=dataset_type,
69 |         ann_file=data_root + 'annotations/instances_train2017.json',
70 |         img_prefix=data_root + 'train2017/',
71 |         pipeline=train_pipeline),
72 |     val=dict(
73 |         type=dataset_type,
74 |         ann_file=data_root + 'annotations/instances_val2017.json',
75 |         img_prefix=data_root + 'val2017/',
76 |         pipeline=test_pipeline),
77 |     test=dict(
78 |         type=dataset_type,
79 |         ann_file=data_root + 'annotations/instances_val2017.json',
80 |         img_prefix=data_root + 'val2017/',
81 |         pipeline=test_pipeline))
82 | evaluation = dict(interval=1, metric='bbox')
83 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')
50 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])
50 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(
 9 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
10 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='SegRescale', scale_factor=1 / 8),
15 |     dict(type='DefaultFormatBundle'),
16 |     dict(
17 |         type='Collect',
18 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
19 | ]
20 | test_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=(1333, 800),
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip', flip_ratio=0.5),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='Pad', size_divisor=32),
31 |             dict(type='ImageToTensor', keys=['img']),
32 |             dict(type='Collect', keys=['img']),
33 |         ])
34 | ]
35 | data = dict(
36 |     samples_per_gpu=2,
37 |     workers_per_gpu=2,
38 |     train=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_train2017.json',
41 |         img_prefix=data_root + 'train2017/',
42 |         seg_prefix=data_root + 'stuffthingmaps/train2017/',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root + 'annotations/instances_val2017.json',
52 |         img_prefix=data_root + 'val2017/',
53 |         pipeline=test_pipeline))
54 | evaluation = dict(metric=['bbox', 'segm'])
55 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_panoptic.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoPanopticDataset'
 3 | data_root = 'data/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(
 9 |         type='LoadPanopticAnnotations',
10 |         with_bbox=True,
11 |         with_mask=True,
12 |         with_seg=True),
13 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
14 |     dict(type='RandomFlip', flip_ratio=0.5),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='Pad', size_divisor=32),
17 |     dict(type='SegRescale', scale_factor=1 / 4),
18 |     dict(type='DefaultFormatBundle'),
19 |     dict(
20 |         type='Collect',
21 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1333, 800),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/panoptic_train2017.json',
44 |         img_prefix=data_root + 'train2017/',
45 |         seg_prefix=data_root + 'annotations/panoptic_train2017/',
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/panoptic_val2017.json',
50 |         img_prefix=data_root + 'val2017/',
51 |         seg_prefix=data_root + 'annotations/panoptic_val2017/',
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/panoptic_val2017.json',
56 |         img_prefix=data_root + 'val2017/',
57 |         seg_prefix=data_root + 'annotations/panoptic_val2017/',
58 |         pipeline=test_pipeline))
59 | evaluation = dict(interval=1, metric=['PQ'])
60 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/crowdhuman.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CrowdHumanDataset'
 3 | data_root = 'data/CrowdHuman/'
 4 | 
 5 | img_norm_cfg = dict(
 6 |     mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', with_bbox=True),
10 |     # dict(type='Resize', img_scale=(1400, 800), keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(
13 |         type='AutoAugment',
14 |         policies=[[
15 |             dict(
16 |                 type='Resize',
17 |                 img_scale=[(640, 1400), (672, 1400), (704, 1400),
18 |                            (736, 1400), (768, 1400), (800, 1400),
19 |                            (832, 1400), (864, 1400), (896, 1400)],
20 |                 multiscale_mode='value',
21 |                 keep_ratio=True)
22 |         ],
23 |                   [
24 |                       dict(
25 |                           type='Resize',
26 |                           img_scale=[(400, 1400), (500, 1400), (600, 1400)],
27 |                           multiscale_mode='value',
28 |                           keep_ratio=True),
29 |                       dict(
30 |                           type='RandomCrop',
31 |                           crop_type='absolute_range',
32 |                           crop_size=(384, 600),
33 |                           allow_negative_crop=True),
34 |                       dict(
35 |                           type='Resize',
36 |                           img_scale=[(640, 1400), (672, 1400), (704, 1400),
37 |                                      (736, 1400), (768, 1400), (800, 1400),
38 |                                      (832, 1400), (864, 1400), (896, 1400)],
39 |                           multiscale_mode='value',
40 |                           override=True,
41 |                           keep_ratio=True)
42 |                   ]]),
43 |     dict(type='Normalize', **img_norm_cfg),
44 |     dict(type='Pad', size_divisor=32),
45 |     dict(type='DefaultFormatBundle'),
46 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
47 | ]
48 | test_pipeline = [
49 |     dict(type='LoadImageFromFile'),
50 |     dict(
51 |         type='MultiScaleFlipAug',
52 |         img_scale=(1400, 800),
53 |         flip=False,
54 |         transforms=[
55 |             dict(type='Resize', keep_ratio=True),
56 |             dict(type='RandomFlip'),
57 |             dict(type='Normalize', **img_norm_cfg),
58 |             dict(type='Pad', size_divisor=32),
59 |             dict(type='ImageToTensor', keys=['img']),
60 |             dict(type='Collect', keys=['img']),
61 |         ])
62 | ]
63 | data = dict(
64 |     samples_per_gpu=2,
65 |     workers_per_gpu=2,
66 |     train=dict(
67 |         type=dataset_type,
68 |         classes=('person', ),
69 |         ann_file=data_root + 'annotations/train.json',
70 |         img_prefix=data_root + 'Images/',
71 |         pipeline=train_pipeline),
72 |     val=dict(
73 |         type=dataset_type,
74 |         classes=('person', ),
75 |         ann_file=data_root + 'annotations/val.json',
76 |         img_prefix=data_root + 'Images/',
77 |         pipeline=test_pipeline),
78 |     test=dict(
79 |         type=dataset_type,
80 |         classes=('person', ),
81 |         ann_file=data_root + 'annotations/val.json',
82 |         img_prefix=data_root + 'Images/',
83 |         pipeline=test_pipeline))
84 | evaluation = dict(interval=5000,  # not evaluate during training.
85 |                   metric='bbox',
86 |                   classwise=True,
87 |                   proposal_nums=(500, 500, 1000))
88 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(750, 1101),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     imgs_per_gpu=2,
33 |     workers_per_gpu=1,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 |         img_prefix=data_root + 'Img/',
38 |         pipeline=train_pipeline,
39 |         data_root=data_root),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 |         img_prefix=data_root + 'Img/',
44 |         pipeline=test_pipeline,
45 |         data_root=data_root),
46 |     test=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root +
49 |         'annotations/DeepFashion_segmentation_gallery.json',
50 |         img_prefix=data_root + 'Img/',
51 |         pipeline=test_pipeline,
52 |         data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV05Dataset'
 4 | data_root = 'data/lvis_v0.5/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
15 |             img_prefix=data_root + 'train2017/')),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
19 |         img_prefix=data_root + 'val2017/'),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
23 |         img_prefix=data_root + 'val2017/'))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | _base_ = 'coco_instance.py'
 3 | dataset_type = 'LVISV1Dataset'
 4 | data_root = 'data/lvis_v1/'
 5 | data = dict(
 6 |     samples_per_gpu=2,
 7 |     workers_per_gpu=2,
 8 |     train=dict(
 9 |         _delete_=True,
10 |         type='ClassBalancedDataset',
11 |         oversample_thr=1e-3,
12 |         dataset=dict(
13 |             type=dataset_type,
14 |             ann_file=data_root + 'annotations/lvis_v1_train.json',
15 |             img_prefix=data_root)),
16 |     val=dict(
17 |         type=dataset_type,
18 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
19 |         img_prefix=data_root),
20 |     test=dict(
21 |         type=dataset_type,
22 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
23 |         img_prefix=data_root))
24 | evaluation = dict(metric=['bbox', 'segm'])
25 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/openimages_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'OpenImagesDataset'
 3 | data_root = 'data/OpenImages/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, denorm_bbox=True),
 9 |     dict(type='Resize', img_scale=(1024, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1024, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ],
30 |     ),
31 | ]
32 | data = dict(
33 |     samples_per_gpu=2,
34 |     workers_per_gpu=0,  # workers_per_gpu > 0 may occur out of memory
35 |     train=dict(
36 |         type=dataset_type,
37 |         ann_file=data_root + 'annotations/oidv6-train-annotations-bbox.csv',
38 |         img_prefix=data_root + 'OpenImages/train/',
39 |         label_file=data_root + 'annotations/class-descriptions-boxable.csv',
40 |         hierarchy_file=data_root +
41 |         'annotations/bbox_labels_600_hierarchy.json',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/validation-annotations-bbox.csv',
46 |         img_prefix=data_root + 'OpenImages/validation/',
47 |         label_file=data_root + 'annotations/class-descriptions-boxable.csv',
48 |         hierarchy_file=data_root +
49 |         'annotations/bbox_labels_600_hierarchy.json',
50 |         meta_file=data_root + 'annotations/validation-image-metas.pkl',
51 |         image_level_ann_file=data_root +
52 |         'annotations/validation-annotations-human-imagelabels-boxable.csv',
53 |         pipeline=test_pipeline),
54 |     test=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'annotations/validation-annotations-bbox.csv',
57 |         img_prefix=data_root + 'OpenImages/validation/',
58 |         label_file=data_root + 'annotations/class-descriptions-boxable.csv',
59 |         hierarchy_file=data_root +
60 |         'annotations/bbox_labels_600_hierarchy.json',
61 |         meta_file=data_root + 'annotations/validation-image-metas.pkl',
62 |         image_level_ann_file=data_root +
63 |         'annotations/validation-annotations-human-imagelabels-boxable.csv',
64 |         pipeline=test_pipeline))
65 | evaluation = dict(interval=1, metric='mAP')
66 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1000, 600),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=3,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=[
40 |                 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 |                 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 |             ],
43 |             img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 |             pipeline=train_pipeline)),
45 |     val=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 |         img_prefix=data_root + 'VOC2007/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 |         img_prefix=data_root + 'VOC2007/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='PhotoMetricDistortion',
10 |         brightness_delta=32,
11 |         contrast_range=(0.5, 1.5),
12 |         saturation_range=(0.5, 1.5),
13 |         hue_delta=18),
14 |     dict(
15 |         type='Expand',
16 |         mean=img_norm_cfg['mean'],
17 |         to_rgb=img_norm_cfg['to_rgb'],
18 |         ratio_range=(1, 4)),
19 |     dict(
20 |         type='MinIoURandomCrop',
21 |         min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 |         min_crop_size=0.3),
23 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 |     dict(type='Normalize', **img_norm_cfg),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='DefaultFormatBundle'),
27 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(300, 300),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='Resize', keep_ratio=False),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=60,
44 |     workers_per_gpu=2,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=2,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root + 'train.txt',
51 |             img_prefix=data_root + 'WIDER_train/',
52 |             min_size=17,
53 |             pipeline=train_pipeline)),
54 |     val=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'val.txt',
57 |         img_prefix=data_root + 'WIDER_val/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + 'val.txt',
62 |         img_prefix=data_root + 'WIDER_val/',
63 |         pipeline=test_pipeline))
64 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1, max_keep_ckpts=36)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         dict(type='TensorboardLoggerHook'),
 8 |         # dict(type='MMDetWandbHook',
 9 |         #      init_kwargs={
10 |         #         'project': 'NMS-Free',
11 |         #         'group': 'convdetr'
12 |         #      },
13 |         #      interval=12,
14 |         #      log_checkpoint=True,
15 |         #      log_checkpoint_metadata=True,
16 |         #      num_eval_images=100),
17 |     ])
18 | # yapf:enable
19 | custom_hooks = [dict(type='NumClassCheckHook')]
20 | 
21 | dist_params = dict(backend='nccl')
22 | log_level = 'INFO'
23 | load_from = None
24 | resume_from = None
25 | workflow = [('train', 1)]
26 | 
27 | # disable opencv multithreading to avoid system being overloaded
28 | opencv_num_threads = 0
29 | # set multi-process start method as `fork` to speed up the training
30 | mp_start_method = 'fork'
31 | 
32 | # Default setting for scaling LR automatically
33 | #   - `enable` means enable scaling LR automatically
34 | #       or not by default.
35 | #   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
36 | auto_scale_lr = dict(enable=False, base_batch_size=16)
37 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=3,
  9 |         strides=(1, 2, 2),
 10 |         dilations=(1, 1, 1),
 11 |         out_indices=(2, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=1024,
 22 |         feat_channels=1024,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         shared_head=dict(
 38 |             type='ResLayer',
 39 |             depth=50,
 40 |             stage=3,
 41 |             stride=2,
 42 |             dilation=1,
 43 |             style='caffe',
 44 |             norm_cfg=norm_cfg,
 45 |             norm_eval=True,
 46 |             init_cfg=dict(
 47 |                 type='Pretrained',
 48 |                 checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 49 |         bbox_roi_extractor=dict(
 50 |             type='SingleRoIExtractor',
 51 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 52 |             out_channels=1024,
 53 |             featmap_strides=[16]),
 54 |         bbox_head=dict(
 55 |             type='BBoxHead',
 56 |             with_avg_pool=True,
 57 |             roi_feat_size=7,
 58 |             in_channels=2048,
 59 |             num_classes=80,
 60 |             bbox_coder=dict(
 61 |                 type='DeltaXYWHBBoxCoder',
 62 |                 target_means=[0., 0., 0., 0.],
 63 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 64 |             reg_class_agnostic=False,
 65 |             loss_cls=dict(
 66 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 67 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 68 |     # model training and testing settings
 69 |     train_cfg=dict(
 70 |         rpn=dict(
 71 |             assigner=dict(
 72 |                 type='MaxIoUAssigner',
 73 |                 pos_iou_thr=0.7,
 74 |                 neg_iou_thr=0.3,
 75 |                 min_pos_iou=0.3,
 76 |                 match_low_quality=True,
 77 |                 ignore_iof_thr=-1),
 78 |             sampler=dict(
 79 |                 type='RandomSampler',
 80 |                 num=256,
 81 |                 pos_fraction=0.5,
 82 |                 neg_pos_ub=-1,
 83 |                 add_gt_as_proposals=False),
 84 |             allowed_border=-1,
 85 |             pos_weight=-1,
 86 |             debug=False),
 87 |         rpn_proposal=dict(
 88 |             nms_pre=12000,
 89 |             max_per_img=2000,
 90 |             nms=dict(type='nms', iou_threshold=0.7),
 91 |             min_bbox_size=0),
 92 |         rcnn=dict(
 93 |             assigner=dict(
 94 |                 type='MaxIoUAssigner',
 95 |                 pos_iou_thr=0.5,
 96 |                 neg_iou_thr=0.5,
 97 |                 min_pos_iou=0.5,
 98 |                 match_low_quality=False,
 99 |                 ignore_iof_thr=-1),
100 |             sampler=dict(
101 |                 type='RandomSampler',
102 |                 num=512,
103 |                 pos_fraction=0.25,
104 |                 neg_pos_ub=-1,
105 |                 add_gt_as_proposals=True),
106 |             pos_weight=-1,
107 |             debug=False)),
108 |     test_cfg=dict(
109 |         rpn=dict(
110 |             nms_pre=6000,
111 |             max_per_img=1000,
112 |             nms=dict(type='nms', iou_threshold=0.7),
113 |             min_bbox_size=0),
114 |         rcnn=dict(
115 |             score_thr=0.05,
116 |             nms=dict(type='nms', iou_threshold=0.5),
117 |             max_per_img=100)))
118 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         strides=(1, 2, 2, 1),
 10 |         dilations=(1, 1, 1, 2),
 11 |         out_indices=(3, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=2048,
 22 |         feat_channels=2048,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=2048,
 41 |             featmap_strides=[16]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=2048,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=0,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=12000,
 77 |             max_per_img=2000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms=dict(type='nms', iou_threshold=0.7),
 99 |             nms_pre=6000,
100 |             max_per_img=1000,
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)))
106 | 


--------------------------------------------------------------------------------
/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=3,
  9 |         strides=(1, 2, 2),
 10 |         dilations=(1, 1, 1),
 11 |         out_indices=(2, ),
 12 |         frozen_stages=1,
 13 |         norm_cfg=norm_cfg,
 14 |         norm_eval=True,
 15 |         style='caffe',
 16 |         init_cfg=dict(
 17 |             type='Pretrained',
 18 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=1024,
 22 |         feat_channels=1024,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[2, 4, 8, 16, 32],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[16]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         shared_head=dict(
 38 |             type='ResLayer',
 39 |             depth=50,
 40 |             stage=3,
 41 |             stride=2,
 42 |             dilation=1,
 43 |             style='caffe',
 44 |             norm_cfg=norm_cfg,
 45 |             norm_eval=True),
 46 |         bbox_roi_extractor=dict(
 47 |             type='SingleRoIExtractor',
 48 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 49 |             out_channels=1024,
 50 |             featmap_strides=[16]),
 51 |         bbox_head=dict(
 52 |             type='BBoxHead',
 53 |             with_avg_pool=True,
 54 |             roi_feat_size=7,
 55 |             in_channels=2048,
 56 |             num_classes=80,
 57 |             bbox_coder=dict(
 58 |                 type='DeltaXYWHBBoxCoder',
 59 |                 target_means=[0., 0., 0., 0.],
 60 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 61 |             reg_class_agnostic=False,
 62 |             loss_cls=dict(
 63 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 64 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 65 |         mask_roi_extractor=None,
 66 |         mask_head=dict(
 67 |             type='FCNMaskHead',
 68 |             num_convs=0,
 69 |             in_channels=2048,
 70 |             conv_out_channels=256,
 71 |             num_classes=80,
 72 |             loss_mask=dict(
 73 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 74 |     # model training and testing settings
 75 |     train_cfg=dict(
 76 |         rpn=dict(
 77 |             assigner=dict(
 78 |                 type='MaxIoUAssigner',
 79 |                 pos_iou_thr=0.7,
 80 |                 neg_iou_thr=0.3,
 81 |                 min_pos_iou=0.3,
 82 |                 match_low_quality=True,
 83 |                 ignore_iof_thr=-1),
 84 |             sampler=dict(
 85 |                 type='RandomSampler',
 86 |                 num=256,
 87 |                 pos_fraction=0.5,
 88 |                 neg_pos_ub=-1,
 89 |                 add_gt_as_proposals=False),
 90 |             allowed_border=0,
 91 |             pos_weight=-1,
 92 |             debug=False),
 93 |         rpn_proposal=dict(
 94 |             nms_pre=12000,
 95 |             max_per_img=2000,
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             min_bbox_size=0),
 98 |         rcnn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.5,
102 |                 neg_iou_thr=0.5,
103 |                 min_pos_iou=0.5,
104 |                 match_low_quality=False,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=512,
109 |                 pos_fraction=0.25,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=True),
112 |             mask_size=14,
113 |             pos_weight=-1,
114 |             debug=False)),
115 |     test_cfg=dict(
116 |         rpn=dict(
117 |             nms_pre=6000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             max_per_img=1000,
120 |             min_bbox_size=0),
121 |         rcnn=dict(
122 |             score_thr=0.05,
123 |             nms=dict(type='nms', iou_threshold=0.5),
124 |             max_per_img=100,
125 |             mask_thr_binary=0.5)))
126 | 


--------------------------------------------------------------------------------
/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=3,
 8 |         strides=(1, 2, 2),
 9 |         dilations=(1, 1, 1),
10 |         out_indices=(2, ),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=False),
13 |         norm_eval=True,
14 |         style='caffe',
15 |         init_cfg=dict(
16 |             type='Pretrained',
17 |             checkpoint='open-mmlab://detectron2/resnet50_caffe')),
18 |     neck=None,
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=1024,
22 |         feat_channels=1024,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[2, 4, 8, 16, 32],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[16]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=12000,
56 |             max_per_img=2000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=50,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=True),
11 |         norm_eval=True,
12 |         style='pytorch',
13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     backbone=dict(
 6 |         type='SSDVGG',
 7 |         depth=16,
 8 |         with_last_pool=False,
 9 |         ceil_mode=True,
10 |         out_indices=(3, 4),
11 |         out_feature_indices=(22, 34),
12 |         init_cfg=dict(
13 |             type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),
14 |     neck=dict(
15 |         type='SSDNeck',
16 |         in_channels=(512, 1024),
17 |         out_channels=(512, 1024, 512, 256, 256, 256),
18 |         level_strides=(2, 2, 1, 1),
19 |         level_paddings=(1, 1, 0, 0),
20 |         l2_norm_scale=20),
21 |     bbox_head=dict(
22 |         type='SSDHead',
23 |         in_channels=(512, 1024, 512, 256, 256, 256),
24 |         num_classes=80,
25 |         anchor_generator=dict(
26 |             type='SSDAnchorGenerator',
27 |             scale_major=False,
28 |             input_size=input_size,
29 |             basesize_ratio_range=(0.15, 0.9),
30 |             strides=[8, 16, 32, 64, 100, 300],
31 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
32 |         bbox_coder=dict(
33 |             type='DeltaXYWHBBoxCoder',
34 |             target_means=[.0, .0, .0, .0],
35 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
36 |     # model training and testing settings
37 |     train_cfg=dict(
38 |         assigner=dict(
39 |             type='MaxIoUAssigner',
40 |             pos_iou_thr=0.5,
41 |             neg_iou_thr=0.5,
42 |             min_pos_iou=0.,
43 |             ignore_iof_thr=-1,
44 |             gt_max_assign_all=False),
45 |         smoothl1_beta=1.,
46 |         allowed_border=-1,
47 |         pos_weight=-1,
48 |         neg_pos_ratio=3,
49 |         debug=False),
50 |     test_cfg=dict(
51 |         nms_pre=1000,
52 |         nms=dict(type='nms', iou_threshold=0.45),
53 |         min_bbox_size=0,
54 |         score_thr=0.02,
55 |         max_per_img=200))
56 | cudnn_benchmark = True
57 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/adamw_1x.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=4e-4,
 4 |     weight_decay=0.0001,
 5 |     paramwise_cfg=dict(
 6 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
 7 | optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
 8 | # learning policy
 9 | lr_config = dict(
10 |     policy='step',
11 |     warmup='linear',
12 |     warmup_iters=1000,
13 |     warmup_ratio=0.001,
14 |     step=[8, 11])
15 | runner = dict(type='EpochBasedRunner', max_epochs=12)
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/adamw_2x.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=4e-4,
 4 |     weight_decay=0.0001,
 5 |     paramwise_cfg=dict(
 6 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
 7 | optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
 8 | # learning policy
 9 | lr_config = dict(
10 |     policy='step',
11 |     warmup='linear',
12 |     warmup_iters=1000,
13 |     warmup_ratio=0.001,
14 |     step=[8, 11])
15 | runner = dict(type='EpochBasedRunner', max_epochs=24)
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/adamw_30k.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=4e-4,
 4 |     weight_decay=0.0001,
 5 |     paramwise_cfg=dict(
 6 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
 7 | optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
 8 | # learning policy
 9 | lr_config = dict(
10 |     policy='step',
11 |     warmup='linear',
12 |     warmup_iters=1000,
13 |     warmup_ratio=0.001,
14 |     step=[20000, 25000])
15 | runner = dict(type='IterBasedRunner', max_iters=30000)
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/adamw_3x.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=4e-4,
 4 |     weight_decay=0.0001,
 5 |     paramwise_cfg=dict(
 6 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
 7 | optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
 8 | # learning policy
 9 | lr_config = dict(
10 |     policy='step',
11 |     warmup='linear',
12 |     warmup_iters=1000,
13 |     warmup_ratio=0.001,
14 |     step=[24, 33])
15 | runner = dict(type='EpochBasedRunner', max_epochs=36)
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/adamw_50k.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=4e-4,
 4 |     weight_decay=0.0001,
 5 |     paramwise_cfg=dict(
 6 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
 7 | optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
 8 | # learning policy
 9 | lr_config = dict(
10 |     policy='step',
11 |     warmup='linear',
12 |     warmup_iters=1000,
13 |     warmup_ratio=0.001,
14 |     step=[40000])
15 | runner = dict(type='IterBasedRunner', max_iters=50000)
16 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/configs/date/date.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | 
  3 | max_per_img = 100
  4 | model = dict(
  5 |     type='FCOS',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         out_indices=(0, 1, 2, 3),
 11 |         frozen_stages=1,
 12 |         norm_cfg=dict(type='BN', requires_grad=False),
 13 |         norm_eval=True,
 14 |         style='caffe',
 15 |         init_cfg=dict(
 16 |             type='Pretrained',
 17 |             checkpoint='open-mmlab://detectron/resnet50_caffe')),
 18 |     neck=dict(
 19 |         type='FPN',
 20 |         in_channels=[256, 512, 1024, 2048],
 21 |         out_channels=256,
 22 |         start_level=1,
 23 |         add_extra_convs='on_output',  # use P5
 24 |         num_outs=5,
 25 |         relu_before_extra_convs=True),
 26 |     bbox_head=dict(
 27 |         type='DATEHead',
 28 |         num_classes=80,
 29 |         in_channels=256,
 30 |         stacked_convs=4,
 31 |         feat_channels=256,
 32 |         strides=[8, 16, 32, 64, 128],
 33 |         deformable=False,
 34 |         predictors=[
 35 |             dict(
 36 |                 type='OneToOneHeadPredictor',
 37 |                 deformable=False,
 38 |                 loss_cls=dict(
 39 |                     type='FocalLoss',
 40 |                     use_sigmoid=True,
 41 |                     gamma=2.0,
 42 |                     alpha=0.25,
 43 |                     loss_weight=2.0),
 44 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
 45 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
 46 |                 assigner=dict(
 47 |                     type='POTOAssigner',
 48 |                     alpha=0.8,
 49 |                     iou_type='giou',
 50 |                     strides=[8, 16, 32, 64, 128],
 51 |                     center_sampling_radius=1.5,
 52 |                 ),
 53 |                 test_cfg=dict(
 54 |                     nms_pre=max_per_img,
 55 |                     min_bbox_size=0,
 56 |                     score_thr=0.01,
 57 |                     nms=False,
 58 |                     max_per_img=max_per_img)
 59 |             ),
 60 |             dict(
 61 |                 type='FCOSHeadPredictor',
 62 |                 center_sampling=False,
 63 |                 center_sample_radius=1.5,
 64 |                 centerness_on_reg=False,
 65 |                 deformable=False,
 66 |                 loss_cls=dict(
 67 |                     type='FocalLoss',
 68 |                     use_sigmoid=True,
 69 |                     gamma=2.0,
 70 |                     alpha=0.25,
 71 |                     loss_weight=1.0),
 72 |                 loss_box=dict(type='IoULoss', loss_weight=1.0),
 73 |                 loss_ctr=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=True,
 76 |                     loss_weight=1.0),
 77 |                 train_cfg=dict(
 78 |                     assigner=dict(
 79 |                         type='MaxIoUAssigner',
 80 |                         pos_iou_thr=0.5,
 81 |                         neg_iou_thr=0.4,
 82 |                         min_pos_iou=0,
 83 |                         ignore_iof_thr=-1),
 84 |                     allowed_border=-1,
 85 |                     pos_weight=-1,
 86 |                     debug=False),
 87 |                 test_cfg=dict(
 88 |                     nms_pre=1000,
 89 |                     min_bbox_size=0,
 90 |                     score_thr=0.05,
 91 |                     nms=dict(type='nms', iou_threshold=0.5),
 92 |                     max_per_img=100)
 93 |             )
 94 |         ],
 95 |         init_cfg=[
 96 |                 dict(
 97 |                     type='Normal',
 98 |                     layer='Conv2d',
 99 |                     std=0.01,
100 |                     override=dict(
101 |                         type='Normal',
102 |                         name='conv_cls',
103 |                         std=0.01,
104 |                         bias_prob=0.01)),
105 |                 dict(
106 |                     type='Normal',
107 |                     layer='ModulatedDeformableConv2d',
108 |                     std=0.01,
109 |                     override=dict(
110 |                         type='Normal',
111 |                         name='conv_cls',
112 |                         std=0.01,
113 |                         bias_prob=0.01))],
114 |     )
115 | )
116 | 


--------------------------------------------------------------------------------
/configs/date/date_r101_36e_8x2_fcos_poto_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './date_r50_36e_8x2.py'
 2 | 
 3 | model = dict(
 4 |     backbone=dict(
 5 |         depth=101,
 6 |         init_cfg=dict(
 7 |             type='Pretrained',
 8 |             checkpoint='open-mmlab://detectron/resnet101_caffe'))
 9 | )
10 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_12e_8x2.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/datasets/coco_det_1x.py',
3 |     '../_base_/schedules/adamw_1x.py',
4 |     '../_base_/default_runtime.py',
5 |     '../_base_/custom_imports.py',
6 |     './date.py',
7 | ]
8 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_12e_8x2_fcos_poto_3dmf_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './date_r50_12e_8x2.py'
 2 | 
 3 | max_per_img = 100
 4 | model = dict(
 5 |     bbox_head=dict(
 6 |         predictors=[
 7 |             dict(
 8 |                 type='DeFCNPredictor',
 9 |                 deformable=False,
10 |                 loss_cls=dict(
11 |                     type='FocalLoss',
12 |                     use_sigmoid=True,
13 |                     gamma=2.0,
14 |                     alpha=0.25,
15 |                     loss_weight=2.0),
16 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
17 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
18 |                 loss_aux=None,  # The DeFCN without aux loss.
19 |                 assigner=dict(
20 |                     type='POTOAssigner',
21 |                     alpha=0.8,
22 |                     iou_type='giou',
23 |                     strides=[8, 16, 32, 64, 128],
24 |                     center_sampling_radius=1.5,
25 |                 ),
26 |                 test_cfg=dict(
27 |                     nms_pre=max_per_img,
28 |                     min_bbox_size=0,
29 |                     score_thr=0.01,
30 |                     nms=False,
31 |                     max_per_img=max_per_img)
32 |             ),
33 |             dict(
34 |                 type='FCOSHeadPredictor',
35 |                 center_sampling=False,
36 |                 center_sample_radius=1.5,
37 |                 centerness_on_reg=False,
38 |                 deformable=False,
39 |                 loss_cls=dict(
40 |                     type='FocalLoss',
41 |                     use_sigmoid=True,
42 |                     gamma=2.0,
43 |                     alpha=0.25,
44 |                     loss_weight=1.0),
45 |                 loss_box=dict(type='IoULoss', loss_weight=1.0),
46 |                 loss_ctr=dict(
47 |                     type='CrossEntropyLoss',
48 |                     use_sigmoid=True,
49 |                     loss_weight=1.0),
50 |                 train_cfg=dict(
51 |                     assigner=dict(
52 |                         type='MaxIoUAssigner',
53 |                         pos_iou_thr=0.5,
54 |                         neg_iou_thr=0.4,
55 |                         min_pos_iou=0,
56 |                         ignore_iof_thr=-1),
57 |                     allowed_border=-1,
58 |                     pos_weight=-1,
59 |                     debug=False),
60 |                 test_cfg=dict(
61 |                     nms_pre=1000,
62 |                     min_bbox_size=0,
63 |                     score_thr=0.05,
64 |                     nms=dict(type='nms', iou_threshold=0.5),
65 |                     max_per_img=100)
66 |             )
67 |         ],
68 |     )
69 | )
70 | optimizer = dict(
71 |     type='AdamW',
72 |     lr=4e-4,
73 |     weight_decay=0.0001,
74 |     paramwise_cfg=dict(
75 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
76 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_12e_8x2_fcos_poto_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './date_r50_12e_8x2.py'
2 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_12e_8x2_retina_poto_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './date_r50_12e_8x2.py'
 2 | 
 3 | model = dict(
 4 |     bbox_head=dict(
 5 |         predictors=[
 6 |             dict(
 7 |                 type='OneToOneHeadPredictor',
 8 |                 deformable=False,
 9 |                 loss_cls=dict(
10 |                     type='FocalLoss',
11 |                     use_sigmoid=True,
12 |                     gamma=2.0,
13 |                     alpha=0.25,
14 |                     loss_weight=2.0),
15 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
16 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
17 |                 assigner=dict(
18 |                     type='POTOAssigner',
19 |                     alpha=0.8,
20 |                     iou_type='giou',
21 |                     strides=[8, 16, 32, 64, 128],
22 |                     center_sampling_radius=1.5),
23 |                 test_cfg=dict(
24 |                     nms_pre=1000,
25 |                     min_bbox_size=0,
26 |                     score_thr=0.01,
27 |                     nms=False,
28 |                     max_per_img=100)
29 |             ),
30 |             dict(
31 |                 type='RetinaHeadPredictor',
32 |                 deformable=False,
33 |                 anchor_generator=dict(
34 |                     type='AnchorGenerator',
35 |                     octave_base_scale=4,
36 |                     scales_per_octave=3,
37 |                     ratios=[0.5, 1.0, 2.0],
38 |                     strides=[8, 16, 32, 64, 128]),
39 |                 bbox_coder=dict(
40 |                     type='DeltaXYWHBBoxCoder',
41 |                     target_means=[.0, .0, .0, .0],
42 |                     target_stds=[1.0, 1.0, 1.0, 1.0]),
43 |                 loss_cls=dict(
44 |                     type='FocalLoss',
45 |                     use_sigmoid=True,
46 |                     gamma=2.0,
47 |                     alpha=0.25,
48 |                     loss_weight=2.0),
49 |                 loss_box=dict(type='L1Loss', loss_weight=2.0),
50 |                 train_cfg=dict(
51 |                     assigner=dict(
52 |                         type='MaxIoUAssigner',
53 |                         pos_iou_thr=0.5,
54 |                         neg_iou_thr=0.4,
55 |                         min_pos_iou=0,
56 |                         ignore_iof_thr=-1),
57 |                     allowed_border=-1,
58 |                     pos_weight=-1,
59 |                     debug=False),
60 |                 test_cfg=dict(
61 |                     nms_pre=1000,
62 |                     min_bbox_size=0,
63 |                     score_thr=0.05,
64 |                     nms=dict(type='nms', iou_threshold=0.5),
65 |                     max_per_img=100),
66 |             ),
67 |         ],
68 |     )
69 | )
70 | optimizer = dict(
71 |     type='AdamW',
72 |     lr=4e-4,
73 |     weight_decay=0.0001,
74 |     paramwise_cfg=dict(
75 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
76 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_30k_8x2_fcos_poto_crowdhuman.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/crowdhuman.py',
 3 |     '../_base_/schedules/adamw_30k.py',
 4 |     '../_base_/default_runtime.py',
 5 |     '../_base_/custom_imports.py',
 6 |     './date.py',
 7 | ]
 8 | model = dict(
 9 |     bbox_head=dict(
10 |         num_classes=1,
11 |         predict_from=0,
12 |         predictors=[
13 |             dict(
14 |                 type='OneToOneHeadPredictor',
15 |                 deformable=False,
16 |                 loss_cls=dict(
17 |                     type='FocalLoss',
18 |                     use_sigmoid=True,
19 |                     gamma=2.0,
20 |                     alpha=0.25,
21 |                     loss_weight=2.0),
22 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
23 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
24 |                 assigner=dict(
25 |                     type='HungarianAssigner',
26 |                     cls_cost=dict(
27 |                         type='FocalLossCost', weight=2.0),
28 |                     reg_cost=dict(
29 |                         type='BBoxL1Cost', weight=5.0, box_format='xywh'),
30 |                     iou_cost=dict(
31 |                         type='IoUCost', iou_mode='giou', weight=2.0)),
32 |                 test_cfg=dict(
33 |                     nms_pre=1000,
34 |                     min_bbox_size=0,
35 |                     score_thr=0.01,
36 |                     nms=False,
37 |                     max_per_img=500)
38 |             ),
39 |             dict(
40 |                 type='FCOSHeadPredictor',
41 |                 center_sampling=False,
42 |                 center_sample_radius=1.5,
43 |                 centerness_on_reg=False,
44 |                 deformable=False,
45 |                 loss_cls=dict(
46 |                     type='FocalLoss',
47 |                     use_sigmoid=True,
48 |                     gamma=2.0,
49 |                     alpha=0.25,
50 |                     loss_weight=1.0),
51 |                 loss_box=dict(type='IoULoss', loss_weight=1.0),
52 |                 loss_ctr=dict(
53 |                     type='CrossEntropyLoss',
54 |                     use_sigmoid=True,
55 |                     loss_weight=1.0),
56 |                 train_cfg=dict(
57 |                     assigner=dict(
58 |                         type='MaxIoUAssigner',
59 |                         pos_iou_thr=0.5,
60 |                         neg_iou_thr=0.4,
61 |                         min_pos_iou=0,
62 |                         ignore_iof_thr=-1),
63 |                     allowed_border=-1,
64 |                     pos_weight=-1,
65 |                     debug=False),
66 |                 test_cfg=dict(
67 |                     nms_pre=1000,
68 |                     min_bbox_size=0,
69 |                     score_thr=0.05,
70 |                     nms=dict(type='nms', iou_threshold=0.5),
71 |                     max_per_img=100)
72 |             )
73 |         ]))
74 | checkpoint_config = dict(interval=5000, max_keep_ckpts=5, by_epoch=False)
75 | optimizer = dict(
76 |     type='AdamW',
77 |     lr=4e-4,
78 |     weight_decay=0.0001,
79 |     paramwise_cfg=dict(
80 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
81 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_30k_8x2_retina_poto_crowdhuman.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/crowdhuman.py',
 3 |     '../_base_/schedules/adamw_30k.py',
 4 |     '../_base_/default_runtime.py',
 5 |     '../_base_/custom_imports.py',
 6 |     './date.py',
 7 | ]
 8 | model = dict(
 9 |     bbox_head=dict(
10 |         num_classes=1,
11 |         predictors=[
12 |             dict(
13 |                 type='OneToOneHeadPredictor',
14 |                 deformable=False,
15 |                 loss_cls=dict(
16 |                     type='FocalLoss',
17 |                     use_sigmoid=True,
18 |                     gamma=2.0,
19 |                     alpha=0.25,
20 |                     loss_weight=2.0),
21 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
22 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
23 |                 assigner=dict(
24 |                     type='HungarianAssigner',
25 |                     cls_cost=dict(
26 |                         type='FocalLossCost', weight=2.0),
27 |                     reg_cost=dict(
28 |                         type='BBoxL1Cost', weight=5.0, box_format='xywh'),
29 |                     iou_cost=dict(
30 |                         type='IoUCost', iou_mode='giou', weight=2.0)),
31 |                 test_cfg=dict(
32 |                     nms_pre=1000,
33 |                     min_bbox_size=0,
34 |                     score_thr=0.01,
35 |                     nms=False,
36 |                     max_per_img=500)
37 |             ),
38 |             dict(
39 |                 type='RetinaHeadPredictor',
40 |                 deformable=False,
41 |                 anchor_generator=dict(
42 |                     type='AnchorGenerator',
43 |                     octave_base_scale=4,
44 |                     scales_per_octave=3,
45 |                     ratios=[0.5, 1.0, 2.0],
46 |                     strides=[8, 16, 32, 64, 128]),
47 |                 bbox_coder=dict(
48 |                     type='DeltaXYWHBBoxCoder',
49 |                     target_means=[.0, .0, .0, .0],
50 |                     target_stds=[1.0, 1.0, 1.0, 1.0]),
51 |                 loss_cls=dict(
52 |                     type='FocalLoss',
53 |                     use_sigmoid=True,
54 |                     gamma=2.0,
55 |                     alpha=0.25,
56 |                     loss_weight=2.0),
57 |                 loss_box=dict(type='L1Loss', loss_weight=2.0),
58 |                 train_cfg=dict(
59 |                     assigner=dict(
60 |                         type='MaxIoUAssigner',
61 |                         pos_iou_thr=0.5,
62 |                         neg_iou_thr=0.4,
63 |                         min_pos_iou=0,
64 |                         ignore_iof_thr=-1),
65 |                     allowed_border=-1,
66 |                     pos_weight=-1,
67 |                     debug=False),
68 |                 test_cfg=dict(
69 |                     nms_pre=1000,
70 |                     min_bbox_size=0,
71 |                     score_thr=0.05,
72 |                     nms=dict(type='nms', iou_threshold=0.5),
73 |                     max_per_img=100),
74 |             )
75 |         ]))
76 | checkpoint_config = dict(interval=5000, max_keep_ckpts=5, by_epoch=False)
77 | optimizer = dict(
78 |     type='AdamW',
79 |     lr=4e-4,
80 |     weight_decay=0.0001,
81 |     paramwise_cfg=dict(
82 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
83 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_36e_8x2.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 |     '../_base_/datasets/coco_det_3x.py',
3 |     '../_base_/schedules/adamw_3x.py',
4 |     '../_base_/default_runtime.py',
5 |     '../_base_/custom_imports.py',
6 |     './date.py',
7 | ]
8 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_36e_8x2_fcos_poto_3dmf_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './date_r50_12e_8x2.py'
 2 | 
 3 | max_per_img = 100
 4 | model = dict(
 5 |     bbox_head=dict(
 6 |         predictors=[
 7 |             dict(
 8 |                 type='DeFCNPredictor',
 9 |                 deformable=False,
10 |                 loss_cls=dict(
11 |                     type='FocalLoss',
12 |                     use_sigmoid=True,
13 |                     gamma=2.0,
14 |                     alpha=0.25,
15 |                     loss_weight=2.0),
16 |                 loss_box=dict(type='L1Loss', loss_weight=5.0),
17 |                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
18 |                 loss_aux=None,  # The DeFCN without aux loss.
19 |                 assigner=dict(
20 |                     type='POTOAssigner',
21 |                     alpha=0.8,
22 |                     iou_type='giou',
23 |                     strides=[8, 16, 32, 64, 128],
24 |                     center_sampling_radius=1.5,
25 |                 ),
26 |                 test_cfg=dict(
27 |                     nms_pre=max_per_img,
28 |                     min_bbox_size=0,
29 |                     score_thr=0.01,
30 |                     nms=False,
31 |                     max_per_img=max_per_img)
32 |             ),
33 |             dict(
34 |                 type='FCOSHeadPredictor',
35 |                 center_sampling=False,
36 |                 center_sample_radius=1.5,
37 |                 centerness_on_reg=False,
38 |                 deformable=False,
39 |                 loss_cls=dict(
40 |                     type='FocalLoss',
41 |                     use_sigmoid=True,
42 |                     gamma=2.0,
43 |                     alpha=0.25,
44 |                     loss_weight=1.0),
45 |                 loss_box=dict(type='IoULoss', loss_weight=1.0),
46 |                 loss_ctr=dict(
47 |                     type='CrossEntropyLoss',
48 |                     use_sigmoid=True,
49 |                     loss_weight=1.0),
50 |                 train_cfg=dict(
51 |                     assigner=dict(
52 |                         type='MaxIoUAssigner',
53 |                         pos_iou_thr=0.5,
54 |                         neg_iou_thr=0.4,
55 |                         min_pos_iou=0,
56 |                         ignore_iof_thr=-1),
57 |                     allowed_border=-1,
58 |                     pos_weight=-1,
59 |                     debug=False),
60 |                 test_cfg=dict(
61 |                     nms_pre=1000,
62 |                     min_bbox_size=0,
63 |                     score_thr=0.05,
64 |                     nms=dict(type='nms', iou_threshold=0.5),
65 |                     max_per_img=100)
66 |             )
67 |         ],
68 |     )
69 | )
70 | optimizer = dict(
71 |     type='AdamW',
72 |     lr=4e-4,
73 |     weight_decay=0.0001,
74 |     paramwise_cfg=dict(
75 |         custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
76 | 


--------------------------------------------------------------------------------
/configs/date/date_r50_36e_8x2_fcos_poto_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './date_r50_36e_8x2.py'
2 | 


--------------------------------------------------------------------------------
/date/datasets/crowdhuman.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | CrowdHuman dataset.
  4 | 
  5 | Author:
  6 |     Yiqun Chen
  7 | """
  8 | 
  9 | from mmdet.datasets.coco import CocoDataset
 10 | from mmdet.datasets.api_wrappers import COCO
 11 | from mmdet.datasets.builder import DATASETS
 12 | 
 13 | from date.datasets.utils import (
 14 |     format_dt_json,
 15 |     format_gt_json,
 16 |     evaluate_crowdhuman)
 17 | 
 18 | 
 19 | @DATASETS.register_module()
 20 | class CrowdHumanDataset(CocoDataset):
 21 | 
 22 |     CLASSES = ('person', )
 23 | 
 24 |     PALETTE = [(220, 20, 60)]
 25 | 
 26 |     def load_annotations(self, ann_file):
 27 |         """Load annotation from COCO style annotation file.
 28 | 
 29 |         Args:
 30 |             ann_file (str): Path of annotation file.
 31 | 
 32 |         Returns:
 33 |             list[dict]: Annotation info from COCO api.
 34 |         """
 35 | 
 36 |         self.coco = COCO(ann_file)
 37 |         # The order of returned `cat_ids` will not
 38 |         # change with the order of the CLASSES
 39 |         self.cat_ids = self.coco.get_cat_ids()
 40 | 
 41 |         self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
 42 |         self.img_ids = self.coco.get_img_ids()
 43 |         data_infos = []
 44 |         total_ann_ids = []
 45 |         for i in self.img_ids:
 46 |             info = self.coco.load_imgs([i])[0]
 47 |             info['filename'] = info['file_name']
 48 |             data_infos.append(info)
 49 |             ann_ids = self.coco.get_ann_ids(img_ids=[i])
 50 |             total_ann_ids.extend(ann_ids)
 51 |         assert len(set(total_ann_ids)) == len(
 52 |             total_ann_ids), f"Annotation ids in '{ann_file}' are not unique!"
 53 |         return data_infos
 54 | 
 55 |     def evaluate(self,
 56 |                  results,
 57 |                  metric='bbox',
 58 |                  logger=None,
 59 |                  jsonfile_prefix=None,
 60 |                  classwise=False,
 61 |                  proposal_nums=(100, 300, 1000),
 62 |                  iou_thrs=None,
 63 |                  metric_items=None):
 64 |         """Evaluation in COCO protocol.
 65 | 
 66 |         Args:
 67 |             results (list[list | tuple]): Testing results of the dataset.
 68 |             metric (str | list[str]): Metrics to be evaluated. Options are
 69 |                 'bbox', 'segm', 'proposal', 'proposal_fast'.
 70 |             logger (logging.Logger | str | None): Keep the API consistent
 71 |                 with mmdet, not used.
 72 |             jsonfile_prefix (str | None): The prefix of json files. It includes
 73 |                 the file path and the prefix of filename, e.g., "a/b/prefix".
 74 |                 If not specified, a temp file will be created. Default: None.
 75 |             classwise (bool): Keep the API consistent with mmdet, not used.
 76 |             proposal_nums (Sequence[int]): Keep the API consistent
 77 |                 with mmdet, not used.
 78 |             iou_thrs (Sequence[float], optional): Keep the API consistent
 79 |                 with mmdet, not used.
 80 |             metric_items (list[str] | str, optional): Keep the API consistent
 81 |                 with mmdet, not used.
 82 | 
 83 |         Returns:
 84 |             dict[str, float]: CrowdHuman evaluation metric.
 85 |         """
 86 | 
 87 |         metrics = metric if isinstance(metric, list) else [metric]
 88 |         allowed_metrics = ['bbox']
 89 |         for metric in metrics:
 90 |             if metric not in allowed_metrics:
 91 |                 raise KeyError(f'metric {metric} is not supported')
 92 | 
 93 |         coco_gt = self.coco
 94 |         self.cat_ids = coco_gt.get_cat_ids()
 95 | 
 96 |         result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
 97 | 
 98 |         path2dt = format_dt_json(tmp_dir.name, result_files['bbox'])
 99 |         path2gt = format_gt_json(tmp_dir.name, self.ann_file)
100 |         eval_results = evaluate_crowdhuman(path2gt, path2dt)
101 | 
102 |         if tmp_dir is not None:
103 |             tmp_dir.cleanup()
104 |         return eval_results
105 | 


--------------------------------------------------------------------------------
/date/models/modules/assigner.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Assigner to support one-to-many matching via multi-one-to-one fashion.
  4 | 
  5 | Author:
  6 |     Yiqun Chen
  7 | """
  8 | 
  9 | from typing import Dict, List
 10 | 
 11 | from scipy.optimize import linear_sum_assignment
 12 | import torch
 13 | from torch import Tensor
 14 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
 15 | from mmdet.core.bbox.assigners import BaseAssigner
 16 | from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy
 17 | from mmdet.core.bbox.iou_calculators import bbox_overlaps
 18 | from mmdet.core.bbox.assigners.assign_result import AssignResult
 19 | from mmdet.core.bbox.assigners.base_assigner import BaseAssigner
 20 | 
 21 | from date.utils.grid import get_points_in_center_area_mask
 22 | 
 23 | 
 24 | @BBOX_ASSIGNERS.register_module()
 25 | class POTOAssigner(BaseAssigner):
 26 |     """
 27 |     Prediction-aware One-to-One (POTO) label assigment strategy.
 28 | 
 29 |     Firstly described in
 30 |     `End-to-End Object Detection with Fully Convolutional Network`.
 31 |     arxiv: https://arxiv.org/abs/2012.03544
 32 | 
 33 |     Args:
 34 |         alpha: Controling the geometric weight.
 35 |         iou: The type of IoU type used in formula.
 36 |     """
 37 |     def __init__(
 38 |             self,
 39 |             alpha: float = 0.8,
 40 |             iou_type: str = 'giou',
 41 |             strides: List[int] = [8, 16, 32, 64, 128],
 42 |             center_sampling_radius: float = 1.5) -> None:
 43 |         super().__init__()
 44 |         self.alpha = alpha
 45 |         self.iou_type = iou_type
 46 |         self.strides = strides
 47 |         self.center_sampling_radius = center_sampling_radius
 48 | 
 49 |     def assign(self,
 50 |                box_preds: Tensor,
 51 |                cls_preds: Tensor,
 52 |                gt_bboxes: Tensor,
 53 |                gt_labels: Tensor,
 54 |                img_meta: Dict,
 55 |                points: Tensor,
 56 |                num_points_per_level: List[int],
 57 |                gt_bboxes_ignore: Tensor = None,
 58 |                eps: float = 1e-7):
 59 |         """Computes one-to-one matching based on the weighted costs.
 60 | 
 61 |         This method assign each query prediction to a ground truth or
 62 |         background. The `assigned_gt_inds` with -1 means don't care,
 63 |         0 means negative sample, and positive number is the index (1-based)
 64 |         of assigned gt.
 65 |         The assignment is done in the following steps, the order matters.
 66 | 
 67 |         1. assign every prediction to -1
 68 |         2. compute the weighted costs
 69 |         3. do Hungarian matching on CPU based on the costs
 70 |         4. assign all to 0 (background) first, then for each matched pair
 71 |            between predictions and gts, treat this prediction as foreground
 72 |            and assign the corresponding gt index (plus 1) to it.
 73 | 
 74 |         Args:
 75 |             bbox_preds (Tensor): Predicted boxes with normalized coordinates
 76 |                 (cx, cy, w, h), which are all in range [0, 1]. This design
 77 |                 is compatible with HungarianAssigner. Shape: [N, 4].
 78 |             cls_preds (Tensor): Predicted classification logits, shape
 79 |                 [N, num_class].
 80 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
 81 |                 coordinates (x1, y1, x2, y2). This design is
 82 |                 compatible with HungarianAssigner. Shape: [G, 4].
 83 |             gt_labels (Tensor): Label of `gt_bboxes`, shape (G,).
 84 |             img_meta (dict): Meta information for current image.
 85 |             gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
 86 |                 labelled as `ignored`. Default None.
 87 |             eps (int | float, optional): A value added to the denominator for
 88 |                 numerical stability. Default 1e-7.
 89 | 
 90 |         Returns:
 91 |             :obj:`AssignResult`: The assigned result.
 92 |         """
 93 |         num_points = len(points)
 94 |         num_gts = len(gt_bboxes)
 95 |         inside_gt_mask = get_points_in_center_area_mask(
 96 |             gt_bboxes,
 97 |             points,
 98 |             self.strides,
 99 |             self.center_sampling_radius,
100 |             num_points_per_level)
101 |         assert inside_gt_mask.shape == (num_points, num_gts), \
102 |                f'{inside_gt_mask.shape} == {(num_points, num_gts)}'
103 | 
104 |         cls_quality = self._get_cls_quality(cls_preds, gt_labels)
105 |         assert cls_quality.shape == inside_gt_mask.shape, \
106 |                f'{cls_quality.shape} == {inside_gt_mask.shape}'
107 | 
108 |         img_h, img_w, _ = img_meta['img_shape']
109 |         factor = gt_bboxes.new_tensor(
110 |             [img_w, img_h, img_w, img_h]).unsqueeze(0)
111 |         box_preds = bbox_cxcywh_to_xyxy(box_preds) * factor
112 |         box_quality = self._get_box_quality(box_preds, gt_bboxes)
113 |         assert box_quality.shape == inside_gt_mask.shape, \
114 |                f'{box_quality.shape} == {inside_gt_mask.shape}'
115 | 
116 |         # Quality, the higher the better.
117 |         quality = cls_quality * box_quality * inside_gt_mask
118 | 
119 |         matched_row_inds, matched_col_inds = \
120 |             linear_sum_assignment(quality.detach().cpu(), maximize=True)
121 |         matched_row_inds = torch.from_numpy(matched_row_inds).to(
122 |             box_preds.device)
123 |         matched_col_inds = torch.from_numpy(matched_col_inds).to(
124 |             box_preds.device)
125 | 
126 |         num_bboxes = box_preds.size(0)
127 |         assigned_gt_inds = box_preds.new_full(
128 |             (num_bboxes, ), -1, dtype=torch.long)
129 |         assigned_labels = box_preds.new_full(
130 |             (num_bboxes, ), -1, dtype=torch.long)
131 |         assigned_gt_inds[:] = 0
132 |         # assign foregrounds based on matching results
133 |         assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
134 |         assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
135 |         num_gts = gt_bboxes.size(0)
136 |         assign_result = AssignResult(
137 |             num_gts, assigned_gt_inds, None, assigned_labels)
138 |         return assign_result
139 | 
140 |     def _get_box_quality(
141 |             self, box_preds: Tensor, gt_bboxes: Tensor) -> Tensor:
142 |         iou = bbox_overlaps(box_preds, gt_bboxes)
143 |         return iou ** self.alpha
144 | 
145 |     def _get_cls_quality(
146 |             self, cls_preds: Tensor, gt_labels: Tensor) -> Tensor:
147 |         return cls_preds[:, gt_labels].sigmoid() ** (1 - self.alpha)
148 | 


--------------------------------------------------------------------------------
/date/models/modules/conv.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """Definition of Dynamic Deformable Convolution.
 3 | 
 4 | Author:
 5 |     Yiqun Chen
 6 | """
 7 | 
 8 | import torch
 9 | from mmcv.cnn.bricks.registry import CONV_LAYERS
10 | from mmcv.ops.modulated_deform_conv import (ModulatedDeformConv2dPack,
11 |                                             modulated_deform_conv2d)
12 | 
13 | 
14 | # DeformConv2d from torchvision.ops requires extra mask as input during
15 | # `forward` to enable the version 2, thus here the ModulatedDeformConv2d
16 | # is preferred.
17 | CONV_LAYERS.register_module(
18 |     name='ModulatedDeformConv2dPack', module=ModulatedDeformConv2dPack)
19 | 
20 | 
21 | @CONV_LAYERS.register_module()
22 | class ModulatedDeformableConv2d(ModulatedDeformConv2dPack):
23 |     def __init__(
24 |             self,
25 |             *args,
26 |             **kwargs):
27 |         super().__init__(*args, **kwargs)
28 |         super().init_weights()
29 | 
30 |     def init_weights(self) -> None:
31 |         None
32 | 
33 |     def forward(self, x: torch.Tensor) -> torch.Tensor:  # type: ignore
34 |         out = self.conv_offset(x)
35 |         o1, o2, mask = torch.chunk(out, 3, dim=1)
36 |         offset = torch.cat((o1, o2), dim=1)  # (y, x, y, x, ...)
37 |         mask = torch.sigmoid(mask)
38 |         return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
39 |                                        self.stride, self.padding,
40 |                                        self.dilation, self.groups,
41 |                                        self.deform_groups)
42 | 


--------------------------------------------------------------------------------
/date/models/modules/identity.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """This file defines the identity modules.
 3 | 
 4 | Author:
 5 |     Yiqun Chen
 6 | """
 7 | 
 8 | from warnings import warn
 9 | 
10 | from mmcv.runner import BaseModule
11 | from mmdet.models.builder import MODELS
12 | 
13 | 
14 | @MODELS.register_module()
15 | class Identity(BaseModule):
16 |     def __init__(self, *args, **kwargs):
17 |         super().__init__()
18 |         warn(f'Configs {kwargs} for Identity module will not be used.')
19 | 
20 |     def forward(self, x, *args, **kwargs):
21 |         return x
22 | 


--------------------------------------------------------------------------------
/date/models/predictors/base_predictor.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Base predictor.
  4 | 
  5 | Author:
  6 |     Yiqun Chen
  7 | """
  8 | 
  9 | from typing import List, Tuple, Union, Dict
 10 | 
 11 | from torch import nn, Tensor
 12 | from mmcv.cnn import Scale, initialize
 13 | from mmcv.runner import force_fp32
 14 | from mmcv.utils.logging import print_log, logger_initialized
 15 | from mmdet.core import multi_apply
 16 | from mmdet.models.builder import build_loss
 17 | from mmdet.models.dense_heads.base_dense_head import BaseDenseHead
 18 | from mmdet.models.dense_heads.dense_test_mixins import BBoxTestMixin
 19 | from date.models.modules.conv import ModulatedDeformableConv2d
 20 | from date.utils.utils import sort_and_keep_topk_results
 21 | 
 22 | 
 23 | class BaseHeadPredictor(BaseDenseHead, BBoxTestMixin):
 24 |     def __init__(
 25 |             self,
 26 |             num_classes: int,
 27 |             in_channels: int,
 28 |             deformable: bool = False,
 29 |             loss_cls: Dict = None,
 30 |             loss_iou: Dict = None,
 31 |             loss_box: Dict = None,
 32 |             train_cfg: Dict = dict(),
 33 |             test_cfg: Dict = dict(),
 34 |             init_cfg: Union[Dict, List[Dict]] = [
 35 |                 dict(
 36 |                     type='Normal',
 37 |                     layer='Conv2d',
 38 |                     std=0.01,
 39 |                     override=dict(
 40 |                         type='Normal',
 41 |                         name='conv_cls',
 42 |                         std=0.01,
 43 |                         bias_prob=0.01)),
 44 |                 dict(
 45 |                     type='Normal',
 46 |                     layer='ModulatedDeformableConv2d',
 47 |                     std=0.01,
 48 |                     override=dict(
 49 |                         type='Normal',
 50 |                         name='conv_cls',
 51 |                         std=0.01,
 52 |                         bias_prob=0.01))],
 53 |             **kwargs):
 54 |         super().__init__(init_cfg)
 55 |         self.num_classes = num_classes
 56 |         self.in_channels = in_channels
 57 |         self.deformable = deformable
 58 |         self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
 59 |         if self.use_sigmoid_cls:
 60 |             self.cls_out_channels = num_classes
 61 |         else:
 62 |             self.cls_out_channels = num_classes + 1
 63 |         self.loss_cls: nn.Module = self._build_loss(loss_cls)
 64 |         self.loss_iou: nn.Module = self._build_loss(loss_iou)
 65 |         self.loss_box: nn.Module = self._build_loss(loss_box)
 66 |         self.train_cfg = train_cfg
 67 |         self.test_cfg = test_cfg
 68 |         self._build_model()
 69 |         self._positive_indices = list()
 70 |         self._negative_indices = list()
 71 |         self._assigned_indices = list()
 72 | 
 73 |     def init_weights(self):
 74 |         module_name = self.__class__.__name__
 75 |         logger_names = list(logger_initialized.keys())
 76 |         logger_name = logger_names[0] if logger_names else 'mmcv'
 77 |         print_log(
 78 |             f'initialize {module_name} with init_cfg {self.init_cfg}',
 79 |             logger=logger_name)
 80 |         initialize(self, self.init_cfg)
 81 | 
 82 |     def forward(
 83 |             self,
 84 |             cls_feat: Union[Tensor, List[Tensor]],
 85 |             reg_feat: Union[Tensor, List[Tensor]],
 86 |             scale: Union[Scale, List[Scale]],
 87 |             stride: Union[int, List[int]],
 88 |     ) -> Tuple[List[Tensor], ...]:
 89 |         if isinstance(cls_feat, Tensor) and isinstance(reg_feat, Tensor):
 90 |             return self.forward_single(cls_feat,
 91 |                                        reg_feat,
 92 |                                        scale,
 93 |                                        stride)
 94 |         else:
 95 |             return multi_apply(
 96 |                 self.forward_single, cls_feat, reg_feat, scale, stride)
 97 | 
 98 |     def forward_single(self,
 99 |                        cls_feat: Tensor,
100 |                        reg_feat: Tensor,
101 |                        scale: Scale,
102 |                        stride: int) -> Tuple[Tensor, Tensor]:
103 |         if not isinstance(cls_feat, Tensor) \
104 |                 or not isinstance(reg_feat, Tensor):
105 |             raise TypeError(
106 |                 f'BaseHeadPredictor processes single level feature map, '
107 |                 f'which should be type Tensor, but got '
108 |                 f'type of cls_feat: {type(cls_feat)}, '
109 |                 f'type of reg_feat: {type(reg_feat)}.')
110 |         cls_pred = self.conv_cls(cls_feat)
111 |         reg_pred = self.conv_reg(reg_feat)
112 |         reg_pred: Tensor = scale(reg_pred)
113 |         reg_pred = reg_pred.clamp(min=0)
114 |         reg_pred = reg_pred * stride
115 |         assert reg_pred.max() < 1e5, f'{reg_pred.max()}'
116 |         return cls_pred, reg_pred
117 | 
118 |     def _build_loss(self, loss: Dict):
119 |         return build_loss(loss) if loss else None
120 | 
121 |     def _build_model(self):
122 |         self._build_cls_layer()
123 |         self._build_reg_layer()
124 | 
125 |     def _build_reg_layer(self):
126 |         layer = self._get_layer_type()
127 |         self.conv_reg = layer(self.in_channels, 4, 3, padding=1)
128 | 
129 |     def _build_cls_layer(self):
130 |         layer = self._get_layer_type()
131 |         self.conv_cls = layer(self.in_channels, self.num_classes, 3, padding=1)
132 | 
133 |     def _get_layer_type(self):
134 |         if self.deformable:
135 |             return ModulatedDeformableConv2d
136 |         else:
137 |             return nn.Conv2d
138 | 
139 |     @force_fp32(apply_to=('cls_probs_list', 'reg_preds_list'))
140 |     def get_bboxes(
141 |             self,
142 |             cls_probs: List[Tensor],
143 |             reg_preds: List[Tensor],
144 |             score_factors: List[Tensor] = None,
145 |             img_metas: List[Dict] = None,
146 |             cfg: Dict = None,
147 |             rescale: bool = False,
148 |             with_nms: bool = False,
149 |             **kwargs):
150 |         nms = self.test_cfg.get('nms', None)
151 |         with_nms = nms is not None and nms is not False
152 |         results = super(BaseHeadPredictor, self).get_bboxes(
153 |             cls_probs,
154 |             reg_preds,
155 |             score_factors,
156 |             img_metas,
157 |             cfg,
158 |             rescale,
159 |             with_nms,
160 |             **kwargs)
161 |         assert len(results) == len(img_metas)
162 |         if not with_nms:
163 |             max_per_img = self.test_cfg.get('max_per_img', 100)
164 |             return sort_and_keep_topk_results(results, max_per_img)
165 |         return results
166 | 
167 |     def _clear_assigned_cache(self):
168 |         if len(self._positive_indices):
169 |             self._positive_indices.clear()
170 |         if len(self._negative_indices):
171 |             self._negative_indices.clear()
172 |         if len(self._assigned_indices):
173 |             self._assigned_indices.clear()
174 | 


--------------------------------------------------------------------------------
/date/models/predictors/fcos_predictor.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Definition of one-to-many assignment branch (FCOS-style).
  4 | 
  5 | Author:
  6 |     Yiqun Chen
  7 | """
  8 | 
  9 | from typing import Callable, Sequence, Union, List, Dict
 10 | from warnings import warn
 11 | 
 12 | from torch import Tensor
 13 | from mmdet.models.builder import HEADS
 14 | from mmdet.models.dense_heads.fcos_head import FCOSHead
 15 | from date.models.predictors.base_predictor import BaseHeadPredictor
 16 | 
 17 | 
 18 | INF = 1E8
 19 | 
 20 | 
 21 | class _FCOSHeadPredictor(FCOSHead):
 22 |     def _init_layers(self):
 23 |         pass
 24 | 
 25 | 
 26 | @HEADS.register_module()
 27 | class FCOSHeadPredictor(BaseHeadPredictor):
 28 |     """
 29 |     An auxiliary head with FCOS.
 30 |     """
 31 |     def __init__(
 32 |             self,
 33 |             num_classes: int,
 34 |             in_channels: int,
 35 |             strides: Sequence[int] = [8, 16, 32, 64, 128],
 36 |             regress_ranges: Sequence[Sequence[int]] = (
 37 |                 (-1, 64), (64, 128), (128, 256), (256, 512), (512, 1E8)),
 38 |             center_sampling: bool = False,
 39 |             center_sample_radius: bool = 1.5,
 40 |             centerness_on_reg: bool = False,
 41 |             norm_on_bbox: bool = False,
 42 |             infer_without_center: bool = False,
 43 |             loss_cls: Dict = None,
 44 |             loss_box: Dict = None,
 45 |             loss_ctr: Dict = None,
 46 |             deformable: bool = False,
 47 |             init_cfg: Union[Dict, List[Dict]] = [
 48 |                 dict(type='Normal',
 49 |                      layer='Conv2d',
 50 |                      std=0.01,
 51 |                      override=dict(
 52 |                          type='Normal',
 53 |                          name='conv_cls',
 54 |                          std=0.01,
 55 |                          bias_prob=0.01)),
 56 |                 dict(type='Normal',
 57 |                      layer='ModulatedDeformableConv2d',
 58 |                      std=0.01,
 59 |                      override=dict(
 60 |                          type='Normal',
 61 |                          name='conv_cls',
 62 |                          std=0.01,
 63 |                          bias_prob=0.01))],
 64 |             **kwargs):
 65 |         super().__init__(
 66 |             num_classes=num_classes,
 67 |             in_channels=in_channels,
 68 |             deformable=deformable,
 69 |             loss_cls=loss_cls,
 70 |             loss_box=loss_box,
 71 |             init_cfg=init_cfg,
 72 |             **kwargs)
 73 |         if infer_without_center:
 74 |             loss_ctr['loss_weight'] = 0.0
 75 |         self.fcos_head = _FCOSHeadPredictor(
 76 |             num_classes=num_classes,
 77 |             in_channels=in_channels,
 78 |             regress_ranges=regress_ranges,
 79 |             strides=strides,
 80 |             center_sampling=center_sampling,
 81 |             center_sample_radius=center_sample_radius,
 82 |             norm_on_bbox=norm_on_bbox,
 83 |             centerness_on_reg=centerness_on_reg,
 84 |             loss_cls=loss_cls,
 85 |             loss_bbox=loss_box,
 86 |             loss_centerness=loss_ctr,
 87 |             init_cfg=None,
 88 |             **kwargs)
 89 |         self.infer_without_center = infer_without_center
 90 |         self.loss_cls = self.fcos_head.loss_cls
 91 |         self.loss_ctr = self.fcos_head.loss_centerness
 92 |         self.loss_box = self.fcos_head.loss_bbox
 93 | 
 94 |     def forward_single(
 95 |             self,
 96 |             cls_feat: Tensor,
 97 |             reg_feat: Tensor,
 98 |             scale: Callable[[Tensor], Tensor],
 99 |             stride: int) -> Tensor:
100 |         if not isinstance(cls_feat, Tensor) \
101 |                 or not isinstance(reg_feat, Tensor):
102 |             raise TypeError(
103 |                 f'AuxiliaryGaussianHead processes single level feature map, '
104 |                 f'which should be type Tensor, but got '
105 |                 f'type of cls_feat: {type(cls_feat)}, '
106 |                 f'type of reg_feat: {type(reg_feat)}.')
107 |         # Follow FCOSHead.
108 |         if self.fcos_head.centerness_on_reg:
109 |             ctr_pred: Tensor = self.conv_ctr(reg_feat)
110 |         else:
111 |             ctr_pred: Tensor = self.conv_ctr(cls_feat)
112 |         ctr_pred = ctr_pred[:, 0, ...].unsqueeze(1)
113 |         assert len(ctr_pred.shape) == 4 and ctr_pred.shape[1] == 1, \
114 |                f'Unexpected ctr_pred.shape: {ctr_pred.shape}'
115 | 
116 |         cls_pred = self.conv_cls(cls_feat)
117 |         reg_pred = self.conv_reg(reg_feat)
118 |         reg_pred = scale(reg_pred).float()
119 |         if self.fcos_head.norm_on_bbox:
120 |             # reg_pred needed for gradient computation has been modified
121 |             # by F.relu(reg_pred) when run with PyTorch 1.10. So replace
122 |             # F.relu(reg_pred) with reg_pred.clamp(min=0)
123 |             reg_pred = reg_pred.clamp(min=0)
124 |             if not self.training:
125 |                 reg_pred *= stride
126 |         else:
127 |             reg_pred = reg_pred.exp()
128 |         return cls_pred, reg_pred, ctr_pred
129 | 
130 |     def loss(self,
131 |              cls_preds: List[Tensor],
132 |              box_preds: List[Tensor],
133 |              ctr_preds: List[Tensor],
134 |              gt_bboxes: List[Tensor],
135 |              gt_labels: List[Tensor],
136 |              img_metas: List[Dict],
137 |              gt_bboxes_ignore: List[Tensor] = None) -> Dict[str, Tensor]:
138 |         loss_dict = self.fcos_head.loss(
139 |             cls_preds,
140 |             box_preds,
141 |             ctr_preds,
142 |             gt_bboxes,
143 |             gt_labels,
144 |             img_metas,
145 |             gt_bboxes_ignore)
146 |         return dict(
147 |             loss_fcos_cls=loss_dict['loss_cls'],
148 |             loss_fcos_iou=loss_dict['loss_bbox'],
149 |             loss_fcos_ctr=loss_dict['loss_centerness'])
150 | 
151 |     def get_bboxes(self, *args, **kwargs):
152 |         score_factors = kwargs.pop('score_factors', None)
153 |         if score_factors is not None:
154 |             raise ValueError(
155 |                 f'FCOSHeadPredictor does not recognize'
156 |                 f'specified score_factors.')
157 |         with_nms = True if self.test_cfg.get('nms', True) else False
158 |         kwargs.pop('with_nms')  # Specified by cfg.
159 |         if self.infer_without_center:
160 |             warn(f'Center-ness branch is discarded during inference.')
161 |             args = args[: 2] + (None, )
162 |         return self.fcos_head.get_bboxes(*args, with_nms=with_nms, **kwargs)
163 | 
164 |     def _build_model(self):
165 |         self._build_cls_layer()
166 |         self._build_reg_layer()
167 |         self._build_ctr_layer()
168 | 
169 |     def _init_layers(self):
170 |         pass
171 | 
172 |     def _build_ctr_layer(self):
173 |         layer = self._get_layer_type()
174 |         self.conv_ctr = layer(self.in_channels, 1, 3, padding=1)
175 | 


--------------------------------------------------------------------------------
/date/models/predictors/retina_predictor.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | Definition of the one-to-many assignment branch (RetinaNet-style).
  4 | 
  5 | Author:
  6 |     Yiqun Chen
  7 | """
  8 | 
  9 | from typing import Dict, Union, List
 10 | from warnings import warn
 11 | 
 12 | from torch import Tensor
 13 | from mmcv.cnn import Scale
 14 | from mmdet.models.builder import HEADS
 15 | from mmdet.models.dense_heads.retina_head import RetinaHead
 16 | from date.models.predictors.base_predictor import BaseHeadPredictor
 17 | 
 18 | 
 19 | class _RetinaHeadPredictor(RetinaHead):
 20 |     def _init_layers(self):
 21 |         pass
 22 | 
 23 | 
 24 | @HEADS.register_module()
 25 | class RetinaHeadPredictor(BaseHeadPredictor):
 26 |     def __init__(
 27 |             self,
 28 |             num_classes: int,
 29 |             in_channels: int,
 30 |             deformable: bool = False,
 31 |             loss_cls: Dict = dict(
 32 |                 type='FocalLoss',
 33 |                 use_sigmoid=True,
 34 |                 gamma=2.0,
 35 |                 alpha=0.25,
 36 |                 loss_weight=2.0),
 37 |             loss_iou: Dict = None,
 38 |             loss_box: Dict = dict(type='L1Loss', loss_weight=2.0),
 39 |             train_cfg: Dict = dict(),
 40 |             test_cfg: Dict = dict(),
 41 |             init_cfg: Union[Dict, List[Dict]] = dict(
 42 |                 type='Normal',
 43 |                 layer='Conv2d',
 44 |                 std=0.01,
 45 |                 override=dict(
 46 |                     type='Normal',
 47 |                     name='conv_cls',
 48 |                     std=0.01,
 49 |                     bias_prob=0.01)),
 50 |             **kwargs):
 51 |         retina_head = _RetinaHeadPredictor(
 52 |             num_classes=num_classes,
 53 |             in_channels=in_channels,
 54 |             feat_channels=in_channels,
 55 |             loss_cls=loss_cls,
 56 |             loss_bbox=loss_box,
 57 |             stacked_convs=0,
 58 |             anchor_generator=dict(
 59 |                 type='AnchorGenerator',
 60 |                 octave_base_scale=4,
 61 |                 scales_per_octave=3,
 62 |                 ratios=[0.5, 1.0, 2.0],
 63 |                 strides=[8, 16, 32, 64, 128]),
 64 |             bbox_coder=dict(
 65 |                 type='DeltaXYWHBBoxCoder',
 66 |                 target_means=[.0, .0, .0, .0],
 67 |                 target_stds=[1.0, 1.0, 1.0, 1.0]),
 68 |             train_cfg=train_cfg,
 69 |             test_cfg=test_cfg)
 70 |         self.num_base_priors = retina_head.num_base_priors
 71 |         self.cls_out_channels = retina_head.cls_out_channels
 72 |         super().__init__(
 73 |             num_classes,
 74 |             in_channels,
 75 |             deformable,
 76 |             loss_cls,
 77 |             loss_iou,
 78 |             loss_box,
 79 |             train_cfg,
 80 |             test_cfg,
 81 |             init_cfg,
 82 |             **kwargs)
 83 |         self.retina_head = retina_head
 84 |         self.loss_cls = self.retina_head.loss_cls
 85 |         self.loss_box = self.retina_head.loss_bbox
 86 | 
 87 |     def forward_single(self,
 88 |                        cls_feat: Tensor,
 89 |                        reg_feat: Tensor,
 90 |                        scale: Scale,
 91 |                        *args) -> Tensor:
 92 |         cls_pred = self.conv_cls(cls_feat)
 93 |         reg_pred = self.conv_reg(reg_feat)
 94 |         reg_pred = scale(reg_pred)
 95 |         return cls_pred, reg_pred
 96 | 
 97 |     def loss(self,
 98 |              cls_preds: List[Tensor],
 99 |              reg_preds: List[Tensor],
100 |              gt_bboxes: List[Tensor],
101 |              gt_labels: List[Tensor],
102 |              img_metas: List[Dict],
103 |              gt_bboxes_ignore: List[Tensor] = None) -> Dict[str, Tensor]:
104 |         loss_dict = self.retina_head.loss(
105 |             cls_preds,
106 |             reg_preds,
107 |             gt_bboxes,
108 |             gt_labels,
109 |             img_metas,
110 |             gt_bboxes_ignore)
111 |         return dict(loss_cls=loss_dict['loss_cls'],
112 |                     loss_box=loss_dict['loss_bbox'])
113 | 
114 |     def _build_cls_layer(self):
115 |         layer = self._get_layer_type()
116 |         conv_cls_out_channels = \
117 |             self.num_base_priors \
118 |             * self.cls_out_channels
119 |         self.conv_cls = layer(
120 |             self.in_channels,
121 |             conv_cls_out_channels,
122 |             3,
123 |             padding=1)
124 | 
125 |     def _build_reg_layer(self):
126 |         layer = self._get_layer_type()
127 |         self.conv_reg = layer(
128 |             self.in_channels,
129 |             self.num_base_priors * 4,
130 |             3,
131 |             padding=1)
132 | 
133 |     def get_bboxes(self, *args, **kwargs):
134 |         score_factors = kwargs.pop('score_factors', None)
135 |         if score_factors is not None:
136 |             raise ValueError(
137 |                 f'RetinaHeadPredictor does not recognize'
138 |                 f'specified score_factors.')
139 |         with_nms = True if self.test_cfg.get('nms', True) else False
140 |         kwargs.pop('with_nms')  # Specified by cfg.
141 |         if not with_nms:
142 |             warn('You are performing inference without NMS.')
143 |         return self.retina_head.get_bboxes(*args, with_nms=with_nms, **kwargs)
144 | 


--------------------------------------------------------------------------------
/date/utils/grid.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from typing import List, Dict, Tuple
  3 | import torch
  4 | from torch import Tensor
  5 | 
  6 | from mmdet.core import multi_apply
  7 | from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh
  8 | 
  9 | 
 10 | def get_points_in_boxes_mask(points: Tensor, boxes: Tensor) -> Tensor:
 11 |     """
 12 |     Get a mask indicates whether a point is fall in a box.
 13 | 
 14 |     Args:
 15 |         points: Coordinates in format (x, y) with shape (N, 2)
 16 |         boxes: Coordinates in format (x, y, x, y) with shape (G, 4)
 17 | 
 18 |     Returns:
 19 |         fall_in_boxes: A mask with shape (N, G)
 20 |             indicates whether a point is fall in a box.
 21 | 
 22 |     NOTE It's user's responsibility to ensure both points and boxes are
 23 |     normalized or unnormalized.
 24 |     """
 25 |     points = points.unsqueeze(1).repeat(1, len(boxes), 1)  # (N, G, 2)
 26 |     boxes = boxes.unsqueeze(0).repeat(len(points), 1, 1)  # (N, G, 4)
 27 |     fall_in_boxes = \
 28 |         (points[..., 0] > boxes[..., 0]) \
 29 |         & (points[..., 1] > boxes[..., 1]) \
 30 |         & (points[..., 0] < boxes[..., 2]) \
 31 |         & (points[..., 1] < boxes[..., 3])
 32 |     return fall_in_boxes
 33 | 
 34 | 
 35 | def get_points_in_center_area_mask(
 36 |         gt_bboxes: Tensor,
 37 |         points: Tensor,
 38 |         strides: List[int],
 39 |         center_sampling_radius: int,
 40 |         num_points_per_level: List[int]) -> Tensor:
 41 |     """
 42 |     Get the mask which indicates whether a point is
 43 |     in the area of a center of gt bbox or not.
 44 | 
 45 |     NOTE The format of gt_bboxes and points should be both normalized
 46 |         or unnormalized, and it's user's responsibility.
 47 | 
 48 |     Args:
 49 |         gt_bboxes: Ground truth bounding boxes
 50 |             in (x, y, x, y) format with shape (G, 4).
 51 |         points: points in (x, y) format with shape (N, 2). N is the total
 52 |             number of points, which sums over all feature levels.
 53 |         strides: A list records the stride of each feature level.
 54 |         center_sampling_radius: Points within the radius of
 55 |             center_sampling_radius*stride in absolute coordinate will
 56 |             be treated as positive.
 57 |         num_points_per_level: Numbers of points of each feature level.
 58 | 
 59 |     Returns:
 60 |         inside_gt_bbox_mask: A mask with shape (N, G),
 61 |             indicate whether a point falls into the center area of a gt bbox.
 62 |     """
 63 |     num_points = points.size(0)
 64 |     num_gts = gt_bboxes.size(0)
 65 |     gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)
 66 |     xs, ys = points[:, 0], points[:, 1]
 67 |     xs = xs[:, None].expand(num_points, num_gts)
 68 |     ys = ys[:, None].expand(num_points, num_gts)
 69 | 
 70 |     # condition1: inside a `center bbox`
 71 |     radius = center_sampling_radius
 72 |     center_xs = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) / 2
 73 |     center_ys = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) / 2
 74 |     center_gts = torch.zeros_like(gt_bboxes)
 75 |     stride = center_xs.new_zeros(center_xs.shape)
 76 | 
 77 |     # project the points on current lvl back to the `original` sizes
 78 |     lvl_begin = 0
 79 |     for lvl_idx, num_points_lvl in enumerate(num_points_per_level):
 80 |         lvl_end = lvl_begin + num_points_lvl
 81 |         stride[lvl_begin:lvl_end] = strides[lvl_idx] * radius
 82 |         lvl_begin = lvl_end
 83 | 
 84 |     x_mins = center_xs - stride
 85 |     y_mins = center_ys - stride
 86 |     x_maxs = center_xs + stride
 87 |     y_maxs = center_ys + stride
 88 |     center_gts[..., 0] = torch.where(
 89 |         x_mins > gt_bboxes[..., 0], x_mins, gt_bboxes[..., 0])
 90 |     center_gts[..., 1] = torch.where(
 91 |         y_mins > gt_bboxes[..., 1], y_mins, gt_bboxes[..., 1])
 92 |     center_gts[..., 2] = torch.where(
 93 |         x_maxs > gt_bboxes[..., 2], gt_bboxes[..., 2], x_maxs)
 94 |     center_gts[..., 3] = torch.where(
 95 |         y_maxs > gt_bboxes[..., 3], gt_bboxes[..., 3], y_maxs)
 96 | 
 97 |     cb_dist_left = xs - center_gts[..., 0]
 98 |     cb_dist_right = center_gts[..., 2] - xs
 99 |     cb_dist_top = ys - center_gts[..., 1]
100 |     cb_dist_bottom = center_gts[..., 3] - ys
101 |     center_bbox = torch.stack(
102 |         (cb_dist_left, cb_dist_top, cb_dist_right, cb_dist_bottom), -1)
103 |     inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0
104 | 
105 |     return inside_gt_bbox_mask
106 | 


--------------------------------------------------------------------------------
/date/utils/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import List, Tuple
 3 | from warnings import warn
 4 | 
 5 | import torch
 6 | from torch import nn, Tensor
 7 | 
 8 | 
 9 | def sort_and_keep_topk_results(results: List[List[Tensor]], topk: int):
10 |     return [sort_and_keep_topk_results_single_image(r, topk) for r in results]
11 | 
12 | 
13 | def sort_and_keep_topk_results_single_image(result: List[Tensor], topk: int):
14 |     bboxes, scores, labels = result
15 |     scores, inds = scores.sort(descending=True)
16 |     bboxes = bboxes[inds]
17 |     labels = labels[inds]
18 |     scores = scores[:topk]
19 |     bboxes = bboxes[:topk]
20 |     labels = labels[:topk]
21 |     return torch.cat([bboxes, scores[:, None]], -1), labels
22 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Create an conda environment.
 3 | conda create -n date python=3.10 -y \
 4 | && conda activate date \
 5 | && conda install -c "nvidia/label/cuda-11.7.0" cuda-nvcc -y \
 6 | && conda install pytorch torchvision pytorch-cuda=11.7 -c pytorch -c nvidia -y \
 7 | && pip install -U openmim \
 8 | && mim install 'mmcv-full==1.7.0' \
 9 | && pip install 'mmdet==2.25.1';
10 | 
11 | if [ $? ]
12 | then
13 |     python3 -m pip install --user -e .
14 | else
15 |     echo "Installation DATE failed."
16 | fi
17 | 
18 | echo "Installation finished, please link your data (e.g., MSCOCO) under ./data"
19 | 


--------------------------------------------------------------------------------
/project.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools"]
3 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = date
 3 | version = 0.1.0
 4 | author = Yiqun Chen
 5 | description = DATE: Dual Assignment for End-to-End Fully Convolutional Object Detection.
 6 | long_description = file: README.md, CHANGELOG.md, LICENSE.txt
 7 | license = BSD 3-Clause License
 8 | classifiers =
 9 |     Framework :: PyTorch
10 |     Programming Language :: Python :: 3
11 | 
12 | [options]
13 | zip_safe = False
14 | include_package_data = True
15 | packages = find:
16 | install_requires =
17 |     # protobuf <= 3.20
18 |     attrmap
19 |     cython
20 |     numpy
21 |     tqdm
22 |     opencv-python
23 |     scipy
24 |     scikit-image
25 |     scikit-learn
26 |     pyyaml
27 |     torch-tb-profiler
28 |     pycocotools
29 |     pytest
30 |     mmcv-full
31 |     mmdet>=2.25
32 |     tidecv
33 | 
34 | [options.extras_require]
35 | pdf = ReportLab>=1.2; RXP
36 | rest = docutils>=0.3; pack ==1.1, ==1.3
37 | 
38 | [options.packages.find]
39 | exclude =
40 |     examples*
41 |     tools*
42 |     docs*
43 |     test*
44 |     ckpts*
45 |     logs*
46 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | 
2 | from setuptools import setup
3 | 
4 | if __name__ == '__main__':
5 |     setup()
6 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import copy
  4 | import os
  5 | import time
  6 | 
  7 | import torch
  8 | from mmcv import Config, DictAction
  9 | from mmcv.cnn import fuse_conv_bn
 10 | from mmcv.parallel import MMDistributedDataParallel
 11 | from mmcv.runner import init_dist, load_checkpoint, wrap_fp16_model
 12 | 
 13 | from mmdet.datasets import (build_dataloader, build_dataset,
 14 |                             replace_ImageToTensor)
 15 | from mmdet.models import build_detector
 16 | from mmdet.utils import replace_cfg_vals, update_data_root
 17 | 
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
 21 |     parser.add_argument('config', help='test config file path')
 22 |     parser.add_argument('checkpoint', help='checkpoint file')
 23 |     parser.add_argument(
 24 |         '--repeat-num',
 25 |         type=int,
 26 |         default=1,
 27 |         help='number of repeat times of measurement for averaging the results')
 28 |     parser.add_argument(
 29 |         '--max-iter', type=int, default=2000, help='num of max iter')
 30 |     parser.add_argument(
 31 |         '--log-interval', type=int, default=50, help='interval of logging')
 32 |     parser.add_argument(
 33 |         '--fuse-conv-bn',
 34 |         action='store_true',
 35 |         help='Whether to fuse conv and bn, this will slightly increase'
 36 |         'the inference speed')
 37 |     parser.add_argument(
 38 |         '--cfg-options',
 39 |         nargs='+',
 40 |         action=DictAction,
 41 |         help='override some settings in the used config, the key-value pair '
 42 |         'in xxx=yyy format will be merged into config file. If the value to '
 43 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 44 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 45 |         'Note that the quotation marks are necessary and that no white space '
 46 |         'is allowed.')
 47 |     parser.add_argument(
 48 |         '--launcher',
 49 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 50 |         default='none',
 51 |         help='job launcher')
 52 |     parser.add_argument('--local_rank', type=int, default=0)
 53 |     args = parser.parse_args()
 54 |     if 'LOCAL_RANK' not in os.environ:
 55 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 56 |     return args
 57 | 
 58 | 
 59 | def measure_inference_speed(cfg, checkpoint, max_iter, log_interval,
 60 |                             is_fuse_conv_bn):
 61 |     # set cudnn_benchmark
 62 |     if cfg.get('cudnn_benchmark', False):
 63 |         torch.backends.cudnn.benchmark = True
 64 |     cfg.model.pretrained = None
 65 |     cfg.data.test.test_mode = True
 66 | 
 67 |     # build the dataloader
 68 |     samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 69 |     if samples_per_gpu > 1:
 70 |         # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 71 |         cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 72 |     dataset = build_dataset(cfg.data.test)
 73 |     data_loader = build_dataloader(
 74 |         dataset,
 75 |         samples_per_gpu=1,
 76 |         # Because multiple processes will occupy additional CPU resources,
 77 |         # FPS statistics will be more unstable when workers_per_gpu is not 0.
 78 |         # It is reasonable to set workers_per_gpu to 0.
 79 |         workers_per_gpu=0,
 80 |         dist=True,
 81 |         shuffle=False)
 82 | 
 83 |     # build the model and load checkpoint
 84 |     cfg.model.train_cfg = None
 85 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
 86 |     fp16_cfg = cfg.get('fp16', None)
 87 |     if fp16_cfg is not None:
 88 |         wrap_fp16_model(model)
 89 |     load_checkpoint(model, checkpoint, map_location='cpu')
 90 |     if is_fuse_conv_bn:
 91 |         model = fuse_conv_bn(model)
 92 | 
 93 |     model = MMDistributedDataParallel(
 94 |         model.cuda(),
 95 |         device_ids=[torch.cuda.current_device()],
 96 |         broadcast_buffers=False)
 97 |     model.eval()
 98 | 
 99 |     # the first several iterations may be very slow so skip them
100 |     num_warmup = 5
101 |     pure_inf_time = 0
102 |     fps = 0
103 | 
104 |     # benchmark with 2000 image and take the average
105 |     for i, data in enumerate(data_loader):
106 | 
107 |         torch.cuda.synchronize()
108 |         start_time = time.perf_counter()
109 | 
110 |         with torch.no_grad():
111 |             model(return_loss=False, rescale=True, **data)
112 | 
113 |         torch.cuda.synchronize()
114 |         elapsed = time.perf_counter() - start_time
115 | 
116 |         if i >= num_warmup:
117 |             pure_inf_time += elapsed
118 |             if (i + 1) % log_interval == 0:
119 |                 fps = (i + 1 - num_warmup) / pure_inf_time
120 |                 print(
121 |                     f'Done image [{i + 1:<3}/ {max_iter}], '
122 |                     f'fps: {fps:.1f} img / s, '
123 |                     f'times per image: {1000 / fps:.1f} ms / img',
124 |                     flush=True)
125 | 
126 |         if (i + 1) == max_iter:
127 |             fps = (i + 1 - num_warmup) / pure_inf_time
128 |             print(
129 |                 f'Overall fps: {fps:.1f} img / s, '
130 |                 f'times per image: {1000 / fps:.1f} ms / img',
131 |                 flush=True)
132 |             break
133 |     return fps
134 | 
135 | 
136 | def repeat_measure_inference_speed(cfg,
137 |                                    checkpoint,
138 |                                    max_iter,
139 |                                    log_interval,
140 |                                    is_fuse_conv_bn,
141 |                                    repeat_num=1):
142 |     assert repeat_num >= 1
143 | 
144 |     fps_list = []
145 | 
146 |     for _ in range(repeat_num):
147 |         #
148 |         cp_cfg = copy.deepcopy(cfg)
149 | 
150 |         fps_list.append(
151 |             measure_inference_speed(cp_cfg, checkpoint, max_iter, log_interval,
152 |                                     is_fuse_conv_bn))
153 | 
154 |     if repeat_num > 1:
155 |         fps_list_ = [round(fps, 1) for fps in fps_list]
156 |         times_pre_image_list_ = [round(1000 / fps, 1) for fps in fps_list]
157 |         mean_fps_ = sum(fps_list_) / len(fps_list_)
158 |         mean_times_pre_image_ = sum(times_pre_image_list_) / len(
159 |             times_pre_image_list_)
160 |         print(
161 |             f'Overall fps: {fps_list_}[{mean_fps_:.1f}] img / s, '
162 |             f'times per image: '
163 |             f'{times_pre_image_list_}[{mean_times_pre_image_:.1f}] ms / img',
164 |             flush=True)
165 |         return fps_list
166 | 
167 |     return fps_list[0]
168 | 
169 | 
170 | def main():
171 |     args = parse_args()
172 | 
173 |     cfg = Config.fromfile(args.config)
174 | 
175 |     # replace the ${key} with the value of cfg.key
176 |     cfg = replace_cfg_vals(cfg)
177 | 
178 |     # update data root according to MMDET_DATASETS
179 |     update_data_root(cfg)
180 | 
181 |     if args.cfg_options is not None:
182 |         cfg.merge_from_dict(args.cfg_options)
183 | 
184 |     if args.launcher == 'none':
185 |         raise NotImplementedError('Only supports distributed mode')
186 |     else:
187 |         init_dist(args.launcher, **cfg.dist_params)
188 | 
189 |     repeat_measure_inference_speed(cfg, args.checkpoint, args.max_iter,
190 |                                    args.log_interval, args.fuse_conv_bn,
191 |                                    args.repeat_num)
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     main()
196 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import mmcv
 5 | from mmcv import Config, DictAction
 6 | 
 7 | from mmdet.datasets import build_dataset
 8 | from mmdet.utils import replace_cfg_vals, update_data_root
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
13 |                                      'results saved in pkl format')
14 |     parser.add_argument('config', help='Config of the model')
15 |     parser.add_argument('pkl_results', help='Results in pickle format')
16 |     parser.add_argument(
17 |         '--format-only',
18 |         action='store_true',
19 |         help='Format the output results without perform evaluation. It is'
20 |         'useful when you want to format the result to a specific format and '
21 |         'submit it to the test server')
22 |     parser.add_argument(
23 |         '--eval',
24 |         type=str,
25 |         nargs='+',
26 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
27 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
28 |     parser.add_argument(
29 |         '--cfg-options',
30 |         nargs='+',
31 |         action=DictAction,
32 |         help='override some settings in the used config, the key-value pair '
33 |         'in xxx=yyy format will be merged into config file. If the value to '
34 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
35 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
36 |         'Note that the quotation marks are necessary and that no white space '
37 |         'is allowed.')
38 |     parser.add_argument(
39 |         '--eval-options',
40 |         nargs='+',
41 |         action=DictAction,
42 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
43 |         'format will be kwargs for dataset.evaluate() function')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | 
48 | def main():
49 |     args = parse_args()
50 | 
51 |     cfg = Config.fromfile(args.config)
52 | 
53 |     # replace the ${key} with the value of cfg.key
54 |     cfg = replace_cfg_vals(cfg)
55 | 
56 |     # update data root according to MMDET_DATASETS
57 |     update_data_root(cfg)
58 | 
59 |     assert args.eval or args.format_only, (
60 |         'Please specify at least one operation (eval/format the results) with '
61 |         'the argument "--eval", "--format-only"')
62 |     if args.eval and args.format_only:
63 |         raise ValueError('--eval and --format_only cannot be both specified')
64 | 
65 |     if args.cfg_options is not None:
66 |         cfg.merge_from_dict(args.cfg_options)
67 |     cfg.data.test.test_mode = True
68 | 
69 |     dataset = build_dataset(cfg.data.test)
70 |     outputs = mmcv.load(args.pkl_results)
71 | 
72 |     kwargs = {} if args.eval_options is None else args.eval_options
73 |     if args.format_only:
74 |         dataset.format_results(outputs, **kwargs)
75 |     if args.eval:
76 |         eval_kwargs = cfg.get('evaluation', {}).copy()
77 |         # hard-code way to remove EvalHook args
78 |         for key in [
79 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
80 |                 'rule'
81 |         ]:
82 |             eval_kwargs.pop(key, None)
83 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
84 |         print(dataset.evaluate(outputs, **eval_kwargs))
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from mmcv import Config, DictAction
 7 | 
 8 | from mmdet.models import build_detector
 9 | 
10 | try:
11 |     from mmcv.cnn import get_model_complexity_info
12 | except ImportError:
13 |     raise ImportError('Please upgrade mmcv to >0.6.2')
14 | 
15 | 
16 | def parse_args():
17 |     parser = argparse.ArgumentParser(description='Train a detector')
18 |     parser.add_argument('config', help='train config file path')
19 |     parser.add_argument(
20 |         '--shape',
21 |         type=int,
22 |         nargs='+',
23 |         default=[1280, 800],
24 |         help='input image size')
25 |     parser.add_argument(
26 |         '--cfg-options',
27 |         nargs='+',
28 |         action=DictAction,
29 |         help='override some settings in the used config, the key-value pair '
30 |         'in xxx=yyy format will be merged into config file. If the value to '
31 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
32 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
33 |         'Note that the quotation marks are necessary and that no white space '
34 |         'is allowed.')
35 |     parser.add_argument(
36 |         '--size-divisor',
37 |         type=int,
38 |         default=32,
39 |         help='Pad the input image, the minimum size that is divisible '
40 |         'by size_divisor, -1 means do not pad the image.')
41 |     args = parser.parse_args()
42 |     return args
43 | 
44 | 
45 | def main():
46 | 
47 |     args = parse_args()
48 | 
49 |     if len(args.shape) == 1:
50 |         h = w = args.shape[0]
51 |     elif len(args.shape) == 2:
52 |         h, w = args.shape
53 |     else:
54 |         raise ValueError('invalid input shape')
55 |     ori_shape = (3, h, w)
56 |     divisor = args.size_divisor
57 |     if divisor > 0:
58 |         h = int(np.ceil(h / divisor)) * divisor
59 |         w = int(np.ceil(w / divisor)) * divisor
60 | 
61 |     input_shape = (3, h, w)
62 | 
63 |     cfg = Config.fromfile(args.config)
64 |     if args.cfg_options is not None:
65 |         cfg.merge_from_dict(args.cfg_options)
66 | 
67 |     model = build_detector(
68 |         cfg.model,
69 |         train_cfg=cfg.get('train_cfg'),
70 |         test_cfg=cfg.get('test_cfg'))
71 |     if torch.cuda.is_available():
72 |         model.cuda()
73 |     model.eval()
74 | 
75 |     if hasattr(model, 'forward_dummy'):
76 |         model.forward = model.forward_dummy
77 |     else:
78 |         raise NotImplementedError(
79 |             'FLOPs counter is currently not currently supported with {}'.
80 |             format(model.__class__.__name__))
81 | 
82 |     flops, params = get_model_complexity_info(model, input_shape)
83 |     split_line = '=' * 30
84 | 
85 |     if divisor > 0 and \
86 |             input_shape != ori_shape:
87 |         print(f'{split_line}\nUse size divisor set input shape '
88 |               f'from {ori_shape} to {input_shape}\n')
89 |     print(f'{split_line}\nInput shape: {input_shape}\n'
90 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
91 |     print('!!!Please be cautious if you use the results in papers. '
92 |           'You may need to check if all ops are supported and verify that the '
93 |           'flops computation is correct.')
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/tools/dataset_converters/cityscapes.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import glob
  4 | import os.path as osp
  5 | 
  6 | import cityscapesscripts.helpers.labels as CSLabels
  7 | import mmcv
  8 | import numpy as np
  9 | import pycocotools.mask as maskUtils
 10 | 
 11 | 
 12 | def collect_files(img_dir, gt_dir):
 13 |     suffix = 'leftImg8bit.png'
 14 |     files = []
 15 |     for img_file in glob.glob(osp.join(img_dir, '**/*.png')):
 16 |         assert img_file.endswith(suffix), img_file
 17 |         inst_file = gt_dir + img_file[
 18 |             len(img_dir):-len(suffix)] + 'gtFine_instanceIds.png'
 19 |         # Note that labelIds are not converted to trainId for seg map
 20 |         segm_file = gt_dir + img_file[
 21 |             len(img_dir):-len(suffix)] + 'gtFine_labelIds.png'
 22 |         files.append((img_file, inst_file, segm_file))
 23 |     assert len(files), f'No images found in {img_dir}'
 24 |     print(f'Loaded {len(files)} images from {img_dir}')
 25 | 
 26 |     return files
 27 | 
 28 | 
 29 | def collect_annotations(files, nproc=1):
 30 |     print('Loading annotation images')
 31 |     if nproc > 1:
 32 |         images = mmcv.track_parallel_progress(
 33 |             load_img_info, files, nproc=nproc)
 34 |     else:
 35 |         images = mmcv.track_progress(load_img_info, files)
 36 | 
 37 |     return images
 38 | 
 39 | 
 40 | def load_img_info(files):
 41 |     img_file, inst_file, segm_file = files
 42 |     inst_img = mmcv.imread(inst_file, 'unchanged')
 43 |     # ids < 24 are stuff labels (filtering them first is about 5% faster)
 44 |     unique_inst_ids = np.unique(inst_img[inst_img >= 24])
 45 |     anno_info = []
 46 |     for inst_id in unique_inst_ids:
 47 |         # For non-crowd annotations, inst_id // 1000 is the label_id
 48 |         # Crowd annotations have <1000 instance ids
 49 |         label_id = inst_id // 1000 if inst_id >= 1000 else inst_id
 50 |         label = CSLabels.id2label[label_id]
 51 |         if not label.hasInstances or label.ignoreInEval:
 52 |             continue
 53 | 
 54 |         category_id = label.id
 55 |         iscrowd = int(inst_id < 1000)
 56 |         mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order='F')
 57 |         mask_rle = maskUtils.encode(mask[:, :, None])[0]
 58 | 
 59 |         area = maskUtils.area(mask_rle)
 60 |         # convert to COCO style XYWH format
 61 |         bbox = maskUtils.toBbox(mask_rle)
 62 | 
 63 |         # for json encoding
 64 |         mask_rle['counts'] = mask_rle['counts'].decode()
 65 | 
 66 |         anno = dict(
 67 |             iscrowd=iscrowd,
 68 |             category_id=category_id,
 69 |             bbox=bbox.tolist(),
 70 |             area=area.tolist(),
 71 |             segmentation=mask_rle)
 72 |         anno_info.append(anno)
 73 |     video_name = osp.basename(osp.dirname(img_file))
 74 |     img_info = dict(
 75 |         # remove img_prefix for filename
 76 |         file_name=osp.join(video_name, osp.basename(img_file)),
 77 |         height=inst_img.shape[0],
 78 |         width=inst_img.shape[1],
 79 |         anno_info=anno_info,
 80 |         segm_file=osp.join(video_name, osp.basename(segm_file)))
 81 | 
 82 |     return img_info
 83 | 
 84 | 
 85 | def cvt_annotations(image_infos, out_json_name):
 86 |     out_json = dict()
 87 |     img_id = 0
 88 |     ann_id = 0
 89 |     out_json['images'] = []
 90 |     out_json['categories'] = []
 91 |     out_json['annotations'] = []
 92 |     for image_info in image_infos:
 93 |         image_info['id'] = img_id
 94 |         anno_infos = image_info.pop('anno_info')
 95 |         out_json['images'].append(image_info)
 96 |         for anno_info in anno_infos:
 97 |             anno_info['image_id'] = img_id
 98 |             anno_info['id'] = ann_id
 99 |             out_json['annotations'].append(anno_info)
100 |             ann_id += 1
101 |         img_id += 1
102 |     for label in CSLabels.labels:
103 |         if label.hasInstances and not label.ignoreInEval:
104 |             cat = dict(id=label.id, name=label.name)
105 |             out_json['categories'].append(cat)
106 | 
107 |     if len(out_json['annotations']) == 0:
108 |         out_json.pop('annotations')
109 | 
110 |     mmcv.dump(out_json, out_json_name)
111 |     return out_json
112 | 
113 | 
114 | def parse_args():
115 |     parser = argparse.ArgumentParser(
116 |         description='Convert Cityscapes annotations to COCO format')
117 |     parser.add_argument('cityscapes_path', help='cityscapes data path')
118 |     parser.add_argument('--img-dir', default='leftImg8bit', type=str)
119 |     parser.add_argument('--gt-dir', default='gtFine', type=str)
120 |     parser.add_argument('-o', '--out-dir', help='output path')
121 |     parser.add_argument(
122 |         '--nproc', default=1, type=int, help='number of process')
123 |     args = parser.parse_args()
124 |     return args
125 | 
126 | 
127 | def main():
128 |     args = parse_args()
129 |     cityscapes_path = args.cityscapes_path
130 |     out_dir = args.out_dir if args.out_dir else cityscapes_path
131 |     mmcv.mkdir_or_exist(out_dir)
132 | 
133 |     img_dir = osp.join(cityscapes_path, args.img_dir)
134 |     gt_dir = osp.join(cityscapes_path, args.gt_dir)
135 | 
136 |     set_name = dict(
137 |         train='instancesonly_filtered_gtFine_train.json',
138 |         val='instancesonly_filtered_gtFine_val.json',
139 |         test='instancesonly_filtered_gtFine_test.json')
140 | 
141 |     for split, json_name in set_name.items():
142 |         print(f'Converting {split} into {json_name}')
143 |         with mmcv.Timer(
144 |                 print_tmpl='It took {}s to convert Cityscapes annotation'):
145 |             files = collect_files(
146 |                 osp.join(img_dir, split), osp.join(gt_dir, split))
147 |             image_infos = collect_annotations(files, nproc=args.nproc)
148 |             cvt_annotations(image_infos, osp.join(out_dir, json_name))
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     main()
153 | 


--------------------------------------------------------------------------------
/tools/dataset_converters/crowdhuman.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Modified by Paper 2349 CVPR 2023
 4 | 
 5 | https://github.com/xingyizhou/CenterTrack
 6 | Modified by Peize Sun
 7 | """
 8 | 
 9 | import os
10 | import json
11 | from PIL import Image
12 | 
13 | DATA_PATH = 'data/CrowdHuman/'
14 | OUT_PATH = DATA_PATH + 'annotations/'
15 | SPLITS = ['val', 'train']
16 | DEBUG = False
17 | 
18 | 
19 | def load_func(fpath):
20 |     print('fpath', fpath)
21 |     assert os.path.exists(fpath)
22 |     with open(fpath, 'r') as fid:
23 |         lines = fid.readlines()
24 |     records = [json.loads(line.strip('\n')) for line in lines]
25 |     return records
26 | 
27 | 
28 | def main():
29 |     if not os.path.exists(OUT_PATH):
30 |         os.mkdir(OUT_PATH)
31 |     for split in SPLITS:
32 |         # data_path = DATA_PATH + split
33 |         out_path = OUT_PATH + '{}.json'.format(split)
34 |         out = {'images': [],
35 |                'annotations': [],
36 |                'categories': [{'id': 1, 'name': 'person'}]}
37 |         ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split)
38 |         anns_data = load_func(ann_path)
39 |         image_cnt = 0
40 |         ann_cnt = 0
41 |         # video_cnt = 0
42 |         for ann_data in anns_data:
43 |             image_cnt += 1
44 |             file_path = os.path.join(DATA_PATH,
45 |                                      'Images',
46 |                                      f'{ann_data["ID"]}.jpg')
47 |             im = Image.open(file_path)
48 |             image_info = {'file_name': '{}.jpg'.format(ann_data['ID']),
49 |                           'id': image_cnt,
50 |                           'height': im.size[1],
51 |                           'width': im.size[0]}
52 |             out['images'].append(image_info)
53 |             if split != 'test':
54 |                 anns = ann_data['gtboxes']
55 |                 for i in range(len(anns)):
56 |                     ann_cnt += 1
57 |                     fbox = anns[i]['fbox']
58 |                     iscrowd = 1 if 'extra' in anns[i] and \
59 |                                    'ignore' in anns[i]['extra'] and \
60 |                                    anns[i]['extra']['ignore'] == 1 else 0
61 |                     ann = {'id': ann_cnt,
62 |                            'category_id': 1,
63 |                            'image_id': image_cnt,
64 |                            'bbox_vis': anns[i]['vbox'],
65 |                            'bbox': fbox,
66 |                            'area': fbox[2] * fbox[3],
67 |                            'iscrowd': iscrowd}
68 |                     out['annotations'].append(ann)
69 |         print(
70 |             'loaded {} for {} images and {} samples'.format(
71 |                 split, len(out['images']), len(out['annotations'])))
72 |         json.dump(out, open(out_path, 'w'))
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/tools/dataset_converters/images2coco.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | 
  5 | import mmcv
  6 | from PIL import Image
  7 | 
  8 | 
  9 | def parse_args():
 10 |     parser = argparse.ArgumentParser(
 11 |         description='Convert images to coco format without annotations')
 12 |     parser.add_argument('img_path', help='The root path of images')
 13 |     parser.add_argument(
 14 |         'classes', type=str, help='The text file name of storage class list')
 15 |     parser.add_argument(
 16 |         'out',
 17 |         type=str,
 18 |         help='The output annotation json file name, The save dir is in the '
 19 |         'same directory as img_path')
 20 |     parser.add_argument(
 21 |         '-e',
 22 |         '--exclude-extensions',
 23 |         type=str,
 24 |         nargs='+',
 25 |         help='The suffix of images to be excluded, such as "png" and "bmp"')
 26 |     args = parser.parse_args()
 27 |     return args
 28 | 
 29 | 
 30 | def collect_image_infos(path, exclude_extensions=None):
 31 |     img_infos = []
 32 | 
 33 |     images_generator = mmcv.scandir(path, recursive=True)
 34 |     for image_path in mmcv.track_iter_progress(list(images_generator)):
 35 |         if exclude_extensions is None or (
 36 |                 exclude_extensions is not None
 37 |                 and not image_path.lower().endswith(exclude_extensions)):
 38 |             image_path = os.path.join(path, image_path)
 39 |             img_pillow = Image.open(image_path)
 40 |             img_info = {
 41 |                 'filename': image_path,
 42 |                 'width': img_pillow.width,
 43 |                 'height': img_pillow.height,
 44 |             }
 45 |             img_infos.append(img_info)
 46 |     return img_infos
 47 | 
 48 | 
 49 | def cvt_to_coco_json(img_infos, classes):
 50 |     image_id = 0
 51 |     coco = dict()
 52 |     coco['images'] = []
 53 |     coco['type'] = 'instance'
 54 |     coco['categories'] = []
 55 |     coco['annotations'] = []
 56 |     image_set = set()
 57 | 
 58 |     for category_id, name in enumerate(classes):
 59 |         category_item = dict()
 60 |         category_item['supercategory'] = str('none')
 61 |         category_item['id'] = int(category_id)
 62 |         category_item['name'] = str(name)
 63 |         coco['categories'].append(category_item)
 64 | 
 65 |     for img_dict in img_infos:
 66 |         file_name = img_dict['filename']
 67 |         assert file_name not in image_set
 68 |         image_item = dict()
 69 |         image_item['id'] = int(image_id)
 70 |         image_item['file_name'] = str(file_name)
 71 |         image_item['height'] = int(img_dict['height'])
 72 |         image_item['width'] = int(img_dict['width'])
 73 |         coco['images'].append(image_item)
 74 |         image_set.add(file_name)
 75 | 
 76 |         image_id += 1
 77 |     return coco
 78 | 
 79 | 
 80 | def main():
 81 |     args = parse_args()
 82 |     assert args.out.endswith(
 83 |         'json'), 'The output file name must be json suffix'
 84 | 
 85 |     # 1 load image list info
 86 |     img_infos = collect_image_infos(args.img_path, args.exclude_extensions)
 87 | 
 88 |     # 2 convert to coco format data
 89 |     classes = mmcv.list_from_file(args.classes)
 90 |     coco_info = cvt_to_coco_json(img_infos, classes)
 91 | 
 92 |     # 3 dump
 93 |     save_dir = os.path.join(args.img_path, '..', 'annotations')
 94 |     mmcv.mkdir_or_exist(save_dir)
 95 |     save_path = os.path.join(save_dir, args.out)
 96 |     mmcv.dump(coco_info, save_path)
 97 |     print(f'save json file: {save_path}')
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 


--------------------------------------------------------------------------------
/tools/deployment/mmdet2torchserve.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from argparse import ArgumentParser, Namespace
  3 | from pathlib import Path
  4 | from tempfile import TemporaryDirectory
  5 | 
  6 | import mmcv
  7 | 
  8 | try:
  9 |     from model_archiver.model_packaging import package_model
 10 |     from model_archiver.model_packaging_utils import ModelExportUtils
 11 | except ImportError:
 12 |     package_model = None
 13 | 
 14 | 
 15 | def mmdet2torchserve(
 16 |     config_file: str,
 17 |     checkpoint_file: str,
 18 |     output_folder: str,
 19 |     model_name: str,
 20 |     model_version: str = '1.0',
 21 |     force: bool = False,
 22 | ):
 23 |     """Converts MMDetection model (config + checkpoint) to TorchServe `.mar`.
 24 | 
 25 |     Args:
 26 |         config_file:
 27 |             In MMDetection config format.
 28 |             The contents vary for each task repository.
 29 |         checkpoint_file:
 30 |             In MMDetection checkpoint format.
 31 |             The contents vary for each task repository.
 32 |         output_folder:
 33 |             Folder where `{model_name}.mar` will be created.
 34 |             The file created will be in TorchServe archive format.
 35 |         model_name:
 36 |             If not None, used for naming the `{model_name}.mar` file
 37 |             that will be created under `output_folder`.
 38 |             If None, `{Path(checkpoint_file).stem}` will be used.
 39 |         model_version:
 40 |             Model's version.
 41 |         force:
 42 |             If True, if there is an existing `{model_name}.mar`
 43 |             file under `output_folder` it will be overwritten.
 44 |     """
 45 |     mmcv.mkdir_or_exist(output_folder)
 46 | 
 47 |     config = mmcv.Config.fromfile(config_file)
 48 | 
 49 |     with TemporaryDirectory() as tmpdir:
 50 |         config.dump(f'{tmpdir}/config.py')
 51 | 
 52 |         args = Namespace(
 53 |             **{
 54 |                 'model_file': f'{tmpdir}/config.py',
 55 |                 'serialized_file': checkpoint_file,
 56 |                 'handler': f'{Path(__file__).parent}/mmdet_handler.py',
 57 |                 'model_name': model_name or Path(checkpoint_file).stem,
 58 |                 'version': model_version,
 59 |                 'export_path': output_folder,
 60 |                 'force': force,
 61 |                 'requirements_file': None,
 62 |                 'extra_files': None,
 63 |                 'runtime': 'python',
 64 |                 'archive_format': 'default'
 65 |             })
 66 |         manifest = ModelExportUtils.generate_manifest_json(args)
 67 |         package_model(args, manifest)
 68 | 
 69 | 
 70 | def parse_args():
 71 |     parser = ArgumentParser(
 72 |         description='Convert MMDetection models to TorchServe `.mar` format.')
 73 |     parser.add_argument('config', type=str, help='config file path')
 74 |     parser.add_argument('checkpoint', type=str, help='checkpoint file path')
 75 |     parser.add_argument(
 76 |         '--output-folder',
 77 |         type=str,
 78 |         required=True,
 79 |         help='Folder where `{model_name}.mar` will be created.')
 80 |     parser.add_argument(
 81 |         '--model-name',
 82 |         type=str,
 83 |         default=None,
 84 |         help='If not None, used for naming the `{model_name}.mar`'
 85 |         'file that will be created under `output_folder`.'
 86 |         'If None, `{Path(checkpoint_file).stem}` will be used.')
 87 |     parser.add_argument(
 88 |         '--model-version',
 89 |         type=str,
 90 |         default='1.0',
 91 |         help='Number used for versioning.')
 92 |     parser.add_argument(
 93 |         '-f',
 94 |         '--force',
 95 |         action='store_true',
 96 |         help='overwrite the existing `{model_name}.mar`')
 97 |     args = parser.parse_args()
 98 | 
 99 |     return args
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     args = parse_args()
104 | 
105 |     if package_model is None:
106 |         raise ImportError('`torch-model-archiver` is required.'
107 |                           'Try: pip install torch-model-archiver')
108 | 
109 |     mmdet2torchserve(args.config, args.checkpoint, args.output_folder,
110 |                      args.model_name, args.model_version, args.force)
111 | 


--------------------------------------------------------------------------------
/tools/deployment/mmdet_handler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import base64
 3 | import os
 4 | 
 5 | import mmcv
 6 | import torch
 7 | from ts.torch_handler.base_handler import BaseHandler
 8 | 
 9 | from mmdet.apis import inference_detector, init_detector
10 | 
11 | 
12 | class MMdetHandler(BaseHandler):
13 |     threshold = 0.5
14 | 
15 |     def initialize(self, context):
16 |         properties = context.system_properties
17 |         self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
18 |         self.device = torch.device(self.map_location + ':' +
19 |                                    str(properties.get('gpu_id')) if torch.cuda.
20 |                                    is_available() else self.map_location)
21 |         self.manifest = context.manifest
22 | 
23 |         model_dir = properties.get('model_dir')
24 |         serialized_file = self.manifest['model']['serializedFile']
25 |         checkpoint = os.path.join(model_dir, serialized_file)
26 |         self.config_file = os.path.join(model_dir, 'config.py')
27 | 
28 |         self.model = init_detector(self.config_file, checkpoint, self.device)
29 |         self.initialized = True
30 | 
31 |     def preprocess(self, data):
32 |         images = []
33 | 
34 |         for row in data:
35 |             image = row.get('data') or row.get('body')
36 |             if isinstance(image, str):
37 |                 image = base64.b64decode(image)
38 |             image = mmcv.imfrombytes(image)
39 |             images.append(image)
40 | 
41 |         return images
42 | 
43 |     def inference(self, data, *args, **kwargs):
44 |         results = inference_detector(self.model, data)
45 |         return results
46 | 
47 |     def postprocess(self, data):
48 |         # Format output following the example ObjectDetectionHandler format
49 |         output = []
50 |         for image_index, image_result in enumerate(data):
51 |             output.append([])
52 |             if isinstance(image_result, tuple):
53 |                 bbox_result, segm_result = image_result
54 |                 if isinstance(segm_result, tuple):
55 |                     segm_result = segm_result[0]  # ms rcnn
56 |             else:
57 |                 bbox_result, segm_result = image_result, None
58 | 
59 |             for class_index, class_result in enumerate(bbox_result):
60 |                 class_name = self.model.CLASSES[class_index]
61 |                 for bbox in class_result:
62 |                     bbox_coords = bbox[:-1].tolist()
63 |                     score = float(bbox[-1])
64 |                     if score >= self.threshold:
65 |                         output[image_index].append({
66 |                             'class_name': class_name,
67 |                             'bbox': bbox_coords,
68 |                             'score': score
69 |                         })
70 | 
71 |         return output
72 | 


--------------------------------------------------------------------------------
/tools/deployment/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import warnings
  4 | 
  5 | import mmcv
  6 | from mmcv import Config, DictAction
  7 | from mmcv.parallel import MMDataParallel
  8 | 
  9 | from mmdet.apis import single_gpu_test
 10 | from mmdet.datasets import (build_dataloader, build_dataset,
 11 |                             replace_ImageToTensor)
 12 | from mmdet.utils import compat_cfg
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(
 17 |         description='MMDet test (and eval) an ONNX model using ONNXRuntime')
 18 |     parser.add_argument('config', help='test config file path')
 19 |     parser.add_argument('model', help='Input model file')
 20 |     parser.add_argument('--out', help='output result file in pickle format')
 21 |     parser.add_argument(
 22 |         '--format-only',
 23 |         action='store_true',
 24 |         help='Format the output results without perform evaluation. It is'
 25 |         'useful when you want to format the result to a specific format and '
 26 |         'submit it to the test server')
 27 |     parser.add_argument(
 28 |         '--backend',
 29 |         required=True,
 30 |         choices=['onnxruntime', 'tensorrt'],
 31 |         help='Backend for input model to run. ')
 32 |     parser.add_argument(
 33 |         '--eval',
 34 |         type=str,
 35 |         nargs='+',
 36 |         help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
 37 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
 38 |     parser.add_argument('--show', action='store_true', help='show results')
 39 |     parser.add_argument(
 40 |         '--show-dir', help='directory where painted images will be saved')
 41 |     parser.add_argument(
 42 |         '--show-score-thr',
 43 |         type=float,
 44 |         default=0.3,
 45 |         help='score threshold (default: 0.3)')
 46 |     parser.add_argument(
 47 |         '--cfg-options',
 48 |         nargs='+',
 49 |         action=DictAction,
 50 |         help='override some settings in the used config, the key-value pair '
 51 |         'in xxx=yyy format will be merged into config file. If the value to '
 52 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 53 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 54 |         'Note that the quotation marks are necessary and that no white space '
 55 |         'is allowed.')
 56 |     parser.add_argument(
 57 |         '--eval-options',
 58 |         nargs='+',
 59 |         action=DictAction,
 60 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 61 |         'format will be kwargs for dataset.evaluate() function')
 62 | 
 63 |     args = parser.parse_args()
 64 |     return args
 65 | 
 66 | 
 67 | def main():
 68 |     args = parse_args()
 69 | 
 70 |     assert args.out or args.eval or args.format_only or args.show \
 71 |         or args.show_dir, \
 72 |         ('Please specify at least one operation (save/eval/format/show the '
 73 |          'results / save the results) with the argument "--out", "--eval"'
 74 |          ', "--format-only", "--show" or "--show-dir"')
 75 | 
 76 |     if args.eval and args.format_only:
 77 |         raise ValueError('--eval and --format_only cannot be both specified')
 78 | 
 79 |     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
 80 |         raise ValueError('The output file must be a pkl file.')
 81 | 
 82 |     cfg = Config.fromfile(args.config)
 83 |     if args.cfg_options is not None:
 84 |         cfg.merge_from_dict(args.cfg_options)
 85 |     cfg = compat_cfg(cfg)
 86 |     # in case the test dataset is concatenated
 87 |     samples_per_gpu = 1
 88 |     if isinstance(cfg.data.test, dict):
 89 |         cfg.data.test.test_mode = True
 90 |         samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 91 |         if samples_per_gpu > 1:
 92 |             # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 93 |             cfg.data.test.pipeline = replace_ImageToTensor(
 94 |                 cfg.data.test.pipeline)
 95 |     elif isinstance(cfg.data.test, list):
 96 |         for ds_cfg in cfg.data.test:
 97 |             ds_cfg.test_mode = True
 98 |         samples_per_gpu = max(
 99 |             [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
100 |         if samples_per_gpu > 1:
101 |             for ds_cfg in cfg.data.test:
102 |                 ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
103 | 
104 |     # build the dataloader
105 |     dataset = build_dataset(cfg.data.test)
106 |     data_loader = build_dataloader(
107 |         dataset,
108 |         samples_per_gpu=samples_per_gpu,
109 |         workers_per_gpu=cfg.data.workers_per_gpu,
110 |         dist=False,
111 |         shuffle=False)
112 | 
113 |     if args.backend == 'onnxruntime':
114 |         from mmdet.core.export.model_wrappers import ONNXRuntimeDetector
115 |         model = ONNXRuntimeDetector(
116 |             args.model, class_names=dataset.CLASSES, device_id=0)
117 |     elif args.backend == 'tensorrt':
118 |         from mmdet.core.export.model_wrappers import TensorRTDetector
119 |         model = TensorRTDetector(
120 |             args.model, class_names=dataset.CLASSES, device_id=0)
121 | 
122 |     model = MMDataParallel(model, device_ids=[0])
123 |     outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
124 |                               args.show_score_thr)
125 | 
126 |     if args.out:
127 |         print(f'\nwriting results to {args.out}')
128 |         mmcv.dump(outputs, args.out)
129 |     kwargs = {} if args.eval_options is None else args.eval_options
130 |     if args.format_only:
131 |         dataset.format_results(outputs, **kwargs)
132 |     if args.eval:
133 |         eval_kwargs = cfg.get('evaluation', {}).copy()
134 |         # hard-code way to remove EvalHook args
135 |         for key in [
136 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
137 |                 'rule'
138 |         ]:
139 |             eval_kwargs.pop(key, None)
140 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
141 |         print(dataset.evaluate(outputs, **eval_kwargs))
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     main()
146 | 
147 |     # Following strings of text style are from colorama package
148 |     bright_style, reset_style = '\x1b[1m', '\x1b[0m'
149 |     red_text, blue_text = '\x1b[31m', '\x1b[34m'
150 |     white_background = '\x1b[107m'
151 | 
152 |     msg = white_background + bright_style + red_text
153 |     msg += 'DeprecationWarning: This tool will be deprecated in future. '
154 |     msg += blue_text + 'Welcome to use the unified model deployment toolbox '
155 |     msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
156 |     msg += reset_style
157 |     warnings.warn(msg)
158 | 


--------------------------------------------------------------------------------
/tools/deployment/test_torchserver.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import numpy as np
 4 | import requests
 5 | 
 6 | from mmdet.apis import inference_detector, init_detector, show_result_pyplot
 7 | from mmdet.core import bbox2result
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = ArgumentParser()
12 |     parser.add_argument('img', help='Image file')
13 |     parser.add_argument('config', help='Config file')
14 |     parser.add_argument('checkpoint', help='Checkpoint file')
15 |     parser.add_argument('model_name', help='The model name in the server')
16 |     parser.add_argument(
17 |         '--inference-addr',
18 |         default='127.0.0.1:8080',
19 |         help='Address and port of the inference server')
20 |     parser.add_argument(
21 |         '--device', default='cuda:0', help='Device used for inference')
22 |     parser.add_argument(
23 |         '--score-thr', type=float, default=0.5, help='bbox score threshold')
24 |     args = parser.parse_args()
25 |     return args
26 | 
27 | 
28 | def parse_result(input, model_class):
29 |     bbox = []
30 |     label = []
31 |     score = []
32 |     for anchor in input:
33 |         bbox.append(anchor['bbox'])
34 |         label.append(model_class.index(anchor['class_name']))
35 |         score.append([anchor['score']])
36 |     bboxes = np.append(bbox, score, axis=1)
37 |     labels = np.array(label)
38 |     result = bbox2result(bboxes, labels, len(model_class))
39 |     return result
40 | 
41 | 
42 | def main(args):
43 |     # build the model from a config file and a checkpoint file
44 |     model = init_detector(args.config, args.checkpoint, device=args.device)
45 |     # test a single image
46 |     model_result = inference_detector(model, args.img)
47 |     for i, anchor_set in enumerate(model_result):
48 |         anchor_set = anchor_set[anchor_set[:, 4] >= 0.5]
49 |         model_result[i] = anchor_set
50 |     # show the results
51 |     show_result_pyplot(
52 |         model,
53 |         args.img,
54 |         model_result,
55 |         score_thr=args.score_thr,
56 |         title='pytorch_result')
57 |     url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
58 |     with open(args.img, 'rb') as image:
59 |         response = requests.post(url, image)
60 |     server_result = parse_result(response.json(), model.CLASSES)
61 |     show_result_pyplot(
62 |         model,
63 |         args.img,
64 |         server_result,
65 |         score_thr=args.score_thr,
66 |         title='server_result')
67 | 
68 |     for i in range(len(model.CLASSES)):
69 |         assert np.allclose(model_result[i], server_result[i])
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     args = parse_args()
74 |     main(args)
75 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | torchrun \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | torchrun \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --seed 0 \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/tools/misc/browse_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | from collections import Sequence
  5 | from pathlib import Path
  6 | 
  7 | import mmcv
  8 | import numpy as np
  9 | from mmcv import Config, DictAction
 10 | 
 11 | from mmdet.core.utils import mask2ndarray
 12 | from mmdet.core.visualization import imshow_det_bboxes
 13 | from mmdet.datasets.builder import build_dataset
 14 | from mmdet.utils import replace_cfg_vals, update_data_root
 15 | 
 16 | 
 17 | def parse_args():
 18 |     parser = argparse.ArgumentParser(description='Browse a dataset')
 19 |     parser.add_argument('config', help='train config file path')
 20 |     parser.add_argument(
 21 |         '--skip-type',
 22 |         type=str,
 23 |         nargs='+',
 24 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
 25 |         help='skip some useless pipeline')
 26 |     parser.add_argument(
 27 |         '--output-dir',
 28 |         default=None,
 29 |         type=str,
 30 |         help='If there is no display interface, you can save it')
 31 |     parser.add_argument('--not-show', default=False, action='store_true')
 32 |     parser.add_argument(
 33 |         '--show-interval',
 34 |         type=float,
 35 |         default=2,
 36 |         help='the interval of show (s)')
 37 |     parser.add_argument(
 38 |         '--cfg-options',
 39 |         nargs='+',
 40 |         action=DictAction,
 41 |         help='override some settings in the used config, the key-value pair '
 42 |         'in xxx=yyy format will be merged into config file. If the value to '
 43 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 44 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 45 |         'Note that the quotation marks are necessary and that no white space '
 46 |         'is allowed.')
 47 |     args = parser.parse_args()
 48 |     return args
 49 | 
 50 | 
 51 | def retrieve_data_cfg(config_path, skip_type, cfg_options):
 52 | 
 53 |     def skip_pipeline_steps(config):
 54 |         config['pipeline'] = [
 55 |             x for x in config.pipeline if x['type'] not in skip_type
 56 |         ]
 57 | 
 58 |     cfg = Config.fromfile(config_path)
 59 | 
 60 |     # replace the ${key} with the value of cfg.key
 61 |     cfg = replace_cfg_vals(cfg)
 62 | 
 63 |     # update data root according to MMDET_DATASETS
 64 |     update_data_root(cfg)
 65 | 
 66 |     if cfg_options is not None:
 67 |         cfg.merge_from_dict(cfg_options)
 68 |     train_data_cfg = cfg.data.train
 69 |     while 'dataset' in train_data_cfg and train_data_cfg[
 70 |             'type'] != 'MultiImageMixDataset':
 71 |         train_data_cfg = train_data_cfg['dataset']
 72 | 
 73 |     if isinstance(train_data_cfg, Sequence):
 74 |         [skip_pipeline_steps(c) for c in train_data_cfg]
 75 |     else:
 76 |         skip_pipeline_steps(train_data_cfg)
 77 | 
 78 |     return cfg
 79 | 
 80 | 
 81 | def main():
 82 |     args = parse_args()
 83 |     cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options)
 84 | 
 85 |     if 'gt_semantic_seg' in cfg.train_pipeline[-1]['keys']:
 86 |         cfg.data.train.pipeline = [
 87 |             p for p in cfg.data.train.pipeline if p['type'] != 'SegRescale'
 88 |         ]
 89 |     dataset = build_dataset(cfg.data.train)
 90 | 
 91 |     progress_bar = mmcv.ProgressBar(len(dataset))
 92 | 
 93 |     for item in dataset:
 94 |         filename = os.path.join(args.output_dir,
 95 |                                 Path(item['filename']).name
 96 |                                 ) if args.output_dir is not None else None
 97 | 
 98 |         gt_bboxes = item['gt_bboxes']
 99 |         gt_labels = item['gt_labels']
100 |         gt_masks = item.get('gt_masks', None)
101 |         if gt_masks is not None:
102 |             gt_masks = mask2ndarray(gt_masks)
103 | 
104 |         gt_seg = item.get('gt_semantic_seg', None)
105 |         if gt_seg is not None:
106 |             pad_value = 255  # the padding value of gt_seg
107 |             sem_labels = np.unique(gt_seg)
108 |             all_labels = np.concatenate((gt_labels, sem_labels), axis=0)
109 |             all_labels, counts = np.unique(all_labels, return_counts=True)
110 |             stuff_labels = all_labels[np.logical_and(counts < 2,
111 |                                                      all_labels != pad_value)]
112 |             stuff_masks = gt_seg[None] == stuff_labels[:, None, None]
113 |             gt_labels = np.concatenate((gt_labels, stuff_labels), axis=0)
114 |             gt_masks = np.concatenate((gt_masks, stuff_masks.astype(np.uint8)),
115 |                                       axis=0)
116 |             # If you need to show the bounding boxes,
117 |             # please comment the following line
118 |             gt_bboxes = None
119 | 
120 |         imshow_det_bboxes(
121 |             item['img'],
122 |             gt_bboxes,
123 |             gt_labels,
124 |             gt_masks,
125 |             class_names=dataset.CLASSES,
126 |             show=not args.not_show,
127 |             wait_time=args.show_interval,
128 |             out_file=filename,
129 |             bbox_color=dataset.PALETTE,
130 |             text_color=(200, 200, 200),
131 |             mask_color=dataset.PALETTE)
132 | 
133 |         progress_bar.update()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     main()
138 | 


--------------------------------------------------------------------------------
/tools/misc/download_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from itertools import repeat
  3 | from multiprocessing.pool import ThreadPool
  4 | from pathlib import Path
  5 | from tarfile import TarFile
  6 | from zipfile import ZipFile
  7 | 
  8 | import torch
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(
 13 |         description='Download datasets for training')
 14 |     parser.add_argument(
 15 |         '--dataset-name', type=str, help='dataset name', default='coco2017')
 16 |     parser.add_argument(
 17 |         '--save-dir',
 18 |         type=str,
 19 |         help='the dir to save dataset',
 20 |         default='data/coco')
 21 |     parser.add_argument(
 22 |         '--unzip',
 23 |         action='store_true',
 24 |         help='whether unzip dataset or not, zipped files will be saved')
 25 |     parser.add_argument(
 26 |         '--delete',
 27 |         action='store_true',
 28 |         help='delete the download zipped files')
 29 |     parser.add_argument(
 30 |         '--threads', type=int, help='number of threading', default=4)
 31 |     args = parser.parse_args()
 32 |     return args
 33 | 
 34 | 
 35 | def download(url, dir, unzip=True, delete=False, threads=1):
 36 | 
 37 |     def download_one(url, dir):
 38 |         f = dir / Path(url).name
 39 |         if Path(url).is_file():
 40 |             Path(url).rename(f)
 41 |         elif not f.exists():
 42 |             print('Downloading {} to {}'.format(url, f))
 43 |             torch.hub.download_url_to_file(url, f, progress=True)
 44 |         if unzip and f.suffix in ('.zip', '.tar'):
 45 |             print('Unzipping {}'.format(f.name))
 46 |             if f.suffix == '.zip':
 47 |                 ZipFile(f).extractall(path=dir)
 48 |             elif f.suffix == '.tar':
 49 |                 TarFile(f).extractall(path=dir)
 50 |             if delete:
 51 |                 f.unlink()
 52 |                 print('Delete {}'.format(f))
 53 | 
 54 |     dir = Path(dir)
 55 |     if threads > 1:
 56 |         pool = ThreadPool(threads)
 57 |         pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))
 58 |         pool.close()
 59 |         pool.join()
 60 |     else:
 61 |         for u in [url] if isinstance(url, (str, Path)) else url:
 62 |             download_one(u, dir)
 63 | 
 64 | 
 65 | def main():
 66 |     args = parse_args()
 67 |     path = Path(args.save_dir)
 68 |     if not path.exists():
 69 |         path.mkdir(parents=True, exist_ok=True)
 70 |     data2url = dict(
 71 |         # TODO: Support for downloading Panoptic Segmentation of COCO
 72 |         coco2017=[
 73 |             'http://images.cocodataset.org/zips/train2017.zip',
 74 |             'http://images.cocodataset.org/zips/val2017.zip',
 75 |             'http://images.cocodataset.org/zips/test2017.zip',
 76 |             'http://images.cocodataset.org/annotations/' +
 77 |             'annotations_trainval2017.zip'
 78 |         ],
 79 |         lvis=[
 80 |             'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip',  # noqa
 81 |             'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip',  # noqa
 82 |         ],
 83 |         voc2007=[
 84 |             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',  # noqa
 85 |             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',  # noqa
 86 |             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar',  # noqa
 87 |         ],
 88 |     )
 89 |     url = data2url.get(args.dataset_name, None)
 90 |     if url is None:
 91 |         print('Only support COCO, VOC, and LVIS now!')
 92 |         return
 93 |     download(
 94 |         url,
 95 |         dir=path,
 96 |         unzip=args.unzip,
 97 |         delete=args.delete,
 98 |         threads=args.threads)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/tools/misc/gen_coco_panoptic_test_info.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os.path as osp
 3 | 
 4 | import mmcv
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Generate COCO test image information '
10 |         'for COCO panoptic segmentation.')
11 |     parser.add_argument('data_root', help='Path to COCO annotation directory.')
12 |     args = parser.parse_args()
13 | 
14 |     return args
15 | 
16 | 
17 | def main():
18 |     args = parse_args()
19 |     data_root = args.data_root
20 |     val_info = mmcv.load(osp.join(data_root, 'panoptic_val2017.json'))
21 |     test_old_info = mmcv.load(
22 |         osp.join(data_root, 'image_info_test-dev2017.json'))
23 | 
24 |     # replace categories from image_info_test-dev2017.json
25 |     # with categories from panoptic_val2017.json which
26 |     # has attribute `isthing`.
27 |     test_info = test_old_info
28 |     test_info.update({'categories': val_info['categories']})
29 |     mmcv.dump(test_info,
30 |               osp.join(data_root, 'panoptic_image_info_test-dev2017.json'))
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     main()
35 | 


--------------------------------------------------------------------------------
/tools/misc/get_image_metas.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | """Get test image metas on a specific dataset.
  3 | 
  4 | Here is an example to run this script.
  5 | 
  6 | Example:
  7 |     python tools/misc/get_image_metas.py ${CONFIG} \
  8 |     --out ${OUTPUT FILE NAME}
  9 | """
 10 | import argparse
 11 | import csv
 12 | import os.path as osp
 13 | from multiprocessing import Pool
 14 | 
 15 | import mmcv
 16 | from mmcv import Config
 17 | 
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(description='Collect image metas')
 21 |     parser.add_argument('config', help='Config file path')
 22 |     parser.add_argument(
 23 |         '--out',
 24 |         default='validation-image-metas.pkl',
 25 |         help='The output image metas file name. The save dir is in the '
 26 |         'same directory as `dataset.ann_file` path')
 27 |     parser.add_argument(
 28 |         '--nproc',
 29 |         default=4,
 30 |         type=int,
 31 |         help='Processes used for get image metas')
 32 |     args = parser.parse_args()
 33 |     return args
 34 | 
 35 | 
 36 | def get_metas_from_csv_style_ann_file(ann_file):
 37 |     data_infos = []
 38 |     cp_filename = None
 39 |     with open(ann_file, 'r') as f:
 40 |         reader = csv.reader(f)
 41 |         for i, line in enumerate(reader):
 42 |             if i == 0:
 43 |                 continue
 44 |             img_id = line[0]
 45 |             filename = f'{img_id}.jpg'
 46 |             if filename != cp_filename:
 47 |                 data_infos.append(dict(filename=filename))
 48 |                 cp_filename = filename
 49 |     return data_infos
 50 | 
 51 | 
 52 | def get_metas_from_txt_style_ann_file(ann_file):
 53 |     with open(ann_file) as f:
 54 |         lines = f.readlines()
 55 |     i = 0
 56 |     data_infos = []
 57 |     while i < len(lines):
 58 |         filename = lines[i].rstrip()
 59 |         data_infos.append(dict(filename=filename))
 60 |         skip_lines = int(lines[i + 2]) + 3
 61 |         i += skip_lines
 62 |     return data_infos
 63 | 
 64 | 
 65 | def get_image_metas(data_info, img_prefix):
 66 |     file_client = mmcv.FileClient(backend='disk')
 67 |     filename = data_info.get('filename', None)
 68 |     if filename is not None:
 69 |         if img_prefix is not None:
 70 |             filename = osp.join(img_prefix, filename)
 71 |         img_bytes = file_client.get(filename)
 72 |         img = mmcv.imfrombytes(img_bytes, flag='color')
 73 |         meta = dict(filename=filename, ori_shape=img.shape)
 74 |     else:
 75 |         raise NotImplementedError('Missing `filename` in data_info')
 76 |     return meta
 77 | 
 78 | 
 79 | def main():
 80 |     args = parse_args()
 81 |     assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'
 82 | 
 83 |     # load config files
 84 |     cfg = Config.fromfile(args.config)
 85 |     ann_file = cfg.data.test.ann_file
 86 |     img_prefix = cfg.data.test.img_prefix
 87 | 
 88 |     print(f'{"-" * 5} Start Processing {"-" * 5}')
 89 |     if ann_file.endswith('csv'):
 90 |         data_infos = get_metas_from_csv_style_ann_file(ann_file)
 91 |     elif ann_file.endswith('txt'):
 92 |         data_infos = get_metas_from_txt_style_ann_file(ann_file)
 93 |     else:
 94 |         shuffix = ann_file.split('.')[-1]
 95 |         raise NotImplementedError('File name must be csv or txt suffix but '
 96 |                                   f'get {shuffix}')
 97 | 
 98 |     print(f'Successfully load annotation file from {ann_file}')
 99 |     print(f'Processing {len(data_infos)} images...')
100 |     pool = Pool(args.nproc)
101 |     # get image metas with multiple processes
102 |     image_metas = pool.starmap(
103 |         get_image_metas,
104 |         zip(data_infos, [img_prefix for _ in range(len(data_infos))]),
105 |     )
106 |     pool.close()
107 | 
108 |     # save image metas
109 |     root_path = cfg.data.test.ann_file.rsplit('/', 1)[0]
110 |     save_path = osp.join(root_path, args.out)
111 |     mmcv.dump(image_metas, save_path)
112 |     print(f'Image meta file save to: {save_path}')
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     main()
117 | 


--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import warnings
 4 | 
 5 | from mmcv import Config, DictAction
 6 | 
 7 | from mmdet.utils import replace_cfg_vals, update_data_root
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description='Print the whole config')
12 |     parser.add_argument('config', help='config file path')
13 |     parser.add_argument(
14 |         '--options',
15 |         nargs='+',
16 |         action=DictAction,
17 |         help='override some settings in the used config, the key-value pair '
18 |         'in xxx=yyy format will be merged into config file (deprecate), '
19 |         'change to --cfg-options instead.')
20 |     parser.add_argument(
21 |         '--cfg-options',
22 |         nargs='+',
23 |         action=DictAction,
24 |         help='override some settings in the used config, the key-value pair '
25 |         'in xxx=yyy format will be merged into config file. If the value to '
26 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
27 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
28 |         'Note that the quotation marks are necessary and that no white space '
29 |         'is allowed.')
30 |     args = parser.parse_args()
31 | 
32 |     if args.options and args.cfg_options:
33 |         raise ValueError(
34 |             '--options and --cfg-options cannot be both '
35 |             'specified, --options is deprecated in favor of --cfg-options')
36 |     if args.options:
37 |         warnings.warn('--options is deprecated in favor of --cfg-options')
38 |         args.cfg_options = args.options
39 | 
40 |     return args
41 | 
42 | 
43 | def main():
44 |     args = parse_args()
45 | 
46 |     cfg = Config.fromfile(args.config)
47 | 
48 |     # replace the ${key} with the value of cfg.key
49 |     cfg = replace_cfg_vals(cfg)
50 | 
51 |     # update data root according to MMDET_DATASETS
52 |     update_data_root(cfg)
53 | 
54 |     if args.cfg_options is not None:
55 |         cfg.merge_from_dict(args.cfg_options)
56 |     print(f'Config:\n{cfg.pretty_text}')
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/tools/misc/split_coco.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os.path as osp
  4 | 
  5 | import mmcv
  6 | import numpy as np
  7 | 
  8 | prog_description = '''K-Fold coco split.
  9 | 
 10 | To split coco data for semi-supervised object detection:
 11 |     python tools/misc/split_coco.py
 12 | '''
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser()
 17 |     parser.add_argument(
 18 |         '--data-root',
 19 |         type=str,
 20 |         help='The data root of coco dataset.',
 21 |         default='./data/coco/')
 22 |     parser.add_argument(
 23 |         '--out-dir',
 24 |         type=str,
 25 |         help='The output directory of coco semi-supervised annotations.',
 26 |         default='./data/coco_semi_annos/')
 27 |     parser.add_argument(
 28 |         '--labeled-percent',
 29 |         type=float,
 30 |         nargs='+',
 31 |         help='The percentage of labeled data in the training set.',
 32 |         default=[1, 2, 5, 10])
 33 |     parser.add_argument(
 34 |         '--fold',
 35 |         type=int,
 36 |         help='K-fold cross validation for semi-supervised object detection.',
 37 |         default=5)
 38 |     args = parser.parse_args()
 39 |     return args
 40 | 
 41 | 
 42 | def split_coco(data_root, out_dir, percent, fold):
 43 |     """Split COCO data for Semi-supervised object detection.
 44 | 
 45 |     Args:
 46 |         data_root (str): The data root of coco dataset.
 47 |         out_dir (str): The output directory of coco semi-supervised
 48 |             annotations.
 49 |         percent (float): The percentage of labeled data in the training set.
 50 |         fold (int): The fold of dataset and set as random seed for data split.
 51 |     """
 52 | 
 53 |     def save_anns(name, images, annotations):
 54 |         sub_anns = dict()
 55 |         sub_anns['images'] = images
 56 |         sub_anns['annotations'] = annotations
 57 |         sub_anns['licenses'] = anns['licenses']
 58 |         sub_anns['categories'] = anns['categories']
 59 |         sub_anns['info'] = anns['info']
 60 | 
 61 |         mmcv.mkdir_or_exist(out_dir)
 62 |         mmcv.dump(sub_anns, f'{out_dir}/{name}.json')
 63 | 
 64 |     # set random seed with the fold
 65 |     np.random.seed(fold)
 66 |     ann_file = osp.join(data_root, 'annotations/instances_train2017.json')
 67 |     anns = mmcv.load(ann_file)
 68 | 
 69 |     image_list = anns['images']
 70 |     labeled_total = int(percent / 100. * len(image_list))
 71 |     labeled_inds = set(
 72 |         np.random.choice(range(len(image_list)), size=labeled_total))
 73 |     labeled_ids, labeled_images, unlabeled_images = [], [], []
 74 | 
 75 |     for i in range(len(image_list)):
 76 |         if i in labeled_inds:
 77 |             labeled_images.append(image_list[i])
 78 |             labeled_ids.append(image_list[i]['id'])
 79 |         else:
 80 |             unlabeled_images.append(image_list[i])
 81 | 
 82 |     # get all annotations of labeled images
 83 |     labeled_ids = set(labeled_ids)
 84 |     labeled_annotations, unlabeled_annotations = [], []
 85 | 
 86 |     for ann in anns['annotations']:
 87 |         if ann['image_id'] in labeled_ids:
 88 |             labeled_annotations.append(ann)
 89 |         else:
 90 |             unlabeled_annotations.append(ann)
 91 | 
 92 |     # save labeled and unlabeled
 93 |     labeled_name = f'instances_train2017.{fold}@{percent}'
 94 |     unlabeled_name = f'instances_train2017.{fold}@{percent}-unlabeled'
 95 | 
 96 |     save_anns(labeled_name, labeled_images, labeled_annotations)
 97 |     save_anns(unlabeled_name, unlabeled_images, unlabeled_annotations)
 98 | 
 99 | 
100 | def multi_wrapper(args):
101 |     return split_coco(*args)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     args = parse_args()
106 |     arguments_list = [(args.data_root, args.out_dir, p, f)
107 |                       for f in range(1, args.fold + 1)
108 |                       for p in args.labeled_percent]
109 |     mmcv.track_parallel_progress(multi_wrapper, arguments_list, args.fold)
110 | 


--------------------------------------------------------------------------------
/tools/model_converters/detectron2pytorch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from collections import OrderedDict
 4 | 
 5 | import mmcv
 6 | import torch
 7 | 
 8 | arch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}
 9 | 
10 | 
11 | def convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names):
12 |     # detectron replace bn with affine channel layer
13 |     state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
14 |                                                               '_b'])
15 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
16 |                                                                 '_s'])
17 |     bn_size = state_dict[torch_name + '.weight'].size()
18 |     state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size)
19 |     state_dict[torch_name + '.running_var'] = torch.ones(bn_size)
20 |     converted_names.add(caffe_name + '_b')
21 |     converted_names.add(caffe_name + '_s')
22 | 
23 | 
24 | def convert_conv_fc(blobs, state_dict, caffe_name, torch_name,
25 |                     converted_names):
26 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
27 |                                                                 '_w'])
28 |     converted_names.add(caffe_name + '_w')
29 |     if caffe_name + '_b' in blobs:
30 |         state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
31 |                                                                   '_b'])
32 |         converted_names.add(caffe_name + '_b')
33 | 
34 | 
35 | def convert(src, dst, depth):
36 |     """Convert keys in detectron pretrained ResNet models to pytorch style."""
37 |     # load arch_settings
38 |     if depth not in arch_settings:
39 |         raise ValueError('Only support ResNet-50 and ResNet-101 currently')
40 |     block_nums = arch_settings[depth]
41 |     # load caffe model
42 |     caffe_model = mmcv.load(src, encoding='latin1')
43 |     blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model
44 |     # convert to pytorch style
45 |     state_dict = OrderedDict()
46 |     converted_names = set()
47 |     convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names)
48 |     convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names)
49 |     for i in range(1, len(block_nums) + 1):
50 |         for j in range(block_nums[i - 1]):
51 |             if j == 0:
52 |                 convert_conv_fc(blobs, state_dict, f'res{i + 1}_{j}_branch1',
53 |                                 f'layer{i}.{j}.downsample.0', converted_names)
54 |                 convert_bn(blobs, state_dict, f'res{i + 1}_{j}_branch1_bn',
55 |                            f'layer{i}.{j}.downsample.1', converted_names)
56 |             for k, letter in enumerate(['a', 'b', 'c']):
57 |                 convert_conv_fc(blobs, state_dict,
58 |                                 f'res{i + 1}_{j}_branch2{letter}',
59 |                                 f'layer{i}.{j}.conv{k+1}', converted_names)
60 |                 convert_bn(blobs, state_dict,
61 |                            f'res{i + 1}_{j}_branch2{letter}_bn',
62 |                            f'layer{i}.{j}.bn{k + 1}', converted_names)
63 |     # check if all layers are converted
64 |     for key in blobs:
65 |         if key not in converted_names:
66 |             print(f'Not Convert: {key}')
67 |     # save checkpoint
68 |     checkpoint = dict()
69 |     checkpoint['state_dict'] = state_dict
70 |     torch.save(checkpoint, dst)
71 | 
72 | 
73 | def main():
74 |     parser = argparse.ArgumentParser(description='Convert model keys')
75 |     parser.add_argument('src', help='src detectron model path')
76 |     parser.add_argument('dst', help='save path')
77 |     parser.add_argument('depth', type=int, help='ResNet model depth')
78 |     args = parser.parse_args()
79 |     convert(args.src, args.dst, args.depth)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(
10 |         description='Process a checkpoint to be published')
11 |     parser.add_argument('in_file', help='input checkpoint filename')
12 |     parser.add_argument('out_file', help='output checkpoint filename')
13 |     args = parser.parse_args()
14 |     return args
15 | 
16 | 
17 | def process_checkpoint(in_file, out_file):
18 |     checkpoint = torch.load(in_file, map_location='cpu')
19 |     # remove optimizer for smaller file size
20 |     if 'optimizer' in checkpoint:
21 |         del checkpoint['optimizer']
22 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
23 |     # add the code here.
24 |     if torch.__version__ >= '1.6':
25 |         torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
26 |     else:
27 |         torch.save(checkpoint, out_file)
28 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
29 |     if out_file.endswith('.pth'):
30 |         out_file_name = out_file[:-4]
31 |     else:
32 |         out_file_name = out_file
33 |     final_file = out_file_name + f'-{sha[:8]}.pth'
34 |     subprocess.Popen(['mv', out_file, final_file])
35 | 
36 | 
37 | def main():
38 |     args = parse_args()
39 |     process_checkpoint(args.in_file, args.out_file)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     main()
44 | 


--------------------------------------------------------------------------------
/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 9 |     new_key = model_key.replace('stem.conv', 'conv1')
10 |     new_key = new_key.replace('stem.bn', 'bn1')
11 |     state_dict[new_key] = model_weight
12 |     converted_names.add(model_key)
13 |     print(f'Convert {model_key} to {new_key}')
14 | 
15 | 
16 | def convert_head(model_key, model_weight, state_dict, converted_names):
17 |     new_key = model_key.replace('head.fc', 'fc')
18 |     state_dict[new_key] = model_weight
19 |     converted_names.add(model_key)
20 |     print(f'Convert {model_key} to {new_key}')
21 | 
22 | 
23 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
24 |     split_keys = model_key.split('.')
25 |     layer, block, module = split_keys[:3]
26 |     block_id = int(block[1:])
27 |     layer_name = f'layer{int(layer[1:])}'
28 |     block_name = f'{block_id - 1}'
29 | 
30 |     if block_id == 1 and module == 'bn':
31 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
32 |     elif block_id == 1 and module == 'proj':
33 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
34 |     elif module == 'f':
35 |         if split_keys[3] == 'a_bn':
36 |             module_name = 'bn1'
37 |         elif split_keys[3] == 'b_bn':
38 |             module_name = 'bn2'
39 |         elif split_keys[3] == 'c_bn':
40 |             module_name = 'bn3'
41 |         elif split_keys[3] == 'a':
42 |             module_name = 'conv1'
43 |         elif split_keys[3] == 'b':
44 |             module_name = 'conv2'
45 |         elif split_keys[3] == 'c':
46 |             module_name = 'conv3'
47 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
48 |     else:
49 |         raise ValueError(f'Unsupported conversion of key {model_key}')
50 |     print(f'Convert {model_key} to {new_key}')
51 |     state_dict[new_key] = model_weight
52 |     converted_names.add(model_key)
53 | 
54 | 
55 | def convert(src, dst):
56 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
57 |     # load caffe model
58 |     regnet_model = torch.load(src)
59 |     blobs = regnet_model['model_state']
60 |     # convert to pytorch style
61 |     state_dict = OrderedDict()
62 |     converted_names = set()
63 |     for key, weight in blobs.items():
64 |         if 'stem' in key:
65 |             convert_stem(key, weight, state_dict, converted_names)
66 |         elif 'head' in key:
67 |             convert_head(key, weight, state_dict, converted_names)
68 |         elif key.startswith('s'):
69 |             convert_reslayer(key, weight, state_dict, converted_names)
70 | 
71 |     # check if all layers are converted
72 |     for key in blobs:
73 |         if key not in converted_names:
74 |             print(f'not converted: {key}')
75 |     # save checkpoint
76 |     checkpoint = dict()
77 |     checkpoint['state_dict'] = state_dict
78 |     torch.save(checkpoint, dst)
79 | 
80 | 
81 | def main():
82 |     parser = argparse.ArgumentParser(description='Convert model keys')
83 |     parser.add_argument('src', help='src detectron model path')
84 |     parser.add_argument('dst', help='save path')
85 |     args = parser.parse_args()
86 |     convert(args.src, args.dst)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/tools/model_converters/selfsup2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def moco_convert(src, dst):
 9 |     """Convert keys in pycls pretrained moco models to mmdet style."""
10 |     # load caffe model
11 |     moco_model = torch.load(src)
12 |     blobs = moco_model['state_dict']
13 |     # convert to pytorch style
14 |     state_dict = OrderedDict()
15 |     for k, v in blobs.items():
16 |         if not k.startswith('module.encoder_q.'):
17 |             continue
18 |         old_k = k
19 |         k = k.replace('module.encoder_q.', '')
20 |         state_dict[k] = v
21 |         print(old_k, '->', k)
22 |     # save checkpoint
23 |     checkpoint = dict()
24 |     checkpoint['state_dict'] = state_dict
25 |     torch.save(checkpoint, dst)
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Convert model keys')
30 |     parser.add_argument('src', help='src detectron model path')
31 |     parser.add_argument('dst', help='save path')
32 |     parser.add_argument(
33 |         '--selfsup', type=str, choices=['moco', 'swav'], help='save path')
34 |     args = parser.parse_args()
35 |     if args.selfsup == 'moco':
36 |         moco_convert(args.src, args.dst)
37 |     elif args.selfsup == 'swav':
38 |         print('SWAV does not need to convert the keys')
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/tools/model_converters/upgrade_model_version.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import re
  4 | import tempfile
  5 | from collections import OrderedDict
  6 | 
  7 | import torch
  8 | from mmcv import Config
  9 | 
 10 | 
 11 | def is_head(key):
 12 |     valid_head_list = [
 13 |         'bbox_head', 'mask_head', 'semantic_head', 'grid_head', 'mask_iou_head'
 14 |     ]
 15 | 
 16 |     return any(key.startswith(h) for h in valid_head_list)
 17 | 
 18 | 
 19 | def parse_config(config_strings):
 20 |     temp_file = tempfile.NamedTemporaryFile()
 21 |     config_path = f'{temp_file.name}.py'
 22 |     with open(config_path, 'w') as f:
 23 |         f.write(config_strings)
 24 | 
 25 |     config = Config.fromfile(config_path)
 26 |     is_two_stage = True
 27 |     is_ssd = False
 28 |     is_retina = False
 29 |     reg_cls_agnostic = False
 30 |     if 'rpn_head' not in config.model:
 31 |         is_two_stage = False
 32 |         # check whether it is SSD
 33 |         if config.model.bbox_head.type == 'SSDHead':
 34 |             is_ssd = True
 35 |         elif config.model.bbox_head.type == 'RetinaHead':
 36 |             is_retina = True
 37 |     elif isinstance(config.model['bbox_head'], list):
 38 |         reg_cls_agnostic = True
 39 |     elif 'reg_class_agnostic' in config.model.bbox_head:
 40 |         reg_cls_agnostic = config.model.bbox_head \
 41 |             .reg_class_agnostic
 42 |     temp_file.close()
 43 |     return is_two_stage, is_ssd, is_retina, reg_cls_agnostic
 44 | 
 45 | 
 46 | def reorder_cls_channel(val, num_classes=81):
 47 |     # bias
 48 |     if val.dim() == 1:
 49 |         new_val = torch.cat((val[1:], val[:1]), dim=0)
 50 |     # weight
 51 |     else:
 52 |         out_channels, in_channels = val.shape[:2]
 53 |         # conv_cls for softmax output
 54 |         if out_channels != num_classes and out_channels % num_classes == 0:
 55 |             new_val = val.reshape(-1, num_classes, in_channels, *val.shape[2:])
 56 |             new_val = torch.cat((new_val[:, 1:], new_val[:, :1]), dim=1)
 57 |             new_val = new_val.reshape(val.size())
 58 |         # fc_cls
 59 |         elif out_channels == num_classes:
 60 |             new_val = torch.cat((val[1:], val[:1]), dim=0)
 61 |         # agnostic | retina_cls | rpn_cls
 62 |         else:
 63 |             new_val = val
 64 | 
 65 |     return new_val
 66 | 
 67 | 
 68 | def truncate_cls_channel(val, num_classes=81):
 69 | 
 70 |     # bias
 71 |     if val.dim() == 1:
 72 |         if val.size(0) % num_classes == 0:
 73 |             new_val = val[:num_classes - 1]
 74 |         else:
 75 |             new_val = val
 76 |     # weight
 77 |     else:
 78 |         out_channels, in_channels = val.shape[:2]
 79 |         # conv_logits
 80 |         if out_channels % num_classes == 0:
 81 |             new_val = val.reshape(num_classes, in_channels, *val.shape[2:])[1:]
 82 |             new_val = new_val.reshape(-1, *val.shape[1:])
 83 |         # agnostic
 84 |         else:
 85 |             new_val = val
 86 | 
 87 |     return new_val
 88 | 
 89 | 
 90 | def truncate_reg_channel(val, num_classes=81):
 91 |     # bias
 92 |     if val.dim() == 1:
 93 |         # fc_reg | rpn_reg
 94 |         if val.size(0) % num_classes == 0:
 95 |             new_val = val.reshape(num_classes, -1)[:num_classes - 1]
 96 |             new_val = new_val.reshape(-1)
 97 |         # agnostic
 98 |         else:
 99 |             new_val = val
100 |     # weight
101 |     else:
102 |         out_channels, in_channels = val.shape[:2]
103 |         # fc_reg | rpn_reg
104 |         if out_channels % num_classes == 0:
105 |             new_val = val.reshape(num_classes, -1, in_channels,
106 |                                   *val.shape[2:])[1:]
107 |             new_val = new_val.reshape(-1, *val.shape[1:])
108 |         # agnostic
109 |         else:
110 |             new_val = val
111 | 
112 |     return new_val
113 | 
114 | 
115 | def convert(in_file, out_file, num_classes):
116 |     """Convert keys in checkpoints.
117 | 
118 |     There can be some breaking changes during the development of mmdetection,
119 |     and this tool is used for upgrading checkpoints trained with old versions
120 |     to the latest one.
121 |     """
122 |     checkpoint = torch.load(in_file)
123 |     in_state_dict = checkpoint.pop('state_dict')
124 |     out_state_dict = OrderedDict()
125 |     meta_info = checkpoint['meta']
126 |     is_two_stage, is_ssd, is_retina, reg_cls_agnostic = parse_config(
127 |         '#' + meta_info['config'])
128 |     if meta_info['mmdet_version'] <= '0.5.3' and is_retina:
129 |         upgrade_retina = True
130 |     else:
131 |         upgrade_retina = False
132 | 
133 |     # MMDetection v2.5.0 unifies the class order in RPN
134 |     # if the model is trained in version<v2.5.0
135 |     # The RPN model should be upgraded to be used in version>=2.5.0
136 |     if meta_info['mmdet_version'] < '2.5.0':
137 |         upgrade_rpn = True
138 |     else:
139 |         upgrade_rpn = False
140 | 
141 |     for key, val in in_state_dict.items():
142 |         new_key = key
143 |         new_val = val
144 |         if is_two_stage and is_head(key):
145 |             new_key = 'roi_head.{}'.format(key)
146 | 
147 |         # classification
148 |         if upgrade_rpn:
149 |             m = re.search(
150 |                 r'(conv_cls|retina_cls|rpn_cls|fc_cls|fcos_cls|'
151 |                 r'fovea_cls).(weight|bias)', new_key)
152 |         else:
153 |             m = re.search(
154 |                 r'(conv_cls|retina_cls|fc_cls|fcos_cls|'
155 |                 r'fovea_cls).(weight|bias)', new_key)
156 |         if m is not None:
157 |             print(f'reorder cls channels of {new_key}')
158 |             new_val = reorder_cls_channel(val, num_classes)
159 | 
160 |         # regression
161 |         if upgrade_rpn:
162 |             m = re.search(r'(fc_reg).(weight|bias)', new_key)
163 |         else:
164 |             m = re.search(r'(fc_reg|rpn_reg).(weight|bias)', new_key)
165 |         if m is not None and not reg_cls_agnostic:
166 |             print(f'truncate regression channels of {new_key}')
167 |             new_val = truncate_reg_channel(val, num_classes)
168 | 
169 |         # mask head
170 |         m = re.search(r'(conv_logits).(weight|bias)', new_key)
171 |         if m is not None:
172 |             print(f'truncate mask prediction channels of {new_key}')
173 |             new_val = truncate_cls_channel(val, num_classes)
174 | 
175 |         m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
176 |         # Legacy issues in RetinaNet since V1.x
177 |         # Use ConvModule instead of nn.Conv2d in RetinaNet
178 |         # cls_convs.0.weight -> cls_convs.0.conv.weight
179 |         if m is not None and upgrade_retina:
180 |             param = m.groups()[1]
181 |             new_key = key.replace(param, f'conv.{param}')
182 |             out_state_dict[new_key] = val
183 |             print(f'rename the name of {key} to {new_key}')
184 |             continue
185 | 
186 |         m = re.search(r'(cls_convs).\d.(weight|bias)', key)
187 |         if m is not None and is_ssd:
188 |             print(f'reorder cls channels of {new_key}')
189 |             new_val = reorder_cls_channel(val, num_classes)
190 | 
191 |         out_state_dict[new_key] = new_val
192 |     checkpoint['state_dict'] = out_state_dict
193 |     torch.save(checkpoint, out_file)
194 | 
195 | 
196 | def main():
197 |     parser = argparse.ArgumentParser(description='Upgrade model version')
198 |     parser.add_argument('in_file', help='input checkpoint file')
199 |     parser.add_argument('out_file', help='output checkpoint file')
200 |     parser.add_argument(
201 |         '--num-classes',
202 |         type=int,
203 |         default=81,
204 |         help='number of classes of the original model')
205 |     args = parser.parse_args()
206 |     convert(args.in_file, args.out_file, args.num_classes)
207 | 
208 | 
209 | if __name__ == '__main__':
210 |     main()
211 | 


--------------------------------------------------------------------------------
/tools/model_converters/upgrade_ssd_version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import tempfile
 4 | from collections import OrderedDict
 5 | 
 6 | import torch
 7 | from mmcv import Config
 8 | 
 9 | 
10 | def parse_config(config_strings):
11 |     temp_file = tempfile.NamedTemporaryFile()
12 |     config_path = f'{temp_file.name}.py'
13 |     with open(config_path, 'w') as f:
14 |         f.write(config_strings)
15 | 
16 |     config = Config.fromfile(config_path)
17 |     # check whether it is SSD
18 |     if config.model.bbox_head.type != 'SSDHead':
19 |         raise AssertionError('This is not a SSD model.')
20 | 
21 | 
22 | def convert(in_file, out_file):
23 |     checkpoint = torch.load(in_file)
24 |     in_state_dict = checkpoint.pop('state_dict')
25 |     out_state_dict = OrderedDict()
26 |     meta_info = checkpoint['meta']
27 |     parse_config('#' + meta_info['config'])
28 |     for key, value in in_state_dict.items():
29 |         if 'extra' in key:
30 |             layer_idx = int(key.split('.')[2])
31 |             new_key = 'neck.extra_layers.{}.{}.conv.'.format(
32 |                 layer_idx // 2, layer_idx % 2) + key.split('.')[-1]
33 |         elif 'l2_norm' in key:
34 |             new_key = 'neck.l2_norm.weight'
35 |         elif 'bbox_head' in key:
36 |             new_key = key[:21] + '.0' + key[21:]
37 |         else:
38 |             new_key = key
39 |         out_state_dict[new_key] = value
40 |     checkpoint['state_dict'] = out_state_dict
41 | 
42 |     if torch.__version__ >= '1.6':
43 |         torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
44 |     else:
45 |         torch.save(checkpoint, out_file)
46 | 
47 | 
48 | def main():
49 |     parser = argparse.ArgumentParser(description='Upgrade SSD version')
50 |     parser.add_argument('in_file', help='input checkpoint file')
51 |     parser.add_argument('out_file', help='output checkpoint file')
52 | 
53 |     args = parser.parse_args()
54 |     convert(args.in_file, args.out_file)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------