├── data └── .gitkeep ├── weights └── .gitkeep ├── requirements ├── mminstall.txt ├── readthedocs.txt ├── albu.txt ├── runtime.txt ├── build.txt ├── optional.txt ├── docs.txt └── tests.txt ├── mmdet ├── custom │ └── __init__.py ├── models │ ├── roi_heads │ │ ├── shared_heads │ │ │ └── __init__.py │ │ ├── roi_extractors │ │ │ └── __init__.py │ │ ├── bbox_heads │ │ │ └── __init__.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── scnet_mask_head.py │ │ │ ├── scnet_semantic_head.py │ │ │ ├── htc_mask_head.py │ │ │ └── feature_relay_head.py │ │ ├── double_roi_head.py │ │ └── __init__.py │ ├── seg_heads │ │ ├── __init__.py │ │ └── panoptic_fusion_heads │ │ │ ├── __init__.py │ │ │ └── base_panoptic_fusion_head.py │ ├── detectors │ │ ├── deformable_detr.py │ │ ├── scnet.py │ │ ├── htc.py │ │ ├── gfl.py │ │ ├── yolof.py │ │ ├── atss.py │ │ ├── fcos.py │ │ ├── fsaf.py │ │ ├── paa.py │ │ ├── ddod.py │ │ ├── fovea.py │ │ ├── retinanet.py │ │ ├── vfnet.py │ │ ├── autoassign.py │ │ ├── nasfcos.py │ │ ├── tood.py │ │ ├── reppoints_detector.py │ │ ├── mask_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── queryinst.py │ │ ├── mask_scoring_rcnn.py │ │ ├── mask2former.py │ │ ├── solo.py │ │ ├── solov2.py │ │ ├── point_rend.py │ │ ├── grid_rcnn.py │ │ ├── panoptic_fpn.py │ │ ├── yolo.py │ │ └── cascade_rcnn.py │ ├── plugins │ │ └── __init__.py │ ├── necks │ │ └── __init__.py │ ├── __init__.py │ ├── backbones │ │ └── __init__.py │ ├── utils │ │ ├── make_divisible.py │ │ ├── builder.py │ │ ├── __init__.py │ │ └── brick_wrappers.py │ ├── builder.py │ └── losses │ │ ├── __init__.py │ │ └── mse_loss.py ├── core │ ├── data_structures │ │ └── __init__.py │ ├── bbox │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── match_costs │ │ │ ├── builder.py │ │ │ └── __init__.py │ │ ├── assigners │ │ │ ├── base_assigner.py │ │ │ └── __init__.py │ │ ├── coder │ │ │ ├── base_bbox_coder.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ └── __init__.py │ │ ├── builder.py │ │ ├── samplers │ │ │ ├── combined_sampler.py │ │ │ ├── __init__.py │ │ │ ├── pseudo_sampler.py │ │ │ └── mask_pseudo_sampler.py │ │ ├── demodata.py │ │ └── __init__.py │ ├── evaluation │ │ ├── panoptic_utils.py │ │ └── __init__.py │ ├── optimizers │ │ ├── __init__.py │ │ └── builder.py │ ├── visualization │ │ └── __init__.py │ ├── mask │ │ └── __init__.py │ ├── post_processing │ │ └── __init__.py │ ├── __init__.py │ ├── hook │ │ ├── set_epoch_info_hook.py │ │ ├── checkloss_hook.py │ │ ├── __init__.py │ │ └── sync_norm_hook.py │ ├── export │ │ └── __init__.py │ ├── anchor │ │ ├── builder.py │ │ └── __init__.py │ └── utils │ │ └── __init__.py ├── datasets │ ├── api_wrappers │ │ ├── __init__.py │ │ └── coco_api.py │ ├── pipelines │ │ ├── formating.py │ │ ├── compose.py │ │ └── __init__.py │ ├── samplers │ │ └── __init__.py │ ├── deepfashion.py │ ├── __init__.py │ └── wider_face.py ├── utils │ ├── collect_env.py │ ├── __init__.py │ ├── util_random.py │ ├── profiling.py │ └── split_batch.py ├── version.py ├── apis │ └── __init__.py └── __init__.py ├── requirements.txt ├── configs ├── mask_rcnn │ ├── mask_rcnn_r50_fpn_fp16_1x_coco.py │ ├── mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_r50_fpn_mstrain-poly_3x_coco-person.py │ ├── mask_rcnn_r50_fpn_1x_coco.py │ ├── mask_rcnn_r50_fpn_2x_coco.py │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_r101_fpn_1x_coco.py │ ├── mask_rcnn_r101_fpn_2x_coco.py │ ├── mask_rcnn_r101_caffe_fpn_1x_coco.py │ ├── mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_x101_32x4d_fpn_1x_coco.py │ ├── mask_rcnn_x101_32x4d_fpn_2x_coco.py │ ├── mask_rcnn_x101_64x4d_fpn_1x_coco.py │ ├── mask_rcnn_x101_64x4d_fpn_2x_coco.py │ ├── mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_r50_fpn_1x_wandb_coco.py │ ├── mask_rcnn_r50_fpn_poly_1x_coco.py │ ├── mask_rcnn_r50_caffe_c4_1x_coco.py │ ├── mask_rcnn_r50_caffe_fpn_1x_coco.py │ ├── mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py │ ├── mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py │ ├── mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py │ └── mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py ├── faster_rcnn │ ├── faster_rcnn_r50_fpn_fp16_1x_coco.py │ ├── faster_rcnn_r50_fpn_mstrain_3x_coco.py │ ├── faster_rcnn_r50_fpn_ohem_1x_coco.py │ ├── faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py │ ├── faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py │ ├── faster_rcnn_r50_fpn_1x_coco.py │ ├── faster_rcnn_r50_fpn_2x_coco.py │ ├── faster_rcnn_r101_fpn_1x_coco.py │ ├── faster_rcnn_r101_fpn_2x_coco.py │ ├── faster_rcnn_r50_fpn_iou_1x_coco.py │ ├── faster_rcnn_r50_fpn_ciou_1x_coco.py │ ├── faster_rcnn_r50_fpn_giou_1x_coco.py │ ├── faster_rcnn_r101_fpn_mstrain_3x_coco.py │ ├── faster_rcnn_r50_fpn_bounded_iou_1x_coco.py │ ├── faster_rcnn_r101_caffe_fpn_1x_coco.py │ ├── faster_rcnn_r50_fpn_soft_nms_1x_coco.py │ ├── faster_rcnn_r50_caffe_fpn_90k_coco.py │ ├── faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py │ ├── faster_rcnn_x101_32x4d_fpn_1x_coco.py │ ├── faster_rcnn_x101_32x4d_fpn_2x_coco.py │ ├── faster_rcnn_x101_64x4d_fpn_1x_coco.py │ ├── faster_rcnn_x101_64x4d_fpn_2x_coco.py │ ├── faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py │ ├── faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py │ ├── faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py │ ├── faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py │ ├── faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py │ ├── faster_rcnn_r50_caffe_dc5_1x_coco.py │ ├── faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py │ ├── faster_rcnn_r50_caffe_c4_1x_coco.py │ ├── faster_rcnn_r50_caffe_fpn_1x_coco.py │ ├── faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py │ ├── faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py │ ├── faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py │ └── faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py ├── mask2former │ ├── mask2former_r101_lsj_8x2_50e_coco.py │ ├── mask2former_r101_lsj_8x2_50e_coco-panoptic.py │ ├── mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py │ ├── mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py │ ├── test_coco_occ_person.py │ ├── test_coco_val_person.py │ ├── mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py │ ├── mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py │ └── mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py ├── _base_ │ ├── schedules │ │ ├── schedule_1x.py │ │ ├── schedule_20e.py │ │ └── schedule_2x.py │ ├── datasets │ │ ├── lvis_v1_instance.py │ │ ├── lvis_v0.5_instance.py │ │ ├── coco_detection.py │ │ ├── coco_instance.py │ │ ├── deepfashion.py │ │ ├── voc0712.py │ │ ├── coco_instance_semantic.py │ │ └── cityscapes_detection.py │ ├── default_runtime.py │ └── models │ │ ├── ssd300.py │ │ ├── retinanet_r50_fpn.py │ │ ├── rpn_r50_caffe_c4.py │ │ └── rpn_r50_fpn.py └── maskformer │ └── metafile.yml ├── tools ├── infer.sh ├── slurm_test.sh ├── slurm_train.sh ├── dist_train.sh ├── misc │ ├── gen_coco_panoptic_test_info.py │ └── print_config.py ├── dist_test.sh ├── test_after_train.sh └── model_converters │ ├── selfsup2mmdet.py │ ├── publish_model.py │ └── upgrade_ssd_version.py ├── Pose2Seg_OCP ├── README.md ├── aepose │ └── aedets2cocojson.py └── datasets │ └── utils.py ├── setup.cfg ├── LICENSE └── .gitignore /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /weights/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/mminstall.txt: -------------------------------------------------------------------------------- 1 | mmcv-full>=1.3.17 2 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /requirements/albu.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 --no-binary qudida,albumentations 2 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | pycocotools 4 | six 5 | terminaltables 6 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | cityscapesscripts 2 | imagecorruptions 3 | scipy 4 | sklearn 5 | timm 6 | -------------------------------------------------------------------------------- /mmdet/custom/__init__.py: -------------------------------------------------------------------------------- 1 | from .ocp import OccCopyPaste 2 | 3 | __all__ = [ 4 | 'OccCopyPaste', 5 | ] 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | # fp16 settings 3 | fp16 = dict(loss_scale=512.) 4 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_fp16_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | # fp16 settings 3 | fp16 = dict(loss_scale=512.) 4 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' 3 | ] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .res_layer import ResLayer 3 | 4 | __all__ = ['ResLayer'] 5 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict(train_cfg=dict(rcnn=dict(sampler=dict(type='OHEMSampler')))) 3 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | -------------------------------------------------------------------------------- /mmdet/models/seg_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .panoptic_fpn_head import PanopticFPNHead # noqa: F401,F403 3 | from .panoptic_fusion_heads import * # noqa: F401,F403 4 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco-person.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | classes = ('person',) 7 | -------------------------------------------------------------------------------- /mmdet/core/data_structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .general_data import GeneralData 3 | from .instance_data import InstanceData 4 | 5 | __all__ = ['GeneralData', 'InstanceData'] 6 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[28, 34]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=36) 5 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 23]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=24) 5 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 23]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=24) 5 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[28, 34]) 4 | runner = dict(type='EpochBasedRunner', max_epochs=36) 5 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.16.0 2 | myst-parser 3 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 4 | sphinx==4.0.2 5 | sphinx-copybutton 6 | sphinx_markdown_tables 7 | sphinx_rtd_theme==0.5.2 8 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict(type='Pretrained', 6 | checkpoint='torchvision://resnet101'))) 7 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | roi_head=dict( 4 | bbox_head=dict( 5 | reg_decoded_bbox=True, 6 | loss_bbox=dict(type='IoULoss', loss_weight=10.0)))) 7 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | roi_head=dict( 4 | bbox_head=dict( 5 | reg_decoded_bbox=True, 6 | loss_bbox=dict(type='CIoULoss', loss_weight=12.0)))) 7 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | roi_head=dict( 4 | bbox_head=dict( 5 | reg_decoded_bbox=True, 6 | loss_bbox=dict(type='GIoULoss', loss_weight=10.0)))) 7 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_iou_calculator 3 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 4 | 5 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 6 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | roi_head=dict( 4 | bbox_head=dict( 5 | reg_decoded_bbox=True, 6 | loss_bbox=dict(type='BoundedIoULoss', loss_weight=10.0)))) 7 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py'] 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_caffe_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict( 6 | type='Pretrained', 7 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 8 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask2former_r50_lsj_8x2_50e_coco-panoptic.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | depth=101, 5 | init_cfg=dict( 6 | type='Pretrained', 7 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 8 | -------------------------------------------------------------------------------- /mmdet/datasets/api_wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .coco_api import COCO, COCOeval 3 | from .panoptic_evaluation import pq_compute_multi_core, pq_compute_single_core 4 | 5 | __all__ = [ 6 | 'COCO', 'COCOeval', 'pq_compute_multi_core', 'pq_compute_single_core' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/panoptic_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # A custom value to distinguish instance ID and category ID; need to 3 | # be greater than the number of categories. 4 | # For a pixel in the panoptic result map: 5 | # pan_id = ins_id * INSTANCE_OFFSET + cat_id 6 | INSTANCE_OFFSET = 1000 7 | -------------------------------------------------------------------------------- /mmdet/models/detectors/deformable_detr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .detr import DETR 4 | 5 | 6 | @DETECTORS.register_module() 7 | class DeformableDETR(DETR): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(DETR, self).__init__(*args, **kwargs) 11 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | MATCH_COST = Registry('Match Cost') 5 | 6 | 7 | def build_match_cost(cfg, default_args=None): 8 | """Builder of IoU calculator.""" 9 | return build_from_cfg(cfg, MATCH_COST, default_args) 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_roi_extractor import BaseRoIExtractor 3 | from .generic_roi_extractor import GenericRoIExtractor 4 | from .single_level_roi_extractor import SingleRoIExtractor 5 | 6 | __all__ = ['BaseRoIExtractor', 'SingleRoIExtractor', 'GenericRoIExtractor'] 7 | -------------------------------------------------------------------------------- /mmdet/models/seg_heads/panoptic_fusion_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_panoptic_fusion_head import \ 3 | BasePanopticFusionHead # noqa: F401,F403 4 | from .heuristic_fusion_head import HeuristicFusionHead # noqa: F401,F403 5 | from .maskformer_fusion_head import MaskFormerFusionHead # noqa: F401,F403 6 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | depth=101, 9 | init_cfg=dict(type='Pretrained', 10 | checkpoint='torchvision://resnet101'))) 11 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa 3 | import warnings 4 | 5 | from .formatting import * 6 | 7 | warnings.warn('DeprecationWarning: mmdet.datasets.pipelines.formating will be ' 8 | 'deprecated, please replace it with ' 9 | 'mmdet.datasets.pipelines.formatting.') 10 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | IOU_CALCULATORS = Registry('IoU calculator') 5 | 6 | 7 | def build_iou_calculator(cfg, default_args=None): 8 | """Builder of IoU calculator.""" 9 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 10 | -------------------------------------------------------------------------------- /mmdet/core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import OPTIMIZER_BUILDERS, build_optimizer 3 | from .layer_decay_optimizer_constructor import \ 4 | LearningRateDecayOptimizerConstructor 5 | 6 | __all__ = [ 7 | 'LearningRateDecayOptimizerConstructor', 'OPTIMIZER_BUILDERS', 8 | 'build_optimizer' 9 | ] 10 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort==4.3.21 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | -e git+https://github.com/open-mmlab/mmtracking#egg=mmtrack 9 | onnx==1.7.0 10 | onnxruntime>=1.8.0 11 | protobuf<=3.20.1 12 | pytest 13 | ubelt 14 | xdoctest>=0.10.0 15 | yapf 16 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py'] 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth' # noqa 3 | 4 | model = dict( 5 | backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=pretrained))) 6 | -------------------------------------------------------------------------------- /mmdet/core/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .image import (color_val_matplotlib, imshow_det_bboxes, 3 | imshow_gt_det_bboxes) 4 | from .palette import get_palette, palette_val 5 | 6 | __all__ = [ 7 | 'imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib', 8 | 'palette_val', 'get_palette' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dropblock import DropBlock 3 | from .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder 4 | from .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder 5 | 6 | __all__ = [ 7 | 'DropBlock', 'PixelDecoder', 'TransformerEncoderPixelDecoder', 8 | 'MSDeformAttnPixelDecoder' 9 | ] 10 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /mmdet/models/detectors/scnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .cascade_rcnn import CascadeRCNN 4 | 5 | 6 | @DETECTORS.register_module() 7 | class SCNet(CascadeRCNN): 8 | """Implementation of `SCNet `_""" 9 | 10 | def __init__(self, **kwargs): 11 | super(SCNet, self).__init__(**kwargs) 12 | -------------------------------------------------------------------------------- /tools/infer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIDEO=/path/to/video/file.mp4 4 | 5 | CONFIG=/path/to/config.py 6 | 7 | WEIGHTS=/path/to/weights.pth 8 | 9 | OUT=/path/to/output.mp4 10 | 11 | OUTFRAME=/path/to/output/frames/ 12 | 13 | python3 $(dirname "$0")/infer.py \ 14 | $VIDEO \ 15 | $CONFIG \ 16 | $WEIGHTS \ 17 | --score-thr 0.3 \ 18 | --outframes $OUTFRAME \ 19 | # --out $OUT \ 20 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .mask_target import mask_target 3 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 4 | from .utils import encode_mask_results, mask2bbox, split_combined_polys 5 | 6 | __all__ = [ 7 | 'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks', 8 | 'PolygonMasks', 'encode_mask_results', 'mask2bbox' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_match_cost 3 | from .match_cost import (BBoxL1Cost, ClassificationCost, CrossEntropyLossCost, 4 | DiceCost, FocalLossCost, IoUCost) 5 | 6 | __all__ = [ 7 | 'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost', 8 | 'FocalLossCost', 'DiceCost', 'CrossEntropyLossCost' 9 | ] 10 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | test_cfg=dict( 9 | rcnn=dict( 10 | score_thr=0.05, 11 | nms=dict(type='soft_nms', iou_threshold=0.5), 12 | max_per_img=100))) 13 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BaseAssigner(metaclass=ABCMeta): 6 | """Base assigner that assigns boxes to ground truth boxes.""" 7 | 8 | @abstractmethod 9 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 10 | """Assign boxes to either a ground truth boxes or a negative boxes.""" 11 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bbox_nms import fast_nms, multiclass_nms 3 | from .matrix_nms import mask_matrix_nms 4 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 5 | merge_aug_proposals, merge_aug_scores) 6 | 7 | __all__ = [ 8 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 9 | 'merge_aug_scores', 'merge_aug_masks', 'mask_matrix_nms', 'fast_nms' 10 | ] 11 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster_rcnn_r50_caffe_fpn_1x_coco.py' 2 | 3 | # learning policy 4 | lr_config = dict( 5 | policy='step', 6 | warmup='linear', 7 | warmup_iters=500, 8 | warmup_ratio=0.001, 9 | step=[60000, 80000]) 10 | 11 | # Runner type 12 | runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) 13 | 14 | checkpoint_config = dict(interval=10000) 15 | evaluation = dict(interval=10000, metric='bbox') 16 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor import * # noqa: F401, F403 3 | from .bbox import * # noqa: F401, F403 4 | from .data_structures import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .hook import * # noqa: F401, F403 7 | from .mask import * # noqa: F401, F403 8 | from .optimizers import * # noqa: F401, F403 9 | from .post_processing import * # noqa: F401, F403 10 | from .utils import * # noqa: F401, F403 11 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' 2 | 3 | # learning policy 4 | lr_config = dict( 5 | policy='step', 6 | warmup='linear', 7 | warmup_iters=500, 8 | warmup_ratio=0.001, 9 | step=[60000, 80000]) 10 | 11 | # Runner type 12 | runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) 13 | 14 | checkpoint_config = dict(interval=10000) 15 | evaluation = dict(interval=10000, metric='bbox') 16 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .class_aware_sampler import ClassAwareSampler 3 | from .distributed_sampler import DistributedSampler 4 | from .group_sampler import DistributedGroupSampler, GroupSampler 5 | from .infinite_sampler import InfiniteBatchSampler, InfiniteGroupBatchSampler 6 | 7 | __all__ = [ 8 | 'DistributedSampler', 'DistributedGroupSampler', 'GroupSampler', 9 | 'InfiniteGroupBatchSampler', 'InfiniteBatchSampler', 'ClassAwareSampler' 10 | ] 11 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_x101_32x4d_fpn_2x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=64, 7 | base_width=4, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 15 | -------------------------------------------------------------------------------- /mmdet/core/hook/set_epoch_info_hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.parallel import is_module_wrapper 3 | from mmcv.runner import HOOKS, Hook 4 | 5 | 6 | @HOOKS.register_module() 7 | class SetEpochInfoHook(Hook): 8 | """Set runner's epoch information to the model.""" 9 | 10 | def before_train_epoch(self, runner): 11 | epoch = runner.epoch 12 | model = runner.model 13 | if is_module_wrapper(model): 14 | model = model.module 15 | model.set_epoch(epoch) 16 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' 2 | model = dict(roi_head=dict(bbox_head=dict(num_classes=1))) 3 | classes = ('person', ) 4 | data = dict( 5 | train=dict(classes=classes), 6 | val=dict(classes=classes), 7 | test=dict(classes=classes)) 8 | 9 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa 10 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmdet 6 | 7 | 8 | def collect_env(): 9 | """Collect the information of the running environments.""" 10 | env_info = collect_base_env() 11 | env_info['MMDetection'] = mmdet.__version__ + '+' + get_git_hash()[:7] 12 | return env_info 13 | 14 | 15 | if __name__ == '__main__': 16 | for name, val in collect_env().items(): 17 | print(f'{name}: {val}') 18 | -------------------------------------------------------------------------------- /mmdet/core/export/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .onnx_helper import (add_dummy_nms_for_onnx, dynamic_clip_for_onnx, 3 | get_k_for_topk) 4 | from .pytorch2onnx import (build_model_from_cfg, 5 | generate_inputs_and_wrap_model, 6 | preprocess_example_input) 7 | 8 | __all__ = [ 9 | 'build_model_from_cfg', 'generate_inputs_and_wrap_model', 10 | 'preprocess_example_input', 'get_k_for_topk', 'add_dummy_nms_for_onnx', 11 | 'dynamic_clip_for_onnx' 12 | ] 13 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' 2 | model = dict(roi_head=dict(bbox_head=dict(num_classes=3))) 3 | classes = ('person', 'bicycle', 'car') 4 | data = dict( 5 | train=dict(classes=classes), 6 | val=dict(classes=classes), 7 | test=dict(classes=classes)) 8 | 9 | load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa 10 | -------------------------------------------------------------------------------- /mmdet/models/detectors/htc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .cascade_rcnn import CascadeRCNN 4 | 5 | 6 | @DETECTORS.register_module() 7 | class HybridTaskCascade(CascadeRCNN): 8 | """Implementation of `HTC `_""" 9 | 10 | def __init__(self, **kwargs): 11 | super(HybridTaskCascade, self).__init__(**kwargs) 12 | 13 | @property 14 | def with_semantic(self): 15 | """bool: whether the detector has a semantic head""" 16 | return self.roi_head.with_semantic 17 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | style='pytorch', 15 | init_cfg=dict( 16 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 17 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' 3 | ] 4 | model = dict( 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=True), 14 | style='pytorch', 15 | init_cfg=dict( 16 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BaseBBoxCoder(metaclass=ABCMeta): 6 | """Base bounding box coder.""" 7 | 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | @abstractmethod 12 | def encode(self, bboxes, gt_bboxes): 13 | """Encode deltas between bboxes and ground truth boxes.""" 14 | 15 | @abstractmethod 16 | def decode(self, bboxes, bboxes_pred): 17 | """Decode the predicted bboxes according to prediction and base 18 | boxes.""" 19 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | type='ResNeXt', 9 | depth=101, 10 | groups=32, 11 | base_width=4, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | style='pytorch', 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 19 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | type='ResNeXt', 9 | depth=101, 10 | groups=64, 11 | base_width=4, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | style='pytorch', 17 | init_cfg=dict( 18 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 19 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bbox_head import BBoxHead 3 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, 4 | Shared4Conv1FCBBoxHead) 5 | from .dii_head import DIIHead 6 | from .double_bbox_head import DoubleConvFCBBoxHead 7 | from .sabl_head import SABLHead 8 | from .scnet_bbox_head import SCNetBBoxHead 9 | 10 | __all__ = [ 11 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 12 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'SABLHead', 'DIIHead', 13 | 'SCNetBBoxHead' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | 3 | __version__ = '2.25.0' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import (async_inference_detector, inference_detector, 3 | init_detector, show_result_pyplot) 4 | from .test import multi_gpu_test, single_gpu_test 5 | from .train import (get_root_logger, init_random_seed, set_random_seed, 6 | train_detector) 7 | 8 | __all__ = [ 9 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 10 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 11 | 'multi_gpu_test', 'single_gpu_test', 'init_random_seed' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/detectors/gfl.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class GFL(SingleStageDetector): 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None, 16 | init_cfg=None): 17 | super(GFL, self).__init__(backbone, neck, bbox_head, train_cfg, 18 | test_cfg, pretrained, init_cfg) 19 | -------------------------------------------------------------------------------- /mmdet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.utils import Registry, build_from_cfg 5 | 6 | PRIOR_GENERATORS = Registry('Generator for anchors and points') 7 | 8 | ANCHOR_GENERATORS = PRIOR_GENERATORS 9 | 10 | 11 | def build_prior_generator(cfg, default_args=None): 12 | return build_from_cfg(cfg, PRIOR_GENERATORS, default_args) 13 | 14 | 15 | def build_anchor_generator(cfg, default_args=None): 16 | warnings.warn( 17 | '``build_anchor_generator`` would be deprecated soon, please use ' 18 | '``build_prior_generator`` ') 19 | return build_prior_generator(cfg, default_args=default_args) 20 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | checkpoint = 'https://download.pytorch.org/models/resnet50-11ad3fa6.pth' 8 | model = dict( 9 | backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint))) 10 | 11 | # `lr` and `weight_decay` have been searched to be optimal. 12 | optimizer = dict( 13 | _delete_=True, 14 | type='AdamW', 15 | lr=0.0001, 16 | weight_decay=0.1, 17 | paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True)) 18 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import BBOX_CODERS 3 | from .base_bbox_coder import BaseBBoxCoder 4 | 5 | 6 | @BBOX_CODERS.register_module() 7 | class PseudoBBoxCoder(BaseBBoxCoder): 8 | """Pseudo bounding box coder.""" 9 | 10 | def __init__(self, **kwargs): 11 | super(BaseBBoxCoder, self).__init__(**kwargs) 12 | 13 | def encode(self, bboxes, gt_bboxes): 14 | """torch.Tensor: return the given ``bboxes``""" 15 | return gt_bboxes 16 | 17 | def decode(self, bboxes, pred_bboxes): 18 | """torch.Tensor: return the given ``pred_bboxes``""" 19 | return pred_bboxes 20 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads, 3 | reduce_mean, sync_random_seed) 4 | from .misc import (center_of_mass, filter_scores_and_topk, flip_tensor, 5 | generate_coordinate, mask2ndarray, multi_apply, 6 | select_single_mlvl, unmap) 7 | 8 | __all__ = [ 9 | 'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply', 10 | 'unmap', 'mask2ndarray', 'flip_tensor', 'all_reduce_dict', 11 | 'center_of_mass', 'generate_coordinate', 'select_single_mlvl', 12 | 'filter_scores_and_topk', 'sync_random_seed' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS 3 | from .coco import CocoDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class DeepFashionDataset(CocoDataset): 8 | 9 | CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', 10 | 'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair', 11 | 'skin', 'face') 12 | 13 | PALETTE = [(0, 192, 64), (0, 64, 96), (128, 192, 192), (0, 64, 64), 14 | (0, 192, 224), (0, 192, 192), (128, 192, 64), (0, 192, 96), 15 | (128, 32, 192), (0, 0, 224), (0, 0, 64), (0, 160, 192), 16 | (128, 0, 96), (128, 0, 192), (0, 32, 192)] 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/yolof.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class YOLOF(SingleStageDetector): 8 | r"""Implementation of `You Only Look One-level Feature 9 | `_""" 10 | 11 | def __init__(self, 12 | backbone, 13 | neck, 14 | bbox_head, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None): 18 | super(YOLOF, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained) 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | BBOX_ASSIGNERS = Registry('bbox_assigner') 5 | BBOX_SAMPLERS = Registry('bbox_sampler') 6 | BBOX_CODERS = Registry('bbox_coder') 7 | 8 | 9 | def build_assigner(cfg, **default_args): 10 | """Builder of box assigner.""" 11 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 12 | 13 | 14 | def build_sampler(cfg, **default_args): 15 | """Builder of box sampler.""" 16 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 17 | 18 | 19 | def build_bbox_coder(cfg, **default_args): 20 | """Builder of box coder.""" 21 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 22 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_bbox_coder import BaseBBoxCoder 3 | from .bucketing_bbox_coder import BucketingBBoxCoder 4 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 5 | from .distance_point_bbox_coder import DistancePointBBoxCoder 6 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 7 | from .pseudo_bbox_coder import PseudoBBoxCoder 8 | from .tblr_bbox_coder import TBLRBBoxCoder 9 | from .yolo_bbox_coder import YOLOBBoxCoder 10 | 11 | __all__ = [ 12 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 13 | 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder', 14 | 'BucketingBBoxCoder', 'DistancePointBBoxCoder' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/models/detectors/atss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class ATSS(SingleStageDetector): 8 | """Implementation of `ATSS `_.""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class FCOS(SingleStageDetector): 8 | """Implementation of `FCOS `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fsaf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class FSAF(SingleStageDetector): 8 | """Implementation of `FSAF `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(FSAF, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/paa.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class PAA(SingleStageDetector): 8 | """Implementation of `PAA `_.""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(PAA, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/ddod.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class DDOD(SingleStageDetector): 8 | """Implementation of `DDOD `_.""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(DDOD, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fovea.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class FOVEA(SingleStageDetector): 8 | """Implementation of `FoveaBox `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class RetinaNet(SingleStageDetector): 8 | """Implementation of `RetinaNet `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | bbox_head, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | init_cfg=None): 18 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained, init_cfg) 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/vfnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class VFNet(SingleStageDetector): 8 | """Implementation of `VarifocalNet 9 | (VFNet).`_""" 10 | 11 | def __init__(self, 12 | backbone, 13 | neck, 14 | bbox_head, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(VFNet, self).__init__(backbone, neck, bbox_head, train_cfg, 20 | test_cfg, pretrained, init_cfg) 21 | -------------------------------------------------------------------------------- /mmdet/models/detectors/autoassign.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class AutoAssign(SingleStageDetector): 8 | """Implementation of `AutoAssign: Differentiable Label Assignment for Dense 9 | Object Detection `_.""" 10 | 11 | def __init__(self, 12 | backbone, 13 | neck, 14 | bbox_head, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None): 18 | super(AutoAssign, self).__init__(backbone, neck, bbox_head, train_cfg, 19 | test_cfg, pretrained) 20 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator, 3 | YOLOAnchorGenerator) 4 | from .builder import (ANCHOR_GENERATORS, PRIOR_GENERATORS, 5 | build_anchor_generator, build_prior_generator) 6 | from .point_generator import MlvlPointGenerator, PointGenerator 7 | from .utils import anchor_inside_flags, calc_region, images_to_levels 8 | 9 | __all__ = [ 10 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 11 | 'PointGenerator', 'images_to_levels', 'calc_region', 12 | 'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator', 13 | 'build_prior_generator', 'PRIOR_GENERATORS', 'MlvlPointGenerator' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/models/detectors/nasfcos.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class NASFCOS(SingleStageDetector): 8 | """NAS-FCOS: Fast Neural Architecture Search for Object Detection. 9 | 10 | https://arxiv.org/abs/1906.0442 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | bbox_head, 17 | train_cfg=None, 18 | test_cfg=None, 19 | pretrained=None, 20 | init_cfg=None): 21 | super(NASFCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 22 | test_cfg, pretrained, init_cfg) 23 | -------------------------------------------------------------------------------- /mmdet/core/hook/checkloss_hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.runner.hooks import HOOKS, Hook 4 | 5 | 6 | @HOOKS.register_module() 7 | class CheckInvalidLossHook(Hook): 8 | """Check invalid loss hook. 9 | 10 | This hook will regularly check whether the loss is valid 11 | during training. 12 | 13 | Args: 14 | interval (int): Checking interval (every k iterations). 15 | Default: 50. 16 | """ 17 | 18 | def __init__(self, interval=50): 19 | self.interval = interval 20 | 21 | def after_train_iter(self, runner): 22 | if self.every_n_iters(runner, self.interval): 23 | assert torch.isfinite(runner.outputs['loss']), \ 24 | runner.logger.info('loss become infinite or NaN!') 25 | -------------------------------------------------------------------------------- /mmdet/core/hook/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .checkloss_hook import CheckInvalidLossHook 3 | from .ema import ExpMomentumEMAHook, LinearMomentumEMAHook 4 | from .memory_profiler_hook import MemoryProfilerHook 5 | from .set_epoch_info_hook import SetEpochInfoHook 6 | from .sync_norm_hook import SyncNormHook 7 | from .sync_random_size_hook import SyncRandomSizeHook 8 | from .wandblogger_hook import MMDetWandbHook 9 | from .yolox_lrupdater_hook import YOLOXLrUpdaterHook 10 | from .yolox_mode_switch_hook import YOLOXModeSwitchHook 11 | 12 | __all__ = [ 13 | 'SyncRandomSizeHook', 'YOLOXModeSwitchHook', 'SyncNormHook', 14 | 'ExpMomentumEMAHook', 'LinearMomentumEMAHook', 'YOLOXLrUpdaterHook', 15 | 'CheckInvalidLossHook', 'SetEpochInfoHook', 'MemoryProfilerHook', 16 | 'MMDetWandbHook' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .collect_env import collect_env 3 | from .compat_config import compat_cfg 4 | from .logger import get_caller_name, get_root_logger, log_img_scale 5 | from .memory import AvoidCUDAOOM, AvoidOOM 6 | from .misc import find_latest_checkpoint, update_data_root 7 | from .replace_cfg_vals import replace_cfg_vals 8 | from .setup_env import setup_multi_processes 9 | from .split_batch import split_batch 10 | from .util_distribution import build_ddp, build_dp, get_device 11 | 12 | __all__ = [ 13 | 'get_root_logger', 'collect_env', 'find_latest_checkpoint', 14 | 'update_data_root', 'setup_multi_processes', 'get_caller_name', 15 | 'log_img_scale', 'compat_cfg', 'split_batch', 'build_ddp', 'build_dp', 16 | 'get_device', 'replace_cfg_vals', 'AvoidOOM', 'AvoidCUDAOOM' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bfp import BFP 3 | from .channel_mapper import ChannelMapper 4 | from .ct_resnet_neck import CTResNetNeck 5 | from .dilated_encoder import DilatedEncoder 6 | from .dyhead import DyHead 7 | from .fpg import FPG 8 | from .fpn import FPN 9 | from .fpn_carafe import FPN_CARAFE 10 | from .hrfpn import HRFPN 11 | from .nas_fpn import NASFPN 12 | from .nasfcos_fpn import NASFCOS_FPN 13 | from .pafpn import PAFPN 14 | from .rfp import RFP 15 | from .ssd_neck import SSDNeck 16 | from .yolo_neck import YOLOV3Neck 17 | from .yolox_pafpn import YOLOXPAFPN 18 | 19 | __all__ = [ 20 | 'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 21 | 'NASFCOS_FPN', 'RFP', 'YOLOV3Neck', 'FPG', 'DilatedEncoder', 22 | 'CTResNetNeck', 'SSDNeck', 'YOLOXPAFPN', 'DyHead' 23 | ] 24 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import BBOX_SAMPLERS, build_sampler 3 | from .base_sampler import BaseSampler 4 | 5 | 6 | @BBOX_SAMPLERS.register_module() 7 | class CombinedSampler(BaseSampler): 8 | """A sampler that combines positive sampler and negative sampler.""" 9 | 10 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 11 | super(CombinedSampler, self).__init__(**kwargs) 12 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 13 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples.""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples.""" 21 | raise NotImplementedError 22 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV1Dataset' 4 | data_root = 'data/lvis_v1/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v1_train.json', 15 | img_prefix=data_root)), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v1_val.json', 19 | img_prefix=data_root), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v1_val.json', 23 | img_prefix=data_root)) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_1x_wandb_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | # Set evaluation interval 8 | evaluation = dict(interval=2) 9 | # Set checkpoint interval 10 | checkpoint_config = dict(interval=4) 11 | 12 | # yapf:disable 13 | log_config = dict( 14 | interval=50, 15 | hooks=[ 16 | dict(type='TextLoggerHook'), 17 | dict(type='MMDetWandbHook', 18 | init_kwargs={ 19 | 'project': 'mmdetection', 20 | 'group': 'maskrcnn-r50-fpn-1x-coco' 21 | }, 22 | interval=50, 23 | log_checkpoint=True, 24 | log_checkpoint_metadata=True, 25 | num_eval_images=100) 26 | ]) 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/tood.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class TOOD(SingleStageDetector): 8 | r"""Implementation of `TOOD: Task-aligned One-stage Object Detection. 9 | `_.""" 10 | 11 | def __init__(self, 12 | backbone, 13 | neck, 14 | bbox_head, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(TOOD, self).__init__(backbone, neck, bbox_head, train_cfg, 20 | test_cfg, pretrained, init_cfg) 21 | 22 | def set_epoch(self, epoch): 23 | self.bbox_head.epoch = epoch 24 | -------------------------------------------------------------------------------- /Pose2Seg_OCP/README.md: -------------------------------------------------------------------------------- 1 | # OCP implementation in Pose2Seg 2 | 3 | This sub-repo holds Occlusion Copy & Paste implementation for Pose2Seg training code. Our original Occlusion Copy & Paste implemented in `mmdetection` is re-implemented in plain PyTorch here. 4 | 5 | We provide here the modified and additional files required to train Pose2Seg with OC&P, on top of their original repository: [Pose2Seg](https://github.com/liruilong940607/Pose2Seg). 6 | 7 | ## Testing without Pose Keypoint GTs 8 | 9 | Pose2Seg uses [Associative Embedding Pose Estimation](https://github.com/princeton-vl/pose-ae-train) to predict keypoints as proposals into Pose2Seg model. We use the same repo to generate predicted keypoints and convert the predicted outputs into COCO json format to feed into Pose2Seg for testing. Script for conversion is at [`Pose2Seg_OCP/aepose/aedets2cocojson.py`](Pose2Seg_OCP/aepose/aedets2cocojson.py). -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | extra_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | 15 | # ignore-words-list needs to be lowercase format. For example, if we want to 16 | # ignore word "BA", then we need to append "ba" to ignore-words-list rather 17 | # than "BA" 18 | [codespell] 19 | skip = *.ipynb 20 | quiet-level = 3 21 | ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood,ba 22 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV05Dataset' 4 | data_root = 'data/lvis_v0.5/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/reppoints_detector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage import SingleStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class RepPointsDetector(SingleStageDetector): 8 | """RepPoints: Point Set Representation for Object Detection. 9 | 10 | This detector is the implementation of: 11 | - RepPoints detector (https://arxiv.org/pdf/1904.11490) 12 | """ 13 | 14 | def __init__(self, 15 | backbone, 16 | neck, 17 | bbox_head, 18 | train_cfg=None, 19 | test_cfg=None, 20 | pretrained=None, 21 | init_cfg=None): 22 | super(RepPointsDetector, 23 | self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg, 24 | pretrained, init_cfg) 25 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | 18 | # disable opencv multithreading to avoid system being overloaded 19 | opencv_num_threads = 0 20 | # set multi-process start method as `fork` to speed up the training 21 | mp_start_method = 'fork' 22 | 23 | # Default setting for scaling LR automatically 24 | # - `enable` means enable scaling LR automatically 25 | # or not by default. 26 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 27 | auto_scale_lr = dict(enable=False, base_batch_size=16) 28 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_sampler import BaseSampler 3 | from .combined_sampler import CombinedSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .mask_pseudo_sampler import MaskPseudoSampler 7 | from .mask_sampling_result import MaskSamplingResult 8 | from .ohem_sampler import OHEMSampler 9 | from .pseudo_sampler import PseudoSampler 10 | from .random_sampler import RandomSampler 11 | from .sampling_result import SamplingResult 12 | from .score_hlr_sampler import ScoreHLRSampler 13 | 14 | __all__ = [ 15 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 16 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 17 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'MaskPseudoSampler', 18 | 'MaskSamplingResult' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .coarse_mask_head import CoarseMaskHead 3 | from .dynamic_mask_head import DynamicMaskHead 4 | from .fcn_mask_head import FCNMaskHead 5 | from .feature_relay_head import FeatureRelayHead 6 | from .fused_semantic_head import FusedSemanticHead 7 | from .global_context_head import GlobalContextHead 8 | from .grid_head import GridHead 9 | from .htc_mask_head import HTCMaskHead 10 | from .mask_point_head import MaskPointHead 11 | from .maskiou_head import MaskIoUHead 12 | from .scnet_mask_head import SCNetMaskHead 13 | from .scnet_semantic_head import SCNetSemanticHead 14 | 15 | __all__ = [ 16 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 17 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead', 'SCNetMaskHead', 18 | 'SCNetSemanticHead', 'GlobalContextHead', 'FeatureRelayHead', 19 | 'DynamicMaskHead' 20 | ] 21 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict( 12 | type='LoadAnnotations', 13 | with_bbox=True, 14 | with_mask=True, 15 | poly2mask=False), 16 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | data = dict(train=dict(pipeline=train_pipeline)) 24 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class MaskRCNN(TwoStageDetector): 8 | """Implementation of `Mask R-CNN `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | rpn_head, 13 | roi_head, 14 | train_cfg, 15 | test_cfg, 16 | neck=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(MaskRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | roi_head=roi_head, 24 | train_cfg=train_cfg, 25 | test_cfg=test_cfg, 26 | pretrained=pretrained, 27 | init_cfg=init_cfg) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class FasterRCNN(TwoStageDetector): 8 | """Implementation of `Faster R-CNN `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | rpn_head, 13 | roi_head, 14 | train_cfg, 15 | test_cfg, 16 | neck=None, 17 | pretrained=None, 18 | init_cfg=None): 19 | super(FasterRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | roi_head=roi_head, 24 | train_cfg=train_cfg, 25 | test_cfg=test_cfg, 26 | pretrained=pretrained, 27 | init_cfg=init_cfg) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/queryinst.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .sparse_rcnn import SparseRCNN 4 | 5 | 6 | @DETECTORS.register_module() 7 | class QueryInst(SparseRCNN): 8 | r"""Implementation of 9 | `Instances as Queries `_""" 10 | 11 | def __init__(self, 12 | backbone, 13 | rpn_head, 14 | roi_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | pretrained=None, 19 | init_cfg=None): 20 | super(QueryInst, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | rpn_head=rpn_head, 24 | roi_head=roi_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained, 28 | init_cfg=init_cfg) 29 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | GPUS=4 #number of gpus per node 6 | PORT=${PORT:-29500} 7 | 8 | WORK_DIR_PARENT="./work_dirs/" 9 | 10 | config_dir="configs/mask_rcnn/" 11 | # run_name="coco_human-mask_rcnn_r50_fpn-basic_copy_paste" 12 | run_name="coco_human-mask_rcnn_r50_fpn-occlusion_copy_paste" #config file with/without suffix '.py' 13 | 14 | 15 | run_name="${run_name%.py}" # this will strip trailing .py 16 | echo "Running training for $run_name.." 17 | 18 | WORK_DIR="${WORK_DIR_PARENT}/${run_name}" 19 | 20 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 21 | $(dirname "$0")/train.py $config_dir/${run_name}.py --work-dir ${WORK_DIR} --launcher pytorch ${@:3} 22 | 23 | echo "############ Training is done! ############" 24 | 25 | echo "Testing.." 26 | $(dirname "$0")/test_after_train.sh ${run_name} ${WORK_DIR} ${GPUS} ${PORT} 27 | echo "########## Tests done! ###########" 28 | 29 | 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_scoring_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class MaskScoringRCNN(TwoStageDetector): 8 | """Mask Scoring RCNN. 9 | 10 | https://arxiv.org/abs/1903.00241 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | rpn_head, 16 | roi_head, 17 | train_cfg, 18 | test_cfg, 19 | neck=None, 20 | pretrained=None, 21 | init_cfg=None): 22 | super(MaskScoringRCNN, self).__init__( 23 | backbone=backbone, 24 | neck=neck, 25 | rpn_head=rpn_head, 26 | roi_head=roi_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained, 30 | init_cfg=init_cfg) 31 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask2former.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .maskformer import MaskFormer 4 | 5 | 6 | @DETECTORS.register_module() 7 | class Mask2Former(MaskFormer): 8 | r"""Implementation of `Masked-attention Mask 9 | Transformer for Universal Image Segmentation 10 | `_.""" 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | panoptic_head=None, 16 | panoptic_fusion_head=None, 17 | train_cfg=None, 18 | test_cfg=None, 19 | init_cfg=None): 20 | super().__init__( 21 | backbone, 22 | neck=neck, 23 | panoptic_head=panoptic_head, 24 | panoptic_fusion_head=panoptic_fusion_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | init_cfg=init_cfg) 28 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 4 | ROI_EXTRACTORS, SHARED_HEADS, build_backbone, 5 | build_detector, build_head, build_loss, build_neck, 6 | build_roi_extractor, build_shared_head) 7 | from .dense_heads import * # noqa: F401,F403 8 | from .detectors import * # noqa: F401,F403 9 | from .losses import * # noqa: F401,F403 10 | from .necks import * # noqa: F401,F403 11 | from .plugins import * # noqa: F401,F403 12 | from .roi_heads import * # noqa: F401,F403 13 | from .seg_heads import * # noqa: F401,F403 14 | 15 | __all__ = [ 16 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 17 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/solo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage_instance_seg import SingleStageInstanceSegmentor 4 | 5 | 6 | @DETECTORS.register_module() 7 | class SOLO(SingleStageInstanceSegmentor): 8 | """`SOLO: Segmenting Objects by Locations 9 | `_ 10 | 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | neck=None, 16 | bbox_head=None, 17 | mask_head=None, 18 | train_cfg=None, 19 | test_cfg=None, 20 | init_cfg=None, 21 | pretrained=None): 22 | super().__init__( 23 | backbone=backbone, 24 | neck=neck, 25 | bbox_head=bbox_head, 26 | mask_head=mask_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | init_cfg=init_cfg, 30 | pretrained=pretrained) 31 | -------------------------------------------------------------------------------- /mmdet/models/detectors/solov2.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .single_stage_instance_seg import SingleStageInstanceSegmentor 4 | 5 | 6 | @DETECTORS.register_module() 7 | class SOLOv2(SingleStageInstanceSegmentor): 8 | """`SOLOv2: Dynamic and Fast Instance Segmentation 9 | `_ 10 | 11 | """ 12 | 13 | def __init__(self, 14 | backbone, 15 | neck=None, 16 | bbox_head=None, 17 | mask_head=None, 18 | train_cfg=None, 19 | test_cfg=None, 20 | init_cfg=None, 21 | pretrained=None): 22 | super().__init__( 23 | backbone=backbone, 24 | neck=neck, 25 | bbox_head=bbox_head, 26 | mask_head=mask_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | init_cfg=init_cfg, 30 | pretrained=pretrained) 31 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | 4 | from .version import __version__, short_version 5 | 6 | 7 | def digit_version(version_str): 8 | digit_version = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | digit_version.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | digit_version.append(int(patch_version[0]) - 1) 15 | digit_version.append(int(patch_version[1])) 16 | return digit_version 17 | 18 | 19 | mmcv_minimum_version = '1.3.17' 20 | mmcv_maximum_version = '1.6.0' 21 | mmcv_version = digit_version(mmcv.__version__) 22 | 23 | 24 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 25 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 26 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 27 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 28 | 29 | __all__ = ['__version__', 'short_version'] 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/point_rend.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class PointRend(TwoStageDetector): 8 | """PointRend: Image Segmentation as Rendering 9 | 10 | This detector is the implementation of 11 | `PointRend `_. 12 | 13 | """ 14 | 15 | def __init__(self, 16 | backbone, 17 | rpn_head, 18 | roi_head, 19 | train_cfg, 20 | test_cfg, 21 | neck=None, 22 | pretrained=None, 23 | init_cfg=None): 24 | super(PointRend, self).__init__( 25 | backbone=backbone, 26 | neck=neck, 27 | rpn_head=rpn_head, 28 | roi_head=roi_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained, 32 | init_cfg=init_cfg) 33 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 3 | get_classes, imagenet_det_classes, 4 | imagenet_vid_classes, oid_challenge_classes, 5 | oid_v6_classes, voc_classes) 6 | from .eval_hooks import DistEvalHook, EvalHook 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .panoptic_utils import INSTANCE_OFFSET 9 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 10 | print_recall_summary) 11 | 12 | __all__ = [ 13 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 14 | 'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes', 15 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 16 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall', 'oid_v6_classes', 18 | 'oid_challenge_classes', 'INSTANCE_OFFSET' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/grid_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class GridRCNN(TwoStageDetector): 8 | """Grid R-CNN. 9 | 10 | This detector is the implementation of: 11 | - Grid R-CNN (https://arxiv.org/abs/1811.12030) 12 | - Grid R-CNN Plus: Faster and Better (https://arxiv.org/abs/1906.05688) 13 | """ 14 | 15 | def __init__(self, 16 | backbone, 17 | rpn_head, 18 | roi_head, 19 | train_cfg, 20 | test_cfg, 21 | neck=None, 22 | pretrained=None, 23 | init_cfg=None): 24 | super(GridRCNN, self).__init__( 25 | backbone=backbone, 26 | neck=neck, 27 | rpn_head=rpn_head, 28 | roi_head=roi_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained, 32 | init_cfg=init_cfg) 33 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 3 | from .assign_result import AssignResult 4 | from .atss_assigner import ATSSAssigner 5 | from .base_assigner import BaseAssigner 6 | from .center_region_assigner import CenterRegionAssigner 7 | from .grid_assigner import GridAssigner 8 | from .hungarian_assigner import HungarianAssigner 9 | from .mask_hungarian_assigner import MaskHungarianAssigner 10 | from .max_iou_assigner import MaxIoUAssigner 11 | from .point_assigner import PointAssigner 12 | from .region_assigner import RegionAssigner 13 | from .sim_ota_assigner import SimOTAAssigner 14 | from .task_aligned_assigner import TaskAlignedAssigner 15 | from .uniform_assigner import UniformAssigner 16 | 17 | __all__ = [ 18 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 19 | 'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner', 20 | 'HungarianAssigner', 'RegionAssigner', 'UniformAssigner', 'SimOTAAssigner', 21 | 'TaskAlignedAssigner', 'MaskHungarianAssigner' 22 | ] 23 | -------------------------------------------------------------------------------- /tools/misc/gen_coco_panoptic_test_info.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | 4 | import mmcv 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Generate COCO test image information ' 10 | 'for COCO panoptic segmentation.') 11 | parser.add_argument('data_root', help='Path to COCO annotation directory.') 12 | args = parser.parse_args() 13 | 14 | return args 15 | 16 | 17 | def main(): 18 | args = parse_args() 19 | data_root = args.data_root 20 | val_info = mmcv.load(osp.join(data_root, 'panoptic_val2017.json')) 21 | test_old_info = mmcv.load( 22 | osp.join(data_root, 'image_info_test-dev2017.json')) 23 | 24 | # replace categories from image_info_test-dev2017.json 25 | # with categories from panoptic_val2017.json which 26 | # has attribute `isthing`. 27 | test_info = test_old_info 28 | test_info.update({'categories': val_info['categories']}) 29 | mmcv.dump(test_info, 30 | osp.join(data_root, 'panoptic_image_info_test-dev2017.json')) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .csp_darknet import CSPDarknet 3 | from .darknet import Darknet 4 | from .detectors_resnet import DetectoRS_ResNet 5 | from .detectors_resnext import DetectoRS_ResNeXt 6 | from .efficientnet import EfficientNet 7 | from .hourglass import HourglassNet 8 | from .hrnet import HRNet 9 | from .mobilenet_v2 import MobileNetV2 10 | from .pvt import PyramidVisionTransformer, PyramidVisionTransformerV2 11 | from .regnet import RegNet 12 | from .res2net import Res2Net 13 | from .resnest import ResNeSt 14 | from .resnet import ResNet, ResNetV1d 15 | from .resnext import ResNeXt 16 | from .ssd_vgg import SSDVGG 17 | from .swin import SwinTransformer 18 | from .trident_resnet import TridentResNet 19 | 20 | __all__ = [ 21 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 22 | 'MobileNetV2', 'Res2Net', 'HourglassNet', 'DetectoRS_ResNet', 23 | 'DetectoRS_ResNeXt', 'Darknet', 'ResNeSt', 'TridentResNet', 'CSPDarknet', 24 | 'SwinTransformer', 'PyramidVisionTransformer', 25 | 'PyramidVisionTransformerV2', 'EfficientNet' 26 | ] 27 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/scnet_mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.builder import HEADS 3 | from mmdet.models.utils import ResLayer, SimplifiedBasicBlock 4 | from .fcn_mask_head import FCNMaskHead 5 | 6 | 7 | @HEADS.register_module() 8 | class SCNetMaskHead(FCNMaskHead): 9 | """Mask head for `SCNet `_. 10 | 11 | Args: 12 | conv_to_res (bool, optional): if True, change the conv layers to 13 | ``SimplifiedBasicBlock``. 14 | """ 15 | 16 | def __init__(self, conv_to_res=True, **kwargs): 17 | super(SCNetMaskHead, self).__init__(**kwargs) 18 | self.conv_to_res = conv_to_res 19 | if conv_to_res: 20 | assert self.conv_kernel_size == 3 21 | self.num_res_blocks = self.num_convs // 2 22 | self.convs = ResLayer( 23 | SimplifiedBasicBlock, 24 | self.in_channels, 25 | self.conv_out_channels, 26 | self.num_res_blocks, 27 | conv_cfg=self.conv_cfg, 28 | norm_cfg=self.norm_cfg) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Evan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/scnet_semantic_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmdet.models.builder import HEADS 3 | from mmdet.models.utils import ResLayer, SimplifiedBasicBlock 4 | from .fused_semantic_head import FusedSemanticHead 5 | 6 | 7 | @HEADS.register_module() 8 | class SCNetSemanticHead(FusedSemanticHead): 9 | """Mask head for `SCNet `_. 10 | 11 | Args: 12 | conv_to_res (bool, optional): if True, change the conv layers to 13 | ``SimplifiedBasicBlock``. 14 | """ 15 | 16 | def __init__(self, conv_to_res=True, **kwargs): 17 | super(SCNetSemanticHead, self).__init__(**kwargs) 18 | self.conv_to_res = conv_to_res 19 | if self.conv_to_res: 20 | num_res_blocks = self.num_convs // 2 21 | self.convs = ResLayer( 22 | SimplifiedBasicBlock, 23 | self.in_channels, 24 | self.conv_out_channels, 25 | num_res_blocks, 26 | conv_cfg=self.conv_cfg, 27 | norm_cfg=self.norm_cfg) 28 | self.num_convs = num_res_blocks 29 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | GPUS=4 #number of gpus per node 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | PORT=${PORT:-29500} 7 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 8 | 9 | # config="configs/mask_rcnn/coco_human-mask_rcnn_r50_fpn-basic_copy_paste.py" 10 | # checkpoint="weights/coco_human-mask_rcnn_r50_fpn-basic_copy_paste-ep25.pth" 11 | 12 | config="configs/mask_rcnn/coco_human-mask_rcnn_r50_fpn-occlusion_copy_paste.py" 13 | checkpoint="weights/coco_human-mask_rcnn_r50_fpn-occlusion_copy_paste-ep24.pth" 14 | 15 | # config="configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_4x1_50e_coco-person-finetune-OCP_aug-5e.py" 16 | # checkpoint="weights/mask2former_swin-s-p4-w7-224_lsj_4x1_50e_coco-person-finetune-OCP_aug-5e-iter48000.pth" 17 | 18 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 19 | python -m torch.distributed.launch \ 20 | --nnodes=$NNODES \ 21 | --node_rank=$NODE_RANK \ 22 | --master_addr=$MASTER_ADDR \ 23 | --nproc_per_node=$GPUS \ 24 | --master_port=$PORT \ 25 | $(dirname "$0")/test.py \ 26 | $config \ 27 | $checkpoint \ 28 | --launcher pytorch \ 29 | --fuse-conv-bn \ 30 | --eval bbox segm -------------------------------------------------------------------------------- /mmdet/utils/util_random.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """Helpers for random number generators.""" 3 | import numpy as np 4 | 5 | 6 | def ensure_rng(rng=None): 7 | """Coerces input into a random number generator. 8 | 9 | If the input is None, then a global random state is returned. 10 | 11 | If the input is a numeric value, then that is used as a seed to construct a 12 | random state. Otherwise the input is returned as-is. 13 | 14 | Adapted from [1]_. 15 | 16 | Args: 17 | rng (int | numpy.random.RandomState | None): 18 | if None, then defaults to the global rng. Otherwise this can be an 19 | integer or a RandomState class 20 | Returns: 21 | (numpy.random.RandomState) : rng - 22 | a numpy random number generator 23 | 24 | References: 25 | .. [1] https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 # noqa: E501 26 | """ 27 | 28 | if rng is None: 29 | rng = np.random.mtrand._rand 30 | elif isinstance(rng, int): 31 | rng = np.random.RandomState(rng) 32 | else: 33 | rng = rng 34 | return rng 35 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py'] 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa 3 | 4 | model = dict( 5 | backbone=dict( 6 | embed_dims=192, 7 | num_heads=[6, 12, 24, 48], 8 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 9 | panoptic_head=dict(num_queries=200, in_channels=[192, 384, 768, 1536])) 10 | 11 | data = dict(samples_per_gpu=1, workers_per_gpu=1) 12 | 13 | lr_config = dict(step=[655556, 710184]) 14 | 15 | max_iters = 737500 16 | runner = dict(type='IterBasedRunner', max_iters=max_iters) 17 | 18 | # Before 735001th iteration, we do evaluation every 5000 iterations. 19 | # After 735000th iteration, we do evaluation every 737500 iterations, 20 | # which means that we do evaluation at the end of training.' 21 | interval = 5000 22 | dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)] 23 | evaluation = dict( 24 | interval=interval, 25 | dynamic_intervals=dynamic_intervals, 26 | metric=['PQ', 'bbox', 'segm']) 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/panoptic_fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .panoptic_two_stage_segmentor import TwoStagePanopticSegmentor 4 | 5 | 6 | @DETECTORS.register_module() 7 | class PanopticFPN(TwoStagePanopticSegmentor): 8 | r"""Implementation of `Panoptic feature pyramid 9 | networks `_""" 10 | 11 | def __init__( 12 | self, 13 | backbone, 14 | neck=None, 15 | rpn_head=None, 16 | roi_head=None, 17 | train_cfg=None, 18 | test_cfg=None, 19 | pretrained=None, 20 | init_cfg=None, 21 | # for panoptic segmentation 22 | semantic_head=None, 23 | panoptic_fusion_head=None): 24 | super(PanopticFPN, self).__init__( 25 | backbone=backbone, 26 | neck=neck, 27 | rpn_head=rpn_head, 28 | roi_head=roi_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained, 32 | init_cfg=init_cfg, 33 | semantic_head=semantic_head, 34 | panoptic_fusion_head=panoptic_fusion_head) 35 | -------------------------------------------------------------------------------- /tools/test_after_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | NNODES=${NNODES:-1} 5 | NODE_RANK=${NODE_RANK:-0} 6 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 7 | 8 | bn=${1} 9 | echo "Testing ${bn}.." 10 | 11 | WORK_DIR=${2} 12 | echo "Work dir: ${WORK_DIR}" 13 | 14 | GPUS=${3:-4} 15 | echo "Num gpus: ${GPUS}" 16 | 17 | PORT=${4:-29500} 18 | echo "Port: ${PORT}" 19 | 20 | best_model=`ls ${WORK_DIR}/best_1_segm_mAP_*.pth -t | head -1` 21 | echo "best model: ${best_model}" 22 | cfg=`ls ${WORK_DIR}/*.py -rt | head -1` 23 | echo "config file: ${cfg}" 24 | 25 | test_set="test-best" 26 | echo "testing on ${test_set}" 27 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 28 | python -m torch.distributed.launch \ 29 | --nnodes=$NNODES \ 30 | --node_rank=$NODE_RANK \ 31 | --master_addr=$MASTER_ADDR \ 32 | --nproc_per_node=$GPUS \ 33 | --master_port=$PORT \ 34 | $(dirname "$0")/test.py \ 35 | ${cfg} \ 36 | ${best_model} \ 37 | --launcher pytorch \ 38 | --fuse-conv-bn \ 39 | --work-dir ${WORK_DIR}/${test_set}/ \ 40 | --out ${WORK_DIR}/${test_set}/results.pkl \ 41 | --eval bbox segm \ 42 | --show \ 43 | --show-dir ${WORK_DIR}/${test_set}/viz/ \ 44 | --show-score-thr 0.3 \ 45 | --eval-options jsonfile_prefix=${WORK_DIR}/${test_set}/res_jsons/res 46 | -------------------------------------------------------------------------------- /configs/mask2former/test_coco_occ_person.py: -------------------------------------------------------------------------------- 1 | classes=('person',) 2 | 3 | # dataset settings 4 | image_size = (1024, 1024) 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | pad_cfg = dict(img=(128, 128, 128), masks=0, seg=255) 8 | 9 | test_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict( 12 | type='MultiScaleFlipAug', 13 | img_scale=(1333, 800), 14 | flip=False, 15 | transforms=[ 16 | dict(type='Resize', keep_ratio=True), 17 | dict(type='RandomFlip'), 18 | dict(type='Pad', size_divisor=32, pad_val=pad_cfg), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='ImageToTensor', keys=['img']), 21 | dict(type='Collect', keys=['img']), 22 | ]) 23 | ] 24 | 25 | dataset_type = 'CocoDataset' 26 | data_root = 'data/' 27 | 28 | data = dict( 29 | samples_per_gpu=1, 30 | workers_per_gpu=1, 31 | test=dict( 32 | _delete_=True, 33 | type=dataset_type, 34 | ann_file=data_root + 'COCO2017/annotations/instances_occperson2017.json', 35 | img_prefix=data_root + 'COCO2017/val2017/', 36 | classes=classes, 37 | test_mode=True, 38 | pipeline=test_pipeline 39 | ) 40 | ) 41 | 42 | -------------------------------------------------------------------------------- /configs/mask2former/test_coco_val_person.py: -------------------------------------------------------------------------------- 1 | classes=('person',) 2 | 3 | # dataset settings 4 | image_size = (1024, 1024) 5 | img_norm_cfg = dict( 6 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 7 | pad_cfg = dict(img=(128, 128, 128), masks=0, seg=255) 8 | 9 | test_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict( 12 | type='MultiScaleFlipAug', 13 | img_scale=(1333, 800), 14 | flip=False, 15 | transforms=[ 16 | dict(type='Resize', keep_ratio=True), 17 | dict(type='RandomFlip'), 18 | dict(type='Pad', size_divisor=32, pad_val=pad_cfg), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='ImageToTensor', keys=['img']), 21 | dict(type='Collect', keys=['img']), 22 | ]) 23 | ] 24 | 25 | dataset_type = 'CocoDataset' 26 | data_root = 'data/' 27 | 28 | data = dict( 29 | samples_per_gpu=1, 30 | workers_per_gpu=1, 31 | test=dict( 32 | _delete_=True, 33 | type=dataset_type, 34 | ann_file=data_root + 'COCO2017/annotations/instances_val_person2017.json', 35 | img_prefix=data_root + 'COCO2017/val2017/', 36 | classes=classes, 37 | test_mode=True, 38 | pipeline=test_pipeline 39 | ) 40 | ) 41 | 42 | -------------------------------------------------------------------------------- /mmdet/core/optimizers/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import copy 3 | 4 | from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS 5 | from mmcv.utils import Registry, build_from_cfg 6 | 7 | OPTIMIZER_BUILDERS = Registry( 8 | 'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS) 9 | 10 | 11 | def build_optimizer_constructor(cfg): 12 | constructor_type = cfg.get('type') 13 | if constructor_type in OPTIMIZER_BUILDERS: 14 | return build_from_cfg(cfg, OPTIMIZER_BUILDERS) 15 | elif constructor_type in MMCV_OPTIMIZER_BUILDERS: 16 | return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS) 17 | else: 18 | raise KeyError(f'{constructor_type} is not registered ' 19 | 'in the optimizer builder registry.') 20 | 21 | 22 | def build_optimizer(model, cfg): 23 | optimizer_cfg = copy.deepcopy(cfg) 24 | constructor_type = optimizer_cfg.pop('constructor', 25 | 'DefaultOptimizerConstructor') 26 | paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) 27 | optim_constructor = build_optimizer_constructor( 28 | dict( 29 | type=constructor_type, 30 | optimizer_cfg=optimizer_cfg, 31 | paramwise_cfg=paramwise_cfg)) 32 | optimizer = optim_constructor(model) 33 | return optimizer 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch 4 | 5 | from mmdet.utils.util_random import ensure_rng 6 | 7 | 8 | def random_boxes(num=1, scale=1, rng=None): 9 | """Simple version of ``kwimage.Boxes.random`` 10 | 11 | Returns: 12 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 13 | 14 | References: 15 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 16 | 17 | Example: 18 | >>> num = 3 19 | >>> scale = 512 20 | >>> rng = 0 21 | >>> boxes = random_boxes(num, scale, rng) 22 | >>> print(boxes) 23 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 24 | [216.9113, 330.6978, 224.0446, 456.5878], 25 | [405.3632, 196.3221, 493.3953, 270.7942]]) 26 | """ 27 | rng = ensure_rng(rng) 28 | 29 | tlbr = rng.rand(num, 4).astype(np.float32) 30 | 31 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 32 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 33 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 34 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 35 | 36 | tlbr[:, 0] = tl_x * scale 37 | tlbr[:, 1] = tl_y * scale 38 | tlbr[:, 2] = br_x * scale 39 | tlbr[:, 3] = br_y * scale 40 | 41 | boxes = torch.from_numpy(tlbr) 42 | return boxes 43 | -------------------------------------------------------------------------------- /mmdet/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 3 | """Make divisible function. 4 | 5 | This function rounds the channel number to the nearest value that can be 6 | divisible by the divisor. It is taken from the original tf repo. It ensures 7 | that all layers have a channel number that is divisible by divisor. It can 8 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 9 | 10 | Args: 11 | value (int): The original channel number. 12 | divisor (int): The divisor to fully divide the channel number. 13 | min_value (int): The minimum value of the output channel. 14 | Default: None, means that the minimum value equal to the divisor. 15 | min_ratio (float): The minimum ratio of the rounded channel number to 16 | the original channel number. Default: 0.9. 17 | 18 | Returns: 19 | int: The modified output channel number. 20 | """ 21 | 22 | if min_value is None: 23 | min_value = divisor 24 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 25 | # Make sure that round down does not go down by more than (1-min_ratio). 26 | if new_value < min_ratio * value: 27 | new_value += divisor 28 | return new_value 29 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/double_roi_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import HEADS 3 | from .standard_roi_head import StandardRoIHead 4 | 5 | 6 | @HEADS.register_module() 7 | class DoubleHeadRoIHead(StandardRoIHead): 8 | """RoI head for Double Head RCNN. 9 | 10 | https://arxiv.org/abs/1904.06493 11 | """ 12 | 13 | def __init__(self, reg_roi_scale_factor, **kwargs): 14 | super(DoubleHeadRoIHead, self).__init__(**kwargs) 15 | self.reg_roi_scale_factor = reg_roi_scale_factor 16 | 17 | def _bbox_forward(self, x, rois): 18 | """Box head forward function used in both training and testing time.""" 19 | bbox_cls_feats = self.bbox_roi_extractor( 20 | x[:self.bbox_roi_extractor.num_inputs], rois) 21 | bbox_reg_feats = self.bbox_roi_extractor( 22 | x[:self.bbox_roi_extractor.num_inputs], 23 | rois, 24 | roi_scale_factor=self.reg_roi_scale_factor) 25 | if self.with_shared_head: 26 | bbox_cls_feats = self.shared_head(bbox_cls_feats) 27 | bbox_reg_feats = self.shared_head(bbox_reg_feats) 28 | cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats) 29 | 30 | bbox_results = dict( 31 | cls_score=cls_score, 32 | bbox_pred=bbox_pred, 33 | bbox_feats=bbox_cls_feats) 34 | return bbox_results 35 | -------------------------------------------------------------------------------- /tools/model_converters/selfsup2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def moco_convert(src, dst): 9 | """Convert keys in pycls pretrained moco models to mmdet style.""" 10 | # load caffe model 11 | moco_model = torch.load(src) 12 | blobs = moco_model['state_dict'] 13 | # convert to pytorch style 14 | state_dict = OrderedDict() 15 | for k, v in blobs.items(): 16 | if not k.startswith('module.encoder_q.'): 17 | continue 18 | old_k = k 19 | k = k.replace('module.encoder_q.', '') 20 | state_dict[k] = v 21 | print(old_k, '->', k) 22 | # save checkpoint 23 | checkpoint = dict() 24 | checkpoint['state_dict'] = state_dict 25 | torch.save(checkpoint, dst) 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Convert model keys') 30 | parser.add_argument('src', help='src detectron model path') 31 | parser.add_argument('dst', help='save path') 32 | parser.add_argument( 33 | '--selfsup', type=str, choices=['moco', 'swav'], help='save path') 34 | args = parser.parse_args() 35 | if args.selfsup == 'moco': 36 | moco_convert(args.src, args.dst) 37 | elif args.selfsup == 'swav': 38 | print('SWAV does not need to convert the keys') 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /Pose2Seg_OCP/aepose/aedets2cocojson.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | import numpy as np 4 | 5 | aedet_json='/Users/levan/Workspace/Pose2Seg/pose_pred/ochval_refine_dt.json' 6 | ochuman_json='/Users/levan/Data/OCHuman/ochuman_coco_format_val_range_0.00_1.00.json' 7 | # ochuman_json='/Users/levan/Data/OCHuman/ochuman_coco_format_test_range_0.00_1.00.json' 8 | 9 | och_path = Path(ochuman_json) 10 | det_path = Path(aedet_json) 11 | 12 | out_path = och_path.parent / f'{och_path.stem}-{det_path.stem}.json' 13 | 14 | with det_path.open('r') as rf: 15 | det_list = json.load(rf) 16 | 17 | with och_path.open('r') as rf: 18 | och_dict = json.load(rf) 19 | 20 | img_ids = [img['id'] for img in och_dict['images']] 21 | 22 | new_annots = [] 23 | for det in det_list: 24 | kpts = np.array(det['keypoints']) 25 | assert np.equal(kpts[2::3],1).all() 26 | kpts[2::3] = 2 27 | 28 | annot = {'image_id': det['image_id'], 29 | 'area': None, 30 | 'num_keypoints': 0, 31 | 'iscrowd': 0, 32 | 'id': det['id'], 33 | 'category_id': 1, 34 | 'keypoints': list(kpts), 35 | 'segmentation': [[]], 36 | 'bbox': [] 37 | } 38 | 39 | assert annot['image_id'] in img_ids 40 | 41 | new_annots.append(annot) 42 | 43 | och_dict['annotations'] = new_annots 44 | 45 | with out_path.open('w') as wf: 46 | json.dump(och_dict, wf) 47 | 48 | print('Written to', out_path) -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import ConvModule 3 | 4 | from mmdet.models.builder import HEADS 5 | from .fcn_mask_head import FCNMaskHead 6 | 7 | 8 | @HEADS.register_module() 9 | class HTCMaskHead(FCNMaskHead): 10 | 11 | def __init__(self, with_conv_res=True, *args, **kwargs): 12 | super(HTCMaskHead, self).__init__(*args, **kwargs) 13 | self.with_conv_res = with_conv_res 14 | if self.with_conv_res: 15 | self.conv_res = ConvModule( 16 | self.conv_out_channels, 17 | self.conv_out_channels, 18 | 1, 19 | conv_cfg=self.conv_cfg, 20 | norm_cfg=self.norm_cfg) 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | assert self.with_conv_res 25 | res_feat = self.conv_res(res_feat) 26 | x = x + res_feat 27 | for conv in self.convs: 28 | x = conv(x) 29 | res_feat = x 30 | outs = [] 31 | if return_logits: 32 | x = self.upsample(x) 33 | if self.upsample_method == 'deconv': 34 | x = self.relu(x) 35 | mask_pred = self.conv_logits(x) 36 | outs.append(mask_pred) 37 | if return_feat: 38 | outs.append(res_feat) 39 | return outs if len(outs) > 1 else outs[0] 40 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_caffe_dc5.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # use caffe img_norm 7 | img_norm_cfg = dict( 8 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True), 12 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip'), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | train=dict(pipeline=train_pipeline), 36 | val=dict(pipeline=test_pipeline), 37 | test=dict(pipeline=test_pipeline)) 38 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | if torch.__version__ >= '1.6': 25 | torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) 26 | else: 27 | torch.save(checkpoint, out_file) 28 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 29 | if out_file.endswith('.pth'): 30 | out_file_name = out_file[:-4] 31 | else: 32 | out_file_name = out_file 33 | final_file = out_file_name + f'-{sha[:8]}.pth' 34 | subprocess.Popen(['mv', out_file, final_file]) 35 | 36 | 37 | def main(): 38 | args = parse_args() 39 | process_checkpoint(args.in_file, args.out_file) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import contextlib 3 | import sys 4 | import time 5 | 6 | import torch 7 | 8 | if sys.version_info >= (3, 7): 9 | 10 | @contextlib.contextmanager 11 | def profile_time(trace_name, 12 | name, 13 | enabled=True, 14 | stream=None, 15 | end_stream=None): 16 | """Print time spent by CPU and GPU. 17 | 18 | Useful as a temporary context manager to find sweet spots of code 19 | suitable for async implementation. 20 | """ 21 | if (not enabled) or not torch.cuda.is_available(): 22 | yield 23 | return 24 | stream = stream if stream else torch.cuda.current_stream() 25 | end_stream = end_stream if end_stream else stream 26 | start = torch.cuda.Event(enable_timing=True) 27 | end = torch.cuda.Event(enable_timing=True) 28 | stream.record_event(start) 29 | try: 30 | cpu_start = time.monotonic() 31 | yield 32 | finally: 33 | cpu_end = time.monotonic() 34 | end_stream.record_event(end) 35 | end.synchronize() 36 | cpu_time = (cpu_end - cpu_start) * 1000 37 | gpu_time = start.elapsed_time(end) 38 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 39 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 40 | print(msg, end_stream) 41 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_caffe_c4_1x_coco.py' 2 | # use caffe img_norm 3 | img_norm_cfg = dict( 4 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='Resize', 10 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 11 | (1333, 768), (1333, 800)], 12 | multiscale_mode='value', 13 | keep_ratio=True), 14 | dict(type='RandomFlip', flip_ratio=0.5), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size_divisor=32), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=(1333, 800), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | train=dict(pipeline=train_pipeline), 37 | val=dict(pipeline=test_pipeline), 38 | test=dict(pipeline=test_pipeline)) 39 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 3 | from .cityscapes import CityscapesDataset 4 | from .coco import CocoDataset 5 | from .coco_panoptic import CocoPanopticDataset 6 | from .custom import CustomDataset 7 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, 8 | MultiImageMixDataset, RepeatDataset) 9 | from .deepfashion import DeepFashionDataset 10 | from .lvis import LVISDataset, LVISV1Dataset, LVISV05Dataset 11 | from .openimages import OpenImagesChallengeDataset, OpenImagesDataset 12 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 13 | from .utils import (NumClassCheckHook, get_loading_pipeline, 14 | replace_ImageToTensor) 15 | from .voc import VOCDataset 16 | from .wider_face import WIDERFaceDataset 17 | from .xml_style import XMLDataset 18 | 19 | __all__ = [ 20 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset', 21 | 'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'LVISV05Dataset', 22 | 'LVISV1Dataset', 'GroupSampler', 'DistributedGroupSampler', 23 | 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 24 | 'ClassBalancedDataset', 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 25 | 'build_dataset', 'replace_ImageToTensor', 'get_loading_pipeline', 26 | 'NumClassCheckHook', 'CocoPanopticDataset', 'MultiImageMixDataset', 27 | 'OpenImagesDataset', 'OpenImagesChallengeDataset' 28 | ] 29 | -------------------------------------------------------------------------------- /mmdet/models/detectors/yolo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # Copyright (c) 2019 Western Digital Corporation or its affiliates. 3 | import torch 4 | 5 | from ..builder import DETECTORS 6 | from .single_stage import SingleStageDetector 7 | 8 | 9 | @DETECTORS.register_module() 10 | class YOLOV3(SingleStageDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck, 15 | bbox_head, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None, 19 | init_cfg=None): 20 | super(YOLOV3, self).__init__(backbone, neck, bbox_head, train_cfg, 21 | test_cfg, pretrained, init_cfg) 22 | 23 | def onnx_export(self, img, img_metas): 24 | """Test function for exporting to ONNX, without test time augmentation. 25 | 26 | Args: 27 | img (torch.Tensor): input images. 28 | img_metas (list[dict]): List of image information. 29 | 30 | Returns: 31 | tuple[Tensor, Tensor]: dets of shape [N, num_det, 5] 32 | and class labels of shape [N, num_det]. 33 | """ 34 | x = self.extract_feat(img) 35 | outs = self.bbox_head.forward(x) 36 | # get shape as tensor 37 | img_shape = torch._shape_as_tensor(img)[2:] 38 | img_metas[0]['img_shape_for_onnx'] = img_shape 39 | 40 | det_bboxes, det_labels = self.bbox_head.onnx_export(*outs, img_metas) 41 | 42 | return det_bboxes, det_labels 43 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_caffe_c4.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # use caffe img_norm 7 | img_norm_cfg = dict( 8 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True), 12 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip'), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | train=dict(pipeline=train_pipeline), 36 | val=dict(pipeline=test_pipeline), 37 | test=dict(pipeline=test_pipeline)) 38 | # optimizer 39 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 40 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_r50_caffe_c4.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # use caffe img_norm 7 | img_norm_cfg = dict( 8 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip'), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | train=dict(pipeline=train_pipeline), 36 | val=dict(pipeline=test_pipeline), 37 | test=dict(pipeline=test_pipeline)) 38 | # optimizer 39 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 40 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | style='caffe', 6 | init_cfg=dict( 7 | type='Pretrained', 8 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 9 | # use caffe img_norm 10 | img_norm_cfg = dict( 11 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 12 | train_pipeline = [ 13 | dict(type='LoadImageFromFile'), 14 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 15 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=(1333, 800), 27 | flip=False, 28 | transforms=[ 29 | dict(type='Resize', keep_ratio=True), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='Pad', size_divisor=32), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | train=dict(pipeline=train_pipeline), 39 | val=dict(pipeline=test_pipeline), 40 | test=dict(pipeline=test_pipeline)) 41 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | norm_eval=True, 6 | style='caffe', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 10 | # use caffe img_norm 11 | img_norm_cfg = dict( 12 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 13 | train_pipeline = [ 14 | dict(type='LoadImageFromFile'), 15 | dict(type='LoadAnnotations', with_bbox=True), 16 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | train=dict(pipeline=train_pipeline), 40 | val=dict(pipeline=test_pipeline), 41 | test=dict(pipeline=test_pipeline)) 42 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py'] 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa 3 | 4 | depths = [2, 2, 18, 2] 5 | model = dict( 6 | backbone=dict( 7 | depths=depths, init_cfg=dict(type='Pretrained', 8 | checkpoint=pretrained))) 9 | 10 | # set all layers in backbone to lr_mult=0.1 11 | # set all norm layers, position_embeding, 12 | # query_embeding, level_embeding to decay_multi=0.0 13 | backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) 14 | backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) 15 | embed_multi = dict(lr_mult=1.0, decay_mult=0.0) 16 | custom_keys = { 17 | 'backbone': dict(lr_mult=0.1, decay_mult=1.0), 18 | 'backbone.patch_embed.norm': backbone_norm_multi, 19 | 'backbone.norm': backbone_norm_multi, 20 | 'absolute_pos_embed': backbone_embed_multi, 21 | 'relative_position_bias_table': backbone_embed_multi, 22 | 'query_embed': embed_multi, 23 | 'query_feat': embed_multi, 24 | 'level_embed': embed_multi 25 | } 26 | custom_keys.update({ 27 | f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi 28 | for stage_id, num_blocks in enumerate(depths) 29 | for block_id in range(num_blocks) 30 | }) 31 | custom_keys.update({ 32 | f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi 33 | for stage_id in range(len(depths) - 1) 34 | }) 35 | # optimizer 36 | optimizer = dict( 37 | paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) 38 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_r50_caffe_dc5.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | # use caffe img_norm 7 | img_norm_cfg = dict( 8 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 15 | (1333, 768), (1333, 800)], 16 | multiscale_mode='value', 17 | keep_ratio=True), 18 | dict(type='RandomFlip', flip_ratio=0.5), 19 | dict(type='Normalize', **img_norm_cfg), 20 | dict(type='Pad', size_divisor=32), 21 | dict(type='DefaultFormatBundle'), 22 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 23 | ] 24 | test_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(1333, 800), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']), 37 | ]) 38 | ] 39 | data = dict( 40 | train=dict(pipeline=train_pipeline), 41 | val=dict(pipeline=test_pipeline), 42 | test=dict(pipeline=test_pipeline)) 43 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from ..builder import BBOX_SAMPLERS 5 | from .base_sampler import BaseSampler 6 | from .sampling_result import SamplingResult 7 | 8 | 9 | @BBOX_SAMPLERS.register_module() 10 | class PseudoSampler(BaseSampler): 11 | """A pseudo sampler that does not do sampling actually.""" 12 | 13 | def __init__(self, **kwargs): 14 | pass 15 | 16 | def _sample_pos(self, **kwargs): 17 | """Sample positive samples.""" 18 | raise NotImplementedError 19 | 20 | def _sample_neg(self, **kwargs): 21 | """Sample negative samples.""" 22 | raise NotImplementedError 23 | 24 | def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs): 25 | """Directly returns the positive and negative indices of samples. 26 | 27 | Args: 28 | assign_result (:obj:`AssignResult`): Assigned results 29 | bboxes (torch.Tensor): Bounding boxes 30 | gt_bboxes (torch.Tensor): Ground truth boxes 31 | 32 | Returns: 33 | :obj:`SamplingResult`: sampler results 34 | """ 35 | pos_inds = torch.nonzero( 36 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 37 | neg_inds = torch.nonzero( 38 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 39 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 40 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 41 | assign_result, gt_flags) 42 | return sampling_result 43 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa 3 | 4 | depths = [2, 2, 18, 2] 5 | model = dict( 6 | backbone=dict( 7 | depths=depths, init_cfg=dict(type='Pretrained', 8 | checkpoint=pretrained))) 9 | 10 | # set all layers in backbone to lr_mult=0.1 11 | # set all norm layers, position_embeding, 12 | # query_embeding, level_embeding to decay_multi=0.0 13 | backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) 14 | backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) 15 | embed_multi = dict(lr_mult=1.0, decay_mult=0.0) 16 | custom_keys = { 17 | 'backbone': dict(lr_mult=0.1, decay_mult=1.0), 18 | 'backbone.patch_embed.norm': backbone_norm_multi, 19 | 'backbone.norm': backbone_norm_multi, 20 | 'absolute_pos_embed': backbone_embed_multi, 21 | 'relative_position_bias_table': backbone_embed_multi, 22 | 'query_embed': embed_multi, 23 | 'query_feat': embed_multi, 24 | 'level_embed': embed_multi 25 | } 26 | custom_keys.update({ 27 | f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi 28 | for stage_id, num_blocks in enumerate(depths) 29 | for block_id in range(num_blocks) 30 | }) 31 | custom_keys.update({ 32 | f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi 33 | for stage_id in range(len(depths) - 1) 34 | }) 35 | # optimizer 36 | optimizer = dict( 37 | paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) 38 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.cnn import MODELS as MMCV_MODELS 5 | from mmcv.utils import Registry 6 | 7 | MODELS = Registry('models', parent=MMCV_MODELS) 8 | 9 | BACKBONES = MODELS 10 | NECKS = MODELS 11 | ROI_EXTRACTORS = MODELS 12 | SHARED_HEADS = MODELS 13 | HEADS = MODELS 14 | LOSSES = MODELS 15 | DETECTORS = MODELS 16 | 17 | 18 | def build_backbone(cfg): 19 | """Build backbone.""" 20 | return BACKBONES.build(cfg) 21 | 22 | 23 | def build_neck(cfg): 24 | """Build neck.""" 25 | return NECKS.build(cfg) 26 | 27 | 28 | def build_roi_extractor(cfg): 29 | """Build roi extractor.""" 30 | return ROI_EXTRACTORS.build(cfg) 31 | 32 | 33 | def build_shared_head(cfg): 34 | """Build shared head.""" 35 | return SHARED_HEADS.build(cfg) 36 | 37 | 38 | def build_head(cfg): 39 | """Build head.""" 40 | return HEADS.build(cfg) 41 | 42 | 43 | def build_loss(cfg): 44 | """Build loss.""" 45 | return LOSSES.build(cfg) 46 | 47 | 48 | def build_detector(cfg, train_cfg=None, test_cfg=None): 49 | """Build detector.""" 50 | if train_cfg is not None or test_cfg is not None: 51 | warnings.warn( 52 | 'train_cfg and test_cfg is deprecated, ' 53 | 'please specify them in model', UserWarning) 54 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 55 | 'train_cfg specified in both outer field and model field ' 56 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 57 | 'test_cfg specified in both outer field and model field ' 58 | return DETECTORS.build( 59 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 60 | -------------------------------------------------------------------------------- /mmdet/datasets/api_wrappers/coco_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # This file add snake case alias for coco api 3 | 4 | import warnings 5 | 6 | import pycocotools 7 | from pycocotools.coco import COCO as _COCO 8 | from pycocotools.cocoeval import COCOeval as _COCOeval 9 | 10 | 11 | class COCO(_COCO): 12 | """This class is almost the same as official pycocotools package. 13 | 14 | It implements some snake case function aliases. So that the COCO class has 15 | the same interface as LVIS class. 16 | """ 17 | 18 | def __init__(self, annotation_file=None): 19 | if getattr(pycocotools, '__version__', '0') >= '12.0.2': 20 | warnings.warn( 21 | 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501 22 | UserWarning) 23 | super().__init__(annotation_file=annotation_file) 24 | self.img_ann_map = self.imgToAnns 25 | self.cat_img_map = self.catToImgs 26 | 27 | def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None): 28 | return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd) 29 | 30 | def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]): 31 | return self.getCatIds(cat_names, sup_names, cat_ids) 32 | 33 | def get_img_ids(self, img_ids=[], cat_ids=[]): 34 | return self.getImgIds(img_ids, cat_ids) 35 | 36 | def load_anns(self, ids): 37 | return self.loadAnns(ids) 38 | 39 | def load_cats(self, ids): 40 | return self.loadCats(ids) 41 | 42 | def load_imgs(self, ids): 43 | return self.loadImgs(ids) 44 | 45 | 46 | # just for the ease of import 47 | COCOeval = _COCOeval 48 | -------------------------------------------------------------------------------- /mmdet/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from mmcv.utils import Registry, build_from_cfg 4 | 5 | TRANSFORMER = Registry('Transformer') 6 | LINEAR_LAYERS = Registry('linear layers') 7 | 8 | 9 | def build_transformer(cfg, default_args=None): 10 | """Builder for Transformer.""" 11 | return build_from_cfg(cfg, TRANSFORMER, default_args) 12 | 13 | 14 | LINEAR_LAYERS.register_module('Linear', module=nn.Linear) 15 | 16 | 17 | def build_linear_layer(cfg, *args, **kwargs): 18 | """Build linear layer. 19 | Args: 20 | cfg (None or dict): The linear layer config, which should contain: 21 | - type (str): Layer type. 22 | - layer args: Args needed to instantiate an linear layer. 23 | args (argument list): Arguments passed to the `__init__` 24 | method of the corresponding linear layer. 25 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 26 | method of the corresponding linear layer. 27 | Returns: 28 | nn.Module: Created linear layer. 29 | """ 30 | if cfg is None: 31 | cfg_ = dict(type='Linear') 32 | else: 33 | if not isinstance(cfg, dict): 34 | raise TypeError('cfg must be a dict') 35 | if 'type' not in cfg: 36 | raise KeyError('the cfg dict must contain the key "type"') 37 | cfg_ = cfg.copy() 38 | 39 | layer_type = cfg_.pop('type') 40 | if layer_type not in LINEAR_LAYERS: 41 | raise KeyError(f'Unrecognized linear type {layer_type}') 42 | else: 43 | linear_layer = LINEAR_LAYERS.get(layer_type) 44 | 45 | layer = linear_layer(*args, **kwargs, **cfg_) 46 | 47 | return layer 48 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | norm_eval=True, 6 | style='caffe', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 10 | 11 | # use caffe img_norm 12 | img_norm_cfg = dict( 13 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 14 | train_pipeline = [ 15 | dict(type='LoadImageFromFile'), 16 | dict(type='LoadAnnotations', with_bbox=True), 17 | dict( 18 | type='Resize', 19 | img_scale=[(1333, 640), (1333, 800)], 20 | multiscale_mode='range', 21 | keep_ratio=True), 22 | dict(type='RandomFlip', flip_ratio=0.5), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='Pad', size_divisor=32), 25 | dict(type='DefaultFormatBundle'), 26 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict( 31 | type='MultiScaleFlipAug', 32 | img_scale=(1333, 800), 33 | flip=False, 34 | transforms=[ 35 | dict(type='Resize', keep_ratio=True), 36 | dict(type='RandomFlip'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict(type='ImageToTensor', keys=['img']), 40 | dict(type='Collect', keys=['img']), 41 | ]) 42 | ] 43 | 44 | data = dict( 45 | train=dict(dataset=dict(pipeline=train_pipeline)), 46 | val=dict(pipeline=test_pipeline), 47 | test=dict(pipeline=test_pipeline)) 48 | -------------------------------------------------------------------------------- /configs/maskformer/metafile.yml: -------------------------------------------------------------------------------- 1 | Collections: 2 | - Name: MaskFormer 3 | Metadata: 4 | Training Data: COCO 5 | Training Techniques: 6 | - AdamW 7 | - Weight Decay 8 | Training Resources: 16x V100 GPUs 9 | Architecture: 10 | - MaskFormer 11 | Paper: 12 | URL: https://arxiv.org/pdf/2107.06278 13 | Title: 'Per-Pixel Classification is Not All You Need for Semantic Segmentation' 14 | README: configs/maskformer/README.md 15 | Code: 16 | URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/mmdet/models/detectors/maskformer.py#L7 17 | Version: v2.22.0 18 | 19 | Models: 20 | - Name: maskformer_r50_mstrain_16x1_75e_coco 21 | In Collection: MaskFormer 22 | Config: configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py 23 | Metadata: 24 | Training Memory (GB): 16.2 25 | Epochs: 75 26 | Results: 27 | - Task: Panoptic Segmentation 28 | Dataset: COCO 29 | Metrics: 30 | PQ: 46.9 31 | Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth 32 | - Name: maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco 33 | In Collection: MaskFormer 34 | Config: configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py 35 | Metadata: 36 | Training Memory (GB): 27.2 37 | Epochs: 300 38 | Results: 39 | - Task: Panoptic Segmentation 40 | Dataset: COCO 41 | Metrics: 42 | PQ: 53.2 43 | Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco_20220326_221612-061b4eb8.pth 44 | -------------------------------------------------------------------------------- /mmdet/models/seg_heads/panoptic_fusion_heads/base_panoptic_fusion_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | from mmcv.runner import BaseModule 5 | 6 | from ...builder import build_loss 7 | 8 | 9 | class BasePanopticFusionHead(BaseModule, metaclass=ABCMeta): 10 | """Base class for panoptic heads.""" 11 | 12 | def __init__(self, 13 | num_things_classes=80, 14 | num_stuff_classes=53, 15 | test_cfg=None, 16 | loss_panoptic=None, 17 | init_cfg=None, 18 | **kwargs): 19 | super(BasePanopticFusionHead, self).__init__(init_cfg) 20 | self.num_things_classes = num_things_classes 21 | self.num_stuff_classes = num_stuff_classes 22 | self.num_classes = num_things_classes + num_stuff_classes 23 | self.test_cfg = test_cfg 24 | 25 | if loss_panoptic: 26 | self.loss_panoptic = build_loss(loss_panoptic) 27 | else: 28 | self.loss_panoptic = None 29 | 30 | @property 31 | def with_loss(self): 32 | """bool: whether the panoptic head contains loss function.""" 33 | return self.loss_panoptic is not None 34 | 35 | @abstractmethod 36 | def forward_train(self, gt_masks=None, gt_semantic_seg=None, **kwargs): 37 | """Forward function during training.""" 38 | 39 | @abstractmethod 40 | def simple_test(self, 41 | img_metas, 42 | det_labels, 43 | mask_preds, 44 | seg_preds, 45 | det_bboxes, 46 | cfg=None, 47 | **kwargs): 48 | """Test without augmentation.""" 49 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | norm_cfg=dict(requires_grad=False), 7 | norm_eval=True, 8 | style='caffe', 9 | init_cfg=dict( 10 | type='Pretrained', 11 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 12 | 13 | # use caffe img_norm 14 | img_norm_cfg = dict( 15 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 16 | train_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='Resize', 21 | img_scale=[(1333, 640), (1333, 800)], 22 | multiscale_mode='range', 23 | keep_ratio=True), 24 | dict(type='RandomFlip', flip_ratio=0.5), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='DefaultFormatBundle'), 28 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 29 | ] 30 | test_pipeline = [ 31 | dict(type='LoadImageFromFile'), 32 | dict( 33 | type='MultiScaleFlipAug', 34 | img_scale=(1333, 800), 35 | flip=False, 36 | transforms=[ 37 | dict(type='Resize', keep_ratio=True), 38 | dict(type='RandomFlip'), 39 | dict(type='Normalize', **img_norm_cfg), 40 | dict(type='Pad', size_divisor=32), 41 | dict(type='ImageToTensor', keys=['img']), 42 | dict(type='Collect', keys=['img']), 43 | ]) 44 | ] 45 | 46 | data = dict( 47 | train=dict(dataset=dict(pipeline=train_pipeline)), 48 | val=dict(pipeline=test_pipeline), 49 | test=dict(pipeline=test_pipeline)) 50 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | norm_eval=True, 6 | style='caffe', 7 | init_cfg=dict( 8 | type='Pretrained', 9 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 10 | # use caffe img_norm 11 | img_norm_cfg = dict( 12 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 13 | train_pipeline = [ 14 | dict(type='LoadImageFromFile'), 15 | dict(type='LoadAnnotations', with_bbox=True), 16 | dict( 17 | type='Resize', 18 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 19 | (1333, 768), (1333, 800)], 20 | multiscale_mode='value', 21 | keep_ratio=True), 22 | dict(type='RandomFlip', flip_ratio=0.5), 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='Pad', size_divisor=32), 25 | dict(type='DefaultFormatBundle'), 26 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict( 31 | type='MultiScaleFlipAug', 32 | img_scale=(1333, 800), 33 | flip=False, 34 | transforms=[ 35 | dict(type='Resize', keep_ratio=True), 36 | dict(type='RandomFlip'), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict(type='ImageToTensor', keys=['img']), 40 | dict(type='Collect', keys=['img']), 41 | ]) 42 | ] 43 | data = dict( 44 | train=dict(pipeline=train_pipeline), 45 | val=dict(pipeline=test_pipeline), 46 | test=dict(pipeline=test_pipeline)) 47 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/mask_pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """copy from 3 | https://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py.""" 4 | 5 | import torch 6 | 7 | from mmdet.core.bbox.builder import BBOX_SAMPLERS 8 | from .base_sampler import BaseSampler 9 | from .mask_sampling_result import MaskSamplingResult 10 | 11 | 12 | @BBOX_SAMPLERS.register_module() 13 | class MaskPseudoSampler(BaseSampler): 14 | """A pseudo sampler that does not do sampling actually.""" 15 | 16 | def __init__(self, **kwargs): 17 | pass 18 | 19 | def _sample_pos(self, **kwargs): 20 | """Sample positive samples.""" 21 | raise NotImplementedError 22 | 23 | def _sample_neg(self, **kwargs): 24 | """Sample negative samples.""" 25 | raise NotImplementedError 26 | 27 | def sample(self, assign_result, masks, gt_masks, **kwargs): 28 | """Directly returns the positive and negative indices of samples. 29 | 30 | Args: 31 | assign_result (:obj:`AssignResult`): Assigned results 32 | masks (torch.Tensor): Bounding boxes 33 | gt_masks (torch.Tensor): Ground truth boxes 34 | Returns: 35 | :obj:`SamplingResult`: sampler results 36 | """ 37 | pos_inds = torch.nonzero( 38 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 39 | neg_inds = torch.nonzero( 40 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 41 | gt_flags = masks.new_zeros(masks.shape[0], dtype=torch.uint8) 42 | sampling_result = MaskSamplingResult(pos_inds, neg_inds, masks, 43 | gt_masks, assign_result, gt_flags) 44 | return sampling_result 45 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 3 | MaxIoUAssigner, RegionAssigner) 4 | from .builder import build_assigner, build_bbox_coder, build_sampler 5 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, DistancePointBBoxCoder, 6 | PseudoBBoxCoder, TBLRBBoxCoder) 7 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 8 | from .samplers import (BaseSampler, CombinedSampler, 9 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 10 | OHEMSampler, PseudoSampler, RandomSampler, 11 | SamplingResult, ScoreHLRSampler) 12 | from .transforms import (bbox2distance, bbox2result, bbox2roi, 13 | bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, 14 | bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, 15 | distance2bbox, find_inside_bboxes, roi2bbox) 16 | 17 | __all__ = [ 18 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 19 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 20 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 21 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner', 22 | 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 23 | 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', 24 | 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 25 | 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'DistancePointBBoxCoder', 26 | 'CenterRegionAssigner', 'bbox_rescale', 'bbox_cxcywh_to_xyxy', 27 | 'bbox_xyxy_to_cxcywh', 'RegionAssigner', 'find_inside_bboxes' 28 | ] 29 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | import xml.etree.ElementTree as ET 4 | 5 | import mmcv 6 | 7 | from .builder import DATASETS 8 | from .xml_style import XMLDataset 9 | 10 | 11 | @DATASETS.register_module() 12 | class WIDERFaceDataset(XMLDataset): 13 | """Reader for the WIDER Face dataset in PASCAL VOC format. 14 | 15 | Conversion scripts can be found in 16 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 17 | """ 18 | CLASSES = ('face', ) 19 | 20 | PALETTE = [(0, 255, 0)] 21 | 22 | def __init__(self, **kwargs): 23 | super(WIDERFaceDataset, self).__init__(**kwargs) 24 | 25 | def load_annotations(self, ann_file): 26 | """Load annotation from WIDERFace XML style annotation file. 27 | 28 | Args: 29 | ann_file (str): Path of XML file. 30 | 31 | Returns: 32 | list[dict]: Annotation info from XML file. 33 | """ 34 | 35 | data_infos = [] 36 | img_ids = mmcv.list_from_file(ann_file) 37 | for img_id in img_ids: 38 | filename = f'{img_id}.jpg' 39 | xml_path = osp.join(self.img_prefix, 'Annotations', 40 | f'{img_id}.xml') 41 | tree = ET.parse(xml_path) 42 | root = tree.getroot() 43 | size = root.find('size') 44 | width = int(size.find('width').text) 45 | height = int(size.find('height').text) 46 | folder = root.find('folder').text 47 | data_infos.append( 48 | dict( 49 | id=img_id, 50 | filename=osp.join(folder, filename), 51 | width=width, 52 | height=height)) 53 | 54 | return data_infos 55 | -------------------------------------------------------------------------------- /configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py'] 2 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth' # noqa 3 | 4 | depths = [2, 2, 18, 2] 5 | model = dict( 6 | backbone=dict( 7 | pretrain_img_size=384, 8 | embed_dims=128, 9 | depths=depths, 10 | num_heads=[4, 8, 16, 32], 11 | window_size=12, 12 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 13 | panoptic_head=dict(in_channels=[128, 256, 512, 1024])) 14 | 15 | # set all layers in backbone to lr_mult=0.1 16 | # set all norm layers, position_embeding, 17 | # query_embeding, level_embeding to decay_multi=0.0 18 | backbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0) 19 | backbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0) 20 | embed_multi = dict(lr_mult=1.0, decay_mult=0.0) 21 | custom_keys = { 22 | 'backbone': dict(lr_mult=0.1, decay_mult=1.0), 23 | 'backbone.patch_embed.norm': backbone_norm_multi, 24 | 'backbone.norm': backbone_norm_multi, 25 | 'absolute_pos_embed': backbone_embed_multi, 26 | 'relative_position_bias_table': backbone_embed_multi, 27 | 'query_embed': embed_multi, 28 | 'query_feat': embed_multi, 29 | 'level_embed': embed_multi 30 | } 31 | custom_keys.update({ 32 | f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi 33 | for stage_id, num_blocks in enumerate(depths) 34 | for block_id in range(num_blocks) 35 | }) 36 | custom_keys.update({ 37 | f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi 38 | for stage_id in range(len(depths) - 1) 39 | }) 40 | # optimizer 41 | optimizer = dict( 42 | paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0)) 43 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .accuracy import Accuracy, accuracy 3 | from .ae_loss import AssociativeEmbeddingLoss 4 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 5 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 6 | cross_entropy, mask_cross_entropy) 7 | from .dice_loss import DiceLoss 8 | from .focal_loss import FocalLoss, sigmoid_focal_loss 9 | from .gaussian_focal_loss import GaussianFocalLoss 10 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss 11 | from .ghm_loss import GHMC, GHMR 12 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss, 13 | bounded_iou_loss, iou_loss) 14 | from .kd_loss import KnowledgeDistillationKLDivLoss 15 | from .mse_loss import MSELoss, mse_loss 16 | from .pisa_loss import carl_loss, isr_p 17 | from .seesaw_loss import SeesawLoss 18 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 19 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 20 | from .varifocal_loss import VarifocalLoss 21 | 22 | __all__ = [ 23 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 24 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 25 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 26 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 27 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC', 28 | 'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 29 | 'l1_loss', 'isr_p', 'carl_loss', 'AssociativeEmbeddingLoss', 30 | 'GaussianFocalLoss', 'QualityFocalLoss', 'DistributionFocalLoss', 31 | 'VarifocalLoss', 'KnowledgeDistillationKLDivLoss', 'SeesawLoss', 'DiceLoss' 32 | ] 33 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | style='caffe', 6 | init_cfg=dict( 7 | type='Pretrained', 8 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 9 | # use caffe img_norm 10 | img_norm_cfg = dict( 11 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 12 | train_pipeline = [ 13 | dict(type='LoadImageFromFile'), 14 | dict( 15 | type='LoadAnnotations', 16 | with_bbox=True, 17 | with_mask=True, 18 | poly2mask=False), 19 | dict( 20 | type='Resize', 21 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 22 | (1333, 768), (1333, 800)], 23 | multiscale_mode='value', 24 | keep_ratio=True), 25 | dict(type='RandomFlip', flip_ratio=0.5), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='DefaultFormatBundle'), 29 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 30 | ] 31 | test_pipeline = [ 32 | dict(type='LoadImageFromFile'), 33 | dict( 34 | type='MultiScaleFlipAug', 35 | img_scale=(1333, 800), 36 | flip=False, 37 | transforms=[ 38 | dict(type='Resize', keep_ratio=True), 39 | dict(type='RandomFlip'), 40 | dict(type='Normalize', **img_norm_cfg), 41 | dict(type='Pad', size_divisor=32), 42 | dict(type='ImageToTensor', keys=['img']), 43 | dict(type='Collect', keys=['img']), 44 | ]) 45 | ] 46 | data = dict( 47 | train=dict(pipeline=train_pipeline), 48 | val=dict(pipeline=test_pipeline), 49 | test=dict(pipeline=test_pipeline)) 50 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from ..builder import PIPELINES 7 | 8 | 9 | @PIPELINES.register_module() 10 | class Compose: 11 | """Compose multiple transforms sequentially. 12 | 13 | Args: 14 | transforms (Sequence[dict | callable]): Sequence of transform object or 15 | config dict to be composed. 16 | """ 17 | 18 | def __init__(self, transforms): 19 | assert isinstance(transforms, collections.abc.Sequence) 20 | self.transforms = [] 21 | for transform in transforms: 22 | if isinstance(transform, dict): 23 | transform = build_from_cfg(transform, PIPELINES) 24 | self.transforms.append(transform) 25 | elif callable(transform): 26 | self.transforms.append(transform) 27 | else: 28 | raise TypeError('transform must be callable or a dict') 29 | 30 | def __call__(self, data): 31 | """Call function to apply transforms sequentially. 32 | 33 | Args: 34 | data (dict): A result dict contains the data to transform. 35 | 36 | Returns: 37 | dict: Transformed data. 38 | """ 39 | 40 | for t in self.transforms: 41 | data = t(data) 42 | if data is None: 43 | return None 44 | return data 45 | 46 | def __repr__(self): 47 | format_string = self.__class__.__name__ + '(' 48 | for t in self.transforms: 49 | str_ = t.__repr__() 50 | if 'Compose(' in str_: 51 | str_ = str_.replace('\n', '\n ') 52 | format_string += '\n' 53 | format_string += f' {str_}' 54 | format_string += '\n)' 55 | return format_string 56 | -------------------------------------------------------------------------------- /mmdet/models/detectors/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import DETECTORS 3 | from .two_stage import TwoStageDetector 4 | 5 | 6 | @DETECTORS.register_module() 7 | class CascadeRCNN(TwoStageDetector): 8 | r"""Implementation of `Cascade R-CNN: Delving into High Quality Object 9 | Detection `_""" 10 | 11 | def __init__(self, 12 | backbone, 13 | neck=None, 14 | rpn_head=None, 15 | roi_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None, 19 | init_cfg=None): 20 | super(CascadeRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | rpn_head=rpn_head, 24 | roi_head=roi_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained, 28 | init_cfg=init_cfg) 29 | 30 | def show_result(self, data, result, **kwargs): 31 | """Show prediction results of the detector. 32 | 33 | Args: 34 | data (str or np.ndarray): Image filename or loaded image. 35 | result (Tensor or tuple): The results to draw over `img` 36 | bbox_result or (bbox_result, segm_result). 37 | 38 | Returns: 39 | np.ndarray: The image with bboxes drawn on it. 40 | """ 41 | if self.with_mask: 42 | ms_bbox_result, ms_segm_result = result 43 | if isinstance(ms_bbox_result, dict): 44 | result = (ms_bbox_result['ensemble'], 45 | ms_segm_result['ensemble']) 46 | else: 47 | if isinstance(result, dict): 48 | result = result['ensemble'] 49 | return super(CascadeRCNN, self).show_result(data, result, **kwargs) 50 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(interval=1, metric='bbox') 50 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../common/mstrain-poly_3x_coco_instance.py', 3 | '../_base_/models/mask_rcnn_r50_fpn.py' 4 | ] 5 | 6 | model = dict( 7 | backbone=dict( 8 | depth=101, 9 | norm_cfg=dict(requires_grad=False), 10 | norm_eval=True, 11 | style='caffe', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint='open-mmlab://detectron2/resnet101_caffe'))) 15 | # use caffe img_norm 16 | img_norm_cfg = dict( 17 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 18 | train_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict( 21 | type='LoadAnnotations', 22 | with_bbox=True, 23 | with_mask=True, 24 | poly2mask=False), 25 | dict( 26 | type='Resize', 27 | img_scale=[(1333, 640), (1333, 800)], 28 | multiscale_mode='range', 29 | keep_ratio=True), 30 | dict(type='RandomFlip', flip_ratio=0.5), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='Pad', size_divisor=32), 33 | dict(type='DefaultFormatBundle'), 34 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 35 | ] 36 | test_pipeline = [ 37 | dict(type='LoadImageFromFile'), 38 | dict( 39 | type='MultiScaleFlipAug', 40 | img_scale=(1333, 800), 41 | flip=False, 42 | transforms=[ 43 | dict(type='Resize', keep_ratio=True), 44 | dict(type='RandomFlip'), 45 | dict(type='Normalize', **img_norm_cfg), 46 | dict(type='Pad', size_divisor=32), 47 | dict(type='ImageToTensor', keys=['img']), 48 | dict(type='Collect', keys=['img']), 49 | ]) 50 | ] 51 | 52 | data = dict( 53 | train=dict(dataset=dict(pipeline=train_pipeline)), 54 | val=dict(pipeline=test_pipeline), 55 | test=dict(pipeline=test_pipeline)) 56 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform, 3 | ContrastTransform, EqualizeTransform, Rotate, Shear, 4 | Translate) 5 | from .compose import Compose 6 | from .formatting import (Collect, DefaultFormatBundle, ImageToTensor, 7 | ToDataContainer, ToTensor, Transpose, to_tensor) 8 | from .instaboost import InstaBoost 9 | from .loading import (FilterAnnotations, LoadAnnotations, LoadImageFromFile, 10 | LoadImageFromWebcam, LoadMultiChannelImageFromFiles, 11 | LoadPanopticAnnotations, LoadProposals) 12 | from .test_time_aug import MultiScaleFlipAug 13 | from .transforms import (Albu, CopyPaste, CutOut, Expand, MinIoURandomCrop, 14 | MixUp, Mosaic, Normalize, Pad, PhotoMetricDistortion, 15 | RandomAffine, RandomCenterCropPad, RandomCrop, 16 | RandomFlip, RandomShift, Resize, SegRescale, 17 | YOLOXHSVRandomAug) 18 | 19 | __all__ = [ 20 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 21 | 'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations', 22 | 'LoadImageFromFile', 'LoadImageFromWebcam', 'LoadPanopticAnnotations', 23 | 'LoadMultiChannelImageFromFiles', 'LoadProposals', 'FilterAnnotations', 24 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 25 | 'Normalize', 'SegRescale', 'MinIoURandomCrop', 'Expand', 26 | 'PhotoMetricDistortion', 'Albu', 'InstaBoost', 'RandomCenterCropPad', 27 | 'AutoAugment', 'CutOut', 'Shear', 'Rotate', 'ColorTransform', 28 | 'EqualizeTransform', 'BrightnessTransform', 'ContrastTransform', 29 | 'Translate', 'RandomShift', 'Mosaic', 'MixUp', 'RandomAffine', 30 | 'YOLOXHSVRandomAug', 'CopyPaste' 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .brick_wrappers import AdaptiveAvgPool2d, adaptive_avg_pool2d 3 | from .builder import build_linear_layer, build_transformer 4 | from .ckpt_convert import pvt_convert 5 | from .conv_upsample import ConvUpsample 6 | from .csp_layer import CSPLayer 7 | from .gaussian_target import gaussian_radius, gen_gaussian_target 8 | from .inverted_residual import InvertedResidual 9 | from .make_divisible import make_divisible 10 | from .misc import interpolate_as, sigmoid_geometric_mean 11 | from .normed_predictor import NormedConv2d, NormedLinear 12 | from .panoptic_gt_processing import preprocess_panoptic_gt 13 | from .point_sample import (get_uncertain_point_coords_with_randomness, 14 | get_uncertainty) 15 | from .positional_encoding import (LearnedPositionalEncoding, 16 | SinePositionalEncoding) 17 | from .res_layer import ResLayer, SimplifiedBasicBlock 18 | from .se_layer import DyReLU, SELayer 19 | from .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer, 20 | DynamicConv, PatchEmbed, Transformer, nchw_to_nlc, 21 | nlc_to_nchw) 22 | 23 | __all__ = [ 24 | 'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 25 | 'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'Transformer', 26 | 'build_transformer', 'build_linear_layer', 'SinePositionalEncoding', 27 | 'LearnedPositionalEncoding', 'DynamicConv', 'SimplifiedBasicBlock', 28 | 'NormedLinear', 'NormedConv2d', 'make_divisible', 'InvertedResidual', 29 | 'SELayer', 'interpolate_as', 'ConvUpsample', 'CSPLayer', 30 | 'adaptive_avg_pool2d', 'AdaptiveAvgPool2d', 'PatchEmbed', 'nchw_to_nlc', 31 | 'nlc_to_nchw', 'pvt_convert', 'sigmoid_geometric_mean', 32 | 'preprocess_panoptic_gt', 'DyReLU', 33 | 'get_uncertain_point_coords_with_randomness', 'get_uncertainty' 34 | ] 35 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(metric=['bbox', 'segm']) 50 | -------------------------------------------------------------------------------- /mmdet/utils/split_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | 5 | def split_batch(img, img_metas, kwargs): 6 | """Split data_batch by tags. 7 | 8 | Code is modified from 9 | # noqa: E501 10 | 11 | Args: 12 | img (Tensor): of shape (N, C, H, W) encoding input images. 13 | Typically these should be mean centered and std scaled. 14 | img_metas (list[dict]): List of image info dict where each dict 15 | has: 'img_shape', 'scale_factor', 'flip', and may also contain 16 | 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 17 | For details on the values of these keys, see 18 | :class:`mmdet.datasets.pipelines.Collect`. 19 | kwargs (dict): Specific to concrete implementation. 20 | 21 | Returns: 22 | data_groups (dict): a dict that data_batch splited by tags, 23 | such as 'sup', 'unsup_teacher', and 'unsup_student'. 24 | """ 25 | 26 | # only stack img in the batch 27 | def fuse_list(obj_list, obj): 28 | return torch.stack(obj_list) if isinstance(obj, 29 | torch.Tensor) else obj_list 30 | 31 | # select data with tag from data_batch 32 | def select_group(data_batch, current_tag): 33 | group_flag = [tag == current_tag for tag in data_batch['tag']] 34 | return { 35 | k: fuse_list([vv for vv, gf in zip(v, group_flag) if gf], v) 36 | for k, v in data_batch.items() 37 | } 38 | 39 | kwargs.update({'img': img, 'img_metas': img_metas}) 40 | kwargs.update({'tag': [meta['tag'] for meta in img_metas]}) 41 | tags = list(set(kwargs['tag'])) 42 | data_groups = {tag: select_group(kwargs, tag) for tag in tags} 43 | for tag, group in data_groups.items(): 44 | group.pop('tag') 45 | return data_groups 46 | -------------------------------------------------------------------------------- /configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | backbone=dict( 6 | type='SSDVGG', 7 | depth=16, 8 | with_last_pool=False, 9 | ceil_mode=True, 10 | out_indices=(3, 4), 11 | out_feature_indices=(22, 34), 12 | init_cfg=dict( 13 | type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')), 14 | neck=dict( 15 | type='SSDNeck', 16 | in_channels=(512, 1024), 17 | out_channels=(512, 1024, 512, 256, 256, 256), 18 | level_strides=(2, 2, 1, 1), 19 | level_paddings=(1, 1, 0, 0), 20 | l2_norm_scale=20), 21 | bbox_head=dict( 22 | type='SSDHead', 23 | in_channels=(512, 1024, 512, 256, 256, 256), 24 | num_classes=80, 25 | anchor_generator=dict( 26 | type='SSDAnchorGenerator', 27 | scale_major=False, 28 | input_size=input_size, 29 | basesize_ratio_range=(0.15, 0.9), 30 | strides=[8, 16, 32, 64, 100, 300], 31 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[.0, .0, .0, .0], 35 | target_stds=[0.1, 0.1, 0.2, 0.2])), 36 | # model training and testing settings 37 | train_cfg=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.5, 41 | neg_iou_thr=0.5, 42 | min_pos_iou=0., 43 | ignore_iof_thr=-1, 44 | gt_max_assign_all=False), 45 | smoothl1_beta=1., 46 | allowed_border=-1, 47 | pos_weight=-1, 48 | neg_pos_ratio=3, 49 | debug=False), 50 | test_cfg=dict( 51 | nms_pre=1000, 52 | nms=dict(type='nms', iou_threshold=0.45), 53 | min_bbox_size=0, 54 | score_thr=0.02, 55 | max_per_img=200)) 56 | cudnn_benchmark = True 57 | -------------------------------------------------------------------------------- /mmdet/core/hook/sync_norm_hook.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from collections import OrderedDict 3 | 4 | from mmcv.runner import get_dist_info 5 | from mmcv.runner.hooks import HOOKS, Hook 6 | from torch import nn 7 | 8 | from ..utils.dist_utils import all_reduce_dict 9 | 10 | 11 | def get_norm_states(module): 12 | async_norm_states = OrderedDict() 13 | for name, child in module.named_modules(): 14 | if isinstance(child, nn.modules.batchnorm._NormBase): 15 | for k, v in child.state_dict().items(): 16 | async_norm_states['.'.join([name, k])] = v 17 | return async_norm_states 18 | 19 | 20 | @HOOKS.register_module() 21 | class SyncNormHook(Hook): 22 | """Synchronize Norm states after training epoch, currently used in YOLOX. 23 | 24 | Args: 25 | num_last_epochs (int): The number of latter epochs in the end of the 26 | training to switch to synchronizing norm interval. Default: 15. 27 | interval (int): Synchronizing norm interval. Default: 1. 28 | """ 29 | 30 | def __init__(self, num_last_epochs=15, interval=1): 31 | self.interval = interval 32 | self.num_last_epochs = num_last_epochs 33 | 34 | def before_train_epoch(self, runner): 35 | epoch = runner.epoch 36 | if (epoch + 1) == runner.max_epochs - self.num_last_epochs: 37 | # Synchronize norm every epoch. 38 | self.interval = 1 39 | 40 | def after_train_epoch(self, runner): 41 | """Synchronizing norm.""" 42 | epoch = runner.epoch 43 | module = runner.model 44 | if (epoch + 1) % self.interval == 0: 45 | _, world_size = get_dist_info() 46 | if world_size == 1: 47 | return 48 | norm_states = get_norm_states(module) 49 | if len(norm_states) == 0: 50 | return 51 | norm_states = all_reduce_dict(norm_states, op='mean') 52 | module.load_state_dict(norm_states, strict=False) 53 | -------------------------------------------------------------------------------- /configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch', 13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # model training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=3, 8 | strides=(1, 2, 2), 9 | dilations=(1, 1, 1), 10 | out_indices=(2, ), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=False), 13 | norm_eval=True, 14 | style='caffe', 15 | init_cfg=dict( 16 | type='Pretrained', 17 | checkpoint='open-mmlab://detectron2/resnet50_caffe')), 18 | neck=None, 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=1024, 22 | feat_channels=1024, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[2, 4, 8, 16, 32], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[16]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=12000, 56 | max_per_img=2000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /tools/model_converters/upgrade_ssd_version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import tempfile 4 | from collections import OrderedDict 5 | 6 | import torch 7 | from mmcv import Config 8 | 9 | 10 | def parse_config(config_strings): 11 | temp_file = tempfile.NamedTemporaryFile() 12 | config_path = f'{temp_file.name}.py' 13 | with open(config_path, 'w') as f: 14 | f.write(config_strings) 15 | 16 | config = Config.fromfile(config_path) 17 | # check whether it is SSD 18 | if config.model.bbox_head.type != 'SSDHead': 19 | raise AssertionError('This is not a SSD model.') 20 | 21 | 22 | def convert(in_file, out_file): 23 | checkpoint = torch.load(in_file) 24 | in_state_dict = checkpoint.pop('state_dict') 25 | out_state_dict = OrderedDict() 26 | meta_info = checkpoint['meta'] 27 | parse_config('#' + meta_info['config']) 28 | for key, value in in_state_dict.items(): 29 | if 'extra' in key: 30 | layer_idx = int(key.split('.')[2]) 31 | new_key = 'neck.extra_layers.{}.{}.conv.'.format( 32 | layer_idx // 2, layer_idx % 2) + key.split('.')[-1] 33 | elif 'l2_norm' in key: 34 | new_key = 'neck.l2_norm.weight' 35 | elif 'bbox_head' in key: 36 | new_key = key[:21] + '.0' + key[21:] 37 | else: 38 | new_key = key 39 | out_state_dict[new_key] = value 40 | checkpoint['state_dict'] = out_state_dict 41 | 42 | if torch.__version__ >= '1.6': 43 | torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) 44 | else: 45 | torch.save(checkpoint, out_file) 46 | 47 | 48 | def main(): 49 | parser = argparse.ArgumentParser(description='Upgrade SSD version') 50 | parser.add_argument('in_file', help='input checkpoint file') 51 | parser.add_argument('out_file', help='output checkpoint file') 52 | 53 | args = parser.parse_args() 54 | convert(args.in_file, args.out_file) 55 | 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch', 13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=2000, 56 | max_per_img=1000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /mmdet/models/utils/brick_wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from mmcv.cnn.bricks.wrappers import NewEmptyTensorOp, obsolete_torch_version 6 | 7 | if torch.__version__ == 'parrots': 8 | TORCH_VERSION = torch.__version__ 9 | else: 10 | # torch.__version__ could be 1.3.1+cu92, we only need the first two 11 | # for comparison 12 | TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) 13 | 14 | 15 | def adaptive_avg_pool2d(input, output_size): 16 | """Handle empty batch dimension to adaptive_avg_pool2d. 17 | 18 | Args: 19 | input (tensor): 4D tensor. 20 | output_size (int, tuple[int,int]): the target output size. 21 | """ 22 | if input.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): 23 | if isinstance(output_size, int): 24 | output_size = [output_size, output_size] 25 | output_size = [*input.shape[:2], *output_size] 26 | empty = NewEmptyTensorOp.apply(input, output_size) 27 | return empty 28 | else: 29 | return F.adaptive_avg_pool2d(input, output_size) 30 | 31 | 32 | class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d): 33 | """Handle empty batch dimension to AdaptiveAvgPool2d.""" 34 | 35 | def forward(self, x): 36 | # PyTorch 1.9 does not support empty tensor inference yet 37 | if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): 38 | output_size = self.output_size 39 | if isinstance(output_size, int): 40 | output_size = [output_size, output_size] 41 | else: 42 | output_size = [ 43 | v if v is not None else d 44 | for v, d in zip(output_size, 45 | x.size()[-2:]) 46 | ] 47 | output_size = [*x.shape[:2], *output_size] 48 | empty = NewEmptyTensorOp.apply(x, output_size) 49 | return empty 50 | 51 | return super().forward(x) 52 | -------------------------------------------------------------------------------- /Pose2Seg_OCP/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | cv2_interp_codes = { 5 | 'nearest': cv2.INTER_NEAREST, 6 | 'bilinear': cv2.INTER_LINEAR, 7 | 'bicubic': cv2.INTER_CUBIC, 8 | 'area': cv2.INTER_AREA, 9 | 'lanczos': cv2.INTER_LANCZOS4 10 | } 11 | 12 | def imrotate(img, 13 | angle, 14 | center=None, 15 | scale=1.0, 16 | border_value=0, 17 | interpolation='bilinear', 18 | auto_bound=False): 19 | """Rotate an image. 20 | 21 | Args: 22 | img (ndarray): Image to be rotated. 23 | angle (float): Rotation angle in degrees, positive values mean 24 | clockwise rotation. 25 | center (tuple[float], optional): Center point (w, h) of the rotation in 26 | the source image. If not specified, the center of the image will be 27 | used. 28 | scale (float): Isotropic scale factor. 29 | border_value (int): Border value. 30 | interpolation (str): Same as :func:`resize`. 31 | auto_bound (bool): Whether to adjust the image size to cover the whole 32 | rotated image. 33 | 34 | Returns: 35 | ndarray: The rotated image. 36 | """ 37 | if center is not None and auto_bound: 38 | raise ValueError('`auto_bound` conflicts with `center`') 39 | h, w = img.shape[:2] 40 | if center is None: 41 | center = ((w - 1) * 0.5, (h - 1) * 0.5) 42 | assert isinstance(center, tuple) 43 | 44 | matrix = cv2.getRotationMatrix2D(center, -angle, scale) 45 | if auto_bound: 46 | cos = np.abs(matrix[0, 0]) 47 | sin = np.abs(matrix[0, 1]) 48 | new_w = h * sin + w * cos 49 | new_h = h * cos + w * sin 50 | matrix[0, 2] += (new_w - w) * 0.5 51 | matrix[1, 2] += (new_h - h) * 0.5 52 | w = int(np.round(new_w)) 53 | h = int(np.round(new_h)) 54 | rotated = cv2.warpAffine( 55 | img, 56 | matrix, (w, h), 57 | flags=cv2_interp_codes[interpolation], 58 | borderValue=border_value) 59 | return rotated, matrix 60 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_roi_head import BaseRoIHead 3 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DIIHead, 4 | DoubleConvFCBBoxHead, SABLHead, SCNetBBoxHead, 5 | Shared2FCBBoxHead, Shared4Conv1FCBBoxHead) 6 | from .cascade_roi_head import CascadeRoIHead 7 | from .double_roi_head import DoubleHeadRoIHead 8 | from .dynamic_roi_head import DynamicRoIHead 9 | from .grid_roi_head import GridRoIHead 10 | from .htc_roi_head import HybridTaskCascadeRoIHead 11 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FeatureRelayHead, 12 | FusedSemanticHead, GlobalContextHead, GridHead, 13 | HTCMaskHead, MaskIoUHead, MaskPointHead, 14 | SCNetMaskHead, SCNetSemanticHead) 15 | from .mask_scoring_roi_head import MaskScoringRoIHead 16 | from .pisa_roi_head import PISARoIHead 17 | from .point_rend_roi_head import PointRendRoIHead 18 | from .roi_extractors import (BaseRoIExtractor, GenericRoIExtractor, 19 | SingleRoIExtractor) 20 | from .scnet_roi_head import SCNetRoIHead 21 | from .shared_heads import ResLayer 22 | from .sparse_roi_head import SparseRoIHead 23 | from .standard_roi_head import StandardRoIHead 24 | from .trident_roi_head import TridentRoIHead 25 | 26 | __all__ = [ 27 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead', 28 | 'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead', 29 | 'ConvFCBBoxHead', 'DIIHead', 'SABLHead', 'Shared2FCBBoxHead', 30 | 'StandardRoIHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 31 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 32 | 'MaskIoUHead', 'BaseRoIExtractor', 'GenericRoIExtractor', 33 | 'SingleRoIExtractor', 'PISARoIHead', 'PointRendRoIHead', 'MaskPointHead', 34 | 'CoarseMaskHead', 'DynamicRoIHead', 'SparseRoIHead', 'TridentRoIHead', 35 | 'SCNetRoIHead', 'SCNetMaskHead', 'SCNetSemanticHead', 'SCNetBBoxHead', 36 | 'FeatureRelayHead', 'GlobalContextHead' 37 | ] 38 | -------------------------------------------------------------------------------- /configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | imgs_per_gpu=2, 33 | workers_per_gpu=1, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 37 | img_prefix=data_root + 'Img/', 38 | pipeline=train_pipeline, 39 | data_root=data_root), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 43 | img_prefix=data_root + 'Img/', 44 | pipeline=test_pipeline, 45 | data_root=data_root), 46 | test=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 49 | 'annotations/DeepFashion_segmentation_gallery.json', 50 | img_prefix=data_root + 'Img/', 51 | pipeline=test_pipeline, 52 | data_root=data_root)) 53 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r101_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | type='ResNeXt', 5 | depth=101, 6 | groups=32, 7 | base_width=8, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=False), 12 | style='pytorch', 13 | init_cfg=dict( 14 | type='Pretrained', 15 | checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) 16 | 17 | dataset_type = 'CocoDataset' 18 | data_root = 'data/coco/' 19 | img_norm_cfg = dict( 20 | mean=[103.530, 116.280, 123.675], 21 | std=[57.375, 57.120, 58.395], 22 | to_rgb=False) 23 | train_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='LoadAnnotations', 27 | with_bbox=True, 28 | with_mask=True, 29 | poly2mask=False), 30 | dict( 31 | type='Resize', 32 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 33 | (1333, 768), (1333, 800)], 34 | multiscale_mode='value', 35 | keep_ratio=True), 36 | dict(type='RandomFlip', flip_ratio=0.5), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='Pad', size_divisor=32), 39 | dict(type='DefaultFormatBundle'), 40 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 41 | ] 42 | test_pipeline = [ 43 | dict(type='LoadImageFromFile'), 44 | dict( 45 | type='MultiScaleFlipAug', 46 | img_scale=(1333, 800), 47 | flip=False, 48 | transforms=[ 49 | dict(type='Resize', keep_ratio=True), 50 | dict(type='RandomFlip'), 51 | dict(type='Normalize', **img_norm_cfg), 52 | dict(type='Pad', size_divisor=32), 53 | dict(type='ImageToTensor', keys=['img']), 54 | dict(type='Collect', keys=['img']), 55 | ]) 56 | ] 57 | data = dict( 58 | train=dict(pipeline=train_pipeline), 59 | val=dict(pipeline=test_pipeline), 60 | test=dict(pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | outputs/ 2 | wandb/ 3 | graphs/ 4 | 5 | *.jpg 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/en/_build/ 74 | docs/zh_cn/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | 113 | data/* 114 | !*.gitkeep 115 | 116 | .vscode 117 | .idea 118 | .DS_Store 119 | 120 | # custom 121 | *.pkl 122 | *.pkl.json 123 | *.log.json 124 | docs/modelzoo_statistics.md 125 | mmdet/.mim 126 | work_dirs/ 127 | 128 | # Pytorch 129 | *.pth 130 | *.py~ 131 | *.sh~ 132 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..builder import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def mse_loss(pred, target): 11 | """Warpper of mse loss.""" 12 | return F.mse_loss(pred, target, reduction='none') 13 | 14 | 15 | @LOSSES.register_module() 16 | class MSELoss(nn.Module): 17 | """MSELoss. 18 | 19 | Args: 20 | reduction (str, optional): The method that reduces the loss to a 21 | scalar. Options are "none", "mean" and "sum". 22 | loss_weight (float, optional): The weight of the loss. Defaults to 1.0 23 | """ 24 | 25 | def __init__(self, reduction='mean', loss_weight=1.0): 26 | super().__init__() 27 | self.reduction = reduction 28 | self.loss_weight = loss_weight 29 | 30 | def forward(self, 31 | pred, 32 | target, 33 | weight=None, 34 | avg_factor=None, 35 | reduction_override=None): 36 | """Forward function of loss. 37 | 38 | Args: 39 | pred (torch.Tensor): The prediction. 40 | target (torch.Tensor): The learning target of the prediction. 41 | weight (torch.Tensor, optional): Weight of the loss for each 42 | prediction. Defaults to None. 43 | avg_factor (int, optional): Average factor that is used to average 44 | the loss. Defaults to None. 45 | reduction_override (str, optional): The reduction method used to 46 | override the original reduction method of the loss. 47 | Defaults to None. 48 | 49 | Returns: 50 | torch.Tensor: The calculated loss 51 | """ 52 | assert reduction_override in (None, 'none', 'mean', 'sum') 53 | reduction = ( 54 | reduction_override if reduction_override else self.reduction) 55 | loss = self.loss_weight * mse_loss( 56 | pred, target, weight, reduction=reduction, avg_factor=avg_factor) 57 | return loss 58 | -------------------------------------------------------------------------------- /configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=3, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=[ 40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 42 | ], 43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 44 | pipeline=train_pipeline)), 45 | val=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 48 | img_prefix=data_root + 'VOC2007/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 53 | img_prefix=data_root + 'VOC2007/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='mAP') 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict( 9 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='SegRescale', scale_factor=1 / 8), 15 | dict(type='DefaultFormatBundle'), 16 | dict( 17 | type='Collect', 18 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=(1333, 800), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip', flip_ratio=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | samples_per_gpu=2, 37 | workers_per_gpu=2, 38 | train=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_train2017.json', 41 | img_prefix=data_root + 'train2017/', 42 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 'annotations/instances_val2017.json', 52 | img_prefix=data_root + 'val2017/', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(metric=['bbox', 'segm']) 55 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import warnings 4 | 5 | from mmcv import Config, DictAction 6 | 7 | from mmdet.utils import replace_cfg_vals, update_data_root 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='Print the whole config') 12 | parser.add_argument('config', help='config file path') 13 | parser.add_argument( 14 | '--options', 15 | nargs='+', 16 | action=DictAction, 17 | help='override some settings in the used config, the key-value pair ' 18 | 'in xxx=yyy format will be merged into config file (deprecate), ' 19 | 'change to --cfg-options instead.') 20 | parser.add_argument( 21 | '--cfg-options', 22 | nargs='+', 23 | action=DictAction, 24 | help='override some settings in the used config, the key-value pair ' 25 | 'in xxx=yyy format will be merged into config file. If the value to ' 26 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 27 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 28 | 'Note that the quotation marks are necessary and that no white space ' 29 | 'is allowed.') 30 | args = parser.parse_args() 31 | 32 | if args.options and args.cfg_options: 33 | raise ValueError( 34 | '--options and --cfg-options cannot be both ' 35 | 'specified, --options is deprecated in favor of --cfg-options') 36 | if args.options: 37 | warnings.warn('--options is deprecated in favor of --cfg-options') 38 | args.cfg_options = args.options 39 | 40 | return args 41 | 42 | 43 | def main(): 44 | args = parse_args() 45 | 46 | cfg = Config.fromfile(args.config) 47 | 48 | # replace the ${key} with the value of cfg.key 49 | cfg = replace_cfg_vals(cfg) 50 | 51 | # update data root according to MMDET_DATASETS 52 | update_data_root(cfg) 53 | 54 | if args.cfg_options is not None: 55 | cfg.merge_from_dict(args.cfg_options) 56 | print(f'Config:\n{cfg.pretty_text}') 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/feature_relay_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from mmcv.runner import BaseModule, auto_fp16 4 | 5 | from mmdet.models.builder import HEADS 6 | 7 | 8 | @HEADS.register_module() 9 | class FeatureRelayHead(BaseModule): 10 | """Feature Relay Head used in `SCNet `_. 11 | 12 | Args: 13 | in_channels (int, optional): number of input channels. Default: 256. 14 | conv_out_channels (int, optional): number of output channels before 15 | classification layer. Default: 256. 16 | roi_feat_size (int, optional): roi feat size at box head. Default: 7. 17 | scale_factor (int, optional): scale factor to match roi feat size 18 | at mask head. Default: 2. 19 | init_cfg (dict or list[dict], optional): Initialization config dict. 20 | """ 21 | 22 | def __init__(self, 23 | in_channels=1024, 24 | out_conv_channels=256, 25 | roi_feat_size=7, 26 | scale_factor=2, 27 | init_cfg=dict(type='Kaiming', layer='Linear')): 28 | super(FeatureRelayHead, self).__init__(init_cfg) 29 | assert isinstance(roi_feat_size, int) 30 | 31 | self.in_channels = in_channels 32 | self.out_conv_channels = out_conv_channels 33 | self.roi_feat_size = roi_feat_size 34 | self.out_channels = (roi_feat_size**2) * out_conv_channels 35 | self.scale_factor = scale_factor 36 | self.fp16_enabled = False 37 | 38 | self.fc = nn.Linear(self.in_channels, self.out_channels) 39 | self.upsample = nn.Upsample( 40 | scale_factor=scale_factor, mode='bilinear', align_corners=True) 41 | 42 | @auto_fp16() 43 | def forward(self, x): 44 | """Forward function.""" 45 | N, in_C = x.shape 46 | if N > 0: 47 | out_C = self.out_conv_channels 48 | out_HW = self.roi_feat_size 49 | x = self.fc(x) 50 | x = x.reshape(N, out_C, out_HW, out_HW) 51 | x = self.upsample(x) 52 | return x 53 | return None 54 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict( 10 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=1, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type='RepeatDataset', 37 | times=8, 38 | dataset=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 41 | 'annotations/instancesonly_filtered_gtFine_train.json', 42 | img_prefix=data_root + 'leftImg8bit/train/', 43 | pipeline=train_pipeline)), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 47 | 'annotations/instancesonly_filtered_gtFine_val.json', 48 | img_prefix=data_root + 'leftImg8bit/val/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 53 | 'annotations/instancesonly_filtered_gtFine_test.json', 54 | img_prefix=data_root + 'leftImg8bit/test/', 55 | pipeline=test_pipeline)) 56 | evaluation = dict(interval=1, metric='bbox') 57 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './mask_rcnn_r50_fpn_1x_coco.py' 2 | model = dict( 3 | backbone=dict( 4 | norm_cfg=dict(requires_grad=False), 5 | style='caffe', 6 | init_cfg=dict( 7 | type='Pretrained', 8 | checkpoint='open-mmlab://detectron2/resnet50_caffe'))) 9 | # use caffe img_norm 10 | img_norm_cfg = dict( 11 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 12 | train_pipeline = [ 13 | dict(type='LoadImageFromFile'), 14 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 15 | dict( 16 | type='Resize', 17 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 18 | (1333, 768), (1333, 800)], 19 | multiscale_mode='value', 20 | keep_ratio=True), 21 | dict(type='RandomFlip', flip_ratio=0.5), 22 | dict(type='Normalize', **img_norm_cfg), 23 | dict(type='Pad', size_divisor=32), 24 | dict(type='DefaultFormatBundle'), 25 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 26 | ] 27 | test_pipeline = [ 28 | dict(type='LoadImageFromFile'), 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=(1333, 800), 32 | flip=False, 33 | transforms=[ 34 | dict(type='Resize', keep_ratio=True), 35 | dict(type='RandomFlip'), 36 | dict(type='Normalize', **img_norm_cfg), 37 | dict(type='Pad', size_divisor=32), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | 43 | data_root = 'data/' 44 | data = dict( 45 | samples_per_gpu=1, 46 | workers_per_gpu=1, 47 | train=dict( 48 | ann_file=data_root + 'COCO2017/annotations/instances_train2017.json', 49 | img_prefix=data_root + 'COCO2017/train2017/', 50 | pipeline=train_pipeline, 51 | ), 52 | val=dict( 53 | pipeline=test_pipeline, 54 | ann_file=data_root + 'COCO2017/annotations/instances_val2017.json', 55 | img_prefix=data_root + 'COCO2017/val2017/', 56 | ), 57 | test=dict(pipeline=test_pipeline)) 58 | --------------------------------------------------------------------------------