├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md └── ISSUE_TEMPLATE │ ├── config.yml │ ├── error-report.md │ ├── feature_request.md │ └── general_questions.md ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .style.yapf ├── .travis.yml ├── LICENSE ├── README.md ├── configs ├── OpenImages_configs │ ├── r50-FPN-1x_classsampling │ │ └── r50-FPN-1x_classsampling.py │ └── r50-FPN-1x_classsampling_TSD │ │ └── r50-FPN-1x_classsampling_TSD.py ├── TSD_configs │ ├── cascade_rcnn_r101_fpn_20e.py │ ├── cascade_rcnn_r101_fpn_TSD_20e.py │ ├── cascade_rcnn_r50_fpn_TSD_20e.py │ ├── faster_rcnn_r101_fpn_TSD_1x.py │ ├── faster_rcnn_r152_fpn_TSD_1x.py │ ├── faster_rcnn_r50_fpn_TSD_1x.py │ ├── faster_rcnn_r50_fpn_TSD_1x_fp16.py │ ├── faster_rcnn_x101_64x4d_fpn_TSD.py │ └── mask_rcnn_r50_fpn_TSD_1x.py ├── albu_example │ └── mask_rcnn_r50_fpn_1x.py ├── atss │ ├── README.md │ └── atss_r50_fpn_1x.py ├── carafe │ ├── README.md │ ├── faster_rcnn_r50_fpn_carafe_1x.py │ └── mask_rcnn_r50_fpn_carafe_1x.py ├── cascade_mask_rcnn_r101_fpn_1x.py ├── cascade_mask_rcnn_r50_caffe_c4_1x.py ├── cascade_mask_rcnn_r50_fpn_1x.py ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py ├── cascade_rcnn_r101_fpn_1x.py ├── cascade_rcnn_r50_caffe_c4_1x.py ├── cascade_rcnn_r50_fpn_1x.py ├── cascade_rcnn_x101_32x4d_fpn_1x.py ├── cascade_rcnn_x101_64x4d_fpn_1x.py ├── cityscapes │ ├── README.md │ ├── faster_rcnn_r50_fpn_1x_cityscapes.py │ └── mask_rcnn_r50_fpn_1x_cityscapes.py ├── dcn │ ├── README.md │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py │ ├── faster_rcnn_dpool_r50_fpn_1x.py │ ├── faster_rcnn_mdconv_c3-c5_group4_r50_fpn_1x.py │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_mdpool_r50_fpn_1x.py │ ├── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py │ └── mask_rcnn_mdconv_c3-c5_r50_fpn_1x.py ├── double_heads │ └── dh_faster_rcnn_r50_fpn_1x.py ├── empirical_attention │ ├── README.md │ ├── faster_rcnn_r50_fpn_attention_0010_1x.py │ ├── faster_rcnn_r50_fpn_attention_0010_dcn_1x.py │ ├── faster_rcnn_r50_fpn_attention_1111_1x.py │ └── faster_rcnn_r50_fpn_attention_1111_dcn_1x.py ├── fast_mask_rcnn_r101_fpn_1x.py ├── fast_mask_rcnn_r50_caffe_c4_1x.py ├── fast_mask_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py ├── fast_rcnn_r50_caffe_c4_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── faster_rcnn_ohem_r50_fpn_1x.py ├── faster_rcnn_r101_fpn_1x.py ├── faster_rcnn_r101_fpn_TSD_1x.py ├── faster_rcnn_r152_fpn_1x.py ├── faster_rcnn_r152_fpn_TSD_1x.py ├── faster_rcnn_r50_caffe_c4_1x.py ├── faster_rcnn_r50_fpn_1x.py ├── faster_rcnn_r50_fpn_TSD_1x.py ├── faster_rcnn_r50_fpn_TSD_1x_fp16.py ├── faster_rcnn_x101_32x4d_fpn_1x.py ├── faster_rcnn_x101_64x4d_fpn_1x.py ├── faster_rcnn_x101_64x4d_fpn_TSD.py ├── fcos │ ├── README.md │ ├── fcos_center_r50_caffe_fpn_gn_1x_4gpu.py.py │ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py │ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py │ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py ├── foveabox │ ├── README.md │ ├── fovea_align_gn_ms_r101_fpn_4gpu_2x.py │ ├── fovea_align_gn_ms_r50_fpn_4gpu_2x.py │ ├── fovea_align_gn_r101_fpn_4gpu_2x.py │ ├── fovea_align_gn_r50_fpn_4gpu_2x.py │ └── fovea_r50_fpn_4gpu_1x.py ├── fp16 │ ├── faster_rcnn_r50_fpn_fp16_1x.py │ ├── mask_rcnn_r50_fpn_fp16_1x.py │ └── retinanet_r50_fpn_fp16_1x.py ├── free_anchor │ ├── README.md │ ├── retinanet_free_anchor_r101_fpn_1x.py │ ├── retinanet_free_anchor_r50_fpn_1x.py │ └── retinanet_free_anchor_x101-32x4d_fpn_1x.py ├── gcnet │ ├── README.md │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py │ └── mask_rcnn_r50_fpn_sbn_1x.py ├── ghm │ ├── README.md │ └── retinanet_ghm_r50_fpn_1x.py ├── gn+ws │ ├── README.md │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py ├── gn │ ├── README.md │ ├── mask_rcnn_r101_fpn_gn_2x.py │ ├── mask_rcnn_r50_fpn_gn_2x.py │ └── mask_rcnn_r50_fpn_gn_contrib_2x.py ├── grid_rcnn │ ├── README.md │ ├── grid_rcnn_gn_head_r50_fpn_2x.py │ └── grid_rcnn_gn_head_x101_32x4d_fpn_2x.py ├── guided_anchoring │ ├── README.md │ ├── ga_fast_r50_caffe_fpn_1x.py │ ├── ga_faster_r50_caffe_fpn_1x.py │ ├── ga_faster_x101_32x4d_fpn_1x.py │ ├── ga_retinanet_r101_caffe_fpn_mstrain_2x.py │ ├── ga_retinanet_r50_caffe_fpn_1x.py │ ├── ga_retinanet_x101_32x4d_fpn_1x.py │ ├── ga_rpn_r101_caffe_rpn_1x.py │ ├── ga_rpn_r50_caffe_fpn_1x.py │ └── ga_rpn_x101_32x4d_fpn_1x.py ├── hrnet │ ├── README.md │ ├── cascade_mask_rcnn_hrnetv2p_w32_20e.py │ ├── cascade_rcnn_hrnetv2p_w32_20e.py │ ├── faster_rcnn_hrnetv2p_w18_1x.py │ ├── faster_rcnn_hrnetv2p_w32_1x.py │ ├── faster_rcnn_hrnetv2p_w40_1x.py │ ├── fcos_hrnetv2p_w32_gn_1x_4gpu.py │ ├── htc_hrnetv2p_w32_20e.py │ ├── mask_rcnn_hrnetv2p_w18_1x.py │ └── mask_rcnn_hrnetv2p_w32_1x.py ├── htc │ ├── README.md │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py │ ├── htc_r101_fpn_20e.py │ ├── htc_r50_fpn_1x.py │ ├── htc_r50_fpn_20e.py │ ├── htc_without_semantic_r50_fpn_1x.py │ ├── htc_x101_32x4d_fpn_20e_16gpu.py │ └── htc_x101_64x4d_fpn_20e_16gpu.py ├── instaboost │ ├── README.md │ ├── cascade_mask_rcnn_r50_fpn_instaboost_4x.py │ ├── mask_rcnn_r50_fpn_instaboost_4x.py │ └── ssd300_coco_instaboost_4x.py ├── libra_rcnn │ ├── README.md │ ├── libra_fast_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_r101_fpn_1x.py │ ├── libra_faster_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_x101_64x4d_fpn_1x.py │ └── libra_retinanet_r50_fpn_1x.py ├── mask_rcnn_r101_fpn_1x.py ├── mask_rcnn_r50_caffe_c4_1x.py ├── mask_rcnn_r50_fpn_1x.py ├── mask_rcnn_x101_32x4d_fpn_1x.py ├── mask_rcnn_x101_64x4d_fpn_1x.py ├── ms_rcnn │ ├── README.md │ ├── ms_rcnn_r101_caffe_fpn_1x.py │ ├── ms_rcnn_r50_caffe_fpn_1x.py │ └── ms_rcnn_x101_64x4d_fpn_1x.py ├── nas_fpn │ ├── README.md │ ├── retinanet_crop640_r50_fpn_50e.py │ └── retinanet_crop640_r50_nasfpn_50e.py ├── pascal_voc │ ├── README.md │ ├── faster_rcnn_r50_fpn_1x_voc0712.py │ ├── ssd300_voc.py │ └── ssd512_voc.py ├── reppoints │ ├── README.md │ ├── bbox_r50_grid_center_fpn_1x.py │ ├── bbox_r50_grid_fpn_1x.py │ ├── reppoints.png │ ├── reppoints_minmax_r50_fpn_1x.py │ ├── reppoints_moment_r101_dcn_fpn_2x.py │ ├── reppoints_moment_r101_dcn_fpn_2x_mt.py │ ├── reppoints_moment_r101_fpn_2x.py │ ├── reppoints_moment_r101_fpn_2x_mt.py │ ├── reppoints_moment_r50_fpn_1x.py │ ├── reppoints_moment_r50_fpn_2x.py │ ├── reppoints_moment_r50_fpn_2x_mt.py │ ├── reppoints_moment_r50_no_gn_fpn_1x.py │ ├── reppoints_moment_x101_dcn_fpn_2x.py │ ├── reppoints_moment_x101_dcn_fpn_2x_mt.py │ └── reppoints_partial_minmax_r50_fpn_1x.py ├── retinanet_r101_fpn_1x.py ├── retinanet_r50_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_r50_caffe_c4_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── scratch │ ├── README.md │ ├── scratch_faster_rcnn_r50_fpn_gn_6x.py │ └── scratch_mask_rcnn_r50_fpn_gn_6x.py ├── ssd300_coco.py ├── ssd512_coco.py └── wider_face │ ├── README.md │ └── ssd300_wider_face.py ├── demo ├── TSD.png ├── coco_test_12510.jpg ├── corruptions_sev_3.png ├── data_pipeline.png ├── demo.jpg ├── inference_demo.ipynb ├── loss_curve.png └── webcam_demo.py ├── docker └── Dockerfile ├── docs ├── CHANGELOG.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── Makefile ├── ROBUSTNESS_BENCHMARKING.md ├── TECHNICAL_DETAILS.md ├── conf.py ├── index.rst ├── make.bat └── requirements.txt ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ ├── guided_anchor_target.py │ │ ├── point_generator.py │ │ └── point_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ └── point_assigner.py │ │ ├── bbox_target.py │ │ ├── demodata.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── copy_of_sgd.py │ │ └── registry.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── base_dataset.py │ ├── builder.py │ ├── cityscapes.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── openimages_dataset.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_aug.py │ │ └── transforms.py │ ├── registry.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_classaware_sampler.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ ├── utils │ │ ├── __init__.py │ │ ├── eval.py │ │ ├── metrics.py │ │ ├── np_box_list.py │ │ ├── np_box_list_ops.py │ │ ├── np_box_mask_list.py │ │ ├── np_box_mask_list_ops.py │ │ ├── np_box_ops.py │ │ ├── np_mask_ops.py │ │ └── per_image_evaluation.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── atss_head.py │ │ ├── fcos_head.py │ │ ├── fovea_head.py │ │ ├── free_anchor_retina_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── reppoints_head.py │ │ ├── retina_head.py │ │ ├── retina_sepbn_head.py │ │ ├── rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── hrnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ ├── convfc_bbox_head.py │ │ ├── double_bbox_head.py │ │ └── tsd_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── atss.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── double_head_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── fovea.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── reppoints_detector.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── smooth_l1_loss.py │ │ ├── trunc_cross_entropy.py │ │ └── utils.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ ├── grid_head.py │ │ ├── htc_mask_head.py │ │ └── maskiou_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ ├── fpn_carafe.py │ │ ├── hrfpn.py │ │ └── nas_fpn.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── activation.py │ ├── affine_grid │ │ ├── __init__.py │ │ ├── affine_grid.py │ │ └── src │ │ │ └── affine_grid_cuda.cpp │ ├── carafe │ │ ├── __init__.py │ │ ├── carafe.py │ │ ├── grad_check.py │ │ ├── setup.py │ │ └── src │ │ │ ├── carafe_cuda.cpp │ │ │ ├── carafe_cuda_kernel.cu │ │ │ ├── carafe_naive_cuda.cpp │ │ │ └── carafe_naive_cuda_kernel.cu │ ├── context_block.py │ ├── conv.py │ ├── conv_module.py │ ├── conv_ws.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_pool.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── generalized_attention.py │ ├── grid_sampler │ │ ├── __init__.py │ │ ├── grid_sampler.py │ │ └── src │ │ │ ├── cpu │ │ │ ├── grid_sampler_cpu.cpp │ │ │ └── grid_sampler_cpu.h │ │ │ ├── cuda │ │ │ ├── grid_sampler_cuda.cu │ │ │ └── grid_sampler_cuda.cuh │ │ │ ├── cudnn │ │ │ └── grid_sampler_cudnn.cpp │ │ │ └── grid_sampler.cpp │ ├── masked_conv │ │ ├── __init__.py │ │ ├── masked_conv.py │ │ └── src │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ └── nms_kernel.cu │ ├── non_local.py │ ├── norm.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel_v2.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_pool.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ ├── scale.py │ ├── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ │ ├── sigmoid_focal_loss.cpp │ │ │ └── sigmoid_focal_loss_cuda.cu │ ├── upsample.py │ └── utils │ │ ├── __init__.py │ │ └── src │ │ └── compiling_info.cpp └── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── flops_counter.py │ ├── logger.py │ ├── profiling.py │ ├── registry.py │ └── util_mixins.py ├── pytest.ini ├── requirements.txt ├── requirements ├── build.txt ├── optional.txt ├── runtime.txt └── tests.txt ├── setup.py ├── tests ├── async_benchmark.py ├── test_assigner.py ├── test_async.py ├── test_config.py ├── test_forward.py ├── test_heads.py ├── test_nms.py ├── test_sampler.py ├── test_soft_nms.py └── test_utils.py └── tools ├── analyze_logs.py ├── browse_dataset.py ├── coco_error_analysis.py ├── convert_datasets ├── cityscapes.py └── pascal_voc.py ├── detectron2pytorch.py ├── dist_test.sh ├── dist_train.sh ├── fuse_conv_bn.py ├── get_flops.py ├── publish_model.py ├── pytorch2onnx.py ├── robustness_eval.py ├── slurm_test.sh ├── slurm_test_openimage.sh ├── slurm_train.sh ├── test.py ├── test_openimages.py ├── test_robustness.py ├── train.py └── upgrade_model_version.py /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to mmdetection 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmdetection 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmdetection, 18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | 25 | We use the following tools for linting and formatting: 26 | - [flake8](http://flake8.pycqa.org/en/latest/): linter 27 | - [yapf](https://github.com/google/yapf): formatter 28 | - [isort](https://github.com/timothycrosley/isort): sort imports 29 | 30 | Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg). 31 | 32 | We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, 33 | fixes `end-of-files`, sorts `requirments.txt` automatically on every commit. 34 | The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml). 35 | 36 | After you clone the repository, you will need to install initialize pre-commit hook. 37 | 38 | ``` 39 | pip install -U pre-commit 40 | ``` 41 | 42 | From the repository folder 43 | ``` 44 | pre-commit install 45 | ``` 46 | 47 | After this on every commit check code linters and formatter will be enforced. 48 | 49 | 50 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 51 | 52 | ### C++ and CUDA 53 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). 54 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/error-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Error report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for your error report and we appreciate it a lot. 11 | 12 | **Checklist** 13 | 1. I have searched related issues but cannot get the expected help. 14 | 2. The bug has not been fixed in the latest version. 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | 19 | **Reproduction** 20 | 1. What command or script did you run? 21 | ``` 22 | A placeholder for the command. 23 | ``` 24 | 2. Did you make any modifications on the code or config? Did you understand what you have modified? 25 | 3. What dataset did you use? 26 | 27 | **Environment** 28 | 29 | 1. Please run `python mmdet/utils/collect_env.py` to collect necessary environment infomation and paste it here. 30 | 2. You may add addition that may be helpful for locating the problem, such as 31 | - How you installed PyTorch [e.g., pip, conda, source] 32 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 33 | 34 | **Error traceback** 35 | If applicable, paste the error trackback here. 36 | ``` 37 | A placeholder for trackback. 38 | ``` 39 | 40 | **Bug fix** 41 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature** 11 | 12 | **Motivation** 13 | A clear and concise description of the motivation of the feature. 14 | Ex1. It is inconvenient when [....]. 15 | Ex2. There is a recent paper [....], which is very helpful for [....]. 16 | 17 | **Related resources** 18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General questions 3 | about: Ask general questions to get help 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | exp/ 23 | mmcv/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | .DS_Store 113 | 114 | # custom 115 | *.pkl 116 | *.pkl.json 117 | *.log.json 118 | work_dirs/ 119 | 120 | # Pytorch 121 | *.pth 122 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,pycocotools,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/pycqa/flake8.git 3 | rev: 3.7.9 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/asottile/seed-isort-config 7 | rev: v2.1.0 8 | hooks: 9 | - id: seed-isort-config 10 | - repo: https://github.com/timothycrosley/isort 11 | rev: 4.3.21 12 | hooks: 13 | - id: isort 14 | - repo: https://github.com/pre-commit/mirrors-yapf 15 | rev: v0.29.0 16 | hooks: 17 | - id: yapf 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v2.5.0 20 | hooks: 21 | - id: trailing-whitespace 22 | - id: check-yaml 23 | - id: end-of-file-fixer 24 | - id: requirements-txt-fixer 25 | - id: double-quote-string-fixer 26 | - id: fix-encoding-pragma 27 | args: ["--remove"] 28 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic # ubuntu 18.04 2 | language: python 3 | 4 | python: 5 | - "3.5" 6 | - "3.6" 7 | - "3.7" 8 | 9 | env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1 10 | cache: pip 11 | 12 | # Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis 13 | before_install: 14 | - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb 15 | - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER} 16 | - sudo dpkg -i ${INSTALLER} 17 | - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub 18 | - sudo apt-key add 7fa2af80.pub 19 | - sudo apt update -qq 20 | - sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-} 21 | - sudo apt clean 22 | - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT} 23 | - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH} 24 | - PATH=${CUDA_HOME}/bin:${PATH} 25 | 26 | install: 27 | - pip install Pillow==6.2.2 # remove this line when torchvision>=0.5 28 | - pip install torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2 29 | - pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI" 30 | - pip install -r requirements.txt 31 | 32 | before_script: 33 | - flake8 . 34 | - isort -rc --check-only --diff mmdet/ tools/ tests/ 35 | - yapf -r -d --style .style.yapf mmdet/ tools/ tests/ configs/ 36 | 37 | script: 38 | - python setup.py check -m -s 39 | - python setup.py build_ext --inplace 40 | - coverage run --source mmdet -m py.test -v --xdoctest-modules tests mmdet 41 | 42 | after_success: 43 | - coverage report 44 | -------------------------------------------------------------------------------- /configs/atss/README.md: -------------------------------------------------------------------------------- 1 | # Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection 2 | 3 | 4 | ## Introduction 5 | 6 | ``` 7 | @article{zhang2019bridging, 8 | title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection}, 9 | author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.}, 10 | journal = {arXiv preprint arXiv:1912.02424}, 11 | year = {2019} 12 | } 13 | ``` 14 | 15 | 16 | ## Results and Models 17 | 18 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 19 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 20 | | R-50 | pytorch | 1x | 3.6 | 0.357 | 12.8 | 39.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/atss/atss_r50_fpn_1x_20200113-a7aa251e.pth)| 21 | -------------------------------------------------------------------------------- /configs/carafe/README.md: -------------------------------------------------------------------------------- 1 | # CARAFE: Content-Aware ReAssembly of FEatures 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the object detection & instance segmentation results in the ICCV 2019 Oral paper for [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188). 6 | 7 | ``` 8 | @inproceedings{Wang_2019_ICCV, 9 | title = {CARAFE: Content-Aware ReAssembly of FEatures}, 10 | author = {Wang, Jiaqi and Chen, Kai and Xu, Rui and Liu, Ziwei and Loy, Chen Change and Lin, Dahua}, 11 | booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, 12 | month = {October}, 13 | year = {2019} 14 | } 15 | ``` 16 | 17 | ## Results and Models 18 | 19 | The results on COCO 2017 val is shown in the below table. 20 | 21 | | Method | Backbone | Style | Lr schd | Test Proposal Num| Box AP | Mask AP | Download | 22 | | :--------------------: | :-------------: | :-----: | :-----: | :--------------: | :----: | :--------: |:----------------------------------------------------------------------------------------------------: | 23 | | Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 37.8 | - | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/carafe/faster_rcnn_r50_fpn_carafe_1x-2ca2d094.pth) | 24 | | - | - | - | - | 2000 | 37.9 | - | - | 25 | | Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 38.6 | 35.6| [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/carafe/mask_rcnn_r50_fpn_carafe_1x-2cc4b9fe.pth) | 26 | | - | - | - | - | 2000 | 38.6 | 35.7| - | 27 | 28 | ## Implementation 29 | 30 | The CUDA implementation of CARAFE can be find at `mmdet/ops/carafe` under this repository. 31 | 32 | ## Setup CARAFE 33 | 34 | a. Use CARAFE in mmdetection. 35 | 36 | Install mmdetection following the official guide. 37 | 38 | b. Use CARAFE in your own project. 39 | 40 | Git clone mmdetection. 41 | ```shell 42 | git clone https://github.com/open-mmlab/mmdetection.git 43 | cd mmdetection 44 | ``` 45 | Setup CARAFE in our project. 46 | ```shell 47 | cp -r ./mmdet/ops/carafe $Your_Project_Path$ 48 | cd $Your_Project_Path$/carafe 49 | python setup.py develop 50 | # or "pip install -v -e ." 51 | cd .. 52 | python ./carafe/grad_check.py 53 | ``` 54 | -------------------------------------------------------------------------------- /configs/cityscapes/README.md: -------------------------------------------------------------------------------- 1 | ## Common settings 2 | 3 | - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. 4 | - All models were trained on `cityscapes_train`, and tested on `cityscapes_val`. 5 | - 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870) 6 | - COCO pre-trained weights are used to initialize. 7 | - A conversion [script](../../tools/convert_datasets/cityscapes.py) is provided to convert Cityscapes into COCO format. Please refer to [INSTALL.md](../../docs/INSTALL.md#prepare-datasets) for details. 8 | - `CityscapesDataset` implemented three evaluation methods. `bbox` and `segm` are standard COCO bbox/mask AP. `cityscapes` is the cityscapes dataset official evaluation, which may be slightly higher than COCO. 9 | 10 | 11 | ### Faster R-CNN 12 | 13 | | Backbone | Style | Lr schd | Scale | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 14 | | :-------------: | :-----: | :-----: | :---: | :------: | :-----------------: | :------------: | :----: | :------: | 15 | | R-50-FPN | pytorch | 1x | 800-1024 | 4.9 | - | - | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200227-362cfbbf.pth) | 16 | 17 | ### Mask R-CNN 18 | 19 | | Backbone | Style | Lr schd | Scale | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 20 | | :-------------: | :-----: | :-----: | :------: | :------: | :-----------------: | :------------: | :----: | :-----: | :------: | 21 | | R-50-FPN | pytorch | 1x | 800-1024 | 4.9 | - | - | 41.9 | 37.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20200227-afe51d5a.pth) | 22 | -------------------------------------------------------------------------------- /configs/empirical_attention/README.md: -------------------------------------------------------------------------------- 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{zhu2019empirical, 7 | title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, 8 | author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, 9 | journal={arXiv preprint arXiv:1904.05873}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | 15 | ## Results and Models 16 | 17 | | Backbone | Attention Component | DCN | Lr schd | box AP | Download | 18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:| 19 | | R-50 | 1111 | N | 1x | 38.6 | - | 20 | | R-50 | 0010 | N | 1x | 38.2 | - | 21 | | R-50 | 1111 | Y | 1x | 41.0 | - | 22 | | R-50 | 0010 | Y | 1x | 40.8 | - | 23 | -------------------------------------------------------------------------------- /configs/fcos/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{tian2019fcos, 7 | title={FCOS: Fully Convolutional One-Stage Object Detection}, 8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 9 | journal={arXiv preprint arXiv:1904.01355}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) | 19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) | 20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) | 21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) | 22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) | 23 | 24 | 25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) | 28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) | 29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) | 30 | 31 | **Notes:** 32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. 33 | - The X-101 backbone is X-101-64x4d. 34 | -------------------------------------------------------------------------------- /configs/free_anchor/README.md: -------------------------------------------------------------------------------- 1 | # FreeAnchor: Learning to Match Anchors for Visual Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{zhang2019freeanchor, 7 | title = {{FreeAnchor}: Learning to Match Anchors for Visual Object Detection}, 8 | author = {Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang}, 9 | booktitle = {Neural Information Processing Systems}, 10 | year = {2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | pytorch | 1x | 4.7 | 0.322 | 12.0 | 38.4 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/free_anchor/retinanet_free_anchor_r50_fpn_1x_20190914-84db6585.pth) | 19 | | R-101 | pytorch | 1x | 6.6 | 0.437 | 9.7 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/free_anchor/retinanet_free_anchor_r101_fpn_1x_20190914-c4e4db81.pth) | 20 | | X-101-32x4d | pytorch | 1x | 7.8 | 0.640 | 8.4 | 42.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/free_anchor/retinanet_free_anchor_x101-32x4d_fpn_1x_20190914-eb73b804.pth) | 21 | 22 | **Notes:** 23 | - We use 8 GPUs with 2 images/GPU. 24 | - For more settings and models, please refer to the [official repo](https://github.com/zhangxiaosong18/FreeAnchor). 25 | -------------------------------------------------------------------------------- /configs/ghm/README.md: -------------------------------------------------------------------------------- 1 | # Gradient Harmonized Single-stage Detector 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{li2019gradient, 7 | title={Gradient Harmonized Single-stage Detector}, 8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, 9 | booktitle={AAAI Conference on Artificial Intelligence}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: | 18 | | R-50-FPN | pytorch | 1x | 3.9 | 0.500 | 9.4 | 36.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) | 19 | | R-101-FPN | pytorch | 1x | 5.8 | 0.625 | 8.5 | 39.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) | 20 | | X-101-32x4d-FPN | pytorch | 1x | 7.0 | 0.818 | 7.6 | 40.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) | 21 | | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 1.191 | 6.1 | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) | 22 | -------------------------------------------------------------------------------- /configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{wu2018group, 7 | title={Group Normalization}, 8 | author={Wu, Yuxin and He, Kaiming}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | 19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | 20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | 21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | 22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | 23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | 24 | 25 | **Notes:** 26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). 27 | - The `3x` schedule is epoch [28, 34, 36]. 28 | - **Memory, Train/Inf time is outdated.** 29 | -------------------------------------------------------------------------------- /configs/grid_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Grid R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{lu2019grid, 7 | title={Grid r-cnn}, 8 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019} 11 | } 12 | 13 | @article{lu2019grid, 14 | title={Grid R-CNN Plus: Faster and Better}, 15 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 16 | journal={arXiv preprint arXiv:1906.05688}, 17 | year={2019} 18 | } 19 | ``` 20 | 21 | ## Results and Models 22 | 23 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 25 | | R-50 | 2x | 4.8 | 1.172 | 10.9 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) | 26 | | R-101 | 2x | 6.7 | 1.214 | 10.0 | 41.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) | 27 | | X-101-32x4d | 2x | 8.0 | 1.335 | 8.5 | 43.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) | 28 | | X-101-64x4d | 2x | 10.9 | 1.753 | 6.4 | 43.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) | 29 | 30 | **Notes:** 31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper. 32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. 33 | -------------------------------------------------------------------------------- /configs/libra_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). 6 | 7 | ``` 8 | @inproceedings{pang2019libra, 9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection}, 10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Results and models 17 | 18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 19 | 20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) | 23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) | 24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) | 25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) | 26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) | 27 | -------------------------------------------------------------------------------- /configs/ms_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Mask Scoring R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{huang2019msrcnn, 7 | title={Mask Scoring R-CNN}, 8 | author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, 9 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019}, 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN | caffe | 1x | 4.3 | 0.537 | 10.1 | 37.4 | 35.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) | 19 | | R-50-FPN | caffe | 2x | - | - | - | 38.2 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) | 20 | | R-101-FPN | caffe | 1x | 6.2 | 0.682 | 9.1 | 39.8 | 37.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) | 21 | | R-101-FPN | caffe | 2x | - | - | - | 40.7 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) | 22 | | R-X101-32x4d | pytorch | 2x | 7.6 | 0.844 | 8.0 | 41.7 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) | 23 | | R-X101-64x4d | pytorch | 1x | 10.5 | 1.214 | 6.4 | 42.0 | 39.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) | 24 | | R-X101-64x4d | pytorch | 2x | - | - | - | 42.2 | 38.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) | 25 | -------------------------------------------------------------------------------- /configs/nas_fpn/README.md: -------------------------------------------------------------------------------- 1 | # NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{ghiasi2019fpn, 7 | title={Nas-fpn: Learning scalable feature pyramid architecture for object detection}, 8 | author={Ghiasi, Golnaz and Lin, Tsung-Yi and Le, Quoc V}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 10 | pages={7036--7045}, 11 | year={2019} 12 | } 13 | ``` 14 | 15 | ## Results and Models 16 | 17 | We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. RetinaNet is used in the paper. 18 | 19 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 20 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 21 | | R-50-FPN | 50e | 12.8 | 0.513 | 15.3 | 37.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/nas_fpn/retinanet_crop640_r50_fpn_50e_190824-4d75bfa0.pth) | 22 | | R-50-NASFPN | 50e | 14.8 | 0.662 | 13.1 | 39.8 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/nas_fpn/retinanet_crop640_r50_nasfpn_50e_20191225-b82d3a86.pth) | 23 | 24 | 25 | **Note**: We find that it is unstable to train NAS-FPN and there is a small chance that results can be 3% mAP lower. 26 | -------------------------------------------------------------------------------- /configs/pascal_voc/README.md: -------------------------------------------------------------------------------- 1 | ### SSD 2 | 3 | | Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 4 | | :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: | 5 | | VGG16 | 300 | caffe | 240e | 2.5 | 0.159 | 35.7 / 53.6 | 77.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) | 6 | | VGG16 | 512 | caffe | 240e | 4.3 | 0.214 | 27.5 / 35.9 | 80.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) | 7 | -------------------------------------------------------------------------------- /configs/reppoints/reppoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/configs/reppoints/reppoints.png -------------------------------------------------------------------------------- /configs/scratch/README.md: -------------------------------------------------------------------------------- 1 | # Rethinking ImageNet Pre-training 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{he2018rethinking, 7 | title={Rethinking imagenet pre-training}, 8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, 9 | journal={arXiv preprint arXiv:1811.08883}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download | 17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:| 18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x_20190515-ff554978.pth) | 19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) | 20 | 21 | Note: 22 | - The above models are trained with 16 GPUs. 23 | -------------------------------------------------------------------------------- /configs/wider_face/README.md: -------------------------------------------------------------------------------- 1 | ## WIDER Face Dataset 2 | 3 | To use the WIDER Face dataset you need to download it 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`. 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`. 9 | The directory should be like this: 10 | 11 | ``` 12 | mmdetection 13 | ├── mmdet 14 | ├── tools 15 | ├── configs 16 | ├── data 17 | │ ├── WIDERFace 18 | │ │ ├── WIDER_train 19 | │ | │ ├──0--Parade 20 | │ | │ ├── ... 21 | │ | │ ├── Annotations 22 | │ │ ├── WIDER_val 23 | │ | │ ├──0--Parade 24 | │ | │ ├── ... 25 | │ | │ ├── Annotations 26 | │ │ ├── val.txt 27 | │ │ ├── train.txt 28 | 29 | ``` 30 | 31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or 32 | create your own config based on the presented one. 33 | -------------------------------------------------------------------------------- /demo/TSD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/TSD.png -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /demo/corruptions_sev_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/corruptions_sev_3.png -------------------------------------------------------------------------------- /demo/data_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/data_pipeline.png -------------------------------------------------------------------------------- /demo/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/demo.jpg -------------------------------------------------------------------------------- /demo/loss_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sense-X/TSD/fb1fdd7f14f3c136f4b849914977fae1d8d49398/demo/loss_curve.png -------------------------------------------------------------------------------- /demo/webcam_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import torch 5 | 6 | from mmdet.apis import inference_detector, init_detector, show_result 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description="MMDetection webcam demo") 11 | parser.add_argument("config", help="test config file path") 12 | parser.add_argument("checkpoint", help="checkpoint file") 13 | parser.add_argument("--device", type=int, default=0, help="CUDA device id") 14 | parser.add_argument("--camera-id", type=int, default=0, help="camera device id") 15 | parser.add_argument( 16 | "--score-thr", type=float, default=0.5, help="bbox score threshold" 17 | ) 18 | args = parser.parse_args() 19 | return args 20 | 21 | 22 | def main(): 23 | args = parse_args() 24 | 25 | model = init_detector( 26 | args.config, args.checkpoint, device=torch.device("cuda", args.device) 27 | ) 28 | 29 | camera = cv2.VideoCapture(args.camera_id) 30 | 31 | print('Press "Esc", "q" or "Q" to exit.') 32 | while True: 33 | ret_val, img = camera.read() 34 | result = inference_detector(model, img) 35 | 36 | ch = cv2.waitKey(1) 37 | if ch == 27 or ch == ord("q") or ch == ord("Q"): 38 | break 39 | 40 | show_result(img, result, model.CLASSES, score_thr=args.score_thr, wait_time=1) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.3" 2 | ARG CUDA="10.1" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 10 | 11 | RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # Install mmdetection 16 | RUN conda clean --all 17 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 18 | WORKDIR /mmdetection 19 | ENV FORCE_CUDA="1" 20 | RUN pip install --no-cache-dir -e . 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | project = "MMDetection" 20 | copyright = "2018-2020, OpenMMLab" 21 | author = "OpenMMLab" 22 | 23 | # The full version, including alpha/beta/rc tags 24 | release = "1.0.0" 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | "sphinx.ext.autodoc", 33 | "sphinx.ext.napoleon", 34 | "sphinx.ext.viewcode", 35 | "recommonmark", 36 | "sphinx_markdown_tables", 37 | ] 38 | 39 | autodoc_mock_imports = ["torch", "torchvision", "mmcv"] 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ["_templates"] 43 | 44 | # The suffix(es) of source filenames. 45 | # You can specify multiple suffix as a list of string: 46 | # 47 | source_suffix = {".rst": "restructuredtext", ".md": "markdown"} 48 | 49 | # The master toctree document. 50 | master_doc = "index" 51 | 52 | # List of patterns, relative to source directory, that match files and 53 | # directories to ignore when looking for source files. 54 | # This pattern also affects html_static_path and html_extra_path. 55 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 56 | 57 | # -- Options for HTML output ------------------------------------------------- 58 | 59 | # The theme to use for HTML and HTML Help pages. See the documentation for 60 | # a list of builtin themes. 61 | # 62 | html_theme = "sphinx_rtd_theme" 63 | 64 | # Add any paths that contain custom static files (such as style sheets) here, 65 | # relative to this directory. They are copied after the builtin static files, 66 | # so a file named "default.css" will overwrite the builtin "default.css". 67 | html_static_path = ["_static"] 68 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to MMDetection's documentation! 2 | ======================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | INSTALL.md 8 | GETTING_STARTED.md 9 | MODEL_ZOO.md 10 | TECHNICAL_DETAILS.md 11 | CHANGELOG.md 12 | 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ["__version__", "short_version"] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import ( 2 | async_inference_detector, 3 | inference_detector, 4 | init_detector, 5 | show_result, 6 | show_result_pyplot, 7 | ) 8 | from .test import multi_gpu_test, single_gpu_test 9 | from .train import get_root_logger, set_random_seed, train_detector 10 | 11 | __all__ = [ 12 | "get_root_logger", 13 | "set_random_seed", 14 | "train_detector", 15 | "init_detector", 16 | "async_inference_detector", 17 | "inference_detector", 18 | "show_result", 19 | "show_result_pyplot", 20 | "multi_gpu_test", 21 | "single_gpu_test", 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .optimizer import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_inside_flags, anchor_target, images_to_levels, unmap 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | from .point_generator import PointGenerator 5 | from .point_target import point_target 6 | 7 | __all__ = [ 8 | "AnchorGenerator", 9 | "anchor_target", 10 | "anchor_inside_flags", 11 | "ga_loc_target", 12 | "ga_shape_target", 13 | "PointGenerator", 14 | "point_target", 15 | "images_to_levels", 16 | "unmap", 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PointGenerator(object): 5 | def _meshgrid(self, x, y, row_major=True): 6 | xx = x.repeat(len(y)) 7 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 8 | if row_major: 9 | return xx, yy 10 | else: 11 | return yy, xx 12 | 13 | def grid_points(self, featmap_size, stride=16, device="cuda"): 14 | feat_h, feat_w = featmap_size 15 | shift_x = torch.arange(0.0, feat_w, device=device) * stride 16 | shift_y = torch.arange(0.0, feat_h, device=device) * stride 17 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 18 | stride = shift_x.new_full((shift_xx.shape[0],), stride) 19 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 20 | all_points = shifts.to(device) 21 | return all_points 22 | 23 | def valid_flags(self, featmap_size, valid_size, device="cuda"): 24 | feat_h, feat_w = featmap_size 25 | valid_h, valid_w = valid_size 26 | assert valid_h <= feat_h and valid_w <= feat_w 27 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 28 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 29 | valid_x[:valid_w] = 1 30 | valid_y[:valid_h] = 1 31 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 32 | valid = valid_xx & valid_yy 33 | return valid 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .bbox_target import bbox_target, bbox_target_tsd 3 | from .geometry import bbox_overlaps 4 | from .samplers import ( 5 | BaseSampler, 6 | CombinedSampler, 7 | InstanceBalancedPosSampler, 8 | IoUBalancedNegSampler, 9 | PseudoSampler, 10 | RandomSampler, 11 | SamplingResult, 12 | ) 13 | from .transforms import ( 14 | bbox2delta, 15 | bbox2result, 16 | bbox2roi, 17 | bbox_flip, 18 | bbox_mapping, 19 | bbox_mapping_back, 20 | delta2bbox, 21 | distance2bbox, 22 | roi2bbox, 23 | ) 24 | 25 | from .assign_sampling import ( # isort:skip, avoid recursive imports 26 | assign_and_sample, 27 | build_assigner, 28 | build_sampler, 29 | ) 30 | 31 | __all__ = [ 32 | "bbox_overlaps", 33 | "BaseAssigner", 34 | "MaxIoUAssigner", 35 | "AssignResult", 36 | "BaseSampler", 37 | "PseudoSampler", 38 | "RandomSampler", 39 | "InstanceBalancedPosSampler", 40 | "IoUBalancedNegSampler", 41 | "CombinedSampler", 42 | "SamplingResult", 43 | "build_assigner", 44 | "build_sampler", 45 | "assign_and_sample", 46 | "bbox2delta", 47 | "delta2bbox", 48 | "bbox_flip", 49 | "bbox_mapping", 50 | "bbox_mapping_back", 51 | "bbox2roi", 52 | "roi2bbox", 53 | "bbox2result", 54 | "distance2bbox", 55 | "bbox_target", 56 | "bbox_target_tsd", 57 | ] 58 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError("Invalid type {} for building a sampler".format(type(cfg))) 13 | 14 | 15 | def build_sampler(cfg, **kwargs): 16 | if isinstance(cfg, samplers.BaseSampler): 17 | return cfg 18 | elif isinstance(cfg, dict): 19 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 20 | else: 21 | raise TypeError("Invalid type {} for building a sampler".format(type(cfg))) 22 | 23 | 24 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 25 | bbox_assigner = build_assigner(cfg.assigner) 26 | bbox_sampler = build_sampler(cfg.sampler) 27 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels) 28 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels) 29 | return assign_result, sampling_result 30 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .max_iou_assigner import MaxIoUAssigner 6 | from .point_assigner import PointAssigner 7 | 8 | __all__ = [ 9 | "BaseAssigner", 10 | "MaxIoUAssigner", 11 | "ApproxMaxIoUAssigner", 12 | "AssignResult", 13 | "PointAssigner", 14 | "ATSSAssigner", 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | @abstractmethod 6 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 7 | pass 8 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """ 7 | Simple version of the ``kwarray.ensure_rng`` 8 | 9 | Args: 10 | rng (int | numpy.random.RandomState | None): 11 | if None, then defaults to the global rng. Otherwise this can be an 12 | integer or a RandomState class 13 | Returns: 14 | (numpy.random.RandomState) : rng - 15 | a numpy random number generator 16 | 17 | References: 18 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 19 | """ 20 | 21 | if rng is None: 22 | rng = np.random.mtrand._rand 23 | elif isinstance(rng, int): 24 | rng = np.random.RandomState(rng) 25 | else: 26 | rng = rng 27 | return rng 28 | 29 | 30 | def random_boxes(num=1, scale=1, rng=None): 31 | """ 32 | Simple version of ``kwimage.Boxes.random`` 33 | 34 | Returns: 35 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 36 | 37 | References: 38 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 39 | 40 | Example: 41 | >>> num = 3 42 | >>> scale = 512 43 | >>> rng = 0 44 | >>> boxes = random_boxes(num, scale, rng) 45 | >>> print(boxes) 46 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 47 | [216.9113, 330.6978, 224.0446, 456.5878], 48 | [405.3632, 196.3221, 493.3953, 270.7942]]) 49 | """ 50 | rng = ensure_rng(rng) 51 | 52 | tlbr = rng.rand(num, 4).astype(np.float32) 53 | 54 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 55 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 56 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 57 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 58 | 59 | tlbr[:, 0] = tl_x * scale 60 | tlbr[:, 1] = tl_y * scale 61 | tlbr[:, 2] = br_x * scale 62 | tlbr[:, 3] = br_y * scale 63 | 64 | boxes = torch.from_numpy(tlbr) 65 | return boxes 66 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | "BaseSampler", 12 | "PseudoSampler", 13 | "RandomSampler", 14 | "InstanceBalancedPosSampler", 15 | "IoUBalancedNegSampler", 16 | "CombinedSampler", 17 | "OHEMSampler", 18 | "SamplingResult", 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..assign_sampling import build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 7 | super(CombinedSampler, self).__init__(**kwargs) 8 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 9 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 10 | 11 | def _sample_pos(self, **kwargs): 12 | raise NotImplementedError 13 | 14 | def _sample_neg(self, **kwargs): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | def _sample_pos(self, assign_result, num_expected, **kwargs): 9 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 10 | if pos_inds.numel() != 0: 11 | pos_inds = pos_inds.squeeze(1) 12 | if pos_inds.numel() <= num_expected: 13 | return pos_inds 14 | else: 15 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 16 | num_gts = len(unique_gt_inds) 17 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 18 | sampled_inds = [] 19 | for i in unique_gt_inds: 20 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 21 | if inds.numel() != 0: 22 | inds = inds.squeeze(1) 23 | else: 24 | continue 25 | if len(inds) > num_per_gt: 26 | inds = self.random_choice(inds, num_per_gt) 27 | sampled_inds.append(inds) 28 | sampled_inds = torch.cat(sampled_inds) 29 | if len(sampled_inds) < num_expected: 30 | num_extra = num_expected - len(sampled_inds) 31 | extra_inds = np.array( 32 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu())) 33 | ) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = ( 37 | torch.from_numpy(extra_inds).to(assign_result.gt_inds.device).long() 38 | ) 39 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 40 | elif len(sampled_inds) > num_expected: 41 | sampled_inds = self.random_choice(sampled_inds, num_expected) 42 | return sampled_inds 43 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..transforms import bbox2roi 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | """ 9 | Online Hard Example Mining Sampler described in [1]_. 10 | 11 | References: 12 | .. [1] https://arxiv.org/pdf/1604.03540.pdf 13 | """ 14 | 15 | def __init__( 16 | self, 17 | num, 18 | pos_fraction, 19 | context, 20 | neg_pos_ub=-1, 21 | add_gt_as_proposals=True, 22 | **kwargs 23 | ): 24 | super(OHEMSampler, self).__init__( 25 | num, pos_fraction, neg_pos_ub, add_gt_as_proposals 26 | ) 27 | if not hasattr(context, "num_stages"): 28 | self.bbox_roi_extractor = context.bbox_roi_extractor 29 | self.bbox_head = context.bbox_head 30 | else: 31 | self.bbox_roi_extractor = context.bbox_roi_extractor[context.current_stage] 32 | self.bbox_head = context.bbox_head[context.current_stage] 33 | 34 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 35 | with torch.no_grad(): 36 | rois = bbox2roi([bboxes]) 37 | bbox_feats = self.bbox_roi_extractor( 38 | feats[: self.bbox_roi_extractor.num_inputs], rois 39 | ) 40 | cls_score, _ = self.bbox_head(bbox_feats) 41 | loss = self.bbox_head.loss( 42 | cls_score=cls_score, 43 | bbox_pred=None, 44 | labels=labels, 45 | label_weights=cls_score.new_ones(cls_score.size(0)), 46 | bbox_targets=None, 47 | bbox_weights=None, 48 | reduction_override="none", 49 | )["loss_cls"] 50 | _, topk_loss_inds = loss.topk(num_expected) 51 | return inds[topk_loss_inds] 52 | 53 | def _sample_pos( 54 | self, assign_result, num_expected, bboxes=None, feats=None, **kwargs 55 | ): 56 | # Sample some hard positive samples 57 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 58 | if pos_inds.numel() != 0: 59 | pos_inds = pos_inds.squeeze(1) 60 | if pos_inds.numel() <= num_expected: 61 | return pos_inds 62 | else: 63 | return self.hard_mining( 64 | pos_inds, 65 | num_expected, 66 | bboxes[pos_inds], 67 | assign_result.labels[pos_inds], 68 | feats, 69 | ) 70 | 71 | def _sample_neg( 72 | self, assign_result, num_expected, bboxes=None, feats=None, **kwargs 73 | ): 74 | # Sample some hard negative samples 75 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 76 | if neg_inds.numel() != 0: 77 | neg_inds = neg_inds.squeeze(1) 78 | if len(neg_inds) <= num_expected: 79 | return neg_inds 80 | else: 81 | return self.hard_mining( 82 | neg_inds, 83 | num_expected, 84 | bboxes[neg_inds], 85 | assign_result.labels[neg_inds], 86 | feats, 87 | ) 88 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | def _sample_pos(self, **kwargs): 12 | raise NotImplementedError 13 | 14 | def _sample_neg(self, **kwargs): 15 | raise NotImplementedError 16 | 17 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 18 | pos_inds = torch.nonzero(assign_result.gt_inds > 0).squeeze(-1).unique() 19 | neg_inds = torch.nonzero(assign_result.gt_inds == 0).squeeze(-1).unique() 20 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 21 | sampling_result = SamplingResult( 22 | pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, gt_flags 23 | ) 24 | return sampling_result 25 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | 5 | 6 | class RandomSampler(BaseSampler): 7 | def __init__( 8 | self, num, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs 9 | ): 10 | from mmdet.core.bbox import demodata 11 | 12 | super(RandomSampler, self).__init__( 13 | num, pos_fraction, neg_pos_ub, add_gt_as_proposals 14 | ) 15 | self.rng = demodata.ensure_rng(kwargs.get("rng", None)) 16 | 17 | def random_choice(self, gallery, num): 18 | """Random select some elements from the gallery. 19 | 20 | If `gallery` is a Tensor, the returned indices will be a Tensor; 21 | If `gallery` is a ndarray or list, the returned indices will be a 22 | ndarray. 23 | 24 | Args: 25 | gallery (Tensor | ndarray | list): indices pool. 26 | num (int): expected sample num. 27 | 28 | Returns: 29 | Tensor or ndarray: sampled indices. 30 | """ 31 | assert len(gallery) >= num 32 | 33 | is_tensor = isinstance(gallery, torch.Tensor) 34 | if not is_tensor: 35 | gallery = torch.tensor( 36 | gallery, dtype=torch.long, device=torch.cuda.current_device() 37 | ) 38 | perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] 39 | rand_inds = gallery[perm] 40 | if not is_tensor: 41 | rand_inds = rand_inds.cpu().numpy() 42 | return rand_inds 43 | 44 | def _sample_pos(self, assign_result, num_expected, **kwargs): 45 | """Randomly sample some positive samples.""" 46 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 47 | if pos_inds.numel() != 0: 48 | pos_inds = pos_inds.squeeze(1) 49 | if pos_inds.numel() <= num_expected: 50 | return pos_inds 51 | else: 52 | return self.random_choice(pos_inds, num_expected) 53 | 54 | def _sample_neg(self, assign_result, num_expected, **kwargs): 55 | """Randomly sample some negative samples.""" 56 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 57 | if neg_inds.numel() != 0: 58 | neg_inds = neg_inds.squeeze(1) 59 | if len(neg_inds) <= num_expected: 60 | return neg_inds 61 | else: 62 | return self.random_choice(neg_inds, num_expected) 63 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import ( 2 | cityscapes_classes, 3 | coco_classes, 4 | dataset_aliases, 5 | get_classes, 6 | imagenet_det_classes, 7 | imagenet_vid_classes, 8 | voc_classes, 9 | ) 10 | from .eval_hooks import DistEvalHook, EvalHook 11 | from .mean_ap import average_precision, eval_map, print_map_summary 12 | from .recall import eval_recalls, plot_iou_recall, plot_num_recall, print_recall_summary 13 | 14 | __all__ = [ 15 | "voc_classes", 16 | "imagenet_det_classes", 17 | "imagenet_vid_classes", 18 | "coco_classes", 19 | "cityscapes_classes", 20 | "dataset_aliases", 21 | "get_classes", 22 | "DistEvalHook", 23 | "EvalHook", 24 | "average_precision", 25 | "eval_map", 26 | "print_map_summary", 27 | "eval_recalls", 28 | "print_recall_summary", 29 | "plot_num_recall", 30 | "plot_iou_recall", 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode="iou"): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ["iou", "iof"] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 39 | y_end - y_start + 1, 0 40 | ) 41 | if mode == "iou": 42 | union = area1[i] + area2 - overlap 43 | else: 44 | union = area1[i] if not exchange else area2 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from mmcv.runner import Hook 4 | from torch.utils.data import DataLoader 5 | 6 | 7 | class EvalHook(Hook): 8 | """Evaluation hook. 9 | 10 | Attributes: 11 | dataloader (DataLoader): A PyTorch dataloader. 12 | interval (int): Evaluation interval (by epochs). Default: 1. 13 | """ 14 | 15 | def __init__(self, dataloader, interval=1, **eval_kwargs): 16 | if not isinstance(dataloader, DataLoader): 17 | raise TypeError( 18 | "dataloader must be a pytorch DataLoader, but got {}".format( 19 | type(dataloader) 20 | ) 21 | ) 22 | self.dataloader = dataloader 23 | self.interval = interval 24 | self.eval_kwargs = eval_kwargs 25 | 26 | def after_train_epoch(self, runner): 27 | if not self.every_n_epochs(runner, self.interval): 28 | return 29 | from mmdet.apis import single_gpu_test 30 | 31 | results = single_gpu_test(runner.model, self.dataloader, show=False) 32 | self.evaluate(runner, results) 33 | 34 | def evaluate(self, runner, results): 35 | eval_res = self.dataloader.dataset.evaluate( 36 | results, logger=runner.logger, **self.eval_kwargs 37 | ) 38 | for name, val in eval_res.items(): 39 | runner.log_buffer.output[name] = val 40 | runner.log_buffer.ready = True 41 | 42 | 43 | class DistEvalHook(EvalHook): 44 | """Distributed evaluation hook. 45 | 46 | Attributes: 47 | dataloader (DataLoader): A PyTorch dataloader. 48 | interval (int): Evaluation interval (by epochs). Default: 1. 49 | tmpdir (str | None): Temporary directory to save the results of all 50 | processes. Default: None. 51 | gpu_collect (bool): Whether to use gpu or cpu to collect results. 52 | Default: False. 53 | """ 54 | 55 | def __init__(self, dataloader, interval=1, gpu_collect=False, **eval_kwargs): 56 | if not isinstance(dataloader, DataLoader): 57 | raise TypeError( 58 | "dataloader must be a pytorch DataLoader, but got {}".format( 59 | type(dataloader) 60 | ) 61 | ) 62 | self.dataloader = dataloader 63 | self.interval = interval 64 | self.gpu_collect = gpu_collect 65 | self.eval_kwargs = eval_kwargs 66 | 67 | def after_train_epoch(self, runner): 68 | if not self.every_n_epochs(runner, self.interval): 69 | return 70 | from mmdet.apis import multi_gpu_test 71 | 72 | results = multi_gpu_test( 73 | runner.model, 74 | self.dataloader, 75 | tmpdir=osp.join(runner.work_dir, ".eval_hook"), 76 | gpu_collect=self.gpu_collect, 77 | ) 78 | if runner.rank == 0: 79 | print("\n") 80 | self.evaluate(runner, results) 81 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ["auto_fp16", "force_fp32", "Fp16OptimizerHook", "wrap_fp16_model"] 5 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)( 16 | {k: cast_tensor_type(v, src_type, dst_type) for k, v in inputs.items()} 17 | ) 18 | elif isinstance(inputs, abc.Iterable): 19 | return type(inputs)( 20 | cast_tensor_type(item, src_type, dst_type) for item in inputs 21 | ) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .utils import split_combined_polys 3 | 4 | __all__ = ["split_combined_polys", "mask_target"] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | from torch.nn.modules.utils import _pair 5 | 6 | 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map( 10 | mask_target_single, 11 | pos_proposals_list, 12 | pos_assigned_gt_inds_list, 13 | gt_masks_list, 14 | cfg_list, 15 | ) 16 | mask_targets = torch.cat(list(mask_targets)) 17 | return mask_targets 18 | 19 | 20 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 21 | mask_size = _pair(cfg.mask_size) 22 | num_pos = pos_proposals.size(0) 23 | mask_targets = [] 24 | if num_pos > 0: 25 | proposals_np = pos_proposals.cpu().numpy() 26 | _, maxh, maxw = gt_masks.shape 27 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1) 28 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1) 29 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 30 | for i in range(num_pos): 31 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 32 | bbox = proposals_np[i, :].astype(np.int32) 33 | x1, y1, x2, y2 = bbox 34 | w = np.maximum(x2 - x1 + 1, 1) 35 | h = np.maximum(y2 - y1 + 1, 1) 36 | # mask is uint8 both before and after resizing 37 | # mask_size (h, w) to (w, h) 38 | target = mmcv.imresize(gt_mask[y1 : y1 + h, x1 : x1 + w], mask_size[::-1]) 39 | mask_targets.append(target) 40 | mask_targets = ( 41 | torch.from_numpy(np.stack(mask_targets)).float().to(pos_proposals.device) 42 | ) 43 | else: 44 | mask_targets = pos_proposals.new_zeros((0,) + mask_size) 45 | return mask_targets 46 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_optimizer 2 | from .copy_of_sgd import CopyOfSGD 3 | from .registry import OPTIMIZERS 4 | 5 | __all__ = ["OPTIMIZERS", "build_optimizer", "CopyOfSGD"] 6 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/copy_of_sgd.py: -------------------------------------------------------------------------------- 1 | from torch.optim import SGD 2 | 3 | from .registry import OPTIMIZERS 4 | 5 | 6 | @OPTIMIZERS.register_module 7 | class CopyOfSGD(SGD): 8 | """A clone of torch.optim.SGD. 9 | 10 | A customized optimizer could be defined like CopyOfSGD. 11 | You may derive from built-in optimizers in torch.optim, 12 | or directly implement a new optimizer. 13 | """ 14 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import torch 4 | 5 | from mmdet.utils import Registry 6 | 7 | OPTIMIZERS = Registry("optimizer") 8 | 9 | 10 | def register_torch_optimizers(): 11 | torch_optimizers = [] 12 | for module_name in dir(torch.optim): 13 | if module_name.startswith("__"): 14 | continue 15 | _optim = getattr(torch.optim, module_name) 16 | if inspect.isclass(_optim) and issubclass(_optim, torch.optim.Optimizer): 17 | OPTIMIZERS.register_module(_optim) 18 | torch_optimizers.append(module_name) 19 | return torch_optimizers 20 | 21 | 22 | TORCH_OPTIMIZERS = register_torch_optimizers() 23 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import ( 3 | merge_aug_bboxes, 4 | merge_aug_masks, 5 | merge_aug_proposals, 6 | merge_aug_scores, 7 | ) 8 | 9 | __all__ = [ 10 | "multiclass_nms", 11 | "merge_aug_proposals", 12 | "merge_aug_bboxes", 13 | "merge_aug_scores", 14 | "merge_aug_masks", 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms( 7 | multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None 8 | ): 9 | """NMS for multi-class bboxes. 10 | 11 | Args: 12 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 13 | multi_scores (Tensor): shape (n, #class), where the 0th column 14 | contains scores of the background class, but this will be ignored. 15 | score_thr (float): bbox threshold, bboxes with scores lower than it 16 | will not be considered. 17 | nms_thr (float): NMS IoU threshold 18 | max_num (int): if there are more than max_num bboxes after NMS, 19 | only top max_num will be kept. 20 | score_factors (Tensor): The factors multiplied to scores before 21 | applying NMS 22 | 23 | Returns: 24 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 25 | are 0-based. 26 | """ 27 | num_classes = multi_scores.size(1) - 1 28 | # exclude background category 29 | if multi_bboxes.shape[1] > 4: 30 | bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)[:, 1:] 31 | else: 32 | bboxes = multi_bboxes[:, None].expand(-1, num_classes, 4) 33 | scores = multi_scores[:, 1:] 34 | 35 | # filter out boxes with low scores 36 | valid_mask = scores > score_thr 37 | bboxes = bboxes[valid_mask] 38 | if score_factors is not None: 39 | scores = scores * score_factors[:, None] 40 | scores = scores[valid_mask] 41 | labels = valid_mask.nonzero()[:, 1] 42 | 43 | if bboxes.numel() == 0: 44 | bboxes = multi_bboxes.new_zeros((0, 5)) 45 | labels = multi_bboxes.new_zeros((0,), dtype=torch.long) 46 | return bboxes, labels 47 | 48 | # Modified from https://github.com/pytorch/vision/blob 49 | # /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. 50 | # strategy: in order to perform NMS independently per class. 51 | # we add an offset to all the boxes. The offset is dependent 52 | # only on the class idx, and is large enough so that boxes 53 | # from different classes do not overlap 54 | max_coordinate = bboxes.max() 55 | offsets = labels.to(bboxes) * (max_coordinate + 1) 56 | bboxes_for_nms = bboxes + offsets[:, None] 57 | nms_cfg_ = nms_cfg.copy() 58 | nms_type = nms_cfg_.pop("type", "nms") 59 | nms_op = getattr(nms_wrapper, nms_type) 60 | dets, keep = nms_op(torch.cat([bboxes_for_nms, scores[:, None]], 1), **nms_cfg_) 61 | bboxes = bboxes[keep] 62 | scores = dets[:, -1] # soft_nms will modify scores 63 | labels = labels[keep] 64 | 65 | if keep.size(0) > max_num: 66 | _, inds = scores.sort(descending=True) 67 | inds = inds[:max_num] 68 | bboxes = bboxes[inds] 69 | scores = scores[inds] 70 | labels = labels[inds] 71 | 72 | return torch.cat([bboxes, scores[:, None]], 1), labels 73 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap 3 | 4 | __all__ = [ 5 | "allreduce_grads", 6 | "DistOptimizerHook", 7 | "tensor2imgs", 8 | "unmap", 9 | "multi_apply", 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import OptimizerHook 5 | from torch._utils import _flatten_dense_tensors, _take_tensors, _unflatten_dense_tensors 6 | 7 | 8 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 9 | if bucket_size_mb > 0: 10 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 11 | buckets = _take_tensors(tensors, bucket_size_bytes) 12 | else: 13 | buckets = OrderedDict() 14 | for tensor in tensors: 15 | tp = tensor.type() 16 | if tp not in buckets: 17 | buckets[tp] = [] 18 | buckets[tp].append(tensor) 19 | buckets = buckets.values() 20 | 21 | for bucket in buckets: 22 | flat_tensors = _flatten_dense_tensors(bucket) 23 | dist.all_reduce(flat_tensors) 24 | flat_tensors.div_(world_size) 25 | for tensor, synced in zip( 26 | bucket, _unflatten_dense_tensors(flat_tensors, bucket) 27 | ): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data 34 | for param in params 35 | if param.requires_grad and param.grad is not None 36 | ] 37 | world_size = dist.get_world_size() 38 | if coalesce: 39 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 40 | else: 41 | for tensor in grads: 42 | dist.all_reduce(tensor.div_(world_size)) 43 | 44 | 45 | class DistOptimizerHook(OptimizerHook): 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs["loss"].backward() 54 | if self.grad_clip is not None: 55 | self.clip_grads(runner.model.parameters()) 56 | runner.optimizer.step() 57 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize(img, mean, std, to_bgr=to_rgb).astype(np.uint8) 16 | imgs.append(np.ascontiguousarray(img)) 17 | return imgs 18 | 19 | 20 | def multi_apply(func, *args, **kwargs): 21 | pfunc = partial(func, **kwargs) if kwargs else func 22 | map_results = map(pfunc, *args) 23 | return tuple(map(list, zip(*map_results))) 24 | 25 | 26 | def unmap(data, count, inds, fill=0): 27 | """ Unmap a subset of item (data) back to the original set of items (of 28 | size count) """ 29 | if data.dim() == 1: 30 | ret = data.new_full((count,), fill) 31 | ret[inds] = data 32 | else: 33 | new_size = (count,) + data.size()[1:] 34 | ret = data.new_full(new_size, fill) 35 | ret[inds, :] = data 36 | return ret 37 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataloader, build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset 6 | from .openimages_dataset import OpenImagesDataset 7 | from .registry import DATASETS 8 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 9 | from .voc import VOCDataset 10 | from .wider_face import WIDERFaceDataset 11 | from .xml_style import XMLDataset 12 | 13 | __all__ = [ 14 | "CustomDataset", 15 | "XMLDataset", 16 | "CocoDataset", 17 | "VOCDataset", 18 | "CityscapesDataset", 19 | "GroupSampler", 20 | "DistributedGroupSampler", 21 | "DistributedSampler", 22 | "build_dataloader", 23 | "ConcatDataset", 24 | "RepeatDataset", 25 | "WIDERFaceDataset", 26 | "DATASETS", 27 | "build_dataset", 28 | ] 29 | -------------------------------------------------------------------------------- /mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | 6 | 7 | @DATASETS.register_module 8 | class ConcatDataset(_ConcatDataset): 9 | """A wrapper of concatenated dataset. 10 | 11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 12 | concat the group flag for image aspect ratio. 13 | 14 | Args: 15 | datasets (list[:obj:`Dataset`]): A list of datasets. 16 | """ 17 | 18 | def __init__(self, datasets): 19 | super(ConcatDataset, self).__init__(datasets) 20 | self.CLASSES = datasets[0].CLASSES 21 | if hasattr(datasets[0], "flag"): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | 27 | 28 | @DATASETS.register_module 29 | class RepeatDataset(object): 30 | """A wrapper of repeated dataset. 31 | 32 | The length of repeated dataset will be `times` larger than the original 33 | dataset. This is useful when the data loading time is long but the dataset 34 | is small. Using RepeatDataset can reduce the data loading time between 35 | epochs. 36 | 37 | Args: 38 | dataset (:obj:`Dataset`): The dataset to be repeated. 39 | times (int): Repeat times. 40 | """ 41 | 42 | def __init__(self, dataset, times): 43 | self.dataset = dataset 44 | self.times = times 45 | self.CLASSES = dataset.CLASSES 46 | if hasattr(self.dataset, "flag"): 47 | self.flag = np.tile(self.dataset.flag, times) 48 | 49 | self._ori_len = len(self.dataset) 50 | 51 | def __getitem__(self, idx): 52 | return self.dataset[idx % self._ori_len] 53 | 54 | def __len__(self): 55 | return self.times * self._ori_len 56 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import ( 3 | Collect, 4 | ImageToTensor, 5 | ToDataContainer, 6 | ToTensor, 7 | Transpose, 8 | to_tensor, 9 | ) 10 | from .instaboost import InstaBoost 11 | from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals 12 | from .test_aug import MultiScaleFlipAug 13 | from .transforms import ( 14 | Albu, 15 | Expand, 16 | MinIoURandomCrop, 17 | Normalize, 18 | Pad, 19 | PhotoMetricDistortion, 20 | RandomCrop, 21 | RandomFlip, 22 | Resize, 23 | SegRescale, 24 | ) 25 | 26 | __all__ = [ 27 | "Compose", 28 | "to_tensor", 29 | "ToTensor", 30 | "ImageToTensor", 31 | "ToDataContainer", 32 | "Transpose", 33 | "Collect", 34 | "LoadAnnotations", 35 | "LoadImageFromFile", 36 | "LoadProposals", 37 | "MultiScaleFlipAug", 38 | "Resize", 39 | "RandomFlip", 40 | "Pad", 41 | "RandomCrop", 42 | "Normalize", 43 | "SegRescale", 44 | "MinIoURandomCrop", 45 | "Expand", 46 | "PhotoMetricDistortion", 47 | "Albu", 48 | "InstaBoost", 49 | ] 50 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmdet.utils import build_from_cfg 4 | from ..registry import PIPELINES 5 | 6 | 7 | @PIPELINES.register_module 8 | class Compose(object): 9 | def __init__(self, transforms): 10 | assert isinstance(transforms, collections.abc.Sequence) 11 | self.transforms = [] 12 | for transform in transforms: 13 | if isinstance(transform, dict): 14 | transform = build_from_cfg(transform, PIPELINES) 15 | self.transforms.append(transform) 16 | elif callable(transform): 17 | self.transforms.append(transform) 18 | else: 19 | raise TypeError("transform must be callable or a dict") 20 | 21 | def __call__(self, data): 22 | for t in self.transforms: 23 | data = t(data) 24 | if data is None: 25 | return None 26 | return data 27 | 28 | def __repr__(self): 29 | format_string = self.__class__.__name__ + "(" 30 | for t in self.transforms: 31 | format_string += "\n" 32 | format_string += " {0}".format(t) 33 | format_string += "\n)" 34 | return format_string 35 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/instaboost.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..registry import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module 7 | class InstaBoost(object): 8 | """ 9 | Data augmentation method in paper "InstaBoost: Boosting Instance 10 | Segmentation Via Probability Map Guided Copy-Pasting" 11 | Implementation details can refer to https://github.com/GothicAi/Instaboost. 12 | """ 13 | 14 | def __init__( 15 | self, 16 | action_candidate=("normal", "horizontal", "skip"), 17 | action_prob=(1, 0, 0), 18 | scale=(0.8, 1.2), 19 | dx=15, 20 | dy=15, 21 | theta=(-1, 1), 22 | color_prob=0.5, 23 | hflag=False, 24 | aug_ratio=0.5, 25 | ): 26 | try: 27 | import instaboostfast as instaboost 28 | except ImportError: 29 | raise ImportError( 30 | 'Please run "pip install instaboostfast" ' 31 | "to install instaboostfast first for instaboost augmentation." 32 | ) 33 | self.cfg = instaboost.InstaBoostConfig( 34 | action_candidate, action_prob, scale, dx, dy, theta, color_prob, hflag 35 | ) 36 | self.aug_ratio = aug_ratio 37 | 38 | def _load_anns(self, results): 39 | labels = results["ann_info"]["labels"] 40 | masks = results["ann_info"]["masks"] 41 | bboxes = results["ann_info"]["bboxes"] 42 | n = len(labels) 43 | 44 | anns = [] 45 | for i in range(n): 46 | label = labels[i] 47 | bbox = bboxes[i] 48 | mask = masks[i] 49 | x1, y1, x2, y2 = bbox 50 | bbox = [x1, y1, x2 - x1 + 1, y2 - y1 + 1] 51 | anns.append({"category_id": label, "segmentation": mask, "bbox": bbox}) 52 | 53 | return anns 54 | 55 | def _parse_anns(self, results, anns, img): 56 | gt_bboxes = [] 57 | gt_labels = [] 58 | gt_masks_ann = [] 59 | for ann in anns: 60 | x1, y1, w, h = ann["bbox"] 61 | bbox = [x1, y1, x1 + w - 1, y1 + h - 1] 62 | gt_bboxes.append(bbox) 63 | gt_labels.append(ann["category_id"]) 64 | gt_masks_ann.append(ann["segmentation"]) 65 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 66 | gt_labels = np.array(gt_labels, dtype=np.int64) 67 | results["ann_info"]["labels"] = gt_labels 68 | results["ann_info"]["bboxes"] = gt_bboxes 69 | results["ann_info"]["masks"] = gt_masks_ann 70 | results["img"] = img 71 | return results 72 | 73 | def __call__(self, results): 74 | img = results["img"] 75 | anns = self._load_anns(results) 76 | if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]): 77 | try: 78 | import instaboostfast as instaboost 79 | except ImportError: 80 | raise ImportError( 81 | 'Please run "pip install instaboostfast" ' 82 | "to install instaboostfast first." 83 | ) 84 | anns, img = instaboost.get_new_data(anns, img, self.cfg, background=None) 85 | results = self._parse_anns(results, anns, img) 86 | return results 87 | 88 | def __repr__(self): 89 | repr_str = self.__class__.__name__ 90 | repr_str += ("(cfg={}, aug_ratio={})").format(self.cfg, self.aug_ratio) 91 | return repr_str 92 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/test_aug.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from ..registry import PIPELINES 4 | from .compose import Compose 5 | 6 | 7 | @PIPELINES.register_module 8 | class MultiScaleFlipAug(object): 9 | def __init__(self, transforms, img_scale, flip=False): 10 | self.transforms = Compose(transforms) 11 | self.img_scale = img_scale if isinstance(img_scale, list) else [img_scale] 12 | assert mmcv.is_list_of(self.img_scale, tuple) 13 | self.flip = flip 14 | 15 | def __call__(self, results): 16 | aug_data = [] 17 | flip_aug = [False, True] if self.flip else [False] 18 | for scale in self.img_scale: 19 | for flip in flip_aug: 20 | _results = results.copy() 21 | _results["scale"] = scale 22 | _results["flip"] = flip 23 | data = self.transforms(_results) 24 | aug_data.append(data) 25 | # list of dict to dict of list 26 | aug_data_dict = {key: [] for key in aug_data[0]} 27 | for data in aug_data: 28 | for key, val in data.items(): 29 | aug_data_dict[key].append(val) 30 | return aug_data_dict 31 | 32 | def __repr__(self): 33 | repr_str = self.__class__.__name__ 34 | repr_str += "(transforms={}, img_scale={}, flip={})".format( 35 | self.transforms, self.img_scale, self.flip 36 | ) 37 | return repr_str 38 | -------------------------------------------------------------------------------- /mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry("dataset") 4 | PIPELINES = Registry("pipeline") 5 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_classaware_sampler import DistributedClassAwareSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .group_sampler import DistributedGroupSampler, GroupSampler 4 | 5 | __all__ = [ 6 | "DistributedSampler", 7 | "DistributedGroupSampler", 8 | "GroupSampler", 9 | "DistributedClassAwareSampler", 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_classaware_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pickle as pk 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import BatchSampler, Sampler 7 | 8 | 9 | class DistributedClassAwareSampler(Sampler): 10 | def __init__(self, dataset, num_replicas=None, rank=None, class_sample_path=None): 11 | 12 | if num_replicas is None: 13 | num_replicas = get_world_size() 14 | if rank is None: 15 | rank = get_rank() 16 | 17 | self.dataset = dataset 18 | self.num_replicas = num_replicas 19 | self.rank = rank 20 | self.epoch = 0 21 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 22 | self.total_size = self.num_samples * self.num_replicas 23 | 24 | with open(class_sample_path, "rb") as f: 25 | self.class_dic = pk.load(f) 26 | self.class_num = len(self.class_dic.keys()) 27 | self.class_num_list = [ 28 | len(self.class_dic[i + 1]) for i in range(self.class_num) 29 | ] 30 | self.class_unique_num = len([i for i in self.class_num_list if i != 0]) 31 | self.indices = None 32 | 33 | def __iter__(self): 34 | return iter(self.indices) 35 | 36 | def __len__(self): 37 | return self.num_samples 38 | 39 | def set_epoch(self, epoch): 40 | self.epoch = epoch 41 | 42 | def gen_class_num_indices(class_num_list): 43 | class_indices = np.random.permutation(self.class_num) 44 | id_indices = [ 45 | self.class_dic[class_indice + 1][ 46 | np.random.permutation(class_num_list[class_indice])[0] 47 | ] 48 | for class_indice in class_indices 49 | if class_num_list[class_indice] != 0 50 | ] 51 | return id_indices 52 | 53 | # deterministically shuffle based on epoch 54 | np.random.seed(self.epoch + 1) 55 | num_bins = int(math.floor(self.total_size * 1.0 / self.class_num)) 56 | indices = [] 57 | for i in range(num_bins): 58 | indices += gen_class_num_indices(self.class_num_list) 59 | 60 | # add extra samples to make it evenly divisible 61 | indices += indices[: (self.total_size - len(indices))] 62 | assert len(indices) == self.total_size 63 | # subsample 64 | offset = self.num_samples * self.rank 65 | indices = indices[offset : offset + self.num_samples] 66 | assert len(indices) == self.num_samples 67 | self.indices = indices 68 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DistributedSampler as _DistributedSampler 3 | 4 | 5 | class DistributedSampler(_DistributedSampler): 6 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 7 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 8 | self.shuffle = shuffle 9 | 10 | def __iter__(self): 11 | # deterministically shuffle based on epoch 12 | if self.shuffle: 13 | g = torch.Generator() 14 | g.manual_seed(self.epoch) 15 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 16 | else: 17 | indices = torch.arange(len(self.dataset)).tolist() 18 | 19 | # add extra samples to make it evenly divisible 20 | indices += indices[: (self.total_size - len(indices))] 21 | assert len(indices) == self.total_size 22 | 23 | # subsample 24 | indices = indices[self.rank : self.total_size : self.num_replicas] 25 | assert len(indices) == self.num_samples 26 | 27 | return iter(indices) 28 | -------------------------------------------------------------------------------- /mmdet/datasets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval import ( 2 | OpenImagesDetectionChallengeEvaluator, 3 | get_categories, 4 | read_dets, 5 | read_gts, 6 | ) 7 | 8 | __all__ = [ 9 | "get_categories", 10 | "read_dets", 11 | "read_gts", 12 | "OpenImagesDetectionChallengeEvaluator", 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/datasets/utils/np_box_mask_list.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Numpy BoxMaskList classes and functions.""" 16 | 17 | import numpy as np 18 | 19 | from . import np_box_list 20 | 21 | 22 | class BoxMaskList(np_box_list.BoxList): 23 | """Convenience wrapper for BoxList with masks. 24 | 25 | BoxMaskList extends the np_box_list.BoxList to contain masks as well. 26 | In particular, its constructor receives both boxes and masks. Note that the 27 | masks correspond to the full image. 28 | """ 29 | 30 | def __init__(self, box_data, mask_data): 31 | """Constructs box collection. 32 | 33 | Args: 34 | box_data: a numpy array of shape [N, 4] representing box coordinates 35 | mask_data: a numpy array of shape [N, height, width] representing masks 36 | with values are in {0,1}. The masks correspond to the full 37 | image. The height and the width will be equal to image height and width. 38 | 39 | Raises: 40 | ValueError: if bbox data is not a numpy array 41 | ValueError: if invalid dimensions for bbox data 42 | ValueError: if mask data is not a numpy array 43 | ValueError: if invalid dimension for mask data 44 | """ 45 | super(BoxMaskList, self).__init__(box_data) 46 | if not isinstance(mask_data, np.ndarray): 47 | raise ValueError("Mask data must be a numpy array.") 48 | if len(mask_data.shape) != 3: 49 | raise ValueError("Invalid dimensions for mask data.") 50 | if mask_data.dtype != np.uint8: 51 | raise ValueError("Invalid data type for mask data: uint8 is required.") 52 | if mask_data.shape[0] != box_data.shape[0]: 53 | raise ValueError("There should be the same number of boxes and masks.") 54 | self.data["masks"] = mask_data 55 | 56 | def get_masks(self): 57 | """Convenience function for accessing masks. 58 | 59 | Returns: 60 | a numpy array of shape [N, height, width] representing masks 61 | """ 62 | return self.get_field("masks") 63 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from mmdet.core import eval_map, eval_recalls 2 | from .registry import DATASETS 3 | from .xml_style import XMLDataset 4 | 5 | 6 | @DATASETS.register_module 7 | class VOCDataset(XMLDataset): 8 | 9 | CLASSES = ( 10 | "aeroplane", 11 | "bicycle", 12 | "bird", 13 | "boat", 14 | "bottle", 15 | "bus", 16 | "car", 17 | "cat", 18 | "chair", 19 | "cow", 20 | "diningtable", 21 | "dog", 22 | "horse", 23 | "motorbike", 24 | "person", 25 | "pottedplant", 26 | "sheep", 27 | "sofa", 28 | "train", 29 | "tvmonitor", 30 | ) 31 | 32 | def __init__(self, **kwargs): 33 | super(VOCDataset, self).__init__(**kwargs) 34 | if "VOC2007" in self.img_prefix: 35 | self.year = 2007 36 | elif "VOC2012" in self.img_prefix: 37 | self.year = 2012 38 | else: 39 | raise ValueError("Cannot infer dataset year from img_prefix") 40 | 41 | def evaluate( 42 | self, 43 | results, 44 | metric="mAP", 45 | logger=None, 46 | proposal_nums=(100, 300, 1000), 47 | iou_thr=0.5, 48 | scale_ranges=None, 49 | ): 50 | if not isinstance(metric, str): 51 | assert len(metric) == 1 52 | metric = metric[0] 53 | allowed_metrics = ["mAP", "recall"] 54 | if metric not in allowed_metrics: 55 | raise KeyError("metric {} is not supported".format(metric)) 56 | annotations = [self.get_ann_info(i) for i in range(len(self))] 57 | eval_results = {} 58 | if metric == "mAP": 59 | assert isinstance(iou_thr, float) 60 | if self.year == 2007: 61 | ds_name = "voc07" 62 | else: 63 | ds_name = self.dataset.CLASSES 64 | mean_ap, _ = eval_map( 65 | results, 66 | annotations, 67 | scale_ranges=None, 68 | iou_thr=iou_thr, 69 | dataset=ds_name, 70 | logger=logger, 71 | ) 72 | eval_results["mAP"] = mean_ap 73 | elif metric == "recall": 74 | gt_bboxes = [ann["bboxes"] for ann in annotations] 75 | if isinstance(iou_thr, float): 76 | iou_thr = [iou_thr] 77 | recalls = eval_recalls( 78 | gt_bboxes, results, proposal_nums, iou_thr, logger=logger 79 | ) 80 | for i, num in enumerate(proposal_nums): 81 | for j, iou in enumerate(iou_thr): 82 | eval_results["recall@{}@{}".format(num, iou)] = recalls[i, j] 83 | if recalls.shape[1] > 1: 84 | ar = recalls.mean(axis=1) 85 | for i, num in enumerate(proposal_nums): 86 | eval_results["AR@{}".format(num)] = ar[i] 87 | return eval_results 88 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | 18 | CLASSES = ("face",) 19 | 20 | def __init__(self, **kwargs): 21 | super(WIDERFaceDataset, self).__init__(**kwargs) 22 | 23 | def load_annotations(self, ann_file): 24 | img_infos = [] 25 | img_ids = mmcv.list_from_file(ann_file) 26 | for img_id in img_ids: 27 | filename = "{}.jpg".format(img_id) 28 | xml_path = osp.join(self.img_prefix, "Annotations", "{}.xml".format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find("size") 32 | width = int(size.find("width").text) 33 | height = int(size.find("height").text) 34 | folder = root.find("folder").text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height, 41 | ) 42 | ) 43 | 44 | return img_infos 45 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | from .registry import DATASETS 9 | 10 | 11 | @DATASETS.register_module 12 | class XMLDataset(CustomDataset): 13 | def __init__(self, min_size=None, **kwargs): 14 | super(XMLDataset, self).__init__(**kwargs) 15 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 16 | self.min_size = min_size 17 | 18 | def load_annotations(self, ann_file): 19 | img_infos = [] 20 | img_ids = mmcv.list_from_file(ann_file) 21 | for img_id in img_ids: 22 | filename = "JPEGImages/{}.jpg".format(img_id) 23 | xml_path = osp.join(self.img_prefix, "Annotations", "{}.xml".format(img_id)) 24 | tree = ET.parse(xml_path) 25 | root = tree.getroot() 26 | size = root.find("size") 27 | width = int(size.find("width").text) 28 | height = int(size.find("height").text) 29 | img_infos.append( 30 | dict(id=img_id, filename=filename, width=width, height=height) 31 | ) 32 | return img_infos 33 | 34 | def get_ann_info(self, idx): 35 | img_id = self.img_infos[idx]["id"] 36 | xml_path = osp.join(self.img_prefix, "Annotations", "{}.xml".format(img_id)) 37 | tree = ET.parse(xml_path) 38 | root = tree.getroot() 39 | bboxes = [] 40 | labels = [] 41 | bboxes_ignore = [] 42 | labels_ignore = [] 43 | for obj in root.findall("object"): 44 | name = obj.find("name").text 45 | label = self.cat2label[name] 46 | difficult = int(obj.find("difficult").text) 47 | bnd_box = obj.find("bndbox") 48 | # Coordinates may be float type 49 | bbox = [ 50 | int(float(bnd_box.find("xmin").text)), 51 | int(float(bnd_box.find("ymin").text)), 52 | int(float(bnd_box.find("xmax").text)), 53 | int(float(bnd_box.find("ymax").text)), 54 | ] 55 | ignore = False 56 | if self.min_size: 57 | assert not self.test_mode 58 | w = bbox[2] - bbox[0] 59 | h = bbox[3] - bbox[1] 60 | if w < self.min_size or h < self.min_size: 61 | ignore = True 62 | if difficult or ignore: 63 | bboxes_ignore.append(bbox) 64 | labels_ignore.append(label) 65 | else: 66 | bboxes.append(bbox) 67 | labels.append(label) 68 | if not bboxes: 69 | bboxes = np.zeros((0, 4)) 70 | labels = np.zeros((0,)) 71 | else: 72 | bboxes = np.array(bboxes, ndmin=2) - 1 73 | labels = np.array(labels) 74 | if not bboxes_ignore: 75 | bboxes_ignore = np.zeros((0, 4)) 76 | labels_ignore = np.zeros((0,)) 77 | else: 78 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 79 | labels_ignore = np.array(labels_ignore) 80 | ann = dict( 81 | bboxes=bboxes.astype(np.float32), 82 | labels=labels.astype(np.int64), 83 | bboxes_ignore=bboxes_ignore.astype(np.float32), 84 | labels_ignore=labels_ignore.astype(np.int64), 85 | ) 86 | return ann 87 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_heads import * # noqa: F401,F403 2 | from .backbones import * # noqa: F401,F403 3 | from .bbox_heads import * # noqa: F401,F403 4 | from .builder import ( 5 | build_backbone, 6 | build_detector, 7 | build_head, 8 | build_loss, 9 | build_neck, 10 | build_roi_extractor, 11 | build_shared_head, 12 | ) 13 | from .detectors import * # noqa: F401,F403 14 | from .losses import * # noqa: F401,F403 15 | from .mask_heads import * # noqa: F401,F403 16 | from .necks import * # noqa: F401,F403 17 | from .registry import ( 18 | BACKBONES, 19 | DETECTORS, 20 | HEADS, 21 | LOSSES, 22 | NECKS, 23 | ROI_EXTRACTORS, 24 | SHARED_HEADS, 25 | ) 26 | from .roi_extractors import * # noqa: F401,F403 27 | from .shared_heads import * # noqa: F401,F403 28 | 29 | __all__ = [ 30 | "BACKBONES", 31 | "NECKS", 32 | "ROI_EXTRACTORS", 33 | "SHARED_HEADS", 34 | "HEADS", 35 | "LOSSES", 36 | "DETECTORS", 37 | "build_backbone", 38 | "build_neck", 39 | "build_roi_extractor", 40 | "build_shared_head", 41 | "build_head", 42 | "build_loss", 43 | "build_detector", 44 | ] 45 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .atss_head import ATSSHead 3 | from .fcos_head import FCOSHead 4 | from .fovea_head import FoveaHead 5 | from .free_anchor_retina_head import FreeAnchorRetinaHead 6 | from .ga_retina_head import GARetinaHead 7 | from .ga_rpn_head import GARPNHead 8 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 9 | from .reppoints_head import RepPointsHead 10 | from .retina_head import RetinaHead 11 | from .retina_sepbn_head import RetinaSepBNHead 12 | from .rpn_head import RPNHead 13 | from .ssd_head import SSDHead 14 | 15 | __all__ = [ 16 | "AnchorHead", 17 | "GuidedAnchorHead", 18 | "FeatureAdaption", 19 | "RPNHead", 20 | "GARPNHead", 21 | "RetinaHead", 22 | "RetinaSepBNHead", 23 | "GARetinaHead", 24 | "SSDHead", 25 | "FCOSHead", 26 | "RepPointsHead", 27 | "FoveaHead", 28 | "FreeAnchorRetinaHead", 29 | "ATSSHead", 30 | ] 31 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import HRNet 2 | from .resnet import ResNet, make_res_layer 3 | from .resnext import ResNeXt 4 | from .ssd_vgg import SSDVGG 5 | 6 | __all__ = ["ResNet", "make_res_layer", "ResNeXt", "SSDVGG", "HRNet"] 7 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .double_bbox_head import DoubleConvFCBBoxHead 4 | from .tsd_bbox_head import TSDSharedFCBBoxHead 5 | 6 | __all__ = [ 7 | "BBoxHead", 8 | "ConvFCBBoxHead", 9 | "SharedFCBBoxHead", 10 | "DoubleConvFCBBoxHead", 11 | "TSDSharedFCBBoxHead", 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .registry import ( 5 | BACKBONES, 6 | DETECTORS, 7 | HEADS, 8 | LOSSES, 9 | NECKS, 10 | ROI_EXTRACTORS, 11 | SHARED_HEADS, 12 | ) 13 | 14 | 15 | def build(cfg, registry, default_args=None): 16 | if isinstance(cfg, list): 17 | modules = [build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg] 18 | return nn.Sequential(*modules) 19 | else: 20 | return build_from_cfg(cfg, registry, default_args) 21 | 22 | 23 | def build_backbone(cfg): 24 | return build(cfg, BACKBONES) 25 | 26 | 27 | def build_neck(cfg): 28 | return build(cfg, NECKS) 29 | 30 | 31 | def build_roi_extractor(cfg): 32 | return build(cfg, ROI_EXTRACTORS) 33 | 34 | 35 | def build_shared_head(cfg): 36 | return build(cfg, SHARED_HEADS) 37 | 38 | 39 | def build_head(cfg): 40 | return build(cfg, HEADS) 41 | 42 | 43 | def build_loss(cfg): 44 | return build(cfg, LOSSES) 45 | 46 | 47 | def build_detector(cfg, train_cfg=None, test_cfg=None): 48 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 49 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss import ATSS 2 | from .base import BaseDetector 3 | from .cascade_rcnn import CascadeRCNN 4 | from .double_head_rcnn import DoubleHeadRCNN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .fcos import FCOS 8 | from .fovea import FOVEA 9 | from .grid_rcnn import GridRCNN 10 | from .htc import HybridTaskCascade 11 | from .mask_rcnn import MaskRCNN 12 | from .mask_scoring_rcnn import MaskScoringRCNN 13 | from .reppoints_detector import RepPointsDetector 14 | from .retinanet import RetinaNet 15 | from .rpn import RPN 16 | from .single_stage import SingleStageDetector 17 | from .two_stage import TwoStageDetector 18 | 19 | __all__ = [ 20 | "ATSS", 21 | "BaseDetector", 22 | "SingleStageDetector", 23 | "TwoStageDetector", 24 | "RPN", 25 | "FastRCNN", 26 | "FasterRCNN", 27 | "MaskRCNN", 28 | "CascadeRCNN", 29 | "HybridTaskCascade", 30 | "DoubleHeadRCNN", 31 | "RetinaNet", 32 | "FCOS", 33 | "GridRCNN", 34 | "MaskScoringRCNN", 35 | "RepPointsDetector", 36 | "FOVEA", 37 | ] 38 | -------------------------------------------------------------------------------- /mmdet/models/detectors/atss.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class ATSS(SingleStageDetector): 7 | def __init__( 8 | self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None 9 | ): 10 | super(ATSS, self).__init__( 11 | backbone, neck, bbox_head, train_cfg, test_cfg, pretrained 12 | ) 13 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | def __init__( 8 | self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None, 19 | ): 20 | super(FastRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | bbox_roi_extractor=bbox_roi_extractor, 25 | bbox_head=bbox_head, 26 | train_cfg=train_cfg, 27 | test_cfg=test_cfg, 28 | mask_roi_extractor=mask_roi_extractor, 29 | mask_head=mask_head, 30 | pretrained=pretrained, 31 | ) 32 | 33 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 34 | """ 35 | Args: 36 | imgs (List[Tensor]): the outer list indicates test-time 37 | augmentations and inner Tensor should have a shape NxCxHxW, 38 | which contains all images in the batch. 39 | img_metas (List[List[dict]]): the outer list indicates test-time 40 | augs (multiscale, flip, etc.) and the inner list indicates 41 | images in a batch. 42 | proposals (List[List[Tensor]]): the outer list indicates test-time 43 | augs (multiscale, flip, etc.) and the inner list indicates 44 | images in a batch. The Tensor should have a shape Px4, where 45 | P is the number of proposals. 46 | """ 47 | for var, name in [(imgs, "imgs"), (img_metas, "img_metas")]: 48 | if not isinstance(var, list): 49 | raise TypeError("{} must be a list, but got {}".format(name, type(var))) 50 | 51 | num_augs = len(imgs) 52 | if num_augs != len(img_metas): 53 | raise ValueError( 54 | "num of augmentations ({}) != num of image meta ({})".format( 55 | len(imgs), len(img_metas) 56 | ) 57 | ) 58 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 59 | imgs_per_gpu = imgs[0].size(0) 60 | assert imgs_per_gpu == 1 61 | 62 | if num_augs == 1: 63 | return self.simple_test(imgs[0], img_metas[0], proposals[0], **kwargs) 64 | else: 65 | # TODO: support test-time augmentation 66 | assert NotImplementedError 67 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | def __init__( 8 | self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None, 18 | ): 19 | super(FasterRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | rpn_head=rpn_head, 24 | bbox_roi_extractor=bbox_roi_extractor, 25 | bbox_head=bbox_head, 26 | train_cfg=train_cfg, 27 | test_cfg=test_cfg, 28 | pretrained=pretrained, 29 | ) 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | def __init__( 8 | self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None 9 | ): 10 | super(FCOS, self).__init__( 11 | backbone, neck, bbox_head, train_cfg, test_cfg, pretrained 12 | ) 13 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fovea.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FOVEA(SingleStageDetector): 7 | def __init__( 8 | self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None 9 | ): 10 | super(FOVEA, self).__init__( 11 | backbone, neck, bbox_head, train_cfg, test_cfg, pretrained 12 | ) 13 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | def __init__( 8 | self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None, 20 | ): 21 | super(MaskRCNN, self).__init__( 22 | backbone=backbone, 23 | neck=neck, 24 | shared_head=shared_head, 25 | rpn_head=rpn_head, 26 | bbox_roi_extractor=bbox_roi_extractor, 27 | bbox_head=bbox_head, 28 | mask_roi_extractor=mask_roi_extractor, 29 | mask_head=mask_head, 30 | train_cfg=train_cfg, 31 | test_cfg=test_cfg, 32 | pretrained=pretrained, 33 | ) 34 | -------------------------------------------------------------------------------- /mmdet/models/detectors/reppoints_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms 4 | from ..registry import DETECTORS 5 | from .single_stage import SingleStageDetector 6 | 7 | 8 | @DETECTORS.register_module 9 | class RepPointsDetector(SingleStageDetector): 10 | """RepPoints: Point Set Representation for Object Detection. 11 | 12 | This detector is the implementation of: 13 | - RepPoints detector (https://arxiv.org/pdf/1904.11490) 14 | """ 15 | 16 | def __init__( 17 | self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None 18 | ): 19 | super(RepPointsDetector, self).__init__( 20 | backbone, neck, bbox_head, train_cfg, test_cfg, pretrained 21 | ) 22 | 23 | def merge_aug_results(self, aug_bboxes, aug_scores, img_metas): 24 | """Merge augmented detection bboxes and scores. 25 | 26 | Args: 27 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 28 | aug_scores (list[Tensor] or None): shape (n, #class) 29 | img_shapes (list[Tensor]): shape (3, ). 30 | 31 | Returns: 32 | tuple: (bboxes, scores) 33 | """ 34 | recovered_bboxes = [] 35 | for bboxes, img_info in zip(aug_bboxes, img_metas): 36 | img_shape = img_info[0]["img_shape"] 37 | scale_factor = img_info[0]["scale_factor"] 38 | flip = img_info[0]["flip"] 39 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 40 | recovered_bboxes.append(bboxes) 41 | bboxes = torch.cat(recovered_bboxes, dim=0) 42 | if aug_scores is None: 43 | return bboxes 44 | else: 45 | scores = torch.cat(aug_scores, dim=0) 46 | return bboxes, scores 47 | 48 | def aug_test(self, imgs, img_metas, rescale=False): 49 | # recompute feats to save memory 50 | feats = self.extract_feats(imgs) 51 | 52 | aug_bboxes = [] 53 | aug_scores = [] 54 | for x, img_meta in zip(feats, img_metas): 55 | # only one image in the batch 56 | outs = self.bbox_head(x) 57 | bbox_inputs = outs + (img_metas, self.test_cfg, False, False) 58 | det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0] 59 | aug_bboxes.append(det_bboxes) 60 | aug_scores.append(det_scores) 61 | 62 | # after merging, bboxes will be rescaled to the original image size 63 | merged_bboxes, merged_scores = self.merge_aug_results( 64 | aug_bboxes, aug_scores, img_metas 65 | ) 66 | det_bboxes, det_labels = multiclass_nms( 67 | merged_bboxes, 68 | merged_scores, 69 | self.test_cfg.score_thr, 70 | self.test_cfg.nms, 71 | self.test_cfg.max_per_img, 72 | ) 73 | 74 | if rescale: 75 | _det_bboxes = det_bboxes 76 | else: 77 | _det_bboxes = det_bboxes.clone() 78 | _det_bboxes[:, :4] *= img_metas[0][0]["scale_factor"] 79 | bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head.num_classes) 80 | return bbox_results 81 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | def __init__( 8 | self, backbone, neck, bbox_head, train_cfg=None, test_cfg=None, pretrained=None 9 | ): 10 | super(RetinaNet, self).__init__( 11 | backbone, neck, bbox_head, train_cfg, test_cfg, pretrained 12 | ) 13 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from mmdet.core import bbox2result 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from .base import BaseDetector 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | """Base class for single-stage detectors. 12 | 13 | Single-stage detectors directly and densely predict bounding boxes on the 14 | output features of the backbone+neck. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | backbone, 20 | neck=None, 21 | bbox_head=None, 22 | train_cfg=None, 23 | test_cfg=None, 24 | pretrained=None, 25 | ): 26 | super(SingleStageDetector, self).__init__() 27 | self.backbone = builder.build_backbone(backbone) 28 | if neck is not None: 29 | self.neck = builder.build_neck(neck) 30 | self.bbox_head = builder.build_head(bbox_head) 31 | self.train_cfg = train_cfg 32 | self.test_cfg = test_cfg 33 | self.init_weights(pretrained=pretrained) 34 | 35 | def init_weights(self, pretrained=None): 36 | super(SingleStageDetector, self).init_weights(pretrained) 37 | self.backbone.init_weights(pretrained=pretrained) 38 | if self.with_neck: 39 | if isinstance(self.neck, nn.Sequential): 40 | for m in self.neck: 41 | m.init_weights() 42 | else: 43 | self.neck.init_weights() 44 | self.bbox_head.init_weights() 45 | 46 | def extract_feat(self, img): 47 | """Directly extract features from the backbone+neck 48 | """ 49 | x = self.backbone(img) 50 | if self.with_neck: 51 | x = self.neck(x) 52 | return x 53 | 54 | def forward_dummy(self, img): 55 | """Used for computing network flops. 56 | 57 | See `mmdetection/tools/get_flops.py` 58 | """ 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | return outs 62 | 63 | def forward_train( 64 | self, img, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore=None 65 | ): 66 | x = self.extract_feat(img) 67 | outs = self.bbox_head(x) 68 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 69 | losses = self.bbox_head.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 70 | return losses 71 | 72 | def simple_test(self, img, img_metas, rescale=False): 73 | x = self.extract_feat(img) 74 | outs = self.bbox_head(x) 75 | bbox_inputs = outs + (img_metas, self.test_cfg, rescale) 76 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 77 | bbox_results = [ 78 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 79 | for det_bboxes, det_labels in bbox_list 80 | ] 81 | return bbox_results[0] 82 | 83 | def aug_test(self, imgs, img_metas, rescale=False): 84 | raise NotImplementedError 85 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 3 | from .cross_entropy_loss import ( 4 | CrossEntropyLoss, 5 | binary_cross_entropy, 6 | cross_entropy, 7 | mask_cross_entropy, 8 | ) 9 | from .focal_loss import FocalLoss, sigmoid_focal_loss 10 | from .ghm_loss import GHMC, GHMR 11 | from .iou_loss import BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss, iou_loss 12 | from .mse_loss import MSELoss, mse_loss 13 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss 14 | from .trunc_cross_entropy import TrunCrossEntropyLoss 15 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 16 | 17 | __all__ = [ 18 | "accuracy", 19 | "Accuracy", 20 | "cross_entropy", 21 | "binary_cross_entropy", 22 | "mask_cross_entropy", 23 | "CrossEntropyLoss", 24 | "sigmoid_focal_loss", 25 | "FocalLoss", 26 | "smooth_l1_loss", 27 | "SmoothL1Loss", 28 | "balanced_l1_loss", 29 | "BalancedL1Loss", 30 | "mse_loss", 31 | "MSELoss", 32 | "iou_loss", 33 | "bounded_iou_loss", 34 | "IoULoss", 35 | "BoundedIoULoss", 36 | "GIoULoss", 37 | "GHMC", 38 | "GHMR", 39 | "reduce_loss", 40 | "weight_reduce_loss", 41 | "weighted_loss", 42 | "TrunCrossEntropyLoss", 43 | ] 44 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk,) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | def __init__(self, topk=(1,)): 26 | super().__init__() 27 | self.topk = topk 28 | 29 | def forward(self, pred, target): 30 | return accuracy(pred, target, self.topk) 31 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ..registry import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, target, beta=1.0, alpha=0.5, gamma=1.5, reduction="mean"): 11 | assert beta > 0 12 | assert pred.size() == target.size() and target.numel() > 0 13 | 14 | diff = torch.abs(pred - target) 15 | b = np.e ** (gamma / alpha) - 1 16 | loss = torch.where( 17 | diff < beta, 18 | alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 19 | gamma * diff + gamma / b - alpha * beta, 20 | ) 21 | 22 | return loss 23 | 24 | 25 | @LOSSES.register_module 26 | class BalancedL1Loss(nn.Module): 27 | """Balanced L1 Loss 28 | 29 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 30 | """ 31 | 32 | def __init__( 33 | self, alpha=0.5, gamma=1.5, beta=1.0, reduction="mean", loss_weight=1.0 34 | ): 35 | super(BalancedL1Loss, self).__init__() 36 | self.alpha = alpha 37 | self.gamma = gamma 38 | self.beta = beta 39 | self.reduction = reduction 40 | self.loss_weight = loss_weight 41 | 42 | def forward( 43 | self, 44 | pred, 45 | target, 46 | weight=None, 47 | avg_factor=None, 48 | reduction_override=None, 49 | **kwargs 50 | ): 51 | assert reduction_override in (None, "none", "mean", "sum") 52 | reduction = reduction_override if reduction_override else self.reduction 53 | loss_bbox = self.loss_weight * balanced_l1_loss( 54 | pred, 55 | target, 56 | weight, 57 | alpha=self.alpha, 58 | gamma=self.gamma, 59 | beta=self.beta, 60 | reduction=reduction, 61 | avg_factor=avg_factor, 62 | **kwargs 63 | ) 64 | return loss_bbox 65 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss( 11 | pred, target, weight=None, gamma=2.0, alpha=0.25, reduction="mean", avg_factor=None 12 | ): 13 | pred_sigmoid = pred.sigmoid() 14 | target = target.type_as(pred) 15 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 16 | focal_weight = (alpha * target + (1 - alpha) * (1 - target)) * pt.pow(gamma) 17 | loss = ( 18 | F.binary_cross_entropy_with_logits(pred, target, reduction="none") 19 | * focal_weight 20 | ) 21 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 22 | return loss 23 | 24 | 25 | def sigmoid_focal_loss( 26 | pred, target, weight=None, gamma=2.0, alpha=0.25, reduction="mean", avg_factor=None 27 | ): 28 | # Function.apply does not accept keyword arguments, so the decorator 29 | # "weighted_loss" is not applicable 30 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 31 | # TODO: find a proper way to handle the shape of weight 32 | if weight is not None: 33 | weight = weight.view(-1, 1) 34 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 35 | return loss 36 | 37 | 38 | @LOSSES.register_module 39 | class FocalLoss(nn.Module): 40 | def __init__( 41 | self, use_sigmoid=True, gamma=2.0, alpha=0.25, reduction="mean", loss_weight=1.0 42 | ): 43 | super(FocalLoss, self).__init__() 44 | assert use_sigmoid is True, "Only sigmoid focal loss supported now." 45 | self.use_sigmoid = use_sigmoid 46 | self.gamma = gamma 47 | self.alpha = alpha 48 | self.reduction = reduction 49 | self.loss_weight = loss_weight 50 | 51 | def forward( 52 | self, pred, target, weight=None, avg_factor=None, reduction_override=None 53 | ): 54 | assert reduction_override in (None, "none", "mean", "sum") 55 | reduction = reduction_override if reduction_override else self.reduction 56 | if self.use_sigmoid: 57 | loss_cls = self.loss_weight * sigmoid_focal_loss( 58 | pred, 59 | target, 60 | weight, 61 | gamma=self.gamma, 62 | alpha=self.alpha, 63 | reduction=reduction, 64 | avg_factor=avg_factor, 65 | ) 66 | else: 67 | raise NotImplementedError 68 | return loss_cls 69 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def mse_loss(pred, target): 10 | return F.mse_loss(pred, target, reduction="none") 11 | 12 | 13 | @LOSSES.register_module 14 | class MSELoss(nn.Module): 15 | def __init__(self, reduction="mean", loss_weight=1.0): 16 | super().__init__() 17 | self.reduction = reduction 18 | self.loss_weight = loss_weight 19 | 20 | def forward(self, pred, target, weight=None, avg_factor=None): 21 | loss = self.loss_weight * mse_loss( 22 | pred, target, weight, reduction=self.reduction, avg_factor=avg_factor 23 | ) 24 | return loss 25 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, diff - 0.5 * beta) 14 | return loss 15 | 16 | 17 | @LOSSES.register_module 18 | class SmoothL1Loss(nn.Module): 19 | def __init__(self, beta=1.0, reduction="mean", loss_weight=1.0): 20 | super(SmoothL1Loss, self).__init__() 21 | self.beta = beta 22 | self.reduction = reduction 23 | self.loss_weight = loss_weight 24 | 25 | def forward( 26 | self, 27 | pred, 28 | target, 29 | weight=None, 30 | avg_factor=None, 31 | reduction_override=None, 32 | **kwargs 33 | ): 34 | assert reduction_override in (None, "none", "mean", "sum") 35 | reduction = reduction_override if reduction_override else self.reduction 36 | loss_bbox = self.loss_weight * smooth_l1_loss( 37 | pred, 38 | target, 39 | weight, 40 | beta=self.beta, 41 | reduction=reduction, 42 | avg_factor=avg_factor, 43 | **kwargs 44 | ) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /mmdet/models/losses/trunc_cross_entropy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | 6 | 7 | @LOSSES.register_module 8 | class TrunCrossEntropyLoss(nn.Module): 9 | def __init__(self, loss_trunc_thr=0.5, decay=0.1, ignore_index=-1): 10 | super(TrunCrossEntropyLoss, self).__init__() 11 | self.loss_trunc_thr = loss_trunc_thr 12 | self.decay = decay 13 | self.ignore_index = ignore_index 14 | 15 | def forward(self, input, targets): 16 | weights = (targets > self.ignore_index).float() 17 | normalizer = max(1, weights.sum().data.cpu()) 18 | 19 | p = F.softmax(input, dim=1) 20 | p_max = p[:, 1:].max(dim=1)[0] 21 | inds = (p_max > self.loss_trunc_thr) & (targets == 0) 22 | weights[inds] = self.decay 23 | 24 | loss = F.cross_entropy(input, targets, reduce=False) 25 | loss = loss * weights.cuda() / normalizer.cuda() 26 | return loss.sum() 27 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction="mean", avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == "mean": 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != "none": 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, target, weight=None, reduction="mean", avg_factor=None, **kwargs): 88 | # get element-wise loss 89 | loss = loss_func(pred, target, **kwargs) 90 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 91 | return loss 92 | 93 | return wrapper 94 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | 7 | __all__ = ["FCNMaskHead", "HTCMaskHead", "FusedSemanticHead", "GridHead", "MaskIoUHead"] 8 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmdet.ops import ConvModule 2 | from ..registry import HEADS 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | def __init__(self, with_conv_res=True, *args, **kwargs): 9 | super(HTCMaskHead, self).__init__(*args, **kwargs) 10 | self.with_conv_res = with_conv_res 11 | if self.with_conv_res: 12 | self.conv_res = ConvModule( 13 | self.conv_out_channels, 14 | self.conv_out_channels, 15 | 1, 16 | conv_cfg=self.conv_cfg, 17 | norm_cfg=self.norm_cfg, 18 | ) 19 | 20 | def init_weights(self): 21 | super(HTCMaskHead, self).init_weights() 22 | if self.with_conv_res: 23 | self.conv_res.init_weights() 24 | 25 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 26 | if res_feat is not None: 27 | assert self.with_conv_res 28 | res_feat = self.conv_res(res_feat) 29 | x = x + res_feat 30 | for conv in self.convs: 31 | x = conv(x) 32 | res_feat = x 33 | outs = [] 34 | if return_logits: 35 | x = self.upsample(x) 36 | if self.upsample_method == "deconv": 37 | x = self.relu(x) 38 | mask_pred = self.conv_logits(x) 39 | outs.append(mask_pred) 40 | if return_feat: 41 | outs.append(res_feat) 42 | return outs if len(outs) > 1 else outs[0] 43 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .fpn import FPN 3 | from .fpn_carafe import FPN_CARAFE 4 | from .hrfpn import HRFPN 5 | from .nas_fpn import NASFPN 6 | 7 | __all__ = ["FPN", "BFP", "HRFPN", "NASFPN", "FPN_CARAFE"] 8 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | BACKBONES = Registry("backbone") 4 | NECKS = Registry("neck") 5 | ROI_EXTRACTORS = Registry("roi_extractor") 6 | SHARED_HEADS = Registry("shared_head") 7 | HEADS = Registry("head") 8 | LOSSES = Registry("loss") 9 | DETECTORS = Registry("detector") 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ["SingleRoIExtractor"] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ["ResLayer"] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import constant_init, kaiming_init 3 | from mmcv.runner import load_checkpoint 4 | 5 | from mmdet.core import auto_fp16 6 | from mmdet.utils import get_root_logger 7 | from ..backbones import ResNet, make_res_layer 8 | from ..registry import SHARED_HEADS 9 | 10 | 11 | @SHARED_HEADS.register_module 12 | class ResLayer(nn.Module): 13 | def __init__( 14 | self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style="pytorch", 20 | norm_cfg=dict(type="BN", requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None, 24 | ): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2 ** stage 33 | inplanes = 64 * 2 ** (stage - 1) * block.expansion 34 | 35 | res_layer = make_res_layer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn, 46 | ) 47 | self.add_module("layer{}".format(stage + 1), res_layer) 48 | 49 | def init_weights(self, pretrained=None): 50 | if isinstance(pretrained, str): 51 | logger = get_root_logger() 52 | load_checkpoint(self, pretrained, strict=False, logger=logger) 53 | elif pretrained is None: 54 | for m in self.modules(): 55 | if isinstance(m, nn.Conv2d): 56 | kaiming_init(m) 57 | elif isinstance(m, nn.BatchNorm2d): 58 | constant_init(m, 1) 59 | else: 60 | raise TypeError("pretrained must be a str or None") 61 | 62 | @auto_fp16() 63 | def forward(self, x): 64 | res_layer = getattr(self, "layer{}".format(self.stage + 1)) 65 | out = res_layer(x) 66 | return out 67 | 68 | def train(self, mode=True): 69 | super(ResLayer, self).train(mode) 70 | if self.norm_eval: 71 | for m in self.modules(): 72 | if isinstance(m, nn.BatchNorm2d): 73 | m.eval() 74 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .weight_init import bias_init_with_prob 2 | 3 | __all__ = ["bias_init_with_prob"] 4 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bias_init_with_prob(prior_prob): 5 | """ initialize conv/fc bias value according to giving probablity""" 6 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 7 | return bias_init 8 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .conv import build_conv_layer 3 | from .conv_module import ConvModule 4 | from .conv_ws import ConvWS2d, conv_ws_2d 5 | from .dcn import ( 6 | DeformConv, 7 | DeformConvPack, 8 | DeformRoIPooling, 9 | DeformRoIPoolingPack, 10 | DeltaCPooling, 11 | DeltaRPooling, 12 | ModulatedDeformConv, 13 | ModulatedDeformConvPack, 14 | ModulatedDeformRoIPoolingPack, 15 | deform_conv, 16 | deform_roi_pooling, 17 | modulated_deform_conv, 18 | ) 19 | from .generalized_attention import GeneralizedAttention 20 | from .masked_conv import MaskedConv2d 21 | from .nms import nms, soft_nms 22 | from .non_local import NonLocal2D 23 | from .norm import build_norm_layer 24 | from .roi_align import RoIAlign, roi_align 25 | from .roi_pool import RoIPool, roi_pool 26 | from .scale import Scale 27 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 28 | from .upsample import build_upsample_layer 29 | from .utils import get_compiler_version, get_compiling_cuda_version 30 | 31 | __all__ = [ 32 | "nms", 33 | "soft_nms", 34 | "RoIAlign", 35 | "roi_align", 36 | "RoIPool", 37 | "roi_pool", 38 | "DeformConv", 39 | "DeformConvPack", 40 | "DeformRoIPooling", 41 | "DeformRoIPoolingPack", 42 | "ModulatedDeformRoIPoolingPack", 43 | "ModulatedDeformConv", 44 | "ModulatedDeformConvPack", 45 | "deform_conv", 46 | "modulated_deform_conv", 47 | "DeltaRPooling", 48 | "DeltaCPooling", 49 | "deform_roi_pooling", 50 | "SigmoidFocalLoss", 51 | "sigmoid_focal_loss", 52 | "MaskedConv2d", 53 | "ContextBlock", 54 | "GeneralizedAttention", 55 | "NonLocal2D", 56 | "get_compiler_version", 57 | "get_compiling_cuda_version", 58 | "build_conv_layer", 59 | "ConvModule", 60 | "ConvWS2d", 61 | "conv_ws_2d", 62 | "build_norm_layer", 63 | "Scale", 64 | "build_upsample_layer", 65 | ] 66 | -------------------------------------------------------------------------------- /mmdet/ops/activation.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | activation_cfg = { 4 | # layer_abbreviation: module 5 | "ReLU": nn.ReLU, 6 | "LeakyReLU": nn.LeakyReLU, 7 | "PReLU": nn.PReLU, 8 | "RReLU": nn.RReLU, 9 | "ReLU6": nn.ReLU6, 10 | "SELU": nn.SELU, 11 | "CELU": nn.CELU, 12 | } 13 | 14 | 15 | def build_activation_layer(cfg): 16 | """ Build activation layer 17 | 18 | Args: 19 | cfg (dict): cfg should contain: 20 | type (str): Identify activation layer type. 21 | layer args: args needed to instantiate a activation layer. 22 | 23 | Returns: 24 | layer (nn.Module): Created activation layer 25 | """ 26 | assert isinstance(cfg, dict) and "type" in cfg 27 | cfg_ = cfg.copy() 28 | 29 | layer_type = cfg_.pop("type") 30 | if layer_type not in activation_cfg: 31 | raise KeyError("Unrecognized activation type {}".format(layer_type)) 32 | else: 33 | activation = activation_cfg[layer_type] 34 | if activation is None: 35 | raise NotImplementedError 36 | 37 | layer = activation(**cfg_) 38 | return layer 39 | -------------------------------------------------------------------------------- /mmdet/ops/affine_grid/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine_grid import affine_grid 2 | 3 | __all__ = ["affine_grid"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/affine_grid/affine_grid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from . import affine_grid_cuda 7 | 8 | 9 | class _AffineGridGenerator(Function): 10 | @staticmethod 11 | def forward(ctx, theta, size, align_corners): 12 | 13 | ctx.save_for_backward(theta) 14 | ctx.size = size 15 | ctx.align_corners = align_corners 16 | 17 | func = affine_grid_cuda.affine_grid_generator_forward 18 | 19 | output = func(theta, size, align_corners) 20 | 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | theta = ctx.saved_tensors 27 | size = ctx.size 28 | align_corners = ctx.align_corners 29 | 30 | func = affine_grid_cuda.affine_grid_generator_backward 31 | 32 | grad_input = func(grad_output, theta, size, align_corners) 33 | 34 | return grad_input, None, None 35 | 36 | 37 | def affine_grid(theta, size, align_corners=False): 38 | if torch.__version__ >= "1.3": 39 | return F.affine_grid(theta, size, align_corners) 40 | elif align_corners: 41 | return F.affine_grid(theta, size) 42 | else: 43 | # enforce floating point dtype on theta 44 | if not theta.is_floating_point(): 45 | raise ValueError( 46 | "Expected theta to have floating point type, but got {}".format( 47 | theta.dtype 48 | ) 49 | ) 50 | # check that shapes and sizes match 51 | if len(size) == 4: 52 | if theta.dim() != 3 or theta.size(-2) != 2 or theta.size(-1) != 3: 53 | raise ValueError( 54 | "Expected a batch of 2D affine matrices of shape Nx2x3 " 55 | "for size {}. Got {}.".format(size, theta.shape) 56 | ) 57 | elif len(size) == 5: 58 | if theta.dim() != 3 or theta.size(-2) != 3 or theta.size(-1) != 4: 59 | raise ValueError( 60 | "Expected a batch of 3D affine matrices of shape Nx3x4 " 61 | "for size {}. Got {}.".format(size, theta.shape) 62 | ) 63 | else: 64 | raise NotImplementedError( 65 | "affine_grid only supports 4D and 5D sizes, " 66 | "for 2D and 3D affine transforms, respectively. " 67 | "Got size {}.".format(size) 68 | ) 69 | if min(size) <= 0: 70 | raise ValueError( 71 | "Expected non-zero, positive output size. Got {}".format(size) 72 | ) 73 | return _AffineGridGenerator.apply(theta, size, align_corners) 74 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/__init__.py: -------------------------------------------------------------------------------- 1 | from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive 2 | 3 | __all__ = ["carafe", "carafe_naive", "CARAFE", "CARAFENaive", "CARAFEPack"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/grad_check.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import mmcv 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, "../../"))) 9 | from mmdet.ops.carafe import CARAFE, CARAFENaive # noqa: E402, isort:skip 10 | from mmdet.ops.carafe import carafe, carafe_naive # noqa: E402, isort:skip 11 | 12 | feat = torch.randn(2, 64, 3, 3, requires_grad=True, device="cuda:0").double() 13 | mask = torch.randn(2, 100, 6, 6, requires_grad=True, device="cuda:0").sigmoid().double() 14 | 15 | print("Gradcheck for carafe...") 16 | test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 17 | print(test) 18 | 19 | print("Gradcheck for carafe naive...") 20 | test = gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 21 | print(test) 22 | 23 | feat = torch.randn(2, 1024, 100, 100, requires_grad=True, device="cuda:0").float() 24 | mask = ( 25 | torch.randn(2, 25, 200, 200, requires_grad=True, device="cuda:0").sigmoid().float() 26 | ) 27 | loop_num = 500 28 | 29 | time_forward = 0 30 | time_backward = 0 31 | bar = mmcv.ProgressBar(loop_num) 32 | timer = mmcv.Timer() 33 | for i in range(loop_num): 34 | x = carafe(feat.clone(), mask.clone(), 5, 1, 2) 35 | torch.cuda.synchronize() 36 | time_forward += timer.since_last_check() 37 | x.sum().backward(retain_graph=True) 38 | torch.cuda.synchronize() 39 | time_backward += timer.since_last_check() 40 | bar.update() 41 | print( 42 | "\nCARAFE time forward: {} ms/iter | time backward: {} ms/iter".format( 43 | (time_forward + 1e-3) * 1e3 / loop_num, (time_backward + 1e-3) * 1e3 / loop_num 44 | ) 45 | ) 46 | 47 | time_naive_forward = 0 48 | time_naive_backward = 0 49 | bar = mmcv.ProgressBar(loop_num) 50 | timer = mmcv.Timer() 51 | for i in range(loop_num): 52 | x = carafe_naive(feat.clone(), mask.clone(), 5, 1, 2) 53 | torch.cuda.synchronize() 54 | time_naive_forward += timer.since_last_check() 55 | x.sum().backward(retain_graph=True) 56 | torch.cuda.synchronize() 57 | time_naive_backward += timer.since_last_check() 58 | bar.update() 59 | print( 60 | "\nCARAFE naive time forward: {} ms/iter | time backward: {} ms/iter".format( 61 | (time_naive_forward + 1e-3) * 1e3 / loop_num, 62 | (time_naive_backward + 1e-3) * 1e3 / loop_num, 63 | ) 64 | ) 65 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | NVCC_ARGS = [ 6 | "-D__CUDA_NO_HALF_OPERATORS__", 7 | "-D__CUDA_NO_HALF_CONVERSIONS__", 8 | "-D__CUDA_NO_HALF2_OPERATORS__", 9 | ] 10 | 11 | setup( 12 | name="carafe", 13 | ext_modules=[ 14 | CUDAExtension( 15 | "carafe_cuda", 16 | ["src/carafe_cuda.cpp", "src/carafe_cuda_kernel.cu"], 17 | extra_compile_args={"cxx": [], "nvcc": NVCC_ARGS}, 18 | ), 19 | CUDAExtension( 20 | "carafe_naive_cuda", 21 | ["src/carafe_naive_cuda.cpp", "src/carafe_naive_cuda_kernel.cu"], 22 | extra_compile_args={"cxx": [], "nvcc": NVCC_ARGS}, 23 | ), 24 | ], 25 | cmdclass={"build_ext": BuildExtension}, 26 | ) 27 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/src/carafe_naive_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks, 8 | const int kernel_size, const int group_size, 9 | const int scale_factor, const int batch_size, 10 | const int channels, const int height, 11 | const int width, at::Tensor output); 12 | 13 | int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, 14 | const at::Tensor features, 15 | const at::Tensor masks, const int kernel_size, 16 | const int group_size, const int scale_factor, 17 | const int batch_size, const int channels, 18 | const int height, const int width, 19 | at::Tensor bottom_grad, at::Tensor mask_grad); 20 | 21 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 22 | #define CHECK_CONTIGUOUS(x) \ 23 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 24 | #define CHECK_INPUT(x) \ 25 | CHECK_CUDA(x); \ 26 | CHECK_CONTIGUOUS(x) 27 | 28 | int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks, 29 | int kernel_size, int group_size, int scale_factor, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(masks); 33 | CHECK_INPUT(output); 34 | at::DeviceGuard guard(features.device()); 35 | 36 | int batch_size = output.size(0); 37 | int num_channels = output.size(1); 38 | int data_height = output.size(2); 39 | int data_width = output.size(3); 40 | 41 | CARAFENAIVEForwardLaucher(features, masks, kernel_size, group_size, 42 | scale_factor, batch_size, num_channels, data_height, 43 | data_width, output); 44 | 45 | return 1; 46 | } 47 | 48 | int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, 49 | at::Tensor masks, int kernel_size, 50 | int group_size, int scale_factor, 51 | at::Tensor bottom_grad, at::Tensor mask_grad) { 52 | CHECK_INPUT(top_grad); 53 | CHECK_INPUT(features); 54 | CHECK_INPUT(masks); 55 | CHECK_INPUT(bottom_grad); 56 | CHECK_INPUT(mask_grad); 57 | at::DeviceGuard guard(top_grad.device()); 58 | 59 | int batch_size = top_grad.size(0); 60 | int num_channels = top_grad.size(1); 61 | int data_height = top_grad.size(2); 62 | int data_width = top_grad.size(3); 63 | 64 | CARAFENAIVEBackwardLaucher(top_grad, features, masks, kernel_size, group_size, 65 | scale_factor, batch_size, num_channels, 66 | data_height, data_width, bottom_grad, mask_grad); 67 | 68 | return 1; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def("forward", &carafe_naive_forward_cuda, "carafe_naive forward (CUDA)"); 73 | m.def("backward", &carafe_naive_backward_cuda, 74 | "carafe_naive backward (CUDA)"); 75 | } 76 | -------------------------------------------------------------------------------- /mmdet/ops/conv.py: -------------------------------------------------------------------------------- 1 | from torch import nn as nn 2 | 3 | from .conv_ws import ConvWS2d 4 | from .dcn import DeformConvPack, ModulatedDeformConvPack 5 | 6 | conv_cfg = { 7 | "Conv": nn.Conv2d, 8 | "ConvWS": ConvWS2d, 9 | "DCN": DeformConvPack, 10 | "DCNv2": ModulatedDeformConvPack, 11 | # TODO: octave conv 12 | } 13 | 14 | 15 | def build_conv_layer(cfg, *args, **kwargs): 16 | """ Build convolution layer 17 | 18 | Args: 19 | cfg (None or dict): cfg should contain: 20 | type (str): identify conv layer type. 21 | layer args: args needed to instantiate a conv layer. 22 | 23 | Returns: 24 | layer (nn.Module): created conv layer 25 | """ 26 | if cfg is None: 27 | cfg_ = dict(type="Conv") 28 | else: 29 | assert isinstance(cfg, dict) and "type" in cfg 30 | cfg_ = cfg.copy() 31 | 32 | layer_type = cfg_.pop("type") 33 | if layer_type not in conv_cfg: 34 | raise KeyError("Unrecognized norm type {}".format(layer_type)) 35 | else: 36 | conv_layer = conv_cfg[layer_type] 37 | 38 | layer = conv_layer(*args, **kwargs, **cfg_) 39 | 40 | return layer 41 | -------------------------------------------------------------------------------- /mmdet/ops/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d( 6 | input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1, eps=1e-5 7 | ): 8 | c_in = weight.size(0) 9 | weight_flat = weight.view(c_in, -1) 10 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 11 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 12 | weight = (weight - mean) / (std + eps) 13 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 14 | 15 | 16 | class ConvWS2d(nn.Conv2d): 17 | def __init__( 18 | self, 19 | in_channels, 20 | out_channels, 21 | kernel_size, 22 | stride=1, 23 | padding=0, 24 | dilation=1, 25 | groups=1, 26 | bias=True, 27 | eps=1e-5, 28 | ): 29 | super(ConvWS2d, self).__init__( 30 | in_channels, 31 | out_channels, 32 | kernel_size, 33 | stride=stride, 34 | padding=padding, 35 | dilation=dilation, 36 | groups=groups, 37 | bias=bias, 38 | ) 39 | self.eps = eps 40 | 41 | def forward(self, x): 42 | return conv_ws_2d( 43 | x, 44 | self.weight, 45 | self.bias, 46 | self.stride, 47 | self.padding, 48 | self.dilation, 49 | self.groups, 50 | self.eps, 51 | ) 52 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import ( 2 | DeformConv, 3 | DeformConvPack, 4 | ModulatedDeformConv, 5 | ModulatedDeformConvPack, 6 | deform_conv, 7 | modulated_deform_conv, 8 | ) 9 | from .deform_pool import ( 10 | DeformRoIPooling, 11 | DeformRoIPoolingPack, 12 | DeltaCPooling, 13 | DeltaRPooling, 14 | ModulatedDeformRoIPoolingPack, 15 | deform_roi_pooling, 16 | ) 17 | 18 | __all__ = [ 19 | "DeformConv", 20 | "DeformConvPack", 21 | "ModulatedDeformConv", 22 | "ModulatedDeformConvPack", 23 | "DeformRoIPooling", 24 | "DeformRoIPoolingPack", 25 | "ModulatedDeformRoIPoolingPack", 26 | "deform_conv", 27 | "modulated_deform_conv", 28 | "deform_roi_pooling", 29 | "DeltaRPooling", 30 | "DeltaCPooling", 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet/ops/grid_sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .grid_sampler import grid_sample 2 | 3 | __all__ = ["grid_sample"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ["masked_conv2d", "MaskedConv2d"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | at::DeviceGuard guard(im.device()); 38 | 39 | int channels = im.size(1); 40 | int height = im.size(2); 41 | int width = im.size(3); 42 | int mask_cnt = mask_h_idx.size(0); 43 | 44 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 45 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 46 | col); 47 | 48 | return 1; 49 | } 50 | 51 | int masked_col2im_forward_cuda(const at::Tensor col, 52 | const at::Tensor mask_h_idx, 53 | const at::Tensor mask_w_idx, int height, 54 | int width, int channels, at::Tensor im) { 55 | CHECK_INPUT(col); 56 | CHECK_INPUT(mask_h_idx); 57 | CHECK_INPUT(mask_w_idx); 58 | CHECK_INPUT(im); 59 | // im: (n, ic, h, w), kernel size (kh, kw) 60 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 61 | at::DeviceGuard guard(col.device()); 62 | 63 | int mask_cnt = mask_h_idx.size(0); 64 | 65 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 66 | mask_w_idx, mask_cnt, im); 67 | 68 | return 1; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 73 | "masked_im2col forward (CUDA)"); 74 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 75 | "masked_col2im forward (CUDA)"); 76 | } 77 | -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ["nms", "soft_nms"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } 18 | -------------------------------------------------------------------------------- /mmdet/ops/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | "BN": ("bn", nn.BatchNorm2d), 6 | "SyncBN": ("bn", nn.SyncBatchNorm), 7 | "GN": ("gn", nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=""): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and "type" in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop("type") 32 | if layer_type not in norm_cfg: 33 | raise KeyError("Unrecognized norm type {}".format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop("requires_grad", True) 43 | cfg_.setdefault("eps", 1e-5) 44 | if layer_type != "GN": 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == "SyncBN": 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert "num_groups" in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ["roi_align", "RoIAlign"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, "../../"))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device="cuda:0" 24 | ) 25 | rois = torch.from_numpy(rois).float().cuda() 26 | inputs = (feat, rois) 27 | print("Gradcheck for roi align...") 28 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 31 | print(test) 32 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ["roi_pool", "RoIPool"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, "../../"))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor( 12 | [[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], [1, 67, 40, 110, 120]] 13 | ).cuda() 14 | inputs = (feat, rois) 15 | print("Gradcheck for roi pooling...") 16 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 17 | print(test) 18 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale): 13 | assert features.is_cuda 14 | out_h, out_w = _pair(out_size) 15 | assert isinstance(out_h, int) and isinstance(out_w, int) 16 | ctx.save_for_backward(rois) 17 | num_channels = features.size(1) 18 | num_rois = rois.size(0) 19 | out_size = (num_rois, num_channels, out_h, out_w) 20 | output = features.new_zeros(out_size) 21 | argmax = features.new_zeros(out_size, dtype=torch.int) 22 | roi_pool_cuda.forward( 23 | features, rois, out_h, out_w, spatial_scale, output, argmax 24 | ) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward( 45 | grad_output.contiguous(), rois, argmax, spatial_scale, grad_input 46 | ) 47 | 48 | return grad_input, grad_rois, None, None 49 | 50 | 51 | roi_pool = RoIPoolFunction.apply 52 | 53 | 54 | class RoIPool(nn.Module): 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | 66 | return tv_roi_pool(features, rois, self.out_size, self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += "(out_size={}, spatial_scale={}".format( 73 | self.out_size, self.spatial_scale 74 | ) 75 | format_str += ", use_torchvision={})".format(self.use_torchvision) 76 | return format_str 77 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | at::DeviceGuard guard(features.device()); 35 | 36 | // Number of ROIs 37 | int num_rois = rois.size(0); 38 | int size_rois = rois.size(1); 39 | 40 | if (size_rois != 5) { 41 | printf("wrong roi size\n"); 42 | return 0; 43 | } 44 | 45 | int channels = features.size(1); 46 | int height = features.size(2); 47 | int width = features.size(3); 48 | 49 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 50 | num_rois, pooled_height, pooled_width, output, argmax); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | at::Tensor argmax, float spatial_scale, 57 | at::Tensor bottom_grad) { 58 | CHECK_INPUT(top_grad); 59 | CHECK_INPUT(rois); 60 | CHECK_INPUT(argmax); 61 | CHECK_INPUT(bottom_grad); 62 | at::DeviceGuard guard(top_grad.device()); 63 | 64 | int pooled_height = top_grad.size(2); 65 | int pooled_width = top_grad.size(3); 66 | int num_rois = rois.size(0); 67 | int size_rois = rois.size(1); 68 | 69 | if (size_rois != 5) { 70 | printf("wrong roi size\n"); 71 | return 0; 72 | } 73 | int batch_size = bottom_grad.size(0); 74 | int channels = bottom_grad.size(1); 75 | int height = bottom_grad.size(2); 76 | int width = bottom_grad.size(3); 77 | 78 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 79 | channels, height, width, num_rois, pooled_height, 80 | pooled_width, bottom_grad); 81 | 82 | return 1; 83 | } 84 | 85 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 86 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 87 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 88 | } 89 | -------------------------------------------------------------------------------- /mmdet/ops/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | """ 7 | A learnable scale parameter 8 | """ 9 | 10 | def __init__(self, scale=1.0): 11 | super(Scale, self).__init__() 12 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 13 | 14 | def forward(self, x): 15 | return x * self.scale 16 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ["SigmoidFocalLoss", "sigmoid_focal_loss"] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | @staticmethod 10 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 11 | ctx.save_for_backward(input, target) 12 | num_classes = input.shape[1] 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, gamma, alpha) 18 | return loss 19 | 20 | @staticmethod 21 | @once_differentiable 22 | def backward(ctx, d_loss): 23 | input, target = ctx.saved_tensors 24 | num_classes = ctx.num_classes 25 | gamma = ctx.gamma 26 | alpha = ctx.alpha 27 | d_loss = d_loss.contiguous() 28 | d_input = sigmoid_focal_loss_cuda.backward( 29 | input, target, d_loss, num_classes, gamma, alpha 30 | ) 31 | return d_input, None, None, None, None 32 | 33 | 34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 35 | 36 | 37 | # TODO: remove this module 38 | class SigmoidFocalLoss(nn.Module): 39 | def __init__(self, gamma, alpha): 40 | super(SigmoidFocalLoss, self).__init__() 41 | self.gamma = gamma 42 | self.alpha = alpha 43 | 44 | def forward(self, logits, targets): 45 | assert logits.is_cuda 46 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 47 | return loss.sum() 48 | 49 | def __repr__(self): 50 | tmpstr = self.__class__.__name__ + "(gamma={}, alpha={})".format( 51 | self.gamma, self.alpha 52 | ) 53 | return tmpstr 54 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | at::DeviceGuard guard(logits.device()); 23 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 24 | alpha); 25 | } 26 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 27 | } 28 | 29 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 30 | const at::Tensor &targets, 31 | const at::Tensor &d_losses, 32 | const int num_classes, const float gamma, 33 | const float alpha) { 34 | if (logits.type().is_cuda()) { 35 | at::DeviceGuard guard(logits.device()); 36 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 37 | num_classes, gamma, alpha); 38 | } 39 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 40 | } 41 | 42 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 43 | m.def("forward", &SigmoidFocalLoss_forward, 44 | "SigmoidFocalLoss forward (CUDA)"); 45 | m.def("backward", &SigmoidFocalLoss_backward, 46 | "SigmoidFocalLoss backward (CUDA)"); 47 | } 48 | -------------------------------------------------------------------------------- /mmdet/ops/upsample.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | 5 | from .carafe import CARAFEPack 6 | 7 | 8 | class PixelShufflePack(nn.Module): 9 | """ Pixel Shuffle upsample layer 10 | 11 | Args: 12 | in_channels (int): Number of input channels 13 | out_channels (int): Number of output channels 14 | scale_factor (int): Upsample ratio 15 | upsample_kernel (int): Kernel size of Conv layer to expand the channels 16 | 17 | Returns: 18 | upsampled feature map 19 | """ 20 | 21 | def __init__(self, in_channels, out_channels, scale_factor, upsample_kernel): 22 | super(PixelShufflePack, self).__init__() 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | self.scale_factor = scale_factor 26 | self.upsample_kernel = upsample_kernel 27 | self.upsample_conv = nn.Conv2d( 28 | self.in_channels, 29 | self.out_channels * scale_factor * scale_factor, 30 | self.upsample_kernel, 31 | padding=(self.upsample_kernel - 1) // 2, 32 | ) 33 | self.init_weights() 34 | 35 | def init_weights(self): 36 | xavier_init(self.upsample_conv, distribution="uniform") 37 | 38 | def forward(self, x): 39 | x = self.upsample_conv(x) 40 | x = F.pixel_shuffle(x, self.scale_factor) 41 | return x 42 | 43 | 44 | upsample_cfg = { 45 | # layer_abbreviation: module 46 | "nearest": nn.Upsample, 47 | "bilinear": nn.Upsample, 48 | "deconv": nn.ConvTranspose2d, 49 | "pixel_shuffle": PixelShufflePack, 50 | "carafe": CARAFEPack, 51 | } 52 | 53 | 54 | def build_upsample_layer(cfg): 55 | """ Build upsample layer 56 | 57 | Args: 58 | cfg (dict): cfg should contain: 59 | type (str): Identify upsample layer type. 60 | upsample ratio (int): Upsample ratio 61 | layer args: args needed to instantiate a upsample layer. 62 | 63 | Returns: 64 | layer (nn.Module): Created upsample layer 65 | """ 66 | assert isinstance(cfg, dict) and "type" in cfg 67 | cfg_ = cfg.copy() 68 | 69 | layer_type = cfg_.pop("type") 70 | if layer_type not in upsample_cfg: 71 | raise KeyError("Unrecognized upsample type {}".format(layer_type)) 72 | else: 73 | upsample = upsample_cfg[layer_type] 74 | if upsample is None: 75 | raise NotImplementedError 76 | 77 | layer = upsample(**cfg_) 78 | return layer 79 | -------------------------------------------------------------------------------- /mmdet/ops/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import compiling_info 2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version 3 | 4 | # get_compiler_version = compiling_info.get_compiler_version 5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version 6 | 7 | __all__ = ["get_compiler_version", "get_compiling_cuda_version"] 8 | -------------------------------------------------------------------------------- /mmdet/ops/utils/src/compiling_info.cpp: -------------------------------------------------------------------------------- 1 | // modified from 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp 3 | #include 4 | #include 5 | 6 | #ifdef WITH_CUDA 7 | int get_cudart_version() { return CUDART_VERSION; } 8 | #endif 9 | 10 | std::string get_compiling_cuda_version() { 11 | #ifdef WITH_CUDA 12 | std::ostringstream oss; 13 | 14 | // copied from 15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 16 | auto printCudaStyleVersion = [&](int v) { 17 | oss << (v / 1000) << "." << (v / 10 % 100); 18 | if (v % 10 != 0) { 19 | oss << "." << (v % 10); 20 | } 21 | }; 22 | printCudaStyleVersion(get_cudart_version()); 23 | return oss.str(); 24 | #else 25 | return std::string("not available"); 26 | #endif 27 | } 28 | 29 | // similar to 30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 31 | std::string get_compiler_version() { 32 | std::ostringstream ss; 33 | #if defined(__GNUC__) 34 | #ifndef __clang__ 35 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 36 | #endif 37 | #endif 38 | 39 | #if defined(__clang_major__) 40 | { 41 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 42 | << __clang_patchlevel__; 43 | } 44 | #endif 45 | 46 | #if defined(_MSC_VER) 47 | { ss << "MSVC " << _MSC_FULL_VER; } 48 | #endif 49 | return ss.str(); 50 | } 51 | 52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 53 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 54 | m.def("get_compiling_cuda_version", &get_compiling_cuda_version, 55 | "get_compiling_cuda_version"); 56 | } 57 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .flops_counter import get_model_complexity_info 3 | from .logger import get_root_logger, print_log 4 | from .registry import Registry, build_from_cfg 5 | 6 | __all__ = [ 7 | "Registry", 8 | "build_from_cfg", 9 | "get_model_complexity_info", 10 | "get_root_logger", 11 | "print_log", 12 | "collect_env", 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import subprocess 3 | import sys 4 | from collections import defaultdict 5 | 6 | import cv2 7 | import mmcv 8 | import torch 9 | import torchvision 10 | 11 | import mmdet 12 | 13 | 14 | def collect_env(): 15 | env_info = {} 16 | env_info["sys.platform"] = sys.platform 17 | env_info["Python"] = sys.version.replace("\n", "") 18 | 19 | cuda_available = torch.cuda.is_available() 20 | env_info["CUDA available"] = cuda_available 21 | 22 | if cuda_available: 23 | from torch.utils.cpp_extension import CUDA_HOME 24 | 25 | env_info["CUDA_HOME"] = CUDA_HOME 26 | 27 | if CUDA_HOME is not None and osp.isdir(CUDA_HOME): 28 | try: 29 | nvcc = osp.join(CUDA_HOME, "bin/nvcc") 30 | nvcc = subprocess.check_output( 31 | '"{}" -V | tail -n1'.format(nvcc), shell=True 32 | ) 33 | nvcc = nvcc.decode("utf-8").strip() 34 | except subprocess.SubprocessError: 35 | nvcc = "Not Available" 36 | env_info["NVCC"] = nvcc 37 | 38 | devices = defaultdict(list) 39 | for k in range(torch.cuda.device_count()): 40 | devices[torch.cuda.get_device_name(k)].append(str(k)) 41 | for name, devids in devices.items(): 42 | env_info["GPU " + ",".join(devids)] = name 43 | 44 | gcc = subprocess.check_output("gcc --version | head -n1", shell=True) 45 | gcc = gcc.decode("utf-8").strip() 46 | env_info["GCC"] = gcc 47 | 48 | env_info["PyTorch"] = torch.__version__ 49 | env_info["PyTorch compiling details"] = torch.__config__.show() 50 | 51 | env_info["TorchVision"] = torchvision.__version__ 52 | 53 | env_info["OpenCV"] = cv2.__version__ 54 | 55 | env_info["MMCV"] = mmcv.__version__ 56 | env_info["MMDetection"] = mmdet.__version__ 57 | from mmdet.ops import get_compiler_version, get_compiling_cuda_version 58 | 59 | env_info["MMDetection Compiler"] = get_compiler_version() 60 | env_info["MMDetection CUDA Compiler"] = get_compiling_cuda_version() 61 | return env_info 62 | 63 | 64 | if __name__ == "__main__": 65 | for name, val in collect_env().items(): 66 | print("{}: {}".format(name, val)) 67 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.runner import get_dist_info 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get the root logger. 8 | 9 | The logger will be initialized if it has not been initialized. By default a 10 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 11 | also be added. The name of the root logger is the top-level package name, 12 | e.g., "mmdet". 13 | 14 | Args: 15 | log_file (str | None): The log filename. If specified, a FileHandler 16 | will be added to the root logger. 17 | log_level (int): The root logger level. Note that only the process of 18 | rank 0 is affected, while other processes will set the level to 19 | "Error" and be silent most of the time. 20 | 21 | Returns: 22 | logging.Logger: The root logger. 23 | """ 24 | logger = logging.getLogger(__name__.split(".")[0]) # i.e., mmdet 25 | # if the logger has been initialized, just return it 26 | if logger.hasHandlers(): 27 | return logger 28 | 29 | format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 30 | logging.basicConfig(format=format_str, level=log_level) 31 | rank, _ = get_dist_info() 32 | if rank != 0: 33 | logger.setLevel("ERROR") 34 | elif log_file is not None: 35 | file_handler = logging.FileHandler(log_file, "w") 36 | file_handler.setFormatter(logging.Formatter(format_str)) 37 | file_handler.setLevel(log_level) 38 | logger.addHandler(file_handler) 39 | 40 | return logger 41 | 42 | 43 | def print_log(msg, logger=None, level=logging.INFO): 44 | """Print a log message. 45 | 46 | Args: 47 | msg (str): The message to be logged. 48 | logger (logging.Logger | str | None): The logger to be used. Some 49 | special loggers are: 50 | - "root": the root logger obtained with `get_root_logger()`. 51 | - "silent": no message will be printed. 52 | - None: The `print()` method will be used to print log messages. 53 | level (int): Logging level. Only available when `logger` is a Logger 54 | object or "root". 55 | """ 56 | if logger is None: 57 | print(msg) 58 | elif logger == "root": 59 | _logger = get_root_logger() 60 | _logger.log(level, msg) 61 | elif isinstance(logger, logging.Logger): 62 | logger.log(level, msg) 63 | elif logger != "silent": 64 | raise TypeError( 65 | 'logger should be either a logging.Logger object, "root", ' 66 | '"silent" or None, but got {}'.format(logger) 67 | ) 68 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, name, enabled=True, stream=None, end_stream=None): 11 | """Print time spent by CPU and GPU. 12 | 13 | Useful as a temporary context manager to find sweet spots of 14 | code suitable for async implementation. 15 | 16 | """ 17 | if (not enabled) or not torch.cuda.is_available(): 18 | yield 19 | return 20 | stream = stream if stream else torch.cuda.current_stream() 21 | end_stream = end_stream if end_stream else stream 22 | start = torch.cuda.Event(enable_timing=True) 23 | end = torch.cuda.Event(enable_timing=True) 24 | stream.record_event(start) 25 | try: 26 | cpu_start = time.monotonic() 27 | yield 28 | finally: 29 | cpu_end = time.monotonic() 30 | end_stream.record_event(end) 31 | end.synchronize() 32 | cpu_time = (cpu_end - cpu_start) * 1000 33 | gpu_time = start.elapsed_time(end) 34 | msg = "{} {} cpu_time {:.2f} ms ".format(trace_name, name, cpu_time) 35 | msg += "gpu_time {:.2f} ms stream {}".format(gpu_time, stream) 36 | print(msg, end_stream) 37 | -------------------------------------------------------------------------------- /mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from functools import partial 3 | 4 | import mmcv 5 | 6 | 7 | class Registry(object): 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + "(name={}, items={})".format( 14 | self._name, list(self._module_dict.keys()) 15 | ) 16 | return format_str 17 | 18 | @property 19 | def name(self): 20 | return self._name 21 | 22 | @property 23 | def module_dict(self): 24 | return self._module_dict 25 | 26 | def get(self, key): 27 | return self._module_dict.get(key, None) 28 | 29 | def _register_module(self, module_class, force=False): 30 | """Register a module. 31 | 32 | Args: 33 | module (:obj:`nn.Module`): Module to be registered. 34 | """ 35 | if not inspect.isclass(module_class): 36 | raise TypeError( 37 | "module must be a class, but got {}".format(type(module_class)) 38 | ) 39 | module_name = module_class.__name__ 40 | if not force and module_name in self._module_dict: 41 | raise KeyError( 42 | "{} is already registered in {}".format(module_name, self.name) 43 | ) 44 | self._module_dict[module_name] = module_class 45 | 46 | def register_module(self, cls=None, force=False): 47 | if cls is None: 48 | return partial(self.register_module, force=force) 49 | self._register_module(cls, force=force) 50 | return cls 51 | 52 | 53 | def build_from_cfg(cfg, registry, default_args=None): 54 | """Build a module from config dict. 55 | 56 | Args: 57 | cfg (dict): Config dict. It should at least contain the key "type". 58 | registry (:obj:`Registry`): The registry to search the type from. 59 | default_args (dict, optional): Default initialization arguments. 60 | 61 | Returns: 62 | obj: The constructed object. 63 | """ 64 | assert isinstance(cfg, dict) and "type" in cfg 65 | assert isinstance(default_args, dict) or default_args is None 66 | args = cfg.copy() 67 | obj_type = args.pop("type") 68 | if mmcv.is_str(obj_type): 69 | obj_cls = registry.get(obj_type) 70 | if obj_cls is None: 71 | raise KeyError( 72 | "{} is not in the {} registry".format(obj_type, registry.name) 73 | ) 74 | elif inspect.isclass(obj_type): 75 | obj_cls = obj_type 76 | else: 77 | raise TypeError( 78 | "type must be a str or valid type, but got {}".format(type(obj_type)) 79 | ) 80 | if default_args is not None: 81 | for name, value in default_args.items(): 82 | args.setdefault(name, value) 83 | return obj_cls(**args) 84 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | numpy 3 | torch>=1.1 4 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | mmcv>=0.3.1 3 | numpy 4 | # need older pillow until torchvision is fixed 5 | Pillow<=6.2.2 6 | six 7 | terminaltables 8 | torch>=1.1 9 | torchvision 10 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | isort 5 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 6 | kwarray 7 | pytest 8 | pytest-cov 9 | pytest-runner 10 | ubelt 11 | xdoctest >= 0.10.0 12 | yapf 13 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | """Tests for async interface.""" 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | import asynctest 8 | import mmcv 9 | import torch 10 | 11 | from mmdet.apis import async_inference_detector, init_detector 12 | 13 | if sys.version_info >= (3, 7): 14 | from mmdet.utils.contextmanagers import concurrent 15 | 16 | 17 | class AsyncTestCase(asynctest.TestCase): 18 | use_default_loop = False 19 | forbid_get_event_loop = True 20 | 21 | TEST_TIMEOUT = int(os.getenv("ASYNCIO_TEST_TIMEOUT", "30")) 22 | 23 | def _run_test_method(self, method): 24 | result = method() 25 | if asyncio.iscoroutine(result): 26 | self.loop.run_until_complete( 27 | asyncio.wait_for(result, timeout=self.TEST_TIMEOUT) 28 | ) 29 | 30 | 31 | class MaskRCNNDetector: 32 | def __init__( 33 | self, model_config, checkpoint=None, streamqueue_size=3, device="cuda:0" 34 | ): 35 | 36 | self.streamqueue_size = streamqueue_size 37 | self.device = device 38 | # build the model and load checkpoint 39 | self.model = init_detector(model_config, checkpoint=None, device=self.device) 40 | self.streamqueue = None 41 | 42 | async def init(self): 43 | self.streamqueue = asyncio.Queue() 44 | for _ in range(self.streamqueue_size): 45 | stream = torch.cuda.Stream(device=self.device) 46 | self.streamqueue.put_nowait(stream) 47 | 48 | if sys.version_info >= (3, 7): 49 | 50 | async def apredict(self, img): 51 | if isinstance(img, str): 52 | img = mmcv.imread(img) 53 | async with concurrent(self.streamqueue): 54 | result = await async_inference_detector(self.model, img) 55 | return result 56 | 57 | 58 | class AsyncInferenceTestCase(AsyncTestCase): 59 | 60 | if sys.version_info >= (3, 7): 61 | 62 | async def test_simple_inference(self): 63 | if not torch.cuda.is_available(): 64 | import pytest 65 | 66 | pytest.skip("test requires GPU and torch+cuda") 67 | 68 | root_dir = os.path.dirname(os.path.dirname(__name__)) 69 | model_config = os.path.join(root_dir, "configs/mask_rcnn_r50_fpn_1x.py") 70 | detector = MaskRCNNDetector(model_config) 71 | await detector.init() 72 | img_path = os.path.join(root_dir, "demo/demo.jpg") 73 | bboxes, _ = await detector.apredict(img_path) 74 | self.assertTrue(bboxes) 75 | -------------------------------------------------------------------------------- /tests/test_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_nms.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.nms.nms_wrapper import nms 9 | 10 | 11 | def test_nms_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu 15 | """ 16 | iou_thr = 0.7 17 | base_dets = np.array( 18 | [ 19 | [49.1, 32.4, 51.0, 35.9, 0.9], 20 | [49.3, 32.9, 51.0, 35.3, 0.9], 21 | [35.3, 11.5, 39.9, 14.5, 0.4], 22 | [35.2, 11.7, 39.7, 15.7, 0.3], 23 | ] 24 | ) 25 | 26 | # CPU can handle float32 and float64 27 | dets = base_dets.astype(np.float32) 28 | supressed, inds = nms(dets, iou_thr) 29 | assert dets.dtype == supressed.dtype 30 | assert len(inds) == len(supressed) == 3 31 | 32 | dets = torch.FloatTensor(base_dets) 33 | surpressed, inds = nms(dets, iou_thr) 34 | assert dets.dtype == surpressed.dtype 35 | assert len(inds) == len(surpressed) == 3 36 | 37 | dets = base_dets.astype(np.float64) 38 | supressed, inds = nms(dets, iou_thr) 39 | assert dets.dtype == supressed.dtype 40 | assert len(inds) == len(supressed) == 3 41 | 42 | dets = torch.DoubleTensor(base_dets) 43 | surpressed, inds = nms(dets, iou_thr) 44 | assert dets.dtype == surpressed.dtype 45 | assert len(inds) == len(surpressed) == 3 46 | 47 | 48 | def test_nms_device_and_dtypes_gpu(): 49 | """ 50 | CommandLine: 51 | xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_gpu 52 | """ 53 | if not torch.cuda.is_available(): 54 | import pytest 55 | 56 | pytest.skip("test requires GPU and torch+cuda") 57 | 58 | iou_thr = 0.7 59 | base_dets = np.array( 60 | [ 61 | [49.1, 32.4, 51.0, 35.9, 0.9], 62 | [49.3, 32.9, 51.0, 35.3, 0.9], 63 | [35.3, 11.5, 39.9, 14.5, 0.4], 64 | [35.2, 11.7, 39.7, 15.7, 0.3], 65 | ] 66 | ) 67 | 68 | for device_id in range(torch.cuda.device_count()): 69 | print("Run NMS on device_id = {!r}".format(device_id)) 70 | # GPU can handle float32 but not float64 71 | dets = base_dets.astype(np.float32) 72 | supressed, inds = nms(dets, iou_thr, device_id) 73 | assert dets.dtype == supressed.dtype 74 | assert len(inds) == len(supressed) == 3 75 | 76 | dets = torch.FloatTensor(base_dets).to(device_id) 77 | surpressed, inds = nms(dets, iou_thr) 78 | assert dets.dtype == surpressed.dtype 79 | assert len(inds) == len(surpressed) == 3 80 | -------------------------------------------------------------------------------- /tests/test_soft_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_soft_nms.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.nms.nms_wrapper import soft_nms 9 | 10 | 11 | def test_soft_nms_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_soft_nms.py test_soft_nms_device_and_dtypes_cpu 15 | """ 16 | iou_thr = 0.7 17 | base_dets = np.array( 18 | [ 19 | [49.1, 32.4, 51.0, 35.9, 0.9], 20 | [49.3, 32.9, 51.0, 35.3, 0.9], 21 | [35.3, 11.5, 39.9, 14.5, 0.4], 22 | [35.2, 11.7, 39.7, 15.7, 0.3], 23 | ] 24 | ) 25 | 26 | # CPU can handle float32 and float64 27 | dets = base_dets.astype(np.float32) 28 | new_dets, inds = soft_nms(dets, iou_thr) 29 | assert dets.dtype == new_dets.dtype 30 | assert len(inds) == len(new_dets) == 4 31 | 32 | dets = torch.FloatTensor(base_dets) 33 | new_dets, inds = soft_nms(dets, iou_thr) 34 | assert dets.dtype == new_dets.dtype 35 | assert len(inds) == len(new_dets) == 4 36 | 37 | dets = base_dets.astype(np.float64) 38 | new_dets, inds = soft_nms(dets, iou_thr) 39 | assert dets.dtype == new_dets.dtype 40 | assert len(inds) == len(new_dets) == 4 41 | 42 | dets = torch.DoubleTensor(base_dets) 43 | new_dets, inds = soft_nms(dets, iou_thr) 44 | assert dets.dtype == new_dets.dtype 45 | assert len(inds) == len(new_dets) == 4 46 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy.testing as npt 2 | 3 | from mmdet.utils.flops_counter import params_to_string 4 | 5 | 6 | def test_params_to_string(): 7 | npt.assert_equal(params_to_string(1e9), "1000.0 M") 8 | npt.assert_equal(params_to_string(2e5), "200.0 k") 9 | npt.assert_equal(params_to_string(3e-9), "3e-09") 10 | -------------------------------------------------------------------------------- /tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | import mmcv 6 | from mmcv import Config 7 | 8 | from mmdet.datasets.builder import build_dataset 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description="Browse a dataset") 13 | parser.add_argument("config", help="train config file path") 14 | parser.add_argument( 15 | "--skip-type", 16 | type=str, 17 | nargs="+", 18 | default=["DefaultFormatBundle", "Normalize", "Collect"], 19 | help="skip some useless pipeline", 20 | ) 21 | parser.add_argument( 22 | "--output-dir", 23 | default=None, 24 | type=str, 25 | help="If there is no display interface, you can save it", 26 | ) 27 | parser.add_argument("--not-show", default=False, action="store_true") 28 | parser.add_argument( 29 | "--show-interval", type=int, default=999, help="the interval of show (ms)" 30 | ) 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def retrieve_data_cfg(config_path, skip_type): 36 | cfg = Config.fromfile(config_path) 37 | train_data_cfg = cfg.data.train 38 | train_data_cfg["pipeline"] = [ 39 | x for x in train_data_cfg.pipeline if x["type"] not in skip_type 40 | ] 41 | 42 | return cfg 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | cfg = retrieve_data_cfg(args.config, args.skip_type) 48 | 49 | dataset = build_dataset(cfg.data.train) 50 | 51 | progress_bar = mmcv.ProgressBar(len(dataset)) 52 | for item in dataset: 53 | filename = ( 54 | os.path.join(args.output_dir, Path(item["filename"]).name) 55 | if args.output_dir is not None 56 | else None 57 | ) 58 | mmcv.imshow_det_bboxes( 59 | item["img"], 60 | item["gt_bboxes"], 61 | item["gt_labels"] - 1, 62 | class_names=dataset.CLASSES, 63 | show=not args.not_show, 64 | out_file=filename, 65 | wait_time=args.show_interval, 66 | ) 67 | progress_bar.update() 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | CHECKPOINT=$2 7 | GPUS=$3 8 | PORT=${PORT:-29500} 9 | 10 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 11 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 12 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | PORT=${PORT:-29500} 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 11 | -------------------------------------------------------------------------------- /tools/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.runner import save_checkpoint 6 | 7 | from mmdet.apis import init_detector 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """ During inference, the functionary of batch norm layers is turned off 12 | but only the mean and var alone channels are used, which exposes the 13 | chance to fuse it with the preceding conv layers to save computations and 14 | simplify network structures. 15 | """ 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like(bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * factor.reshape([conv.out_channels, 1, 1, 1])) 21 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 22 | return conv 23 | 24 | 25 | def fuse_module(m): 26 | last_conv = None 27 | last_conv_name = None 28 | 29 | for name, child in m.named_children(): 30 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 31 | if last_conv is None: # only fuse BN that is after Conv 32 | continue 33 | fused_conv = fuse_conv_bn(last_conv, child) 34 | m._modules[last_conv_name] = fused_conv 35 | # To reduce changes, set BN as Identity instead of deleting it. 36 | m._modules[name] = nn.Identity() 37 | last_conv = None 38 | elif isinstance(child, nn.Conv2d): 39 | last_conv = child 40 | last_conv_name = name 41 | else: 42 | fuse_module(child) 43 | return m 44 | 45 | 46 | def parse_args(): 47 | parser = argparse.ArgumentParser(description="fuse Conv and BN layers in a model") 48 | parser.add_argument("config", help="config file path") 49 | parser.add_argument("checkpoint", help="checkpoint file path") 50 | parser.add_argument("out", help="output path of the converted model") 51 | args = parser.parse_args() 52 | return args 53 | 54 | 55 | def main(): 56 | args = parse_args() 57 | # build the model from a config file and a checkpoint file 58 | model = init_detector(args.config, args.checkpoint) 59 | # fuse conv and bn layers of the model 60 | fused_model = fuse_module(model) 61 | save_checkpoint(fused_model, args.out) 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from mmcv import Config 4 | 5 | from mmdet.models import build_detector 6 | from mmdet.utils import get_model_complexity_info 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description="Train a detector") 11 | parser.add_argument("config", help="train config file path") 12 | parser.add_argument( 13 | "--shape", type=int, nargs="+", default=[1280, 800], help="input image size" 14 | ) 15 | args = parser.parse_args() 16 | return args 17 | 18 | 19 | def main(): 20 | 21 | args = parse_args() 22 | 23 | if len(args.shape) == 1: 24 | input_shape = (3, args.shape[0], args.shape[0]) 25 | elif len(args.shape) == 2: 26 | input_shape = (3,) + tuple(args.shape) 27 | else: 28 | raise ValueError("invalid input shape") 29 | 30 | cfg = Config.fromfile(args.config) 31 | model = build_detector( 32 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg 33 | ).cuda() 34 | model.eval() 35 | 36 | if hasattr(model, "forward_dummy"): 37 | model.forward = model.forward_dummy 38 | else: 39 | raise NotImplementedError( 40 | "FLOPs counter is currently not currently supported with {}".format( 41 | model.__class__.__name__ 42 | ) 43 | ) 44 | 45 | flops, params = get_model_complexity_info(model, input_shape) 46 | split_line = "=" * 30 47 | print( 48 | "{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}".format( 49 | split_line, input_shape, flops, params 50 | ) 51 | ) 52 | print( 53 | "!!!Please be cautious if you use the results in papers. " 54 | "You may need to check if all ops are supported and verify that the " 55 | "flops computation is correct." 56 | ) 57 | 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser(description="Process a checkpoint to be published") 9 | parser.add_argument("in_file", help="input checkpoint filename") 10 | parser.add_argument("out_file", help="output checkpoint filename") 11 | args = parser.parse_args() 12 | return args 13 | 14 | 15 | def process_checkpoint(in_file, out_file): 16 | checkpoint = torch.load(in_file, map_location="cpu") 17 | # remove optimizer for smaller file size 18 | if "optimizer" in checkpoint: 19 | del checkpoint["optimizer"] 20 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 21 | # add the code here. 22 | torch.save(checkpoint, out_file) 23 | sha = subprocess.check_output(["sha256sum", out_file]).decode() 24 | final_file = out_file.rstrip(".pth") + "-{}.pth".format(sha[:8]) 25 | subprocess.Popen(["mv", out_file, final_file]) 26 | 27 | 28 | def main(): 29 | args = parse_args() 30 | process_checkpoint(args.in_file, args.out_file) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-32} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} --eval bbox --tmpdir=./tmp1 24 | -------------------------------------------------------------------------------- /tools/slurm_test_openimage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-16} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-3} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test_openimages.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} --eval bbox --label_dir=./data/OpenImages/challenge2019 --det_file=./work_dir/r50-FPN-1x_classsampling_TSD/dump.txt --tmpdir=./tmp1 24 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/upgrade_model_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert(in_file, out_file): 9 | """Convert keys in checkpoints. 10 | 11 | There can be some breaking changes during the development of mmdetection, 12 | and this tool is used for upgrading checkpoints trained with old versions 13 | to the latest one. 14 | """ 15 | checkpoint = torch.load(in_file) 16 | in_state_dict = checkpoint.pop("state_dict") 17 | out_state_dict = OrderedDict() 18 | for key, val in in_state_dict.items(): 19 | # Use ConvModule instead of nn.Conv2d in RetinaNet 20 | # cls_convs.0.weight -> cls_convs.0.conv.weight 21 | m = re.search(r"(cls_convs|reg_convs).\d.(weight|bias)", key) 22 | if m is not None: 23 | param = m.groups()[1] 24 | new_key = key.replace(param, "conv.{}".format(param)) 25 | out_state_dict[new_key] = val 26 | continue 27 | 28 | out_state_dict[key] = val 29 | checkpoint["state_dict"] = out_state_dict 30 | torch.save(checkpoint, out_file) 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description="Upgrade model version") 35 | parser.add_argument("in_file", help="input checkpoint file") 36 | parser.add_argument("out_file", help="output checkpoint file") 37 | args = parser.parse_args() 38 | convert(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | --------------------------------------------------------------------------------