├── .dev_scripts ├── benchmark_filter.py ├── gather_models.py └── linter.sh ├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── error-report.md │ ├── feature_request.md │ ├── general_questions.md │ └── reimplementation_questions.md └── workflows │ └── build.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .style.yapf ├── LICENSE ├── README.md ├── ThirdPartyNotices.txt ├── configs ├── _base_ │ ├── datasets │ │ ├── cityscapes_detection.py │ │ ├── cityscapes_instance.py │ │ ├── coco_detection.py │ │ ├── coco_instance.py │ │ ├── coco_instance_semantic.py │ │ ├── deepfashion.py │ │ ├── lvis_instance.py │ │ ├── voc0712.py │ │ └── wider_face.py │ ├── default_runtime.py │ ├── models │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ ├── cascade_rcnn_r50_fpn.py │ │ ├── fast_rcnn_r50_fpn.py │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ ├── faster_rcnn_r50_fpn.py │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ ├── mask_rcnn_r50_fpn.py │ │ ├── retinanet_r50_fpn.py │ │ ├── rpn_r50_caffe_c4.py │ │ ├── rpn_r50_fpn.py │ │ └── ssd300.py │ └── schedules │ │ ├── schedule_1x.py │ │ ├── schedule_20e.py │ │ └── schedule_2x.py ├── dense_reppoints_v1 │ ├── dense_reppoints_v1_r50_fpn_1x_coco.py │ ├── dense_reppoints_v1_r50_fpn_giou_1x_coco.py │ └── dense_reppoints_v1_r50_fpn_giou_mstrain_3x_coco.py ├── dense_reppoints_v2 │ ├── dense_reppoints_v2_r50_fpn_1x_coco.py │ ├── dense_reppoints_v2_r50_fpn_giou_1x_coco.py │ └── dense_reppoints_v2_r50_fpn_giou_mstrain_3x_coco.py ├── reppoints_v1 │ ├── reppoints_minmax_r50_fpn_1x_coco.py │ ├── reppoints_minmax_r50_fpn_giou_1x_coco.py │ ├── reppoints_v1_mobilenet_fpn_giou_128c_mstrain_2x_coco.py │ ├── reppoints_v1_mobilenet_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v1_r101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py │ ├── reppoints_v1_r101_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v1_r50_fpn_1x_coco.py │ ├── reppoints_v1_r50_fpn_giou_1x_coco.py │ ├── reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v1_x101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py │ └── reppoints_v1_x101_fpn_giou_mstrain_2x_coco.py └── reppoints_v2 │ ├── reppoints_v2_mobilenet_fpn_giou_128c_mstrain_2x_coco.py │ ├── reppoints_v2_mobilenet_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v2_r101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py │ ├── reppoints_v2_r101_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v2_r50_fpn_1x_coco.py │ ├── reppoints_v2_r50_fpn_giou_1x_coco.py │ ├── reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py │ ├── reppoints_v2_x101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py │ └── reppoints_v2_x101_fpn_giou_mstrain_2x_coco.py ├── demo └── reppointsv2.png ├── docker └── Dockerfile ├── docs ├── Makefile ├── api.rst ├── changelog.md ├── compatibility.md ├── conf.py ├── config.md ├── getting_started.md ├── index.rst ├── install.md ├── make.bat ├── model_zoo.md ├── projects.md ├── robustness_benchmarking.md └── tutorials │ ├── data_pipeline.md │ ├── finetune.md │ ├── new_dataset.md │ └── new_modules.md ├── mmdet ├── VERSION ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── builder.py │ │ ├── point_generator.py │ │ └── utils.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── center_region_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ ├── point_assigner.py │ │ │ ├── point_assigner_v2.py │ │ │ ├── point_ct_assigner.py │ │ │ └── point_hm_assigner.py │ │ ├── builder.py │ │ ├── coder │ │ │ ├── __init__.py │ │ │ ├── base_bbox_coder.py │ │ │ ├── delta_xywh_bbox_coder.py │ │ │ ├── legacy_delta_xywh_bbox_coder.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ └── tblr_bbox_coder.py │ │ ├── demodata.py │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ └── iou2d_calculator.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── sampling_result.py │ │ │ └── score_hlr_sampler.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ ├── structures.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── cityscapes.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── deepfashion.py │ ├── lvis.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── auto_augment.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── formating_reppointsv2.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── loading_reppointsv2.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── detectors_resnet.py │ │ ├── detectors_resnext.py │ │ ├── hourglass.py │ │ ├── hrnet.py │ │ ├── mobilenet.py │ │ ├── regnet.py │ │ ├── res2net.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── builder.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── anchor_free_head.py │ │ ├── anchor_head.py │ │ ├── atss_head.py │ │ ├── base_dense_head.py │ │ ├── dense_reppoints_head.py │ │ ├── dense_reppoints_v2_head.py │ │ ├── fcos_head.py │ │ ├── fovea_head.py │ │ ├── free_anchor_retina_head.py │ │ ├── fsaf_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── gfl_head.py │ │ ├── guided_anchor_head.py │ │ ├── nasfcos_head.py │ │ ├── pisa_retinanet_head.py │ │ ├── pisa_ssd_head.py │ │ ├── reppoints_head.py │ │ ├── reppoints_v2_head.py │ │ ├── retina_head.py │ │ ├── retina_sepbn_head.py │ │ ├── rpn_head.py │ │ ├── rpn_test_mixin.py │ │ └── ssd_head.py │ ├── detectors │ │ ├── __init__.py │ │ ├── atss.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── dense_reppoints_detector.py │ │ ├── dense_reppoints_v2_detector.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── fovea.py │ │ ├── fsaf.py │ │ ├── gfl.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── nasfcos.py │ │ ├── point_rend.py │ │ ├── reppoints_detector.py │ │ ├── reppoints_v2_detector.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── ae_loss.py │ │ ├── balanced_l1_loss.py │ │ ├── chamfer_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── gaussian_focal_loss.py │ │ ├── gfocal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── pisa_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ ├── fpn_carafe.py │ │ ├── hrfpn.py │ │ ├── nas_fpn.py │ │ ├── nasfcos_fpn.py │ │ ├── pafpn.py │ │ └── rfp.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── base_roi_head.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ ├── convfc_bbox_head.py │ │ │ └── double_bbox_head.py │ │ ├── cascade_roi_head.py │ │ ├── double_roi_head.py │ │ ├── dynamic_roi_head.py │ │ ├── grid_roi_head.py │ │ ├── htc_roi_head.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── coarse_mask_head.py │ │ │ ├── fcn_mask_head.py │ │ │ ├── fused_semantic_head.py │ │ │ ├── grid_head.py │ │ │ ├── htc_mask_head.py │ │ │ ├── mask_point_head.py │ │ │ └── maskiou_head.py │ │ ├── mask_scoring_roi_head.py │ │ ├── pisa_roi_head.py │ │ ├── point_rend_roi_head.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ ├── base_roi_extractor.py │ │ │ ├── generic_roi_extractor.py │ │ │ └── single_level_roi_extractor.py │ │ ├── shared_heads │ │ │ ├── __init__.py │ │ │ └── res_layer.py │ │ ├── standard_roi_head.py │ │ └── test_mixins.py │ └── utils │ │ ├── __init__.py │ │ └── res_layer.py ├── ops │ ├── __init__.py │ ├── carafe │ │ ├── __init__.py │ │ ├── carafe.py │ │ ├── grad_check.py │ │ ├── setup.py │ │ └── src │ │ │ ├── carafe_ext.cpp │ │ │ ├── carafe_naive_ext.cpp │ │ │ └── cuda │ │ │ ├── carafe_cuda.cpp │ │ │ ├── carafe_cuda_kernel.cu │ │ │ ├── carafe_naive_cuda.cpp │ │ │ └── carafe_naive_cuda_kernel.cu │ ├── chamfer_2d │ │ ├── __init__.py │ │ ├── dist_chamfer_2d.py │ │ └── src │ │ │ ├── chamfer_2d.cu │ │ │ └── chamfer_cuda.cpp │ ├── context_block.py │ ├── conv_ws.py │ ├── corner_pool │ │ ├── __init__.py │ │ ├── corner_pool.py │ │ └── src │ │ │ └── corner_pool.cpp │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_pool.py │ │ └── src │ │ │ ├── cuda │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ │ │ ├── deform_conv_ext.cpp │ │ │ └── deform_pool_ext.cpp │ ├── generalized_attention.py │ ├── masked_conv │ │ ├── __init__.py │ │ ├── masked_conv.py │ │ └── src │ │ │ ├── cuda │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ │ │ └── masked_conv2d_ext.cpp │ ├── merge_cells.py │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── cpu │ │ │ └── nms_cpu.cpp │ │ │ ├── cuda │ │ │ ├── nms_cuda.cpp │ │ │ └── nms_kernel.cu │ │ │ └── nms_ext.cpp │ ├── non_local.py │ ├── plugin.py │ ├── point_sample.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── cpu │ │ │ └── roi_align_v2.cpp │ │ │ ├── cuda │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel_v2.cu │ │ │ └── roi_align_ext.cpp │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_pool.py │ │ └── src │ │ │ ├── cuda │ │ │ └── roi_pool_kernel.cu │ │ │ └── roi_pool_ext.cpp │ ├── saconv.py │ ├── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ │ ├── cuda │ │ │ └── sigmoid_focal_loss_cuda.cu │ │ │ └── sigmoid_focal_loss_ext.cpp │ ├── utils │ │ ├── __init__.py │ │ └── src │ │ │ └── compiling_info.cpp │ └── wrappers.py └── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── logger.py │ ├── profiling.py │ └── util_mixins.py ├── pytest.ini ├── requirements.txt ├── requirements ├── build.txt ├── docs.txt ├── optional.txt ├── readthedocs.txt ├── runtime.txt └── tests.txt ├── setup.py ├── tests ├── async_benchmark.py ├── test_anchor.py ├── test_assigner.py ├── test_async.py ├── test_backbone.py ├── test_config.py ├── test_dataset.py ├── test_forward.py ├── test_fp16.py ├── test_heads.py ├── test_losses.py ├── test_masks.py ├── test_necks.py ├── test_ops │ ├── test_corner_pool.py │ ├── test_merge_cells.py │ ├── test_nms.py │ ├── test_soft_nms.py │ └── test_wrappers.py ├── test_pipelines │ ├── test_formatting.py │ ├── test_loading.py │ ├── test_models_aug_test.py │ └── test_transform.py ├── test_pisa_heads.py ├── test_roi_extractor.py └── test_sampler.py └── tools ├── analyze_logs.py ├── benchmark.py ├── browse_dataset.py ├── coco_error_analysis.py ├── convert_datasets ├── cityscapes.py └── pascal_voc.py ├── detectron2pytorch.py ├── dist_test.sh ├── dist_train.sh ├── fuse_conv_bn.py ├── get_flops.py ├── print_config.py ├── publish_model.py ├── pytorch2onnx.py ├── regnet2mmdet.py ├── robustness_eval.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── test_robustness.py ├── train.py └── upgrade_model_version.py /.dev_scripts/linter.sh: -------------------------------------------------------------------------------- 1 | yapf -r -i --style .style.yapf mmdet/ configs/ tests/ tools/ 2 | isort -rc mmdet/ configs/ tests/ tools/ 3 | flake8 . 4 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to mmdetection 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmdetection 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmdetection, 18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | 25 | We use the following tools for linting and formatting: 26 | - [flake8](http://flake8.pycqa.org/en/latest/): linter 27 | - [yapf](https://github.com/google/yapf): formatter 28 | - [isort](https://github.com/timothycrosley/isort): sort imports 29 | 30 | Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg). 31 | 32 | We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, 33 | fixes `end-of-files`, sorts `requirments.txt` automatically on every commit. 34 | The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml). 35 | 36 | After you clone the repository, you will need to install initialize pre-commit hook. 37 | 38 | ``` 39 | pip install -U pre-commit 40 | ``` 41 | 42 | From the repository folder 43 | ``` 44 | pre-commit install 45 | ``` 46 | 47 | After this on every commit check code linters and formatter will be enforced. 48 | 49 | 50 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 51 | 52 | ### C++ and CUDA 53 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). 54 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/error-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Error report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for your error report and we appreciate it a lot. 11 | 12 | **Checklist** 13 | 1. I have searched related issues but cannot get the expected help. 14 | 2. The bug has not been fixed in the latest version. 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | 19 | **Reproduction** 20 | 1. What command or script did you run? 21 | ``` 22 | A placeholder for the command. 23 | ``` 24 | 2. Did you make any modifications on the code or config? Did you understand what you have modified? 25 | 3. What dataset did you use? 26 | 27 | **Environment** 28 | 29 | 1. Please run `python mmdet/utils/collect_env.py` to collect necessary environment infomation and paste it here. 30 | 2. You may add addition that may be helpful for locating the problem, such as 31 | - How you installed PyTorch [e.g., pip, conda, source] 32 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 33 | 34 | **Error traceback** 35 | If applicable, paste the error trackback here. 36 | ``` 37 | A placeholder for trackback. 38 | ``` 39 | 40 | **Bug fix** 41 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature** 11 | 12 | **Motivation** 13 | A clear and concise description of the motivation of the feature. 14 | Ex1. It is inconvenient when [....]. 15 | Ex2. There is a recent paper [....], which is very helpful for [....]. 16 | 17 | **Related resources** 18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General questions 3 | about: Ask general questions to get help 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/reimplementation_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Reimplementation Questions 3 | about: Ask about questions during model reimplementation 4 | title: '' 5 | labels: 'reimplementation' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Notice** 11 | 12 | There are several common situations in the reimplementation issues as below 13 | 1. Reimplement a model in the model zoo using the provided configs 14 | 2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets) 15 | 3. Reimplement a custom model but all the components are implemented in MMDetection 16 | 4. Reimplement a custom model with new modules implemented by yourself 17 | 18 | There are several things to do for different cases as below. 19 | - For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue. 20 | - For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write. 21 | - One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemted code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you. 22 | 23 | **Checklist** 24 | 1. I have searched related issues but cannot get the expected help. 25 | 2. The issue has not been fixed in the latest version. 26 | 27 | **Describe the issue** 28 | 29 | A clear and concise description of what the problem you meet and what have you done. 30 | 31 | **Reproduction** 32 | 1. What command or script did you run? 33 | ``` 34 | A placeholder for the command. 35 | ``` 36 | 2. What config dir you run? 37 | ``` 38 | A placeholder for the config. 39 | ``` 40 | 3. Did you make any modifications on the code or config? Did you understand what you have modified? 41 | 4. What dataset did you use? 42 | 43 | **Environment** 44 | 45 | 1. Please run `python mmdet/utils/collect_env.py` to collect necessary environment infomation and paste it here. 46 | 2. You may add addition that may be helpful for locating the problem, such as 47 | - How you installed PyTorch [e.g., pip, conda, source] 48 | - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 49 | 50 | **Results** 51 | 52 | If applicable, paste the related results here, e.g., what you expect and what you get. 53 | ``` 54 | A placeholder for results comparison 55 | ``` 56 | 57 | **Issue fix** 58 | 59 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | mmdet/version.py 107 | data/ 108 | .vscode 109 | .idea 110 | .DS_Store 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | work_dirs/ 117 | 118 | # Pytorch 119 | *.pth 120 | *.py~ 121 | *.sh~ 122 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,pycocotools,pytest,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitlab.com/pycqa/flake8.git 3 | rev: 3.8.0 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/asottile/seed-isort-config 7 | rev: v2.1.0 8 | hooks: 9 | - id: seed-isort-config 10 | - repo: https://github.com/timothycrosley/isort 11 | rev: 4.3.21 12 | hooks: 13 | - id: isort 14 | - repo: https://github.com/pre-commit/mirrors-yapf 15 | rev: v0.29.0 16 | hooks: 17 | - id: yapf 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v2.5.0 20 | hooks: 21 | - id: trailing-whitespace 22 | - id: check-yaml 23 | - id: end-of-file-fixer 24 | - id: requirements-txt-fixer 25 | - id: double-quote-string-fixer 26 | - id: check-merge-conflict 27 | - id: fix-encoding-pragma 28 | args: ["--remove"] 29 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: 3.7 5 | install: 6 | - requirements: requirements/docs.txt 7 | - requirements: requirements/readthedocs.txt 8 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='bbox') 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(metric=['bbox', 'segm']) 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(interval=1, metric='bbox') 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict( 8 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='SegRescale', scale_factor=1 / 8), 14 | dict(type='DefaultFormatBundle'), 15 | dict( 16 | type='Collect', 17 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip', flip_ratio=0.5), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_train2017.json', 40 | img_prefix=data_root + 'train2017/', 41 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'annotations/instances_val2017.json', 51 | img_prefix=data_root + 'val2017/', 52 | pipeline=test_pipeline)) 53 | evaluation = dict(metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | imgs_per_gpu=2, 33 | workers_per_gpu=1, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 37 | img_prefix=data_root + 'Img/', 38 | pipeline=train_pipeline, 39 | data_root=data_root), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 43 | img_prefix=data_root + 'Img/', 44 | pipeline=test_pipeline, 45 | data_root=data_root), 46 | test=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 49 | 'annotations/DeepFashion_segmentation_gallery.json', 50 | img_prefix=data_root + 'Img/', 51 | pipeline=test_pipeline, 52 | data_root=data_root)) 53 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /configs/_base_/datasets/lvis_instance.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_instance.py' 2 | dataset_type = 'LVISDataset' 3 | data_root = 'data/lvis/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | type='ClassBalancedDataset', 9 | oversample_thr=1e-3, 10 | dataset=dict( 11 | type=dataset_type, 12 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 13 | img_prefix=data_root + 'train2017/')), 14 | val=dict( 15 | type=dataset_type, 16 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 17 | img_prefix=data_root + 'val2017/'), 18 | test=dict( 19 | type=dataset_type, 20 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 21 | img_prefix=data_root + 'val2017/')) 22 | evaluation = dict(metric=['bbox', 'segm']) 23 | -------------------------------------------------------------------------------- /configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=3, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=[ 40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 42 | ], 43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 44 | pipeline=train_pipeline)), 45 | val=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 48 | img_prefix=data_root + 'VOC2007/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 53 | img_prefix=data_root + 'VOC2007/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='mAP') 56 | -------------------------------------------------------------------------------- /configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'WIDERFaceDataset' 3 | data_root = 'data/WIDERFace/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile', to_float32=True), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='PhotoMetricDistortion', 10 | brightness_delta=32, 11 | contrast_range=(0.5, 1.5), 12 | saturation_range=(0.5, 1.5), 13 | hue_delta=18), 14 | dict( 15 | type='Expand', 16 | mean=img_norm_cfg['mean'], 17 | to_rgb=img_norm_cfg['to_rgb'], 18 | ratio_range=(1, 4)), 19 | dict( 20 | type='MinIoURandomCrop', 21 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 22 | min_crop_size=0.3), 23 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='RandomFlip', flip_ratio=0.5), 26 | dict(type='DefaultFormatBundle'), 27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(300, 300), 34 | flip=False, 35 | transforms=[ 36 | dict(type='Resize', keep_ratio=False), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=60, 44 | workers_per_gpu=2, 45 | train=dict( 46 | type='RepeatDataset', 47 | times=2, 48 | dataset=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'train.txt', 51 | img_prefix=data_root + 'WIDER_train/', 52 | min_size=17, 53 | pipeline=train_pipeline)), 54 | val=dict( 55 | type=dataset_type, 56 | ann_file=data_root + 'val.txt', 57 | img_prefix=data_root + 'WIDER_val/', 58 | pipeline=test_pipeline), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'val.txt', 62 | img_prefix=data_root + 'WIDER_val/', 63 | pipeline=test_pipeline)) 64 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | dist_params = dict(backend='nccl') 11 | log_level = 'INFO' 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | -------------------------------------------------------------------------------- /configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)))) 40 | # model training and testing settings 41 | train_cfg = dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)) 58 | test_cfg = dict( 59 | rcnn=dict( 60 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))) 44 | # training and testing settings 45 | train_cfg = dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False) 55 | test_cfg = dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_thr=0.5), 60 | max_per_img=100) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))) 33 | # model training and testing settings 34 | train_cfg = dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)) 51 | test_cfg = dict( 52 | rpn=dict( 53 | nms_across_levels=False, 54 | nms_pre=12000, 55 | nms_post=2000, 56 | max_num=2000, 57 | nms_thr=0.7, 58 | min_bbox_size=0)) 59 | -------------------------------------------------------------------------------- /configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))) 35 | # model training and testing settings 36 | train_cfg = dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)) 53 | test_cfg = dict( 54 | rpn=dict( 55 | nms_across_levels=False, 56 | nms_pre=2000, 57 | nms_post=1000, 58 | max_num=1000, 59 | nms_thr=0.7, 60 | min_bbox_size=0)) 61 | -------------------------------------------------------------------------------- /configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2]))) 31 | cudnn_benchmark = True 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.5, 37 | min_pos_iou=0., 38 | ignore_iof_thr=-1, 39 | gt_max_assign_all=False), 40 | smoothl1_beta=1., 41 | allowed_border=-1, 42 | pos_weight=-1, 43 | neg_pos_ratio=3, 44 | debug=False) 45 | test_cfg = dict( 46 | nms=dict(type='nms', iou_thr=0.45), 47 | min_bbox_size=0, 48 | score_thr=0.02, 49 | max_per_img=200) 50 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | total_epochs = 12 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | total_epochs = 20 12 | -------------------------------------------------------------------------------- /configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | total_epochs = 24 12 | -------------------------------------------------------------------------------- /configs/dense_reppoints_v1/dense_reppoints_v1_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_instance.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) 6 | model = dict( 7 | type='DenseRepPointsDetector', 8 | pretrained='torchvision://resnet50', 9 | backbone=dict( 10 | type='ResNet', 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | norm_eval=True, 17 | style='pytorch'), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | start_level=1, 23 | add_extra_convs='on_input', 24 | num_outs=5, 25 | norm_cfg=norm_cfg), 26 | bbox_head=dict( 27 | type='DenseRepPointsHead', 28 | num_classes=80, 29 | in_channels=256, 30 | feat_channels=256, 31 | point_feat_channels=256, 32 | stacked_convs=3, 33 | stacked_mask_convs=7, 34 | fuse_mask_feat=True, 35 | num_group=9, 36 | num_score_group=121, 37 | num_points=729, 38 | gradient_mul=0.1, 39 | point_strides=[8, 16, 32, 64, 128], 40 | point_base_scale=4, 41 | norm_cfg=norm_cfg, 42 | loss_cls=dict( 43 | type='FocalLoss', 44 | use_sigmoid=True, 45 | gamma=2.0, 46 | alpha=0.25, 47 | loss_weight=1.0), 48 | loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), 49 | loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), 50 | loss_pts_init=dict(type='ChamferLoss2D', use_cuda=True, loss_weight=0.5, eps=1e-12), 51 | loss_pts_refine=dict(type='ChamferLoss2D', use_cuda=True, loss_weight=1.0, eps=1e-12), 52 | loss_mask_score_init=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 53 | transform_method='minmax')) 54 | # training and testing settings 55 | train_cfg = dict( 56 | init=dict( 57 | assigner=dict(type='PointAssignerV2', scale=4, pos_num=1), 58 | allowed_border=-1, 59 | pos_weight=-1, 60 | mask_size=56, 61 | dist_sample_thr=2, 62 | debug=False), 63 | refine=dict( 64 | assigner=dict(type='ATSSAssigner', topk=9), 65 | allowed_border=-1, 66 | pos_weight=-1, 67 | mask_size=56, 68 | dist_sample_thr=2, 69 | debug=False)) 70 | test_cfg = dict( 71 | nms_pre=1000, 72 | min_bbox_size=0, 73 | score_thr=0.05, 74 | nms=dict(type='nms', iou_thr=0.6), 75 | max_per_img=100) 76 | optimizer = dict(lr=0.01) 77 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2), _delete_=True) -------------------------------------------------------------------------------- /configs/dense_reppoints_v1/dense_reppoints_v1_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './dense_reppoints_v1_r50_fpn_1x_coco.py' 2 | model = dict( 3 | bbox_head=dict( 4 | loss_bbox_init=dict(_delete_=True, type='GIoULoss', loss_weight=1.0), 5 | loss_bbox_refine=dict(_delete_=True, type='GIoULoss', loss_weight=2.0)) 6 | ) 7 | -------------------------------------------------------------------------------- /configs/dense_reppoints_v1/dense_reppoints_v1_r50_fpn_giou_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './dense_reppoints_v1_r50_fpn_giou_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[24, 33]) 4 | total_epochs = 36 5 | # multi-scale training 6 | img_norm_cfg = dict( 7 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 11 | dict( 12 | type='Resize', 13 | img_scale=[(1333, 480), (1333, 960)], 14 | multiscale_mode='range', 15 | keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 21 | ] 22 | data = dict(train=dict(pipeline=train_pipeline)) -------------------------------------------------------------------------------- /configs/dense_reppoints_v2/dense_reppoints_v2_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './dense_reppoints_v2_r50_fpn_1x_coco.py' 2 | model = dict( 3 | bbox_head=dict( 4 | loss_bbox_init=dict(_delete_=True, type='GIoULoss', loss_weight=1.0), 5 | loss_bbox_refine=dict(_delete_=True, type='GIoULoss', loss_weight=2.0)) 6 | ) 7 | -------------------------------------------------------------------------------- /configs/dense_reppoints_v2/dense_reppoints_v2_r50_fpn_giou_mstrain_3x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './dense_reppoints_v2_r50_fpn_giou_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[24, 33]) 4 | total_epochs = 36 5 | # multi-scale training 6 | img_norm_cfg = dict( 7 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 11 | dict( 12 | type='Resize', 13 | img_scale=[(1333, 480), (1333, 960)], 14 | multiscale_mode='range', 15 | keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='LoadDenseRPDV2Annotations'), 20 | dict(type='RPDV2FormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_sem_map', 'gt_contours']), 22 | ] 23 | data = dict(train=dict(pipeline=train_pipeline)) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_minmax_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) 6 | model = dict( 7 | type='RepPointsDetector', 8 | pretrained='torchvision://resnet50', 9 | backbone=dict( 10 | type='ResNet', 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | norm_cfg=dict(type='BN', requires_grad=True), 16 | norm_eval=True, 17 | style='pytorch'), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | start_level=1, 23 | add_extra_convs='on_input', 24 | num_outs=5, 25 | norm_cfg=norm_cfg), 26 | bbox_head=dict( 27 | type='RepPointsHead', 28 | num_classes=80, 29 | in_channels=256, 30 | feat_channels=256, 31 | point_feat_channels=256, 32 | stacked_convs=3, 33 | num_points=9, 34 | gradient_mul=0.1, 35 | point_strides=[8, 16, 32, 64, 128], 36 | point_base_scale=4, 37 | norm_cfg=norm_cfg, 38 | loss_cls=dict( 39 | type='FocalLoss', 40 | use_sigmoid=True, 41 | gamma=2.0, 42 | alpha=0.25, 43 | loss_weight=1.0), 44 | loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5), 45 | loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0), 46 | transform_method='minmax')) 47 | # training and testing settings 48 | train_cfg = dict( 49 | init=dict( 50 | assigner=dict(type='PointAssignerV2', scale=4, pos_num=1), 51 | allowed_border=-1, 52 | pos_weight=-1, 53 | debug=False), 54 | refine=dict( 55 | assigner=dict(type='ATSSAssigner', topk=9), 56 | allowed_border=-1, 57 | pos_weight=-1, 58 | debug=False)) 59 | test_cfg = dict( 60 | nms_pre=1000, 61 | min_bbox_size=0, 62 | score_thr=0.05, 63 | nms=dict(type='nms', iou_thr=0.6), 64 | max_per_img=100) 65 | optimizer = dict(lr=0.01) 66 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2), _delete_=True) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_minmax_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_minmax_r50_fpn_1x_coco.py' 2 | model = dict( 3 | bbox_head=dict( 4 | loss_bbox_init=dict(_delete_=True, type='GIoULoss', loss_weight=1.0), 5 | loss_bbox_refine=dict(_delete_=True, type='GIoULoss', loss_weight=2.0)) 6 | ) 7 | -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_mobilenet_fpn_giou_128c_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_mobilenet_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | neck=dict( 4 | in_channels=[24, 32, 96, 320], 5 | out_channels=128), 6 | bbox_head=dict( 7 | in_channels=128, 8 | feat_channels=128, 9 | point_feat_channels=128)) 10 | -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_mobilenet_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth', 4 | backbone=dict( 5 | _delete_=True, 6 | type='MobileNetV2', 7 | frozen_stages=-1, 8 | norm_cfg=dict(type='SyncBN')), 9 | neck=dict( 10 | in_channels=[24, 32, 96, 320])) 11 | -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_r101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='torchvision://resnet101', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), 12 | stage_with_dcn=(False, True, True, True), 13 | norm_eval=True, 14 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_r101_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='torchvision://resnet101', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_r50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_minmax_r50_fpn_1x_coco.py' 2 | model = dict(bbox_head=dict(transform_method='exact_minmax')) 3 | -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_1x_coco.py' 2 | model = dict( 3 | bbox_head=dict( 4 | loss_bbox_init=dict(_delete_=True, type='GIoULoss', loss_weight=1.0), 5 | loss_bbox_refine=dict(_delete_=True, type='GIoULoss', loss_weight=2.0)) 6 | ) 7 | -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 22]) 4 | total_epochs = 24 5 | # multi-scale training 6 | img_norm_cfg = dict( 7 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True), 11 | dict( 12 | type='Resize', 13 | img_scale=[(1333, 480), (1333, 960)], 14 | multiscale_mode='range', 15 | keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 21 | ] 22 | data = dict(train=dict(pipeline=train_pipeline)) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_x101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), 14 | stage_with_dcn=(False, True, True, True), 15 | norm_eval=True, 16 | with_cp=True, 17 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v1/reppoints_v1_x101_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v1_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | with_cp=True, 15 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_mobilenet_fpn_giou_128c_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_mobilenet_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | neck=dict( 4 | in_channels=[24, 32, 96, 320], 5 | out_channels=128), 6 | bbox_head=dict( 7 | in_channels=128, 8 | feat_channels=128, 9 | point_feat_channels=128)) 10 | -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_mobilenet_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='https://cloudstor.aarnet.edu.au/plus/s/xtixKaxLWmbcyf7/download#mobilenet_v2-ecbe2b5.pth', 4 | backbone=dict( 5 | _delete_=True, 6 | type='MobileNetV2', 7 | frozen_stages=-1, 8 | norm_cfg=dict(type='SyncBN')), 9 | neck=dict( 10 | in_channels=[24, 32, 96, 320])) 11 | -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_r101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='torchvision://resnet101', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), 12 | stage_with_dcn=(False, True, True, True), 13 | norm_eval=True, 14 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_r101_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='torchvision://resnet101', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_r50_fpn_giou_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_1x_coco.py' 2 | model = dict( 3 | bbox_head=dict( 4 | loss_bbox_init=dict(_delete_=True, type='GIoULoss', loss_weight=1.0), 5 | loss_bbox_refine=dict(_delete_=True, type='GIoULoss', loss_weight=2.0)) 6 | ) 7 | -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_1x_coco.py' 2 | # learning policy 3 | lr_config = dict(step=[16, 22]) 4 | total_epochs = 24 5 | # multi-scale training 6 | img_norm_cfg = dict( 7 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 8 | train_pipeline = [ 9 | dict(type='LoadImageFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True), 11 | dict( 12 | type='Resize', 13 | img_scale=[(1333, 480), (1333, 960)], 14 | multiscale_mode='range', 15 | keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='LoadRPDV2Annotations'), 20 | dict(type='RPDV2FormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_sem_map', 'gt_sem_weights']), 22 | ] 23 | data = dict(train=dict(pipeline=train_pipeline)) -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_x101_fpn_dconv_c3-c5_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False), 14 | stage_with_dcn=(False, True, True, True), 15 | norm_eval=True, 16 | with_cp=True, 17 | style='pytorch')) -------------------------------------------------------------------------------- /configs/reppoints_v2/reppoints_v2_x101_fpn_giou_mstrain_2x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = './reppoints_v2_r50_fpn_giou_mstrain_2x_coco.py' 2 | model = dict( 3 | pretrained='open-mmlab://resnext101_64x4d', 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | with_cp=True, 15 | style='pytorch')) -------------------------------------------------------------------------------- /demo/reppointsv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Scalsol/RepPointsV2/36e1e54549061a735831d776781cb64caf6de68a/demo/reppointsv2.png -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.5" 2 | ARG CUDA="10.1" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 10 | 11 | RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # Install mmdetection 16 | RUN conda clean --all 17 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 18 | WORKDIR /mmdetection 19 | ENV FORCE_CUDA="1" 20 | RUN pip install cython --no-cache-dir 21 | RUN pip install "git+https://github.com/open-mmlab/cocoapi.git#subdirectory=pycocotools" 22 | RUN pip install --no-cache-dir -e . 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ================= 3 | 4 | mmdet.apis 5 | -------------- 6 | .. automodule:: mmdet.apis 7 | :members: 8 | 9 | mmdet.core 10 | -------------- 11 | 12 | anchor 13 | ^^^^^^^^^^ 14 | .. automodule:: mmdet.core.anchor 15 | :members: 16 | 17 | bbox 18 | ^^^^^^^^^^ 19 | .. automodule:: mmdet.core.bbox 20 | :members: 21 | 22 | mask 23 | ^^^^^^^^^^ 24 | .. automodule:: mmdet.core.mask 25 | :members: 26 | 27 | evaluation 28 | ^^^^^^^^^^ 29 | .. automodule:: mmdet.core.evaluation 30 | :members: 31 | 32 | post_processing 33 | ^^^^^^^^^^^^^^^ 34 | .. automodule:: mmdet.core.post_processing 35 | :members: 36 | 37 | fp16 38 | ^^^^^^^^^^ 39 | .. automodule:: mmdet.core.fp16 40 | :members: 41 | 42 | optimizer 43 | ^^^^^^^^^^ 44 | .. automodule:: mmdet.core.optimizer 45 | :members: 46 | 47 | utils 48 | ^^^^^^^^^^ 49 | .. automodule:: mmdet.core.utils 50 | :members: 51 | 52 | mmdet.datasets 53 | -------------- 54 | 55 | datasets 56 | ^^^^^^^^^^ 57 | .. automodule:: mmdet.datasets 58 | :members: 59 | 60 | pipelines 61 | ^^^^^^^^^^ 62 | .. automodule:: mmdet.datasets.pipelines 63 | :members: 64 | 65 | mmdet.models 66 | -------------- 67 | 68 | detectors 69 | ^^^^^^^^^^ 70 | .. automodule:: mmdet.models.detectors 71 | :members: 72 | 73 | backbones 74 | ^^^^^^^^^^ 75 | .. automodule:: mmdet.models.backbones 76 | :members: 77 | 78 | necks 79 | ^^^^^^^^^^^^ 80 | .. automodule:: mmdet.models.necks 81 | :members: 82 | 83 | dense_heads 84 | ^^^^^^^^^^^^ 85 | .. automodule:: mmdet.models.dense_heads 86 | :members: 87 | 88 | roi_heads 89 | ^^^^^^^^^^ 90 | .. automodule:: mmdet.models.roi_heads 91 | :members: 92 | 93 | losses 94 | ^^^^^^^^^^ 95 | .. automodule:: mmdet.models.losses 96 | :members: 97 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to MMDetection's documentation! 2 | ======================================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | install.md 8 | getting_started.md 9 | config.md 10 | model_zoo.md 11 | tutorials/finetune.md 12 | tutorials/new_dataset.md 13 | tutorials/data_pipeline.md 14 | tutorials/new_modules.md 15 | compatibility.md 16 | changelog.md 17 | projects.md 18 | api.rst 19 | 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`search` 26 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /mmdet/VERSION: -------------------------------------------------------------------------------- 1 | 2.2.0 2 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 8 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 9 | 'multi_gpu_test', 'single_gpu_test' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator, LegacyAnchorGenerator 2 | from .builder import ANCHOR_GENERATORS, build_anchor_generator 3 | from .point_generator import PointGenerator 4 | from .utils import anchor_inside_flags, calc_region, images_to_levels 5 | 6 | __all__ = [ 7 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 8 | 'PointGenerator', 'images_to_levels', 'calc_region', 9 | 'build_anchor_generator', 'ANCHOR_GENERATORS' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | ANCHOR_GENERATORS = Registry('Anchor generator') 4 | 5 | 6 | def build_anchor_generator(cfg, default_args=None): 7 | return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args) 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .builder import ANCHOR_GENERATORS 4 | 5 | 6 | @ANCHOR_GENERATORS.register_module() 7 | class PointGenerator(object): 8 | 9 | def _meshgrid(self, x, y, row_major=True): 10 | xx = x.repeat(len(y)) 11 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 12 | if row_major: 13 | return xx, yy 14 | else: 15 | return yy, xx 16 | 17 | def grid_points(self, featmap_size, stride=16, device='cuda'): 18 | feat_h, feat_w = featmap_size 19 | shift_x = torch.arange(0., feat_w, device=device) * stride 20 | shift_y = torch.arange(0., feat_h, device=device) * stride 21 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 22 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 23 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 24 | all_points = shifts.to(device) 25 | return all_points 26 | 27 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 28 | feat_h, feat_w = featmap_size 29 | valid_h, valid_w = valid_size 30 | assert valid_h <= feat_h and valid_w <= feat_w 31 | valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) 32 | valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device) 33 | valid_x[:valid_w] = 1 34 | valid_y[:valid_h] = 1 35 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 36 | valid = valid_xx & valid_yy 37 | return valid 38 | -------------------------------------------------------------------------------- /mmdet/core/anchor/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def images_to_levels(target, num_levels): 5 | """Convert targets by image to targets by feature level. 6 | 7 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 8 | """ 9 | target = torch.stack(target, 0) 10 | level_targets = [] 11 | start = 0 12 | for n in num_levels: 13 | end = start + n 14 | # level_targets.append(target[:, start:end].squeeze(0)) 15 | level_targets.append(target[:, start:end]) 16 | start = end 17 | return level_targets 18 | 19 | 20 | def anchor_inside_flags(flat_anchors, 21 | valid_flags, 22 | img_shape, 23 | allowed_border=0): 24 | """Check whether the anchors are inside the border 25 | 26 | Args: 27 | flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4). 28 | valid_flags (torch.Tensor): An existing valid flags of anchors. 29 | img_shape (tuple(int)): Shape of current image. 30 | allowed_border (int, optional): The border to allow the valid anchor. 31 | Defaults to 0. 32 | 33 | Returns: 34 | torch.Tensor: Flags indicating whether the anchors are inside a 35 | valid range. 36 | """ 37 | img_h, img_w = img_shape[:2] 38 | if allowed_border >= 0: 39 | inside_flags = valid_flags & \ 40 | (flat_anchors[:, 0] >= -allowed_border) & \ 41 | (flat_anchors[:, 1] >= -allowed_border) & \ 42 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 43 | (flat_anchors[:, 3] < img_h + allowed_border) 44 | else: 45 | inside_flags = valid_flags 46 | return inside_flags 47 | 48 | 49 | def calc_region(bbox, ratio, featmap_size=None): 50 | """Calculate a proportional bbox region. 51 | 52 | The bbox center are fixed and the new h' and w' is h * ratio and w * ratio. 53 | 54 | Args: 55 | bbox (Tensor): Bboxes to calculate regions, shape (n, 4). 56 | ratio (float): Ratio of the output region. 57 | featmap_size (tuple): Feature map size used for clipping the boundary. 58 | 59 | Returns: 60 | tuple: x1, y1, x2, y2 61 | """ 62 | x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long() 63 | y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long() 64 | x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() 65 | y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() 66 | if featmap_size is not None: 67 | x1 = x1.clamp(min=0, max=featmap_size[1]) 68 | y1 = y1.clamp(min=0, max=featmap_size[0]) 69 | x2 = x2.clamp(min=0, max=featmap_size[1]) 70 | y2 = y2.clamp(min=0, max=featmap_size[0]) 71 | return (x1, y1, x2, y2) 72 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 2 | MaxIoUAssigner) 3 | from .builder import build_assigner, build_bbox_coder, build_sampler 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, 5 | TBLRBBoxCoder) 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 7 | from .samplers import (BaseSampler, CombinedSampler, 8 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 9 | PseudoSampler, RandomSampler, SamplingResult) 10 | from .transforms import (bbox2distance, bbox2result, bbox2roi, bbox_flip, 11 | bbox_mapping, bbox_mapping_back, distance2bbox, 12 | roi2bbox) 13 | 14 | __all__ = [ 15 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 16 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 17 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 18 | 'SamplingResult', 'build_assigner', 'build_sampler', 'bbox_flip', 19 | 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 20 | 'distance2bbox', 'bbox2distance', 'build_bbox_coder', 'BaseBBoxCoder', 21 | 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 22 | 'CenterRegionAssigner' 23 | ] 24 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .center_region_assigner import CenterRegionAssigner 6 | from .max_iou_assigner import MaxIoUAssigner 7 | from .point_assigner import PointAssigner 8 | from .point_assigner_v2 import PointAssignerV2 9 | from .point_ct_assigner import PointCTAssigner 10 | from .point_hm_assigner import PointHMAssigner 11 | 12 | __all__ = [ 13 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 14 | 'PointAssigner', 'PointAssignerV2', 'ATSSAssigner', 'CenterRegionAssigner', 'PointHMAssigner', 15 | 'PointCTAssigner' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | """Base assigner that assigns boxes to ground truth boxes""" 6 | 7 | @abstractmethod 8 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 9 | """Assign boxes to either a ground truth boxe or a negative boxes""" 10 | pass 11 | -------------------------------------------------------------------------------- /mmdet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | BBOX_ASSIGNERS = Registry('bbox_assigner') 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_assigner(cfg, **default_args): 9 | """Builder of box assigner""" 10 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 11 | 12 | 13 | def build_sampler(cfg, **default_args): 14 | """Builder of box sampler""" 15 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 16 | 17 | 18 | def build_bbox_coder(cfg, **default_args): 19 | """Builder of box coder""" 20 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_bbox_coder import BaseBBoxCoder 2 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 3 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 4 | from .pseudo_bbox_coder import PseudoBBoxCoder 5 | from .tblr_bbox_coder import TBLRBBoxCoder 6 | 7 | __all__ = [ 8 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 9 | 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseBBoxCoder(metaclass=ABCMeta): 5 | """Base bounding box coder""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def encode(self, bboxes, gt_bboxes): 12 | """Encode deltas between bboxes and ground truth boxes""" 13 | pass 14 | 15 | @abstractmethod 16 | def decode(self, bboxes, bboxes_pred): 17 | """ 18 | Decode the predicted bboxes according to prediction and base boxes 19 | """ 20 | pass 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_CODERS 2 | from .base_bbox_coder import BaseBBoxCoder 3 | 4 | 5 | @BBOX_CODERS.register_module() 6 | class PseudoBBoxCoder(BaseBBoxCoder): 7 | """Pseudo bounding box coder""" 8 | 9 | def __init__(self, **kwargs): 10 | super(BaseBBoxCoder, self).__init__(**kwargs) 11 | 12 | def encode(self, bboxes, gt_bboxes): 13 | """torch.Tensor: return the given ``bboxes``""" 14 | return gt_bboxes 15 | 16 | def decode(self, bboxes, pred_bboxes): 17 | """torch.Tensor: return the given ``pred_bboxes``""" 18 | return pred_bboxes 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """ 7 | Simple version of the ``kwarray.ensure_rng`` 8 | 9 | Args: 10 | rng (int | numpy.random.RandomState | None): 11 | if None, then defaults to the global rng. Otherwise this can be an 12 | integer or a RandomState class 13 | Returns: 14 | (numpy.random.RandomState) : rng - 15 | a numpy random number generator 16 | 17 | References: 18 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 19 | """ 20 | 21 | if rng is None: 22 | rng = np.random.mtrand._rand 23 | elif isinstance(rng, int): 24 | rng = np.random.RandomState(rng) 25 | else: 26 | rng = rng 27 | return rng 28 | 29 | 30 | def random_boxes(num=1, scale=1, rng=None): 31 | """ 32 | Simple version of ``kwimage.Boxes.random`` 33 | 34 | Returns: 35 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 36 | 37 | References: 38 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 39 | 40 | Example: 41 | >>> num = 3 42 | >>> scale = 512 43 | >>> rng = 0 44 | >>> boxes = random_boxes(num, scale, rng) 45 | >>> print(boxes) 46 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 47 | [216.9113, 330.6978, 224.0446, 456.5878], 48 | [405.3632, 196.3221, 493.3953, 270.7942]]) 49 | """ 50 | rng = ensure_rng(rng) 51 | 52 | tlbr = rng.rand(num, 4).astype(np.float32) 53 | 54 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 55 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 56 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 57 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 58 | 59 | tlbr[:, 0] = tl_x * scale 60 | tlbr[:, 1] = tl_y * scale 61 | tlbr[:, 2] = br_x * scale 62 | tlbr[:, 3] = br_y * scale 63 | 64 | boxes = torch.from_numpy(tlbr) 65 | return boxes 66 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_iou_calculator 2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 3 | 4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 5 | -------------------------------------------------------------------------------- /mmdet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | IOU_CALCULATORS = Registry('IoU calculator') 4 | 5 | 6 | def build_iou_calculator(cfg, default_args=None): 7 | """Builder of IoU calculator""" 8 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | from .score_hlr_sampler import ScoreHLRSampler 10 | 11 | __all__ = [ 12 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 13 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 14 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_SAMPLERS, build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | @BBOX_SAMPLERS.register_module() 6 | class CombinedSampler(BaseSampler): 7 | """A sampler that combines positive sampler and negative sampler""" 8 | 9 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 10 | super(CombinedSampler, self).__init__(**kwargs) 11 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 12 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 13 | 14 | def _sample_pos(self, **kwargs): 15 | """Sample positive samples""" 16 | raise NotImplementedError 17 | 18 | def _sample_neg(self, **kwargs): 19 | """Sample negative samples""" 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from ..builder import BBOX_SAMPLERS 5 | from .random_sampler import RandomSampler 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class InstanceBalancedPosSampler(RandomSampler): 10 | """Instance balanced sampler that samples equal number of positive samples 11 | for each instance.""" 12 | 13 | def _sample_pos(self, assign_result, num_expected, **kwargs): 14 | """Sample positive boxes 15 | 16 | Args: 17 | assign_result (:obj:`AssignResult`): The assigned results of boxes. 18 | num_expected (int): The number of expected positive samples 19 | 20 | Returns: 21 | Tensor or ndarray: sampled indices. 22 | """ 23 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 24 | if pos_inds.numel() != 0: 25 | pos_inds = pos_inds.squeeze(1) 26 | if pos_inds.numel() <= num_expected: 27 | return pos_inds 28 | else: 29 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 30 | num_gts = len(unique_gt_inds) 31 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 32 | sampled_inds = [] 33 | for i in unique_gt_inds: 34 | inds = torch.nonzero( 35 | assign_result.gt_inds == i.item(), as_tuple=False) 36 | if inds.numel() != 0: 37 | inds = inds.squeeze(1) 38 | else: 39 | continue 40 | if len(inds) > num_per_gt: 41 | inds = self.random_choice(inds, num_per_gt) 42 | sampled_inds.append(inds) 43 | sampled_inds = torch.cat(sampled_inds) 44 | if len(sampled_inds) < num_expected: 45 | num_extra = num_expected - len(sampled_inds) 46 | extra_inds = np.array( 47 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 48 | if len(extra_inds) > num_extra: 49 | extra_inds = self.random_choice(extra_inds, num_extra) 50 | extra_inds = torch.from_numpy(extra_inds).to( 51 | assign_result.gt_inds.device).long() 52 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 53 | elif len(sampled_inds) > num_expected: 54 | sampled_inds = self.random_choice(sampled_inds, num_expected) 55 | return sampled_inds 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class PseudoSampler(BaseSampler): 10 | """A pseudo sampler that does not do sampling actually.""" 11 | 12 | def __init__(self, **kwargs): 13 | pass 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples""" 21 | raise NotImplementedError 22 | 23 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 24 | """Directly returns the positive and negative indices of samples 25 | 26 | Args: 27 | assign_result (:obj:`AssignResult`): Assigned results 28 | bboxes (torch.Tensor): Bounding boxes 29 | gt_bboxes (torch.Tensor): Ground truth boxes 30 | 31 | Returns: 32 | :obj:`SamplingResult`: sampler results 33 | """ 34 | pos_inds = torch.nonzero( 35 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 36 | neg_inds = torch.nonzero( 37 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 38 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 39 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 40 | assign_result, gt_flags) 41 | return sampling_result 42 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 2 | get_classes, imagenet_det_classes, 3 | imagenet_vid_classes, voc_classes) 4 | from .eval_hooks import DistEvalHook, EvalHook 5 | from .mean_ap import average_precision, eval_map, print_map_summary 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 7 | print_recall_summary) 8 | 9 | __all__ = [ 10 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 11 | 'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes', 12 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 13 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 14 | 'plot_num_recall', 'plot_iou_recall' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start, 0) * np.maximum( 39 | y_end - y_start, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | union = np.maximum(union, eps) 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from mmcv.runner import Hook 4 | from torch.utils.data import DataLoader 5 | 6 | 7 | class EvalHook(Hook): 8 | """Evaluation hook. 9 | 10 | Attributes: 11 | dataloader (DataLoader): A PyTorch dataloader. 12 | interval (int): Evaluation interval (by epochs). Default: 1. 13 | """ 14 | 15 | def __init__(self, dataloader, interval=1, **eval_kwargs): 16 | if not isinstance(dataloader, DataLoader): 17 | raise TypeError('dataloader must be a pytorch DataLoader, but got' 18 | f' {type(dataloader)}') 19 | self.dataloader = dataloader 20 | self.interval = interval 21 | self.eval_kwargs = eval_kwargs 22 | 23 | def after_train_epoch(self, runner): 24 | if not self.every_n_epochs(runner, self.interval): 25 | return 26 | from mmdet.apis import single_gpu_test 27 | results = single_gpu_test(runner.model, self.dataloader, show=False) 28 | self.evaluate(runner, results) 29 | 30 | def evaluate(self, runner, results): 31 | eval_res = self.dataloader.dataset.evaluate( 32 | results, logger=runner.logger, **self.eval_kwargs) 33 | for name, val in eval_res.items(): 34 | runner.log_buffer.output[name] = val 35 | runner.log_buffer.ready = True 36 | 37 | 38 | class DistEvalHook(EvalHook): 39 | """Distributed evaluation hook. 40 | 41 | Attributes: 42 | dataloader (DataLoader): A PyTorch dataloader. 43 | interval (int): Evaluation interval (by epochs). Default: 1. 44 | tmpdir (str | None): Temporary directory to save the results of all 45 | processes. Default: None. 46 | gpu_collect (bool): Whether to use gpu or cpu to collect results. 47 | Default: False. 48 | """ 49 | 50 | def __init__(self, 51 | dataloader, 52 | interval=1, 53 | gpu_collect=False, 54 | **eval_kwargs): 55 | if not isinstance(dataloader, DataLoader): 56 | raise TypeError('dataloader must be a pytorch DataLoader, but got ' 57 | f'{type(dataloader)}') 58 | self.dataloader = dataloader 59 | self.interval = interval 60 | self.gpu_collect = gpu_collect 61 | self.eval_kwargs = eval_kwargs 62 | 63 | def after_train_epoch(self, runner): 64 | if not self.every_n_epochs(runner, self.interval): 65 | return 66 | from mmdet.apis import multi_gpu_test 67 | results = multi_gpu_test( 68 | runner.model, 69 | self.dataloader, 70 | tmpdir=osp.join(runner.work_dir, '.eval_hook'), 71 | gpu_collect=self.gpu_collect) 72 | if runner.rank == 0: 73 | print('\n') 74 | self.evaluate(runner, results) 75 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | """Recursively convert Tensor in inputs from src_type to dst_type. 9 | 10 | Args: 11 | inputs: Inputs that to be casted. 12 | src_type (torch.dtype): Source type.. 13 | dst_type (torch.dtype): Destination type. 14 | 15 | Returns: 16 | The same type with inputs, but all contained Tensors have been cast. 17 | """ 18 | if isinstance(inputs, torch.Tensor): 19 | return inputs.to(dst_type) 20 | elif isinstance(inputs, str): 21 | return inputs 22 | elif isinstance(inputs, np.ndarray): 23 | return inputs 24 | elif isinstance(inputs, abc.Mapping): 25 | return type(inputs)({ 26 | k: cast_tensor_type(v, src_type, dst_type) 27 | for k, v in inputs.items() 28 | }) 29 | elif isinstance(inputs, abc.Iterable): 30 | return type(inputs)( 31 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 32 | else: 33 | return inputs 34 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .structures import BitmapMasks, PolygonMasks 3 | from .utils import encode_mask_results, split_combined_polys 4 | 5 | __all__ = [ 6 | 'split_combined_polys', 'mask_target', 'BitmapMasks', 'PolygonMasks', 7 | 'encode_mask_results' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | """ Compute mask target for positive proposals in multiple images. 9 | 10 | Args: 11 | pos_proposals_list (list[Tensor]): Positive proposals in multiple 12 | images. 13 | pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each 14 | positive proposals. 15 | gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of 16 | each image. 17 | cfg (dict): Config dict that specifies the mask size. 18 | 19 | Returns: 20 | list[Tensor]: Mask target of each image. 21 | """ 22 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 23 | mask_targets = map(mask_target_single, pos_proposals_list, 24 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 25 | mask_targets = list(mask_targets) 26 | if len(mask_targets) > 0: 27 | mask_targets = torch.cat(mask_targets) 28 | return mask_targets 29 | 30 | 31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 32 | """Compute mask target for each positive proposal in the image. 33 | 34 | Args: 35 | pos_proposals (Tensor): Positive proposals. 36 | pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals. 37 | gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap 38 | or Polygon. 39 | cfg (dict): Config dict that indicate the mask size. 40 | 41 | Returns: 42 | Tensor: Mask target of each positive proposals in the image. 43 | """ 44 | device = pos_proposals.device 45 | mask_size = _pair(cfg.mask_size) 46 | num_pos = pos_proposals.size(0) 47 | if num_pos > 0: 48 | proposals_np = pos_proposals.cpu().numpy() 49 | maxh, maxw = gt_masks.height, gt_masks.width 50 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw) 51 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh) 52 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 53 | 54 | mask_targets = gt_masks.crop_and_resize( 55 | proposals_np, mask_size, device=device, 56 | inds=pos_assigned_gt_inds).to_ndarray() 57 | 58 | mask_targets = torch.from_numpy(mask_targets).float().to(device) 59 | else: 60 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 61 | 62 | return mask_targets 63 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | 5 | 6 | def split_combined_polys(polys, poly_lens, polys_per_mask): 7 | """Split the combined 1-D polys into masks. 8 | 9 | A mask is represented as a list of polys, and a poly is represented as 10 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 11 | tensor. Here we need to split the tensor into original representations. 12 | 13 | Args: 14 | polys (list): a list (length = image num) of 1-D tensors 15 | poly_lens (list): a list (length = image num) of poly length 16 | polys_per_mask (list): a list (length = image num) of poly number 17 | of each mask 18 | 19 | Returns: 20 | list: a list (length = image num) of list (length = mask num) of 21 | list (length = poly num) of numpy array 22 | """ 23 | mask_polys_list = [] 24 | for img_id in range(len(polys)): 25 | polys_single = polys[img_id] 26 | polys_lens_single = poly_lens[img_id].tolist() 27 | polys_per_mask_single = polys_per_mask[img_id].tolist() 28 | 29 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 30 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 31 | mask_polys_list.append(mask_polys) 32 | return mask_polys_list 33 | 34 | 35 | # TODO: move this function to more proper place 36 | def encode_mask_results(mask_results): 37 | """Encode bitmap mask to RLE code. 38 | 39 | Args: 40 | mask_results (list | tuple[list]): bitmap mask results. 41 | In mask scoring rcnn, mask_results is a tuple of (segm_results, 42 | segm_cls_score). 43 | 44 | Returns: 45 | list | tuple: RLE encoded mask. 46 | """ 47 | if isinstance(mask_results, tuple): # mask scoring 48 | cls_segms, cls_mask_scores = mask_results 49 | else: 50 | cls_segms = mask_results 51 | num_classes = len(cls_segms) 52 | encoded_mask_results = [[] for _ in range(num_classes)] 53 | for i in range(len(cls_segms)): 54 | for cls_segm in cls_segms[i]: 55 | encoded_mask_results[i].append( 56 | mask_util.encode( 57 | np.array( 58 | cls_segm[:, :, np.newaxis], order='F', 59 | dtype='uint8'))[0]) # encoded with RLE 60 | if isinstance(mask_results, tuple): 61 | return encoded_mask_results, cls_mask_scores 62 | else: 63 | return encoded_mask_results 64 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms, multiclass_nms_pts, multiclass_nms_pts_refine 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks', 'multiclass_nms_pts', 'multiclass_nms_pts_refine' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'multi_apply', 6 | 'unmap' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import OrderedDict 3 | 4 | import torch.distributed as dist 5 | from mmcv.runner import OptimizerHook 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 7 | _unflatten_dense_tensors) 8 | 9 | 10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 11 | if bucket_size_mb > 0: 12 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 13 | buckets = _take_tensors(tensors, bucket_size_bytes) 14 | else: 15 | buckets = OrderedDict() 16 | for tensor in tensors: 17 | tp = tensor.type() 18 | if tp not in buckets: 19 | buckets[tp] = [] 20 | buckets[tp].append(tensor) 21 | buckets = buckets.values() 22 | 23 | for bucket in buckets: 24 | flat_tensors = _flatten_dense_tensors(bucket) 25 | dist.all_reduce(flat_tensors) 26 | flat_tensors.div_(world_size) 27 | for tensor, synced in zip( 28 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 29 | tensor.copy_(synced) 30 | 31 | 32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 33 | """Allreduce gradients 34 | 35 | Args: 36 | params (list[torch.Parameters]): List of parameters of a model 37 | coalesce (bool, optional): Whether allreduce parameters as a whole. 38 | Defaults to True. 39 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 40 | Defaults to -1. 41 | """ 42 | grads = [ 43 | param.grad.data for param in params 44 | if param.requires_grad and param.grad is not None 45 | ] 46 | world_size = dist.get_world_size() 47 | if coalesce: 48 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 49 | else: 50 | for tensor in grads: 51 | dist.all_reduce(tensor.div_(world_size)) 52 | 53 | 54 | class DistOptimizerHook(OptimizerHook): 55 | """Deprecated optimizer hook for distributed training""" 56 | 57 | def __init__(self, *args, **kwargs): 58 | warnings.warn('"DistOptimizerHook" is deprecated, please switch to' 59 | '"mmcv.runner.OptimizerHook".') 60 | super().__init__(*args, **kwargs) 61 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | import torch 6 | from six.moves import map, zip 7 | 8 | 9 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 10 | """Convert tensor to images 11 | 12 | Args: 13 | tensor (torch.Tensor): Tensor that contains multiple images 14 | mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). 15 | std (tuple[float], optional): Standard deviation of images. 16 | Defaults to (1, 1, 1). 17 | to_rgb (bool, optional): Whether convert the images to RGB format. 18 | Defaults to True. 19 | 20 | Returns: 21 | list[np.ndarray]: A list that contains multiple images. 22 | """ 23 | num_imgs = tensor.size(0) 24 | mean = np.array(mean, dtype=np.float32) 25 | std = np.array(std, dtype=np.float32) 26 | imgs = [] 27 | for img_id in range(num_imgs): 28 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 29 | img = mmcv.imdenormalize( 30 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 31 | imgs.append(np.ascontiguousarray(img)) 32 | return imgs 33 | 34 | 35 | def multi_apply(func, *args, **kwargs): 36 | """Apply function to a list of arguments 37 | 38 | Note: 39 | This function applies the ``func`` to multiple inputs and 40 | map the multiple outputs of the ``func`` into different 41 | list. Each list contains the same type of outputs corresponding 42 | to different inputs. 43 | 44 | Args: 45 | func (Function): A function that will be applied to a list of 46 | arguments 47 | 48 | Returns: 49 | tuple(list): A tuple containing multiple list, each list contains 50 | a kind of returned results by the function 51 | """ 52 | pfunc = partial(func, **kwargs) if kwargs else func 53 | map_results = map(pfunc, *args) 54 | return tuple(map(list, zip(*map_results))) 55 | 56 | 57 | def unmap(data, count, inds, fill=0): 58 | """ Unmap a subset of item (data) back to the original set of items (of 59 | size count) """ 60 | if data.dim() == 1: 61 | ret = data.new_full((count, ), fill) 62 | ret[inds.type(torch.bool)] = data 63 | else: 64 | new_size = (count, ) + data.size()[1:] 65 | ret = data.new_full(new_size, fill) 66 | ret[inds.type(torch.bool), :] = data 67 | return ret 68 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, 6 | RepeatDataset) 7 | from .deepfashion import DeepFashionDataset 8 | from .lvis import LVISDataset 9 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 10 | from .voc import VOCDataset 11 | from .wider_face import WIDERFaceDataset 12 | from .xml_style import XMLDataset 13 | 14 | __all__ = [ 15 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset', 16 | 'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'GroupSampler', 17 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 18 | 'CityscapesDataset', 'LVISDataset', 'DeepFashionDataset', 'GroupSampler', 19 | 'DistributedGroupSampler', 'DistributedSampler', 'build_dataloader', 20 | 'ConcatDataset', 'RepeatDataset', 'ClassBalancedDataset', 21 | 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 'build_dataset' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .coco import CocoDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class DeepFashionDataset(CocoDataset): 7 | 8 | CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', 9 | 'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair', 10 | 'skin', 'face') 11 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import AutoAugment 2 | from .compose import Compose 3 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, 4 | Transpose, to_tensor) 5 | from .formating_reppointsv2 import RPDV2FormatBundle 6 | from .instaboost import InstaBoost 7 | from .loading import (LoadAnnotations, LoadImageFromFile, 8 | LoadMultiChannelImageFromFiles, LoadProposals) 9 | from .loading_reppointsv2 import LoadRPDV2Annotations, LoadDenseRPDV2Annotations 10 | from .test_time_aug import MultiScaleFlipAug 11 | from .transforms import (Albu, Expand, MinIoURandomCrop, Normalize, Pad, 12 | PhotoMetricDistortion, RandomCenterCropPad, 13 | RandomCrop, RandomFlip, Resize, SegRescale) 14 | 15 | __all__ = [ 16 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 17 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 18 | 'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug', 19 | 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale', 20 | 'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu', 21 | 'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'LoadRPDV2Annotations', 'RPDV2FormatBundle', 22 | 'LoadDenseRPDV2Annotations' 23 | ] 24 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/auto_augment.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | 5 | from ..builder import PIPELINES 6 | from .compose import Compose 7 | 8 | 9 | @PIPELINES.register_module() 10 | class AutoAugment(object): 11 | """Auto augmentation. 12 | 13 | This data augmentation is proposed in 14 | `Learning Data Augmentation Strategies for Object Detection `_ # noqa: E501 15 | 16 | Args: 17 | policies (list[list[dict]]): The policies of auto augmentation. Each 18 | policy in ``policies`` is a specific augmentation policy, and is 19 | composed by several augmentations (dict). When AutoAugment is 20 | called, a random policy in ``policies`` will be selected to 21 | augment images. 22 | 23 | Examples: 24 | >>> replace = (104, 116, 124) 25 | >>> policies = [ 26 | >>> [ 27 | >>> dict(type='Sharpness', prob=0.0, level=8), 28 | >>> dict( 29 | >>> type='Shear', 30 | >>> prob=0.4, 31 | >>> level=0, 32 | >>> replace=replace, 33 | >>> axis='x') 34 | >>> ], 35 | >>> [ 36 | >>> dict( 37 | >>> type='Rotate', 38 | >>> prob=0.6, 39 | >>> level=10, 40 | >>> replace=replace), 41 | >>> dict(type='Color', prob=1.0, level=6) 42 | >>> ] 43 | >>> ] 44 | >>> augmentation = AutoAugment(policies) 45 | >>> img = np.ones(100, 100, 3) 46 | >>> gt_bboxes = np.ones(10, 4) 47 | >>> results = dict(img=img, gt_bboxes=gt_bboxes) 48 | >>> results = augmentation(results) 49 | """ 50 | 51 | def __init__(self, policies): 52 | assert isinstance(policies, list) and len(policies) > 0, \ 53 | 'Policies must be a non-empty list.' 54 | for policy in policies: 55 | assert isinstance(policy, list) and len(policy) > 0, \ 56 | 'Each policy in policies must be a non-empty list.' 57 | for augment in policy: 58 | assert isinstance(augment, dict) and 'type' in augment, \ 59 | 'Each specific augmentation must be a dict with key' \ 60 | ' "type".' 61 | 62 | self.policies = copy.deepcopy(policies) 63 | self.transforms = [Compose(policy) for policy in self.policies] 64 | 65 | def __call__(self, results): 66 | transform = np.random.choice(self.transforms) 67 | return transform(results) 68 | 69 | def __repr__(self): 70 | return f'{self.__class__.__name__}(policies={self.policies}' 71 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose(object): 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DistributedSampler as _DistributedSampler 3 | 4 | 5 | class DistributedSampler(_DistributedSampler): 6 | 7 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 8 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 9 | self.shuffle = shuffle 10 | 11 | def __iter__(self): 12 | # deterministically shuffle based on epoch 13 | if self.shuffle: 14 | g = torch.Generator() 15 | g.manual_seed(self.epoch) 16 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 17 | else: 18 | indices = torch.arange(len(self.dataset)).tolist() 19 | 20 | # add extra samples to make it evenly divisible 21 | indices += indices[:(self.total_size - len(indices))] 22 | assert len(indices) == self.total_size 23 | 24 | # subsample 25 | indices = indices[self.rank:self.total_size:self.num_replicas] 26 | assert len(indices) == self.num_samples 27 | 28 | return iter(indices) 29 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .builder import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module() 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | """Load annotation from WIDERFace XML style annotation file. 24 | 25 | Args: 26 | ann_file (str): Path of XML file. 27 | 28 | Returns: 29 | list[dict]: Annotation info from XML file. 30 | """ 31 | 32 | data_infos = [] 33 | img_ids = mmcv.list_from_file(ann_file) 34 | for img_id in img_ids: 35 | filename = f'{img_id}.jpg' 36 | xml_path = osp.join(self.img_prefix, 'Annotations', 37 | f'{img_id}.xml') 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | size = root.find('size') 41 | width = int(size.find('width').text) 42 | height = int(size.find('height').text) 43 | folder = root.find('folder').text 44 | data_infos.append( 45 | dict( 46 | id=img_id, 47 | filename=osp.join(folder, filename), 48 | width=width, 49 | height=height)) 50 | 51 | return data_infos 52 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 3 | ROI_EXTRACTORS, SHARED_HEADS, build_backbone, 4 | build_detector, build_head, build_loss, build_neck, 5 | build_roi_extractor, build_shared_head) 6 | from .dense_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .losses import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .roi_heads import * # noqa: F401,F403 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 14 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 15 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors_resnet import DetectoRS_ResNet 2 | from .detectors_resnext import DetectoRS_ResNeXt 3 | from .hourglass import HourglassNet 4 | from .hrnet import HRNet 5 | from .mobilenet import MobileNetV2 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnet import ResNet, ResNetV1d 9 | from .resnext import ResNeXt 10 | from .ssd_vgg import SSDVGG 11 | 12 | __all__ = [ 13 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 14 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'MobileNetV2' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | from torch import nn 3 | 4 | BACKBONES = Registry('backbone') 5 | NECKS = Registry('neck') 6 | ROI_EXTRACTORS = Registry('roi_extractor') 7 | SHARED_HEADS = Registry('shared_head') 8 | HEADS = Registry('head') 9 | LOSSES = Registry('loss') 10 | DETECTORS = Registry('detector') 11 | 12 | 13 | def build(cfg, registry, default_args=None): 14 | """Build a module 15 | 16 | Args: 17 | cfg (dict, list[dict]): The config of modules, is is either a dict 18 | or a list of configs. 19 | registry (:obj:`Registry`): A registry the module belongs to. 20 | default_args (dict, optional): Default arguments to build the module. 21 | Defaults to None. 22 | 23 | Returns: 24 | nn.Module: A built nn module. 25 | """ 26 | if isinstance(cfg, list): 27 | modules = [ 28 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 29 | ] 30 | return nn.Sequential(*modules) 31 | else: 32 | return build_from_cfg(cfg, registry, default_args) 33 | 34 | 35 | def build_backbone(cfg): 36 | """Build backbone""" 37 | return build(cfg, BACKBONES) 38 | 39 | 40 | def build_neck(cfg): 41 | """Build neck""" 42 | return build(cfg, NECKS) 43 | 44 | 45 | def build_roi_extractor(cfg): 46 | """Build roi extractor""" 47 | return build(cfg, ROI_EXTRACTORS) 48 | 49 | 50 | def build_shared_head(cfg): 51 | """Build shared head""" 52 | return build(cfg, SHARED_HEADS) 53 | 54 | 55 | def build_head(cfg): 56 | """Build head""" 57 | return build(cfg, HEADS) 58 | 59 | 60 | def build_loss(cfg): 61 | """Build loss""" 62 | return build(cfg, LOSSES) 63 | 64 | 65 | def build_detector(cfg, train_cfg=None, test_cfg=None): 66 | """Build detector""" 67 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 68 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_head import AnchorFreeHead 2 | from .anchor_head import AnchorHead 3 | from .atss_head import ATSSHead 4 | from .dense_reppoints_head import DenseRepPointsHead 5 | from .dense_reppoints_v2_head import DenseRepPointsV2Head 6 | from .fcos_head import FCOSHead 7 | from .fovea_head import FoveaHead 8 | from .free_anchor_retina_head import FreeAnchorRetinaHead 9 | from .fsaf_head import FSAFHead 10 | from .ga_retina_head import GARetinaHead 11 | from .ga_rpn_head import GARPNHead 12 | from .gfl_head import GFLHead 13 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 14 | from .nasfcos_head import NASFCOSHead 15 | from .pisa_retinanet_head import PISARetinaHead 16 | from .pisa_ssd_head import PISASSDHead 17 | from .reppoints_head import RepPointsHead 18 | from .reppoints_v2_head import RepPointsV2Head 19 | from .retina_head import RetinaHead 20 | from .retina_sepbn_head import RetinaSepBNHead 21 | from .rpn_head import RPNHead 22 | from .ssd_head import SSDHead 23 | 24 | __all__ = [ 25 | 'AnchorFreeHead', 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 26 | 'RPNHead', 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 27 | 'SSDHead', 'FCOSHead', 'RepPointsHead', 'FoveaHead', 28 | 'FreeAnchorRetinaHead', 'ATSSHead', 'FSAFHead', 'NASFCOSHead', 29 | 'PISARetinaHead', 'PISASSDHead', 'GFLHead', 'RepPointsV2Head', 30 | 'DenseRepPointsHead', 'DenseRepPointsV2Head' 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/base_dense_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta): 7 | """Base class for DenseHeads""" 8 | 9 | def __init__(self): 10 | super(BaseDenseHead, self).__init__() 11 | 12 | @abstractmethod 13 | def loss(self, **kwargs): 14 | """Compute losses of the head.""" 15 | pass 16 | 17 | @abstractmethod 18 | def get_bboxes(self, **kwargs): 19 | """Transform network output for a batch into bbox predictions.""" 20 | pass 21 | 22 | def forward_train(self, 23 | x, 24 | img_metas, 25 | gt_bboxes, 26 | gt_labels=None, 27 | gt_bboxes_ignore=None, 28 | proposal_cfg=None, 29 | **kwargs): 30 | """ 31 | Args: 32 | x (list[Tensor]): Features from FPN. 33 | img_metas (list[dict]): Meta information of each image, e.g., 34 | image size, scaling factor, etc. 35 | gt_bboxes (Tensor): Ground truth bboxes of the image, 36 | shape (num_gts, 4). 37 | gt_labels (Tensor): Ground truth labels of each box, 38 | shape (num_gts,). 39 | gt_bboxes_ignore (Tensor): Ground truth bboxes to be 40 | ignored, shape (num_ignored_gts, 4). 41 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 42 | if None, test_cfg would be used 43 | 44 | Returns: 45 | tuple: 46 | losses: (dict[str, Tensor]): A dictionary of loss components. 47 | proposal_list (list[Tensor]): Proposals of each image. 48 | """ 49 | outs = self(x) 50 | if gt_labels is None: 51 | loss_inputs = outs + (gt_bboxes, img_metas) 52 | else: 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) 54 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | if proposal_cfg is None: 56 | return losses 57 | else: 58 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 59 | return losses, proposal_list 60 | -------------------------------------------------------------------------------- /mmdet/models/dense_heads/rpn_test_mixin.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from mmdet.core import merge_aug_proposals 4 | 5 | if sys.version_info >= (3, 7): 6 | from mmdet.utils.contextmanagers import completed 7 | 8 | 9 | class RPNTestMixin(object): 10 | """Test methods of RPN.""" 11 | 12 | if sys.version_info >= (3, 7): 13 | 14 | async def async_simple_test_rpn(self, x, img_metas): 15 | sleep_interval = self.rpn_head.test_cfg.pop( 16 | 'async_sleep_interval', 0.025) 17 | async with completed( 18 | __name__, 'rpn_head_forward', 19 | sleep_interval=sleep_interval): 20 | rpn_outs = self(x) 21 | 22 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 23 | return proposal_list 24 | 25 | def simple_test_rpn(self, x, img_metas): 26 | """Test without augmentation. 27 | 28 | Args: 29 | x (tuple[Tensor]): Features from the upstream network, each is 30 | a 4D-tensor. 31 | img_metas (list[dict]): Meta info of each image. 32 | 33 | Returns: 34 | list[Tensor]: Proposals of each image. 35 | """ 36 | rpn_outs = self(x) 37 | proposal_list = self.get_bboxes(*rpn_outs, img_metas) 38 | return proposal_list 39 | 40 | def aug_test_rpn(self, feats, img_metas): 41 | samples_per_gpu = len(img_metas[0]) 42 | aug_proposals = [[] for _ in range(samples_per_gpu)] 43 | for x, img_meta in zip(feats, img_metas): 44 | proposal_list = self.simple_test_rpn(x, img_meta) 45 | for i, proposals in enumerate(proposal_list): 46 | aug_proposals[i].append(proposals) 47 | # reorganize the order of 'img_metas' to match the dimensions 48 | # of 'aug_proposals' 49 | aug_img_metas = [] 50 | for i in range(samples_per_gpu): 51 | aug_img_meta = [] 52 | for j in range(len(img_metas)): 53 | aug_img_meta.append(img_metas[j][i]) 54 | aug_img_metas.append(aug_img_meta) 55 | # after merging, proposals will be rescaled to the original image size 56 | merged_proposals = [ 57 | merge_aug_proposals(proposals, aug_img_meta, self.test_cfg) 58 | for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas) 59 | ] 60 | return merged_proposals 61 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss import ATSS 2 | from .base import BaseDetector 3 | from .cascade_rcnn import CascadeRCNN 4 | from .dense_reppoints_detector import DenseRepPointsDetector 5 | from .dense_reppoints_v2_detector import DenseRepPointsV2Detector 6 | from .fast_rcnn import FastRCNN 7 | from .faster_rcnn import FasterRCNN 8 | from .fcos import FCOS 9 | from .fovea import FOVEA 10 | from .fsaf import FSAF 11 | from .gfl import GFL 12 | from .grid_rcnn import GridRCNN 13 | from .htc import HybridTaskCascade 14 | from .mask_rcnn import MaskRCNN 15 | from .mask_scoring_rcnn import MaskScoringRCNN 16 | from .nasfcos import NASFCOS 17 | from .point_rend import PointRend 18 | from .reppoints_detector import RepPointsDetector 19 | from .reppoints_v2_detector import RepPointsV2Detector 20 | from .retinanet import RetinaNet 21 | from .rpn import RPN 22 | from .single_stage import SingleStageDetector 23 | from .two_stage import TwoStageDetector 24 | 25 | __all__ = [ 26 | 'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 27 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 28 | 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN', 'RepPointsDetector', 29 | 'FOVEA', 'FSAF', 'NASFCOS', 'PointRend', 'GFL', 'RepPointsV2Detector', 30 | 'DenseRepPointsDetector', 'DenseRepPointsV2Detector' 31 | ] 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/atss.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class ATSS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/cascade_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class CascadeRCNN(TwoStageDetector): 7 | """Implementation of `Cascade R-CNN and Cascade Mask R-CNN 8 | `_""" 9 | 10 | def __init__(self, 11 | backbone, 12 | neck=None, 13 | rpn_head=None, 14 | roi_head=None, 15 | train_cfg=None, 16 | test_cfg=None, 17 | pretrained=None): 18 | super(CascadeRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | rpn_head=rpn_head, 22 | roi_head=roi_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained) 26 | 27 | def show_result(self, data, result, **kwargs): 28 | """Show prediction results of the detector""" 29 | if self.with_mask: 30 | ms_bbox_result, ms_segm_result = result 31 | if isinstance(ms_bbox_result, dict): 32 | result = (ms_bbox_result['ensemble'], 33 | ms_segm_result['ensemble']) 34 | else: 35 | if isinstance(result, dict): 36 | result = result['ensemble'] 37 | return super(CascadeRCNN, self).show_result(data, result, **kwargs) 38 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FastRCNN(TwoStageDetector): 7 | """Implementation of `Fast R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | roi_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | pretrained=None): 16 | super(FastRCNN, self).__init__( 17 | backbone=backbone, 18 | neck=neck, 19 | roi_head=roi_head, 20 | train_cfg=train_cfg, 21 | test_cfg=test_cfg, 22 | pretrained=pretrained) 23 | 24 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 25 | """ 26 | Args: 27 | imgs (List[Tensor]): the outer list indicates test-time 28 | augmentations and inner Tensor should have a shape NxCxHxW, 29 | which contains all images in the batch. 30 | img_metas (List[List[dict]]): the outer list indicates test-time 31 | augs (multiscale, flip, etc.) and the inner list indicates 32 | images in a batch. 33 | proposals (List[List[Tensor]]): the outer list indicates test-time 34 | augs (multiscale, flip, etc.) and the inner list indicates 35 | images in a batch. The Tensor should have a shape Px4, where 36 | P is the number of proposals. 37 | """ 38 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 39 | if not isinstance(var, list): 40 | raise TypeError(f'{name} must be a list, but got {type(var)}') 41 | 42 | num_augs = len(imgs) 43 | if num_augs != len(img_metas): 44 | raise ValueError(f'num of augmentations ({len(imgs)}) ' 45 | f'!= num of image meta ({len(img_metas)})') 46 | # TODO: remove the restriction of samples_per_gpu == 1 when prepared 47 | samples_per_gpu = imgs[0].size(0) 48 | assert samples_per_gpu == 1 49 | 50 | if num_augs == 1: 51 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 52 | **kwargs) 53 | else: 54 | # TODO: support test-time augmentation 55 | assert NotImplementedError 56 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FasterRCNN(TwoStageDetector): 7 | """Implementation of `Faster R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None): 17 | super(FasterRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | roi_head=roi_head, 22 | train_cfg=train_cfg, 23 | test_cfg=test_cfg, 24 | pretrained=pretrained) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FCOS(SingleStageDetector): 7 | """Implementation of `FCOS `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fovea.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FOVEA(SingleStageDetector): 7 | """Implementation of `FoveaBox `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fsaf.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class FSAF(SingleStageDetector): 7 | """Implementation of `FSAF `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(FSAF, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/detectors/gfl.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class GFL(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(GFL, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/grid_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class GridRCNN(TwoStageDetector): 7 | """Grid R-CNN. 8 | 9 | This detector is the implementation of: 10 | - Grid R-CNN (https://arxiv.org/abs/1811.12030) 11 | - Grid R-CNN Plus: Faster and Better (https://arxiv.org/abs/1906.05688) 12 | """ 13 | 14 | def __init__(self, 15 | backbone, 16 | rpn_head, 17 | roi_head, 18 | train_cfg, 19 | test_cfg, 20 | neck=None, 21 | pretrained=None): 22 | super(GridRCNN, self).__init__( 23 | backbone=backbone, 24 | neck=neck, 25 | rpn_head=rpn_head, 26 | roi_head=roi_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/htc.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .cascade_rcnn import CascadeRCNN 3 | 4 | 5 | @DETECTORS.register_module() 6 | class HybridTaskCascade(CascadeRCNN): 7 | """Implementation of `HTC `_""" 8 | 9 | def __init__(self, **kwargs): 10 | super(HybridTaskCascade, self).__init__(**kwargs) 11 | 12 | @property 13 | def with_semantic(self): 14 | """bool: whether the detector has a semantic head""" 15 | return self.roi_head.with_semantic 16 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskRCNN(TwoStageDetector): 7 | """Implementation of `Mask R-CNN `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | roi_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | pretrained=None): 17 | super(MaskRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | roi_head=roi_head, 22 | train_cfg=train_cfg, 23 | test_cfg=test_cfg, 24 | pretrained=pretrained) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_scoring_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class MaskScoringRCNN(TwoStageDetector): 7 | """Mask Scoring RCNN. 8 | 9 | https://arxiv.org/abs/1903.00241 10 | """ 11 | 12 | def __init__(self, 13 | backbone, 14 | rpn_head, 15 | roi_head, 16 | train_cfg, 17 | test_cfg, 18 | neck=None, 19 | pretrained=None): 20 | super(MaskScoringRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | rpn_head=rpn_head, 24 | roi_head=roi_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/nasfcos.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class NASFCOS(SingleStageDetector): 7 | """NAS-FCOS: Fast Neural Architecture Search for Object Detection. 8 | 9 | https://arxiv.org/abs/1906.0442 10 | """ 11 | 12 | def __init__(self, 13 | backbone, 14 | neck, 15 | bbox_head, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(NASFCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 20 | test_cfg, pretrained) 21 | -------------------------------------------------------------------------------- /mmdet/models/detectors/point_rend.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class PointRend(TwoStageDetector): 7 | """PointRend: Image Segmentation as Rendering 8 | 9 | This detector is the implementation of 10 | `PointRend `_. 11 | 12 | """ 13 | 14 | def __init__(self, 15 | backbone, 16 | rpn_head, 17 | roi_head, 18 | train_cfg, 19 | test_cfg, 20 | neck=None, 21 | pretrained=None): 22 | super(PointRend, self).__init__( 23 | backbone=backbone, 24 | neck=neck, 25 | rpn_head=rpn_head, 26 | roi_head=roi_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class RetinaNet(SingleStageDetector): 7 | """Implementation of `RetinaNet `_""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .ae_loss import AssociativeEmbeddingLoss 3 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 4 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 5 | cross_entropy, mask_cross_entropy) 6 | from .chamfer_loss import ChamferLoss2D 7 | from .focal_loss import FocalLoss, sigmoid_focal_loss, SEPFocalLoss 8 | from .gaussian_focal_loss import GaussianFocalLoss 9 | from .gfocal_loss import DistributionFocalLoss, QualityFocalLoss 10 | from .ghm_loss import GHMC, GHMR 11 | from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss, 12 | iou_loss) 13 | from .mse_loss import MSELoss, mse_loss 14 | from .pisa_loss import carl_loss, isr_p 15 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 16 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 17 | 18 | __all__ = [ 19 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 20 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 21 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 22 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 23 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 24 | 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 'l1_loss', 'isr_p', 25 | 'carl_loss', 'AssociativeEmbeddingLoss', 'GaussianFocalLoss', 26 | 'QualityFocalLoss', 'DistributionFocalLoss', 'SEPFocalLoss', 'ChamferLoss2D' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | """Calculate accuracy according to the prediction and target 6 | 7 | Args: 8 | pred (torch.Tensor): The model prediction. 9 | target (torch.Tensor): The target of each prediction 10 | topk (int | tuple[int], optional): If the predictions in ``topk`` 11 | matches the target, the predictions will be regarded as 12 | correct ones. Defaults to 1. 13 | 14 | Returns: 15 | float | tuple[float]: If the input ``topk`` is a single integer, 16 | the function will return a single float as accuracy. If 17 | ``topk`` is a tuple containing multiple integers, the 18 | function will return a tuple containing accuracies of 19 | each ``topk`` number. 20 | """ 21 | assert isinstance(topk, (int, tuple)) 22 | if isinstance(topk, int): 23 | topk = (topk, ) 24 | return_single = True 25 | else: 26 | return_single = False 27 | 28 | maxk = max(topk) 29 | _, pred_label = pred.topk(maxk, dim=1) 30 | pred_label = pred_label.t() 31 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 32 | 33 | res = [] 34 | for k in topk: 35 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 36 | res.append(correct_k.mul_(100.0 / pred.size(0))) 37 | return res[0] if return_single else res 38 | 39 | 40 | class Accuracy(nn.Module): 41 | 42 | def __init__(self, topk=(1, )): 43 | """Module to calculate the accuracy 44 | 45 | Args: 46 | topk (tuple, optional): The criterion used to calculate the 47 | accuracy. Defaults to (1,). 48 | """ 49 | super().__init__() 50 | self.topk = topk 51 | 52 | def forward(self, pred, target): 53 | """Forward function to calculate accuracy 54 | 55 | Args: 56 | pred (torch.Tensor): Prediction of models. 57 | target (torch.Tensor): Target for each prediction. 58 | 59 | Returns: 60 | tuple[float]: The accuracies under different topk criterions. 61 | """ 62 | return accuracy(pred, target, self.topk) 63 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def mse_loss(pred, target): 10 | """Warpper of mse loss""" 11 | return F.mse_loss(pred, target, reduction='none') 12 | 13 | 14 | @LOSSES.register_module() 15 | class MSELoss(nn.Module): 16 | """MSELoss 17 | 18 | Args: 19 | reduction (str, optional): The method that reduces the loss to a 20 | scalar. Options are "none", "mean" and "sum". 21 | loss_weight (float, optional): The weight of the loss. Defaults to 1.0 22 | """ 23 | 24 | def __init__(self, reduction='mean', loss_weight=1.0): 25 | super().__init__() 26 | self.reduction = reduction 27 | self.loss_weight = loss_weight 28 | 29 | def forward(self, pred, target, weight=None, avg_factor=None): 30 | """Forward function of loss 31 | 32 | Args: 33 | pred (torch.Tensor): The prediction. 34 | target (torch.Tensor): The learning target of the prediction. 35 | weight (torch.Tensor, optional): Weight of the loss for each 36 | prediction. Defaults to None. 37 | avg_factor (int, optional): Average factor that is used to average 38 | the loss. Defaults to None. 39 | 40 | Returns: 41 | torch.Tensor: The calculated loss 42 | """ 43 | loss = self.loss_weight * mse_loss( 44 | pred, 45 | target, 46 | weight, 47 | reduction=self.reduction, 48 | avg_factor=avg_factor) 49 | return loss 50 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .fpn import FPN 3 | from .fpn_carafe import FPN_CARAFE 4 | from .hrfpn import HRFPN 5 | from .nas_fpn import NASFPN 6 | from .nasfcos_fpn import NASFCOS_FPN 7 | from .pafpn import PAFPN 8 | from .rfp import RFP 9 | 10 | __all__ = [ 11 | 'FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 'NASFCOS_FPN', 12 | 'RFP' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_head import BaseRoIHead 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DoubleConvFCBBoxHead, 3 | Shared2FCBBoxHead, Shared4Conv1FCBBoxHead) 4 | from .cascade_roi_head import CascadeRoIHead 5 | from .double_roi_head import DoubleHeadRoIHead 6 | from .dynamic_roi_head import DynamicRoIHead 7 | from .grid_roi_head import GridRoIHead 8 | from .htc_roi_head import HybridTaskCascadeRoIHead 9 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FusedSemanticHead, 10 | GridHead, HTCMaskHead, MaskIoUHead, MaskPointHead) 11 | from .mask_scoring_roi_head import MaskScoringRoIHead 12 | from .pisa_roi_head import PISARoIHead 13 | from .point_rend_roi_head import PointRendRoIHead 14 | from .roi_extractors import SingleRoIExtractor 15 | from .shared_heads import ResLayer 16 | 17 | __all__ = [ 18 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead', 19 | 'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead', 20 | 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'Shared4Conv1FCBBoxHead', 21 | 'DoubleConvFCBBoxHead', 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 22 | 'GridHead', 'MaskIoUHead', 'SingleRoIExtractor', 'PISARoIHead', 23 | 'PointRendRoIHead', 'MaskPointHead', 'CoarseMaskHead', 'DynamicRoIHead' 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead, 3 | Shared4Conv1FCBBoxHead) 4 | from .double_bbox_head import DoubleConvFCBBoxHead 5 | 6 | __all__ = [ 7 | 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 8 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/double_roi_head.py: -------------------------------------------------------------------------------- 1 | from ..builder import HEADS 2 | from .standard_roi_head import StandardRoIHead 3 | 4 | 5 | @HEADS.register_module() 6 | class DoubleHeadRoIHead(StandardRoIHead): 7 | """RoI head for Double Head RCNN 8 | 9 | https://arxiv.org/abs/1904.06493 10 | """ 11 | 12 | def __init__(self, reg_roi_scale_factor, **kwargs): 13 | super(DoubleHeadRoIHead, self).__init__(**kwargs) 14 | self.reg_roi_scale_factor = reg_roi_scale_factor 15 | 16 | def _bbox_forward(self, x, rois): 17 | """Box head forward function used in both training and testing time""" 18 | bbox_cls_feats = self.bbox_roi_extractor( 19 | x[:self.bbox_roi_extractor.num_inputs], rois) 20 | bbox_reg_feats = self.bbox_roi_extractor( 21 | x[:self.bbox_roi_extractor.num_inputs], 22 | rois, 23 | roi_scale_factor=self.reg_roi_scale_factor) 24 | if self.with_shared_head: 25 | bbox_cls_feats = self.shared_head(bbox_cls_feats) 26 | bbox_reg_feats = self.shared_head(bbox_reg_feats) 27 | cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats) 28 | 29 | bbox_results = dict( 30 | cls_score=cls_score, 31 | bbox_pred=bbox_pred, 32 | bbox_feats=bbox_cls_feats) 33 | return bbox_results 34 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .coarse_mask_head import CoarseMaskHead 2 | from .fcn_mask_head import FCNMaskHead 3 | from .fused_semantic_head import FusedSemanticHead 4 | from .grid_head import GridHead 5 | from .htc_mask_head import HTCMaskHead 6 | from .mask_point_head import MaskPointHead 7 | from .maskiou_head import MaskIoUHead 8 | 9 | __all__ = [ 10 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 11 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import ConvModule 2 | 3 | from mmdet.models.builder import HEADS 4 | from .fcn_mask_head import FCNMaskHead 5 | 6 | 7 | @HEADS.register_module() 8 | class HTCMaskHead(FCNMaskHead): 9 | 10 | def __init__(self, with_conv_res=True, *args, **kwargs): 11 | super(HTCMaskHead, self).__init__(*args, **kwargs) 12 | self.with_conv_res = with_conv_res 13 | if self.with_conv_res: 14 | self.conv_res = ConvModule( 15 | self.conv_out_channels, 16 | self.conv_out_channels, 17 | 1, 18 | conv_cfg=self.conv_cfg, 19 | norm_cfg=self.norm_cfg) 20 | 21 | def init_weights(self): 22 | super(HTCMaskHead, self).init_weights() 23 | if self.with_conv_res: 24 | self.conv_res.init_weights() 25 | 26 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 27 | if res_feat is not None: 28 | assert self.with_conv_res 29 | res_feat = self.conv_res(res_feat) 30 | x = x + res_feat 31 | for conv in self.convs: 32 | x = conv(x) 33 | res_feat = x 34 | outs = [] 35 | if return_logits: 36 | x = self.upsample(x) 37 | if self.upsample_method == 'deconv': 38 | x = self.relu(x) 39 | mask_pred = self.conv_logits(x) 40 | outs.append(mask_pred) 41 | if return_feat: 42 | outs.append(res_feat) 43 | return outs if len(outs) > 1 else outs[0] 44 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .generic_roi_extractor import GenericRoIExtractor 2 | from .single_level_roi_extractor import SingleRoIExtractor 3 | 4 | __all__ = [ 5 | 'SingleRoIExtractor', 6 | 'GenericRoIExtractor', 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_heads/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import constant_init, kaiming_init 3 | from mmcv.runner import load_checkpoint 4 | 5 | from mmdet.core import auto_fp16 6 | from mmdet.models.backbones import ResNet 7 | from mmdet.models.builder import SHARED_HEADS 8 | from mmdet.models.utils import ResLayer as _ResLayer 9 | from mmdet.utils import get_root_logger 10 | 11 | 12 | @SHARED_HEADS.register_module() 13 | class ResLayer(nn.Module): 14 | 15 | def __init__(self, 16 | depth, 17 | stage=3, 18 | stride=2, 19 | dilation=1, 20 | style='pytorch', 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | with_cp=False, 24 | dcn=None): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2**stage 33 | inplanes = 64 * 2**(stage - 1) * block.expansion 34 | 35 | res_layer = _ResLayer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn) 46 | self.add_module(f'layer{stage + 1}', res_layer) 47 | 48 | def init_weights(self, pretrained=None): 49 | """Initialize the weights in the module 50 | 51 | Args: 52 | pretrained (str, optional): Path to pre-trained weights. 53 | Defaults to None. 54 | """ 55 | if isinstance(pretrained, str): 56 | logger = get_root_logger() 57 | load_checkpoint(self, pretrained, strict=False, logger=logger) 58 | elif pretrained is None: 59 | for m in self.modules(): 60 | if isinstance(m, nn.Conv2d): 61 | kaiming_init(m) 62 | elif isinstance(m, nn.BatchNorm2d): 63 | constant_init(m, 1) 64 | else: 65 | raise TypeError('pretrained must be a str or None') 66 | 67 | @auto_fp16() 68 | def forward(self, x): 69 | res_layer = getattr(self, f'layer{self.stage + 1}') 70 | out = res_layer(x) 71 | return out 72 | 73 | def train(self, mode=True): 74 | super(ResLayer, self).train(mode) 75 | if self.norm_eval: 76 | for m in self.modules(): 77 | if isinstance(m, nn.BatchNorm2d): 78 | m.eval() 79 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .conv_ws import ConvWS2d, conv_ws_2d 3 | from .corner_pool import CornerPool, TLPool, BRPool 4 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, 5 | DeformRoIPoolingPack, ModulatedDeformConv, 6 | ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, 7 | deform_conv, deform_roi_pooling, modulated_deform_conv) 8 | from .generalized_attention import GeneralizedAttention 9 | from .masked_conv import MaskedConv2d 10 | from .nms import batched_nms, nms, nms_match, soft_nms 11 | from .non_local import NonLocal2D 12 | from .plugin import build_plugin_layer 13 | from .point_sample import (SimpleRoIAlign, point_sample, 14 | rel_roi_point_to_rel_img_point) 15 | from .roi_align import RoIAlign, roi_align 16 | from .roi_pool import RoIPool, roi_pool 17 | from .saconv import SAConv2d 18 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 19 | from .utils import get_compiler_version, get_compiling_cuda_version 20 | from .wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d 21 | 22 | __all__ = [ 23 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 24 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 25 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 26 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 27 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 28 | 'MaskedConv2d', 'ContextBlock', 'GeneralizedAttention', 'NonLocal2D', 29 | 'get_compiler_version', 'get_compiling_cuda_version', 'ConvWS2d', 30 | 'conv_ws_2d', 'build_plugin_layer', 'batched_nms', 'Conv2d', 31 | 'ConvTranspose2d', 'MaxPool2d', 'Linear', 'nms_match', 'CornerPool', 32 | 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', 33 | 'SAConv2d', 'TLPool', 'BRPool' 34 | ] 35 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/__init__.py: -------------------------------------------------------------------------------- 1 | from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive 2 | 3 | __all__ = ['carafe', 'carafe_naive', 'CARAFE', 'CARAFENaive', 'CARAFEPack'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/grad_check.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import mmcv 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from mmdet.ops.carafe import CARAFE, CARAFENaive # noqa: E402, isort:skip 10 | from mmdet.ops.carafe import carafe, carafe_naive # noqa: E402, isort:skip 11 | 12 | feat = torch.randn(2, 64, 3, 3, requires_grad=True, device='cuda:0').double() 13 | mask = torch.randn( 14 | 2, 100, 6, 6, requires_grad=True, device='cuda:0').sigmoid().double() 15 | 16 | print('Gradcheck for carafe...') 17 | test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 18 | print(test) 19 | 20 | print('Gradcheck for carafe naive...') 21 | test = gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 22 | print(test) 23 | 24 | feat = torch.randn( 25 | 2, 1024, 100, 100, requires_grad=True, device='cuda:0').float() 26 | mask = torch.randn( 27 | 2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float() 28 | loop_num = 500 29 | 30 | time_forward = 0 31 | time_backward = 0 32 | bar = mmcv.ProgressBar(loop_num) 33 | timer = mmcv.Timer() 34 | for i in range(loop_num): 35 | x = carafe(feat.clone(), mask.clone(), 5, 1, 2) 36 | torch.cuda.synchronize() 37 | time_forward += timer.since_last_check() 38 | x.sum().backward(retain_graph=True) 39 | torch.cuda.synchronize() 40 | time_backward += timer.since_last_check() 41 | bar.update() 42 | forward_speed = (time_forward + 1e-3) * 1e3 / loop_num 43 | backward_speed = (time_backward + 1e-3) * 1e3 / loop_num 44 | print(f'\nCARAFE time forward: {forward_speed} ' 45 | f'ms/iter | time backward: {backward_speed} ms/iter') 46 | 47 | time_naive_forward = 0 48 | time_naive_backward = 0 49 | bar = mmcv.ProgressBar(loop_num) 50 | timer = mmcv.Timer() 51 | for i in range(loop_num): 52 | x = carafe_naive(feat.clone(), mask.clone(), 5, 1, 2) 53 | torch.cuda.synchronize() 54 | time_naive_forward += timer.since_last_check() 55 | x.sum().backward(retain_graph=True) 56 | torch.cuda.synchronize() 57 | time_naive_backward += timer.since_last_check() 58 | bar.update() 59 | forward_speed = (time_naive_forward + 1e-3) * 1e3 / loop_num 60 | backward_speed = (time_naive_backward + 1e-3) * 1e3 / loop_num 61 | print('\nCARAFE naive time forward: ' 62 | f'{forward_speed} ms/iter | time backward: {backward_speed} ms/iter') 63 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | NVCC_ARGS = [ 6 | '-D__CUDA_NO_HALF_OPERATORS__', 7 | '-D__CUDA_NO_HALF_CONVERSIONS__', 8 | '-D__CUDA_NO_HALF2_OPERATORS__', 9 | ] 10 | 11 | setup( 12 | name='carafe', 13 | ext_modules=[ 14 | CUDAExtension( 15 | 'carafe_ext', [ 16 | 'src/cuda/carafe_cuda.cpp', 'src/cuda/carafe_cuda_kernel.cu', 17 | 'src/carafe_ext.cpp' 18 | ], 19 | define_macros=[('WITH_CUDA', None)], 20 | extra_compile_args={ 21 | 'cxx': [], 22 | 'nvcc': NVCC_ARGS 23 | }), 24 | CUDAExtension( 25 | 'carafe_naive_ext', [ 26 | 'src/cuda/carafe_naive_cuda.cpp', 27 | 'src/cuda/carafe_naive_cuda_kernel.cu', 28 | 'src/carafe_naive_ext.cpp' 29 | ], 30 | define_macros=[('WITH_CUDA', None)], 31 | extra_compile_args={ 32 | 'cxx': [], 33 | 'nvcc': NVCC_ARGS 34 | }) 35 | ], 36 | cmdclass={'build_ext': BuildExtension}) 37 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/src/carafe_ext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #ifdef WITH_CUDA 8 | int carafe_forward_cuda(at::Tensor features, at::Tensor rfeatures, 9 | at::Tensor masks, at::Tensor rmasks, int kernel_size, 10 | int group_size, int scale_factor, at::Tensor routput, 11 | at::Tensor output); 12 | 13 | int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures, 14 | at::Tensor masks, int kernel_size, int group_size, 15 | int scale_factor, at::Tensor rtop_grad, 16 | at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, 17 | at::Tensor rmask_grad, at::Tensor bottom_grad, 18 | at::Tensor mask_grad); 19 | #endif 20 | 21 | int carafe_forward(at::Tensor features, at::Tensor rfeatures, 22 | at::Tensor masks, at::Tensor rmasks, int kernel_size, 23 | int group_size, int scale_factor, at::Tensor routput, 24 | at::Tensor output) { 25 | if (features.device().is_cuda()) { 26 | #ifdef WITH_CUDA 27 | return carafe_forward_cuda(features, rfeatures, masks, rmasks, kernel_size, 28 | group_size, scale_factor, routput, output); 29 | #else 30 | AT_ERROR("carafe is not compiled with GPU support"); 31 | #endif 32 | } 33 | AT_ERROR("carafe is not implemented on CPU"); 34 | } 35 | 36 | int carafe_backward(at::Tensor top_grad, at::Tensor rfeatures, 37 | at::Tensor masks, int kernel_size, int group_size, 38 | int scale_factor, at::Tensor rtop_grad, 39 | at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, 40 | at::Tensor rmask_grad, at::Tensor bottom_grad, 41 | at::Tensor mask_grad) { 42 | if (top_grad.device().is_cuda()) { 43 | #ifdef WITH_CUDA 44 | return carafe_backward_cuda(top_grad, rfeatures, masks, kernel_size, 45 | group_size, scale_factor, rtop_grad, rbottom_grad_hs, rbottom_grad, 46 | rmask_grad, bottom_grad, mask_grad); 47 | #else 48 | AT_ERROR("carafe is not compiled with GPU support"); 49 | #endif 50 | } 51 | AT_ERROR("carafe is not implemented on CPU"); 52 | } 53 | 54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 55 | m.def("forward", &carafe_forward, "carafe forward"); 56 | m.def("backward", &carafe_backward, "carafe backward"); 57 | } 58 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/src/carafe_naive_ext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #ifdef WITH_CUDA 8 | int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks, 9 | int kernel_size, int group_size, int scale_factor, 10 | at::Tensor output); 11 | 12 | int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, 13 | at::Tensor masks, int kernel_size, 14 | int group_size, int scale_factor, 15 | at::Tensor bottom_grad, at::Tensor mask_grad); 16 | #endif 17 | 18 | int carafe_naive_forward(at::Tensor features, at::Tensor masks, 19 | int kernel_size, int group_size, int scale_factor, 20 | at::Tensor output) { 21 | if (features.device().is_cuda()) { 22 | #ifdef WITH_CUDA 23 | return carafe_naive_forward_cuda(features, masks, kernel_size, 24 | group_size, scale_factor, output); 25 | #else 26 | AT_ERROR("carafe naive is not compiled with GPU support"); 27 | #endif 28 | } 29 | AT_ERROR("carafe naive is not implemented on CPU"); 30 | } 31 | 32 | int carafe_naive_backward(at::Tensor top_grad, at::Tensor features, 33 | at::Tensor masks, int kernel_size, 34 | int group_size, int scale_factor, 35 | at::Tensor bottom_grad, at::Tensor mask_grad) { 36 | if (top_grad.device().is_cuda()) { 37 | #ifdef WITH_CUDA 38 | return carafe_naive_backward_cuda(top_grad, features, masks, kernel_size, 39 | group_size, scale_factor, bottom_grad, mask_grad); 40 | #else 41 | AT_ERROR("carafe naive is not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("carafe naive is not implemented on CPU"); 45 | 46 | } 47 | 48 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 49 | m.def("forward", &carafe_naive_forward, "carafe_naive forward"); 50 | m.def("backward", &carafe_naive_backward, "carafe_naive backward"); 51 | } 52 | -------------------------------------------------------------------------------- /mmdet/ops/chamfer_2d/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_chamfer_2d import Chamfer2D 2 | 3 | __all__ = ['Chamfer2D'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/chamfer_2d/dist_chamfer_2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | 5 | from . import chamfer_2d 6 | 7 | 8 | # Chamfer's distance module @thibaultgroueix 9 | # GPU tensors only 10 | class ChamferFunction2D(Function): 11 | @staticmethod 12 | def forward(ctx, xyz1, xyz2): 13 | batchsize, n, _ = xyz1.size() 14 | _, m, _ = xyz2.size() 15 | device = xyz1.device 16 | 17 | dist1 = torch.zeros(batchsize, n) 18 | dist2 = torch.zeros(batchsize, m) 19 | 20 | idx1 = torch.zeros(batchsize, n).type(torch.IntTensor) 21 | idx2 = torch.zeros(batchsize, m).type(torch.IntTensor) 22 | 23 | dist1 = dist1.to(device) 24 | dist2 = dist2.to(device) 25 | idx1 = idx1.to(device) 26 | idx2 = idx2.to(device) 27 | torch.cuda.set_device(device) 28 | 29 | chamfer_2d.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) 30 | ctx.save_for_backward(xyz1, xyz2, idx1, idx2) 31 | return dist1, dist2, idx1, idx2 32 | 33 | @staticmethod 34 | def backward(ctx, graddist1, graddist2, gradidx1, gradidx2): 35 | xyz1, xyz2, idx1, idx2 = ctx.saved_tensors 36 | graddist1 = graddist1.contiguous() 37 | graddist2 = graddist2.contiguous() 38 | device = graddist1.device 39 | 40 | gradxyz1 = torch.zeros(xyz1.size()) 41 | gradxyz2 = torch.zeros(xyz2.size()) 42 | 43 | gradxyz1 = gradxyz1.to(device) 44 | gradxyz2 = gradxyz2.to(device) 45 | chamfer_2d.backward( 46 | xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2 47 | ) 48 | return gradxyz1, gradxyz2 49 | 50 | 51 | class Chamfer2D(nn.Module): 52 | def __init__(self): 53 | super(Chamfer2D, self).__init__() 54 | 55 | def forward(self, input1, input2): 56 | input1 = input1.contiguous() 57 | input2 = input2.contiguous() 58 | return ChamferFunction2D.apply(input1, input2) 59 | -------------------------------------------------------------------------------- /mmdet/ops/chamfer_2d/src/chamfer_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | ///TMP 5 | //#include "common.h" 6 | /// NOT TMP 7 | 8 | 9 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2); 10 | 11 | 12 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2); 13 | 14 | 15 | 16 | 17 | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) { 18 | return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2); 19 | } 20 | 21 | 22 | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, 23 | at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) { 24 | 25 | return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2); 26 | } 27 | 28 | 29 | 30 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 31 | m.def("forward", &chamfer_forward, "chamfer forward (CUDA)"); 32 | m.def("backward", &chamfer_backward, "chamfer backward (CUDA)"); 33 | } -------------------------------------------------------------------------------- /mmdet/ops/corner_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .corner_pool import CornerPool, TLPool, BRPool 2 | 3 | __all__ = ['CornerPool', 'TLPool', 'BRPool'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #ifdef WITH_CUDA 7 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 8 | const at::Tensor mask_w_idx, const int kernel_h, 9 | const int kernel_w, const int pad_h, 10 | const int pad_w, at::Tensor col); 11 | 12 | int masked_col2im_forward_cuda(const at::Tensor col, 13 | const at::Tensor mask_h_idx, 14 | const at::Tensor mask_w_idx, int height, 15 | int width, int channels, at::Tensor im); 16 | #endif 17 | 18 | int masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx, 19 | const at::Tensor mask_w_idx, const int kernel_h, 20 | const int kernel_w, const int pad_h, 21 | const int pad_w, at::Tensor col) { 22 | if (im.device().is_cuda()) { 23 | #ifdef WITH_CUDA 24 | return masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, kernel_h, 25 | kernel_w, pad_h, pad_w, col); 26 | #else 27 | AT_ERROR("masked_im2col is not compiled with GPU support"); 28 | #endif 29 | } 30 | AT_ERROR("masked_im2col is not implemented on CPU"); 31 | } 32 | 33 | int masked_col2im_forward(const at::Tensor col, 34 | const at::Tensor mask_h_idx, 35 | const at::Tensor mask_w_idx, int height, 36 | int width, int channels, at::Tensor im) { 37 | if (col.device().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, height, 40 | width, channels, im); 41 | #else 42 | AT_ERROR("masked_col2im is not compiled with GPU support"); 43 | #endif 44 | } 45 | AT_ERROR("masked_col2im is not implemented on CPU"); 46 | } 47 | 48 | 49 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 50 | m.def("masked_im2col_forward", &masked_im2col_forward, 51 | "masked_im2col forward"); 52 | m.def("masked_col2im_forward", &masked_col2im_forward, 53 | "masked_col2im forward"); 54 | } 55 | -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import batched_nms, nms, nms_match, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms', 'batched_nms', 'nms_match'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/cuda/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms_cuda(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda_forward(dets, threshold); 13 | } 14 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_ext.cpp: -------------------------------------------------------------------------------- 1 | // Modified from https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx, Soft-NMS is added 2 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | #include 4 | 5 | at::Tensor nms_cpu(const at::Tensor& dets, const float threshold); 6 | 7 | at::Tensor soft_nms_cpu(const at::Tensor& dets, const float threshold, 8 | const unsigned char method, const float sigma, const 9 | float min_score); 10 | 11 | std::vector > nms_match_cpu(const at::Tensor& dets, const float threshold); 12 | 13 | 14 | #ifdef WITH_CUDA 15 | at::Tensor nms_cuda(const at::Tensor& dets, const float threshold); 16 | #endif 17 | 18 | at::Tensor nms(const at::Tensor& dets, const float threshold){ 19 | if (dets.device().is_cuda()) { 20 | #ifdef WITH_CUDA 21 | return nms_cuda(dets, threshold); 22 | #else 23 | AT_ERROR("nms is not compiled with GPU support"); 24 | #endif 25 | } 26 | return nms_cpu(dets, threshold); 27 | } 28 | 29 | at::Tensor soft_nms(const at::Tensor& dets, const float threshold, 30 | const unsigned char method, const float sigma, const 31 | float min_score) { 32 | if (dets.device().is_cuda()) { 33 | AT_ERROR("soft_nms is not implemented on GPU"); 34 | } 35 | return soft_nms_cpu(dets, threshold, method, sigma, min_score); 36 | } 37 | 38 | std::vector > nms_match(const at::Tensor& dets, const float threshold) { 39 | if (dets.type().is_cuda()) { 40 | AT_ERROR("nms_match is not implemented on GPU"); 41 | } 42 | return nms_match_cpu(dets, threshold); 43 | } 44 | 45 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 46 | m.def("nms", &nms, "non-maximum suppression"); 47 | m.def("soft_nms", &soft_nms, "soft non-maximum suppression"); 48 | m.def("nms_match", &nms_match, "non-maximum suppression match"); 49 | } 50 | -------------------------------------------------------------------------------- /mmdet/ops/plugin.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import ConvModule 2 | 3 | from .context_block import ContextBlock 4 | from .generalized_attention import GeneralizedAttention 5 | from .non_local import NonLocal2D 6 | 7 | plugin_cfg = { 8 | # format: layer_type: (abbreviation, module) 9 | 'ContextBlock': ('context_block', ContextBlock), 10 | 'GeneralizedAttention': ('gen_attention_block', GeneralizedAttention), 11 | 'NonLocal2D': ('nonlocal_block', NonLocal2D), 12 | 'ConvModule': ('conv_block', ConvModule), 13 | } 14 | 15 | 16 | def build_plugin_layer(cfg, postfix='', **kwargs): 17 | """ Build plugin layer 18 | 19 | Args: 20 | cfg (None or dict): cfg should contain: 21 | type (str): identify plugin layer type. 22 | layer args: args needed to instantiate a plugin layer. 23 | postfix (int, str): appended into norm abbreviation to 24 | create named layer. 25 | 26 | Returns: 27 | name (str): abbreviation + postfix 28 | layer (nn.Module): created plugin layer 29 | """ 30 | assert isinstance(cfg, dict) and 'type' in cfg 31 | cfg_ = cfg.copy() 32 | 33 | layer_type = cfg_.pop('type') 34 | if layer_type not in plugin_cfg: 35 | raise KeyError(f'Unrecognized plugin type {layer_type}') 36 | else: 37 | abbr, plugin_layer = plugin_cfg[layer_type] 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | layer = plugin_layer(**kwargs, **cfg_) 43 | 44 | return name, layer 45 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_ext 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_ext.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_ext.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += f'(out_size={self.out_size}, ' 73 | format_str += f'spatial_scale={self.spatial_scale}, ' 74 | format_str += f'use_torchvision={self.use_torchvision})' 75 | return format_str 76 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_ext 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_ext.forward(input, target, num_classes, 19 | gamma, alpha) 20 | return loss 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | input, target = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_input = sigmoid_focal_loss_ext.backward(input, target, d_loss, 31 | num_classes, gamma, alpha) 32 | return d_input, None, None, None, None 33 | 34 | 35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 36 | 37 | 38 | # TODO: remove this module 39 | class SigmoidFocalLoss(nn.Module): 40 | 41 | def __init__(self, gamma, alpha): 42 | super(SigmoidFocalLoss, self).__init__() 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | assert logits.is_cuda 48 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 49 | return loss.sum() 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ 53 | tmpstr += f'(gamma={self.gamma}, alpha={self.alpha})' 54 | return tmpstr 55 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | #ifdef WITH_CUDA 6 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 7 | const at::Tensor &targets, 8 | const int num_classes, 9 | const float gamma, const float alpha); 10 | 11 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 12 | const at::Tensor &targets, 13 | const at::Tensor &d_losses, 14 | const int num_classes, 15 | const float gamma, const float alpha); 16 | #endif 17 | 18 | // Interface for Python 19 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 20 | const at::Tensor &targets, 21 | const int num_classes, const float gamma, 22 | const float alpha) { 23 | if (logits.device().is_cuda()) { 24 | #ifdef WITH_CUDA 25 | at::DeviceGuard guard(logits.device()); 26 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 27 | alpha); 28 | #else 29 | AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); 30 | #endif 31 | } 32 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 33 | } 34 | 35 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 36 | const at::Tensor &targets, 37 | const at::Tensor &d_losses, 38 | const int num_classes, const float gamma, 39 | const float alpha) { 40 | if (logits.device().is_cuda()) { 41 | #ifdef WITH_CUDA 42 | at::DeviceGuard guard(logits.device()); 43 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 44 | num_classes, gamma, alpha); 45 | #else 46 | AT_ERROR("SigmoidFocalLoss is not compiled with GPU support"); 47 | #endif 48 | } 49 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 50 | } 51 | 52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 53 | m.def("forward", &SigmoidFocalLoss_forward, 54 | "SigmoidFocalLoss forward"); 55 | m.def("backward", &SigmoidFocalLoss_backward, 56 | "SigmoidFocalLoss backward"); 57 | } 58 | -------------------------------------------------------------------------------- /mmdet/ops/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import compiling_info 2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version 3 | 4 | # get_compiler_version = compiling_info.get_compiler_version 5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version 6 | 7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version'] 8 | -------------------------------------------------------------------------------- /mmdet/ops/utils/src/compiling_info.cpp: -------------------------------------------------------------------------------- 1 | // modified from 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp 3 | #include 4 | 5 | #ifdef WITH_CUDA 6 | #include 7 | int get_cudart_version() { return CUDART_VERSION; } 8 | #endif 9 | 10 | std::string get_compiling_cuda_version() { 11 | #ifdef WITH_CUDA 12 | std::ostringstream oss; 13 | 14 | // copied from 15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 16 | auto printCudaStyleVersion = [&](int v) { 17 | oss << (v / 1000) << "." << (v / 10 % 100); 18 | if (v % 10 != 0) { 19 | oss << "." << (v % 10); 20 | } 21 | }; 22 | printCudaStyleVersion(get_cudart_version()); 23 | return oss.str(); 24 | #else 25 | return std::string("not available"); 26 | #endif 27 | } 28 | 29 | // similar to 30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 31 | std::string get_compiler_version() { 32 | std::ostringstream ss; 33 | #if defined(__GNUC__) 34 | #ifndef __clang__ 35 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 36 | #endif 37 | #endif 38 | 39 | #if defined(__clang_major__) 40 | { 41 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 42 | << __clang_patchlevel__; 43 | } 44 | #endif 45 | 46 | #if defined(_MSC_VER) 47 | { ss << "MSVC " << _MSC_FULL_VER; } 48 | #endif 49 | return ss.str(); 50 | } 51 | 52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 53 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 54 | m.def("get_compiling_cuda_version", &get_compiling_cuda_version, 55 | "get_compiling_cuda_version"); 56 | } 57 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import subprocess 3 | import sys 4 | from collections import defaultdict 5 | 6 | import cv2 7 | import mmcv 8 | import torch 9 | import torchvision 10 | 11 | import mmdet 12 | 13 | 14 | def collect_env(): 15 | """Collect the information of the running environments.""" 16 | env_info = {} 17 | env_info['sys.platform'] = sys.platform 18 | env_info['Python'] = sys.version.replace('\n', '') 19 | 20 | cuda_available = torch.cuda.is_available() 21 | env_info['CUDA available'] = cuda_available 22 | 23 | if cuda_available: 24 | from torch.utils.cpp_extension import CUDA_HOME 25 | env_info['CUDA_HOME'] = CUDA_HOME 26 | 27 | if CUDA_HOME is not None and osp.isdir(CUDA_HOME): 28 | try: 29 | nvcc = osp.join(CUDA_HOME, 'bin/nvcc') 30 | nvcc = subprocess.check_output( 31 | f'"{nvcc}" -V | tail -n1', shell=True) 32 | nvcc = nvcc.decode('utf-8').strip() 33 | except subprocess.SubprocessError: 34 | nvcc = 'Not Available' 35 | env_info['NVCC'] = nvcc 36 | 37 | devices = defaultdict(list) 38 | for k in range(torch.cuda.device_count()): 39 | devices[torch.cuda.get_device_name(k)].append(str(k)) 40 | for name, devids in devices.items(): 41 | env_info['GPU ' + ','.join(devids)] = name 42 | 43 | gcc = subprocess.check_output('gcc --version | head -n1', shell=True) 44 | gcc = gcc.decode('utf-8').strip() 45 | env_info['GCC'] = gcc 46 | 47 | env_info['PyTorch'] = torch.__version__ 48 | env_info['PyTorch compiling details'] = torch.__config__.show() 49 | 50 | env_info['TorchVision'] = torchvision.__version__ 51 | 52 | env_info['OpenCV'] = cv2.__version__ 53 | 54 | env_info['MMCV'] = mmcv.__version__ 55 | env_info['MMDetection'] = mmdet.__version__ 56 | from mmdet.ops import get_compiler_version, get_compiling_cuda_version 57 | env_info['MMDetection Compiler'] = get_compiler_version() 58 | env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version() 59 | return env_info 60 | 61 | 62 | if __name__ == '__main__': 63 | for name, val in collect_env().items(): 64 | print(f'{name}: {val}') 65 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get root logger 8 | 9 | Args: 10 | log_file (str, optional): File path of log. Defaults to None. 11 | log_level (int, optional): The level of logger. 12 | Defaults to logging.INFO. 13 | 14 | Returns: 15 | :obj:`logging.Logger`: The obtained logger 16 | """ 17 | logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level) 18 | 19 | return logger 20 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of 18 | code suitable for async implementation. 19 | 20 | """ 21 | if (not enabled) or not torch.cuda.is_available(): 22 | yield 23 | return 24 | stream = stream if stream else torch.cuda.current_stream() 25 | end_stream = end_stream if end_stream else stream 26 | start = torch.cuda.Event(enable_timing=True) 27 | end = torch.cuda.Event(enable_timing=True) 28 | stream.record_event(start) 29 | try: 30 | cpu_start = time.monotonic() 31 | yield 32 | finally: 33 | cpu_end = time.monotonic() 34 | end_stream.record_event(end) 35 | end.synchronize() 36 | cpu_time = (cpu_end - cpu_start) * 1000 37 | gpu_time = start.elapsed_time(end) 38 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 39 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 40 | print(msg, end_stream) 41 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | numpy 3 | torch>=1.3 4 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | cityscapesscripts 3 | imagecorruptions 4 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | mmcv==0.6.2 3 | numpy 4 | scipy 5 | # need older pillow until torchvision is fixed 6 | Pillow<=6.2.2 7 | six 8 | terminaltables 9 | torch>=1.3 10 | torchvision 11 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | interrogate 5 | isort 6 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 7 | kwarray 8 | pytest 9 | pytest-cov 10 | pytest-runner 11 | ubelt 12 | xdoctest >= 0.10.0 13 | yapf 14 | -------------------------------------------------------------------------------- /tests/test_losses.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | 5 | def test_ce_loss(): 6 | from mmdet.models import build_loss 7 | 8 | # use_mask and use_sigmoid cannot be true at the same time 9 | with pytest.raises(AssertionError): 10 | loss_cfg = dict( 11 | type='CrossEntropyLoss', 12 | use_mask=True, 13 | use_sigmoid=True, 14 | loss_weight=1.0) 15 | build_loss(loss_cfg) 16 | 17 | # test loss with class weights 18 | loss_cls_cfg = dict( 19 | type='CrossEntropyLoss', 20 | use_sigmoid=False, 21 | class_weight=[0.8, 0.2], 22 | loss_weight=1.0) 23 | loss_cls = build_loss(loss_cls_cfg) 24 | fake_pred = torch.Tensor([[100, -100]]) 25 | fake_label = torch.Tensor([1]).long() 26 | assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.)) 27 | 28 | loss_cls_cfg = dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0) 30 | loss_cls = build_loss(loss_cls_cfg) 31 | assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(200.)) 32 | -------------------------------------------------------------------------------- /tests/test_ops/test_corner_pool.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_corner_pool.py 4 | """ 5 | import pytest 6 | import torch 7 | 8 | from mmdet.ops import CornerPool 9 | 10 | 11 | def test_corner_pool_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_corner_pool.py \ 15 | test_corner_pool_device_and_dtypes_cpu 16 | """ 17 | with pytest.raises(AssertionError): 18 | # pool mode must in ['bottom', 'left', 'right', 'top'] 19 | pool = CornerPool('corner') 20 | 21 | lr_tensor = torch.tensor([[[[0, 0, 0, 0, 0], [2, 1, 3, 0, 2], 22 | [5, 4, 1, 1, 6], [0, 0, 0, 0, 0], 23 | [0, 0, 0, 0, 0]]]]) 24 | tb_tensor = torch.tensor([[[[0, 3, 1, 0, 0], [0, 1, 1, 0, 0], 25 | [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], 26 | [0, 0, 2, 0, 0]]]]) 27 | # Left Pool 28 | left_answer = torch.tensor([[[[0, 0, 0, 0, 0], [3, 3, 3, 2, 2], 29 | [6, 6, 6, 6, 6], [0, 0, 0, 0, 0], 30 | [0, 0, 0, 0, 0]]]]) 31 | pool = CornerPool('left') 32 | left_tensor = pool(lr_tensor) 33 | assert left_tensor.type() == lr_tensor.type() 34 | assert torch.equal(left_tensor, left_answer) 35 | # Right Pool 36 | right_answer = torch.tensor([[[[0, 0, 0, 0, 0], [2, 2, 3, 3, 3], 37 | [5, 5, 5, 5, 6], [0, 0, 0, 0, 0], 38 | [0, 0, 0, 0, 0]]]]) 39 | pool = CornerPool('right') 40 | right_tensor = pool(lr_tensor) 41 | assert right_tensor.type() == lr_tensor.type() 42 | assert torch.equal(right_tensor, right_answer) 43 | # Top Pool 44 | top_answer = torch.tensor([[[[0, 3, 4, 0, 0], [0, 3, 4, 0, 0], 45 | [0, 3, 4, 0, 0], [0, 2, 2, 0, 0], 46 | [0, 0, 2, 0, 0]]]]) 47 | pool = CornerPool('top') 48 | top_tensor = pool(tb_tensor) 49 | assert top_tensor.type() == tb_tensor.type() 50 | assert torch.equal(top_tensor, top_answer) 51 | # Bottom Pool 52 | bottom_answer = torch.tensor([[[[0, 3, 1, 0, 0], [0, 3, 1, 0, 0], 53 | [0, 3, 4, 0, 0], [0, 3, 4, 0, 0], 54 | [0, 3, 4, 0, 0]]]]) 55 | pool = CornerPool('bottom') 56 | bottom_tensor = pool(tb_tensor) 57 | assert bottom_tensor.type() == tb_tensor.type() 58 | assert torch.equal(bottom_tensor, bottom_answer) 59 | -------------------------------------------------------------------------------- /tests/test_ops/test_merge_cells.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_merge_cells.py 4 | """ 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | from mmdet.ops.merge_cells import (BaseMergeCell, ConcatCell, 9 | GlobalPoolingCell, SumCell) 10 | 11 | 12 | def test_sum_cell(): 13 | inputs_x = torch.randn([2, 256, 32, 32]) 14 | inputs_y = torch.randn([2, 256, 16, 16]) 15 | sum_cell = SumCell(256, 256) 16 | output = sum_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) 17 | assert output.size() == inputs_x.size() 18 | output = sum_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) 19 | assert output.size() == inputs_y.size() 20 | output = sum_cell(inputs_x, inputs_y) 21 | assert output.size() == inputs_x.size() 22 | 23 | 24 | def test_concat_cell(): 25 | inputs_x = torch.randn([2, 256, 32, 32]) 26 | inputs_y = torch.randn([2, 256, 16, 16]) 27 | concat_cell = ConcatCell(256, 256) 28 | output = concat_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) 29 | assert output.size() == inputs_x.size() 30 | output = concat_cell(inputs_x, inputs_y, out_size=inputs_y.shape[-2:]) 31 | assert output.size() == inputs_y.size() 32 | output = concat_cell(inputs_x, inputs_y) 33 | assert output.size() == inputs_x.size() 34 | 35 | 36 | def test_global_pool_cell(): 37 | inputs_x = torch.randn([2, 256, 32, 32]) 38 | inputs_y = torch.randn([2, 256, 32, 32]) 39 | gp_cell = GlobalPoolingCell(with_out_conv=False) 40 | gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) 41 | assert (gp_cell_out.size() == inputs_x.size()) 42 | gp_cell = GlobalPoolingCell(256, 256) 43 | gp_cell_out = gp_cell(inputs_x, inputs_y, out_size=inputs_x.shape[-2:]) 44 | assert (gp_cell_out.size() == inputs_x.size()) 45 | 46 | 47 | def test_resize_methods(): 48 | inputs_x = torch.randn([2, 256, 128, 128]) 49 | target_resize_sizes = [(128, 128), (256, 256)] 50 | resize_methods_list = ['nearest', 'bilinear'] 51 | 52 | for method in resize_methods_list: 53 | merge_cell = BaseMergeCell(upsample_mode=method) 54 | for target_size in target_resize_sizes: 55 | merge_cell_out = merge_cell._resize(inputs_x, target_size) 56 | gt_out = F.interpolate(inputs_x, size=target_size, mode=method) 57 | assert merge_cell_out.equal(gt_out) 58 | 59 | target_size = (64, 64) # resize to a smaller size 60 | merge_cell = BaseMergeCell() 61 | merge_cell_out = merge_cell._resize(inputs_x, target_size) 62 | kernel_size = inputs_x.shape[-1] // target_size[-1] 63 | gt_out = F.max_pool2d( 64 | inputs_x, kernel_size=kernel_size, stride=kernel_size) 65 | assert (merge_cell_out == gt_out).all() 66 | -------------------------------------------------------------------------------- /tests/test_ops/test_soft_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_soft_nms.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.nms.nms_wrapper import soft_nms 9 | 10 | 11 | def test_soft_nms_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_soft_nms.py test_soft_nms_device_and_dtypes_cpu 15 | """ 16 | iou_thr = 0.7 17 | base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 18 | [49.3, 32.9, 51.0, 35.3, 0.9], 19 | [35.3, 11.5, 39.9, 14.5, 0.4], 20 | [35.2, 11.7, 39.7, 15.7, 0.3]]) 21 | 22 | # CPU can handle float32 and float64 23 | dets = base_dets.astype(np.float32) 24 | new_dets, inds = soft_nms(dets, iou_thr) 25 | assert dets.dtype == new_dets.dtype 26 | assert len(inds) == len(new_dets) == 4 27 | 28 | dets = torch.FloatTensor(base_dets) 29 | new_dets, inds = soft_nms(dets, iou_thr) 30 | assert dets.dtype == new_dets.dtype 31 | assert len(inds) == len(new_dets) == 4 32 | 33 | dets = base_dets.astype(np.float64) 34 | new_dets, inds = soft_nms(dets, iou_thr) 35 | assert dets.dtype == new_dets.dtype 36 | assert len(inds) == len(new_dets) == 4 37 | 38 | dets = torch.DoubleTensor(base_dets) 39 | new_dets, inds = soft_nms(dets, iou_thr) 40 | assert dets.dtype == new_dets.dtype 41 | assert len(inds) == len(new_dets) == 4 42 | -------------------------------------------------------------------------------- /tests/test_pipelines/test_formatting.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from mmdet.datasets.builder import PIPELINES 6 | 7 | 8 | def test_default_format_bundle(): 9 | results = dict( 10 | img_prefix=osp.join(osp.dirname(__file__), '../data'), 11 | img_info=dict(filename='color.jpg')) 12 | load = dict(type='LoadImageFromFile') 13 | load = build_from_cfg(load, PIPELINES) 14 | bundle = dict(type='DefaultFormatBundle') 15 | bundle = build_from_cfg(bundle, PIPELINES) 16 | results = load(results) 17 | assert 'pad_shape' not in results 18 | assert 'scale_factor' not in results 19 | assert 'img_norm_cfg' not in results 20 | results = bundle(results) 21 | assert 'pad_shape' in results 22 | assert 'scale_factor' in results 23 | assert 'img_norm_cfg' in results 24 | -------------------------------------------------------------------------------- /tests/test_pipelines/test_models_aug_test.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | import mmcv 4 | import torch 5 | from mmcv.parallel import collate 6 | from mmcv.utils import build_from_cfg 7 | 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.models import build_detector 10 | 11 | 12 | def model_aug_test_template(cfg_file): 13 | # get config 14 | cfg = mmcv.Config.fromfile(cfg_file) 15 | # init model 16 | cfg.model.pretrained = None 17 | model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) 18 | 19 | # init test pipeline and set aug test 20 | load_cfg, multi_scale_cfg = cfg.test_pipeline 21 | multi_scale_cfg['flip'] = True 22 | multi_scale_cfg['img_scale'] = [(1333, 800), (800, 600), (640, 480)] 23 | 24 | load = build_from_cfg(load_cfg, PIPELINES) 25 | transform = build_from_cfg(multi_scale_cfg, PIPELINES) 26 | 27 | results = dict( 28 | img_prefix=osp.join(osp.dirname(__file__), '../data'), 29 | img_info=dict(filename='color.jpg')) 30 | results = transform(load(results)) 31 | assert len(results['img']) == 6 32 | assert len(results['img_metas']) == 6 33 | 34 | results['img'] = [collate([x]) for x in results['img']] 35 | results['img_metas'] = [collate([x]).data[0] for x in results['img_metas']] 36 | # aug test the model 37 | model.eval() 38 | with torch.no_grad(): 39 | aug_result = model(return_loss=False, rescale=True, **results) 40 | return aug_result 41 | 42 | 43 | def test_cascade_rcnn_aug_test(): 44 | aug_result = model_aug_test_template( 45 | 'configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py') 46 | assert len(aug_result) == 80 47 | 48 | 49 | def test_mask_rcnn_aug_test(): 50 | aug_result = model_aug_test_template( 51 | 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py') 52 | assert len(aug_result) == 2 53 | assert len(aug_result[0]) == 80 54 | assert len(aug_result[1]) == 80 55 | 56 | 57 | def test_htc_aug_test(): 58 | aug_result = model_aug_test_template('configs/htc/htc_r50_fpn_1x_coco.py') 59 | assert len(aug_result) == 2 60 | assert len(aug_result[0]) == 80 61 | assert len(aug_result[1]) == 80 62 | -------------------------------------------------------------------------------- /tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | 5 | import mmcv 6 | from mmcv import Config 7 | 8 | from mmdet.datasets.builder import build_dataset 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Browse a dataset') 13 | parser.add_argument('config', help='train config file path') 14 | parser.add_argument( 15 | '--skip-type', 16 | type=str, 17 | nargs='+', 18 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 19 | help='skip some useless pipeline') 20 | parser.add_argument( 21 | '--output-dir', 22 | default=None, 23 | type=str, 24 | help='If there is no display interface, you can save it') 25 | parser.add_argument('--not-show', default=False, action='store_true') 26 | parser.add_argument( 27 | '--show-interval', 28 | type=int, 29 | default=999, 30 | help='the interval of show (ms)') 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def retrieve_data_cfg(config_path, skip_type): 36 | cfg = Config.fromfile(config_path) 37 | train_data_cfg = cfg.data.train 38 | train_data_cfg['pipeline'] = [ 39 | x for x in train_data_cfg.pipeline if x['type'] not in skip_type 40 | ] 41 | 42 | return cfg 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | cfg = retrieve_data_cfg(args.config, args.skip_type) 48 | 49 | dataset = build_dataset(cfg.data.train) 50 | 51 | progress_bar = mmcv.ProgressBar(len(dataset)) 52 | for item in dataset: 53 | filename = os.path.join(args.output_dir, 54 | Path(item['filename']).name 55 | ) if args.output_dir is not None else None 56 | mmcv.imshow_det_bboxes( 57 | item['img'], 58 | item['gt_bboxes'], 59 | item['gt_labels'] - 1, 60 | class_names=dataset.CLASSES, 61 | show=not args.not_show, 62 | out_file=filename, 63 | wait_time=args.show_interval) 64 | progress_bar.update() 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.runner import save_checkpoint 6 | 7 | from mmdet.apis import init_detector 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """ During inference, the functionary of batch norm layers is turned off 12 | but only the mean and var alone channels are used, which exposes the 13 | chance to fuse it with the preceding conv layers to save computations and 14 | simplify network structures. 15 | """ 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_module(m): 28 | last_conv = None 29 | last_conv_name = None 30 | 31 | for name, child in m.named_children(): 32 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 33 | if last_conv is None: # only fuse BN that is after Conv 34 | continue 35 | fused_conv = fuse_conv_bn(last_conv, child) 36 | m._modules[last_conv_name] = fused_conv 37 | # To reduce changes, set BN as Identity instead of deleting it. 38 | m._modules[name] = nn.Identity() 39 | last_conv = None 40 | elif isinstance(child, nn.Conv2d): 41 | last_conv = child 42 | last_conv_name = name 43 | else: 44 | fuse_module(child) 45 | return m 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='fuse Conv and BN layers in a model') 51 | parser.add_argument('config', help='config file path') 52 | parser.add_argument('checkpoint', help='checkpoint file path') 53 | parser.add_argument('out', help='output path of the converted model') 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | # build the model from a config file and a checkpoint file 61 | model = init_detector(args.config, args.checkpoint) 62 | # fuse conv and bn layers of the model 63 | fused_model = fuse_module(model) 64 | save_checkpoint(fused_model, args.out) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from mmcv import Config 5 | 6 | from mmdet.models import build_detector 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[1280, 800], 22 | help='input image size') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def main(): 28 | 29 | args = parse_args() 30 | 31 | if len(args.shape) == 1: 32 | input_shape = (3, args.shape[0], args.shape[0]) 33 | elif len(args.shape) == 2: 34 | input_shape = (3, ) + tuple(args.shape) 35 | else: 36 | raise ValueError('invalid input shape') 37 | 38 | cfg = Config.fromfile(args.config) 39 | model = build_detector( 40 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 41 | if torch.cuda.is_available(): 42 | model.cuda() 43 | model.eval() 44 | 45 | if hasattr(model, 'forward_dummy'): 46 | model.forward = model.forward_dummy 47 | else: 48 | raise NotImplementedError( 49 | 'FLOPs counter is currently not currently supported with {}'. 50 | format(model.__class__.__name__)) 51 | 52 | flops, params = get_model_complexity_info(model, input_shape) 53 | split_line = '=' * 30 54 | print(f'{split_line}\nInput shape: {input_shape}\n' 55 | f'Flops: {flops}\nParams: {params}\n{split_line}') 56 | print('!!!Please be cautious if you use the results in papers. ' 57 | 'You may need to check if all ops are supported and verify that the ' 58 | 'flops computation is correct.') 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /tools/print_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | if out_file.endswith('.pth'): 26 | out_file = out_file[:-4] 27 | final_file = out_file + f'-{sha[:8]}.pth' 28 | subprocess.Popen(['mv', out_file, final_file]) 29 | 30 | 31 | def main(): 32 | args = parse_args() 33 | process_checkpoint(args.in_file, args.out_file) 34 | 35 | 36 | if __name__ == '__main__': 37 | main() 38 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | --------------------------------------------------------------------------------