├── .github
    ├── ISSUE_TEMPLATE
    │   ├── error-report.md
    │   ├── feature_request.md
    │   └── general_questions.md
    └── workflows
    │   └── static.yml
├── .gitignore
├── .isort.cfg
├── .style.yapf
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── GETTING_STARTED.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── ROBUSTNESS_BENCHMARKING.md
├── TECHNICAL_DETAILS.md
├── configs
    ├── cascade_mask_rcnn_r101_fpn_1x.py
    ├── cascade_mask_rcnn_r50_caffe_c4_1x.py
    ├── cascade_mask_rcnn_r50_fpn_1x.py
    ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py
    ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py
    ├── cascade_rcnn_r101_fpn_1x.py
    ├── cascade_rcnn_r50_caffe_c4_1x.py
    ├── cascade_rcnn_r50_fpn_1x.py
    ├── cascade_rcnn_x101_32x4d_fpn_1x.py
    ├── cascade_rcnn_x101_64x4d_fpn_1x.py
    ├── cityscapes
    │   ├── README.md
    │   ├── faster_rcnn_r50_fpn_1x_cityscapes.py
    │   └── mask_rcnn_r50_fpn_1x_cityscapes.py
    ├── dcn
    │   ├── README.md
    │   ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
    │   ├── faster_rcnn_dpool_r50_fpn_1x.py
    │   ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_mdpool_r50_fpn_1x.py
    │   └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
    ├── double_heads
    │   └── dh_faster_rcnn_r50_fpn_1x.py
    ├── empirical_attention
    │   ├── README.md
    │   ├── faster_rcnn_r50_fpn_attention_0010_1x.py
    │   ├── faster_rcnn_r50_fpn_attention_0010_dcn_1x.py
    │   ├── faster_rcnn_r50_fpn_attention_1111_1x.py
    │   └── faster_rcnn_r50_fpn_attention_1111_dcn_1x.py
    ├── fast_mask_rcnn_r101_fpn_1x.py
    ├── fast_mask_rcnn_r50_caffe_c4_1x.py
    ├── fast_mask_rcnn_r50_fpn_1x.py
    ├── fast_rcnn_r101_fpn_1x.py
    ├── fast_rcnn_r50_caffe_c4_1x.py
    ├── fast_rcnn_r50_fpn_1x.py
    ├── faster_rcnn_ohem_r50_fpn_1x.py
    ├── faster_rcnn_r101_fpn_1x.py
    ├── faster_rcnn_r50_caffe_c4_1x.py
    ├── faster_rcnn_r50_fpn_1x.py
    ├── faster_rcnn_x101_32x4d_fpn_1x.py
    ├── faster_rcnn_x101_64x4d_fpn_1x.py
    ├── fcos
    │   ├── README.md
    │   ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
    │   ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
    │   └── fcos_r50_caffe_fpn_gn_1x_4gpu.py
    ├── foveabox
    │   ├── fovea_ms_r101_fpn_4gpu_2x_align.py
    │   ├── fovea_ms_r50_fpn_4gpu_2x_align.py
    │   ├── fovea_r101_fpn_4gpu_1x.py
    │   ├── fovea_r101_fpn_4gpu_2x.py
    │   ├── fovea_r101_fpn_4gpu_2x_align.py
    │   ├── fovea_r50_fpn_4gpu_1x.py
    │   ├── fovea_r50_fpn_4gpu_2x.py
    │   └── fovea_r50_fpn_4gpu_2x_align.py
    ├── fp16
    │   ├── faster_rcnn_r50_fpn_fp16_1x.py
    │   ├── mask_rcnn_r50_fpn_fp16_1x.py
    │   └── retinanet_r50_fpn_fp16_1x.py
    ├── gcnet
    │   ├── README.md
    │   ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py
    │   ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py
    │   ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py
    │   ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py
    │   └── mask_rcnn_r50_fpn_sbn_1x.py
    ├── ghm
    │   ├── README.md
    │   └── retinanet_ghm_r50_fpn_1x.py
    ├── gn+ws
    │   ├── README.md
    │   ├── faster_rcnn_r50_fpn_gn_ws_1x.py
    │   ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
    │   ├── mask_rcnn_r50_fpn_gn_ws_2x.py
    │   └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
    ├── gn
    │   ├── README.md
    │   ├── mask_rcnn_r101_fpn_gn_2x.py
    │   ├── mask_rcnn_r50_fpn_gn_2x.py
    │   └── mask_rcnn_r50_fpn_gn_contrib_2x.py
    ├── grid_rcnn
    │   ├── README.md
    │   ├── grid_rcnn_gn_head_r50_fpn_2x.py
    │   └── grid_rcnn_gn_head_x101_32x4d_fpn_2x.py
    ├── guided_anchoring
    │   ├── README.md
    │   ├── ga_fast_r50_caffe_fpn_1x.py
    │   ├── ga_faster_r50_caffe_fpn_1x.py
    │   ├── ga_faster_x101_32x4d_fpn_1x.py
    │   ├── ga_retinanet_r50_caffe_fpn_1x.py
    │   ├── ga_retinanet_x101_32x4d_fpn_1x.py
    │   ├── ga_rpn_r101_caffe_rpn_1x.py
    │   ├── ga_rpn_r50_caffe_fpn_1x.py
    │   └── ga_rpn_x101_32x4d_fpn_1x.py
    ├── hrnet
    │   ├── README.md
    │   ├── cascade_mask_rcnn_hrnetv2p_w32_20e.py
    │   ├── cascade_rcnn_hrnetv2p_w32_20e.py
    │   ├── faster_rcnn_hrnetv2p_w18_1x.py
    │   ├── faster_rcnn_hrnetv2p_w32_1x.py
    │   ├── faster_rcnn_hrnetv2p_w40_1x.py
    │   ├── fcos_hrnetv2p_w32_gn_1x_4gpu.py
    │   ├── htc_hrnetv2p_w32_20e.py
    │   ├── mask_rcnn_hrnetv2p_w18_1x.py
    │   └── mask_rcnn_hrnetv2p_w32_1x.py
    ├── htc
    │   ├── README.md
    │   ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
    │   ├── htc_r101_fpn_20e.py
    │   ├── htc_r50_fpn_1x.py
    │   ├── htc_r50_fpn_20e.py
    │   ├── htc_without_semantic_r50_fpn_1x.py
    │   ├── htc_x101_32x4d_fpn_20e_16gpu.py
    │   └── htc_x101_64x4d_fpn_20e_16gpu.py
    ├── libra_rcnn
    │   ├── README.md
    │   ├── libra_fast_rcnn_r50_fpn_1x.py
    │   ├── libra_faster_rcnn_r101_fpn_1x.py
    │   ├── libra_faster_rcnn_r50_fpn_1x.py
    │   ├── libra_faster_rcnn_x101_64x4d_fpn_1x.py
    │   └── libra_retinanet_r50_fpn_1x.py
    ├── mask_rcnn_r101_fpn_1x.py
    ├── mask_rcnn_r50_caffe_c4_1x.py
    ├── mask_rcnn_r50_fpn_1x.py
    ├── mask_rcnn_x101_32x4d_fpn_1x.py
    ├── mask_rcnn_x101_64x4d_fpn_1x.py
    ├── ms_rcnn
    │   ├── README.md
    │   ├── ms_rcnn_r101_caffe_fpn_1x.py
    │   ├── ms_rcnn_r50_caffe_fpn_1x.py
    │   └── ms_rcnn_x101_64x4d_fpn_1x.py
    ├── pascal_voc
    │   ├── README.md
    │   ├── faster_rcnn_r50_fpn_1x_voc0712.py
    │   ├── ssd300_voc.py
    │   └── ssd512_voc.py
    ├── retinanet_r101_fpn_1x.py
    ├── retinanet_r50_fpn_1x.py
    ├── retinanet_x101_32x4d_fpn_1x.py
    ├── retinanet_x101_64x4d_fpn_1x.py
    ├── rpn_r101_fpn_1x.py
    ├── rpn_r50_caffe_c4_1x.py
    ├── rpn_r50_fpn_1x.py
    ├── rpn_x101_32x4d_fpn_1x.py
    ├── rpn_x101_64x4d_fpn_1x.py
    ├── scratch
    │   ├── README.md
    │   ├── scratch_faster_rcnn_r50_fpn_gn_6x.py
    │   └── scratch_mask_rcnn_r50_fpn_gn_6x.py
    ├── ssd300_coco.py
    ├── ssd512_coco.py
    └── wider_face
    │   ├── README.md
    │   └── ssd300_wider_face.py
├── demo
    ├── coco_test_12510.jpg
    ├── corruptions_sev_3.png
    ├── demo.jpg
    ├── foveabox.jpg
    ├── inference_demo.ipynb
    ├── loss_curve.png
    └── webcam_demo.py
├── docker
    └── Dockerfile
├── mmdet
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── env.py
    │   ├── inference.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── anchor_target.py
    │   │   └── guided_anchor_target.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── approx_max_iou_assigner.py
    │   │   │   ├── assign_result.py
    │   │   │   ├── base_assigner.py
    │   │   │   └── max_iou_assigner.py
    │   │   ├── bbox_target.py
    │   │   ├── geometry.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_sampler.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   └── sampling_result.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── coco_utils.py
    │   │   ├── eval_hooks.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   ├── decorators.py
    │   │   ├── hooks.py
    │   │   └── utils.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   └── utils.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   └── merge_augs.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── cityscapes.py
    │   ├── coco.py
    │   ├── custom.py
    │   ├── dataset_wrappers.py
    │   ├── extra_aug.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── registry.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── voc.py
    │   ├── wider_face.py
    │   └── xml_style.py
    ├── models
    │   ├── __init__.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   ├── anchor_head.py
    │   │   ├── fcos_head.py
    │   │   ├── fovea_head.py
    │   │   ├── ga_retina_head.py
    │   │   ├── ga_rpn_head.py
    │   │   ├── guided_anchor_head.py
    │   │   ├── retina_head.py
    │   │   ├── rpn_head.py
    │   │   └── ssd_head.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── hrnet.py
    │   │   ├── resnet.py
    │   │   ├── resnext.py
    │   │   └── ssd_vgg.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   ├── bbox_head.py
    │   │   ├── convfc_bbox_head.py
    │   │   └── double_bbox_head.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── double_head_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── faster_rcnn.py
    │   │   ├── fcos.py
    │   │   ├── fovea.py
    │   │   ├── grid_rcnn.py
    │   │   ├── htc.py
    │   │   ├── mask_rcnn.py
    │   │   ├── mask_scoring_rcnn.py
    │   │   ├── retinanet.py
    │   │   ├── rpn.py
    │   │   ├── single_stage.py
    │   │   ├── test_mixins.py
    │   │   └── two_stage.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── balanced_l1_loss.py
    │   │   ├── cross_entropy_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── ghm_loss.py
    │   │   ├── iou_loss.py
    │   │   ├── mse_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   └── utils.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   ├── fcn_mask_head.py
    │   │   ├── fused_semantic_head.py
    │   │   ├── grid_head.py
    │   │   ├── htc_mask_head.py
    │   │   └── maskiou_head.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   ├── bfp.py
    │   │   ├── fpn.py
    │   │   └── hrfpn.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── generalized_attention.py
    │   │   └── non_local.py
    │   ├── registry.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── shared_heads
    │   │   ├── __init__.py
    │   │   └── res_layer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── conv_module.py
    │   │   ├── conv_ws.py
    │   │   ├── norm.py
    │   │   ├── scale.py
    │   │   └── weight_init.py
    ├── ops
    │   ├── __init__.py
    │   ├── context_block.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv.py
    │   │   ├── deform_pool.py
    │   │   └── src
    │   │   │   ├── deform_conv_cuda.cpp
    │   │   │   ├── deform_conv_cuda_kernel.cu
    │   │   │   ├── deform_pool_cuda.cpp
    │   │   │   └── deform_pool_cuda_kernel.cu
    │   ├── masked_conv
    │   │   ├── __init__.py
    │   │   ├── masked_conv.py
    │   │   └── src
    │   │   │   ├── masked_conv2d_cuda.cpp
    │   │   │   └── masked_conv2d_kernel.cu
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cpu.cpp
    │   │   │   ├── nms_cuda.cpp
    │   │   │   ├── nms_kernel.cu
    │   │   │   └── soft_nms_cpu.pyx
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.cpp
    │   │   │   └── roi_align_kernel.cu
    │   ├── roi_pool
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pool_cuda.cpp
    │   │   │   └── roi_pool_kernel.cu
    │   └── sigmoid_focal_loss
    │   │   ├── __init__.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── src
    │   │       ├── sigmoid_focal_loss.cpp
    │   │       └── sigmoid_focal_loss_cuda.cu
    └── utils
    │   ├── __init__.py
    │   ├── flops_counter.py
    │   └── registry.py
├── setup.py
└── tools
    ├── analyze_logs.py
    ├── coco_eval.py
    ├── convert_datasets
        └── pascal_voc.py
    ├── detectron2pytorch.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── get_flops.py
    ├── publish_model.py
    ├── robustness_eval.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── test_robustness.py
    ├── train.py
    ├── upgrade_model_version.py
    └── voc_eval.py


/.github/ISSUE_TEMPLATE/error-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Error report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for your error report and we appreciate it a lot.
11 | 
12 | **Checklist**
13 | 1. I have searched related issues but cannot get the expected help.
14 | 2. The bug has not been fixed in the latest version.
15 | 
16 | **Describe the bug**
17 | A clear and concise description of what the bug is.
18 | 
19 | **Reproduction**
20 | 1. What command or script did you run?
21 | ```
22 | A placeholder for the command.
23 | ```
24 | 2. Did you make any modifications on the code or config? Did you understand what you have modified?
25 | 3. What dataset did you use?
26 | 
27 | **Environment**
28 |  - OS: [e.g., Ubuntu 16.04.6]
29 |  - GCC [e.g., 5.4.0]
30 |  - PyTorch version [e.g., 1.1.0]
31 | - How you installed PyTorch [e.g., pip, conda, source]
32 | - GPU model [e.g., 1080Ti, V100]
33 | - CUDA and CUDNN version
34 | - [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
35 | 
36 | **Error traceback**
37 | If applicable, paste the error trackback here.
38 | ```
39 | A placeholder for trackback.
40 | ```
41 | 
42 | **Bug fix**
43 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
44 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the feature**
11 | 
12 | **Motivation**
13 | A clear and concise description of the motivation of the feature.
14 | Ex1. It is inconvenient when [....].
15 | Ex2. There is a recent paper [....], which is very helpful for [....].
16 | 
17 | **Related resources**
18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
19 | 
20 | **Additional context**
21 | Add any other context or screenshots about the feature request here.
22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_questions.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: General questions
 3 | about: Ask general questions to get help
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/.github/workflows/static.yml:
--------------------------------------------------------------------------------
 1 | # Simple workflow for deploying static content to GitHub Pages
 2 | name: Deploy static content to Pages
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["master"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: false
23 | 
24 | jobs:
25 |   # Single deploy job since we're just deploying
26 |   deploy:
27 |     environment:
28 |       name: github-pages
29 |       url: ${{ steps.deployment.outputs.page_url }}
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - name: Checkout
33 |         uses: actions/checkout@v3
34 |       - name: Setup Pages
35 |         uses: actions/configure-pages@v3
36 |       - name: Upload artifact
37 |         uses: actions/upload-pages-artifact@v1
38 |         with:
39 |           # Upload entire repository
40 |           path: '.'
41 |       - name: Deploy to GitHub Pages
42 |         id: deployment
43 |         uses: actions/deploy-pages@v2
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # cython generated cpp
107 | mmdet/ops/nms/src/soft_nms_cpu.cpp
108 | mmdet/version.py
109 | data
110 | .vscode
111 | .idea
112 | 
113 | # custom
114 | *.pkl
115 | *.pkl.json
116 | *.log.json
117 | work_dirs/
118 | 
119 | # Pytorch
120 | *.pth
121 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 79
3 | multi_line_output = 0
4 | known_first_party = mmdet
5 | known_third_party = mmcv,numpy,matplotlib,pycocotools,six,seaborn,terminaltables,torch,torchvision
6 | no_lines_before = STDLIB,LOCALFOLDER
7 | default_section = THIRDPARTY


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | BASED_ON_STYLE = pep8
3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | 
 4 | install:
 5 |   - pip install isort flake8 yapf 
 6 | 
 7 | python:
 8 |   - "3.5"
 9 |   - "3.6"
10 |   - "3.7"
11 | 
12 | script:
13 |   - flake8
14 |   - isort -rc --check-only --diff mmdet/ tools/
15 |   - yapf -r -d --style .style.yapf mmdet/ tools/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at chenkaidev@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to mmdetection
 2 | 
 3 | All kinds of contributions are welcome, including but not limited to the following.
 4 | 
 5 | - Fixes (typo, bugs)
 6 | - New features and components
 7 | 
 8 | ## Workflow
 9 | 
10 | 1. fork and pull the latest mmdetection
11 | 2. checkout a new branch (do not use master branch for PRs)
12 | 3. commit your changes
13 | 4. create a PR
14 | 
15 | Note
16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
17 | - If you are the author of some papers and would like to include your method to mmdetection,
18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution.
19 | 
20 | ## Code style
21 | 
22 | ### Python
23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
24 | 
25 | We use the following tools for linting and formatting:
26 | - [flake8](http://flake8.pycqa.org/en/latest/): linter
27 | - [yapf](https://github.com/google/yapf): formatter
28 | - [isort](https://github.com/timothycrosley/isort): sort imports
29 | 
30 | Style configurations of yapf and isort can be found in [.style.yapf](.style.yapf) and [.isort.cfg](.isort.cfg).
31 | 
32 | >Before you create a PR, make sure that your code lints and is formatted by yapf.
33 | 
34 | ### C++ and CUDA
35 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).


--------------------------------------------------------------------------------
/configs/cityscapes/README.md:
--------------------------------------------------------------------------------
 1 | ## Common settings
 2 | 
 3 | - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. 
 4 | - All models were trained on `cityscapes_train`, and tested on `cityscapes_val`.
 5 | - 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870)
 6 | - All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.
 7 | 
 8 | 
 9 | ## Baselines
10 | 
11 | Download links and more models with different backbones and training schemes will be added to the model zoo.
12 | 
13 | 
14 | ### Faster R-CNN
15 | 
16 | |    Backbone     |  Style  | Lr schd | Scale    | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :---:    | :------: | :-----------------: | :------------: | :----: | :------: |
18 | |    R-50-FPN     | pytorch |   1x    | 800-1024 | 4.9      | 0.345               | 8.8            | 36.0   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/faster_rcnn_r50_fpn_1x_city_20190727-7b9c0534.pth) |
19 | 
20 | ### Mask R-CNN
21 | 
22 | |    Backbone     |  Style  | Lr schd | Scale    | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
23 | | :-------------: | :-----: | :-----: | :------: | :------: | :-----------------: | :------------: | :----: | :-----: | :------: |
24 | |    R-50-FPN     | pytorch |   1x    | 800-1024 | 4.9      | 0.609               | 2.5            | 37.4  |  32.5   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/mask_rcnn_r50_fpn_1x_city_20190727-9b3c56a5.pth) |
25 | 
26 | **Notes:**
27 | - In the original paper, the mask AP of Mask R-CNN R-50-FPN is 31.5.
28 | 
29 | 


--------------------------------------------------------------------------------
/configs/empirical_attention/README.md:
--------------------------------------------------------------------------------
 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{zhu2019empirical,
 7 |   title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
 8 |   author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng},
 9 |   journal={arXiv preprint arXiv:1904.05873},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | 
15 | ## Results and Models
16 | 
17 | | Backbone  | Attention Component | DCN  | Lr schd | box AP | Download |
18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:|
19 | | R-50      | 1111                | N    | 1x      | 38.6   |     -    |
20 | | R-50      | 0010                | N    | 1x      | 38.2   |     -    |
21 | | R-50      | 1111                | Y    | 1x      | 41.0   |     -    |
22 | | R-50      | 0010                | Y    | 1x      | 40.8   |     -    |
23 | 
24 | 


--------------------------------------------------------------------------------
/configs/fcos/README.md:
--------------------------------------------------------------------------------
 1 | # FCOS: Fully Convolutional One-Stage Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{tian2019fcos,
 7 |   title={FCOS: Fully Convolutional One-Stage Object Detection},
 8 |   author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
 9 |   journal={arXiv preprint arXiv:1904.01355},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
18 | | R-50      | caffe   | N       | N       | 1x      | 5.5      | 0.373               | 13.7           | 35.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) |
19 | | R-50      | caffe   | Y       | N       | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) |
20 | | R-50      | caffe   | Y       | N       | 2x      | -        | -                   | -              | 36.9   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) |
21 | | R-101     | caffe   | Y       | N       | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) |
22 | | R-101     | caffe   | Y       | N       | 2x      | -        | -                   | -              | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) |
23 | 
24 | 
25 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
27 | | R-50      | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) |
28 | | R-101     | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) |
29 | | X-101     | caffe   | Y       | Y       | 2x      | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) |
30 | 
31 | **Notes:**
32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
33 | - The X-101 backbone is X-101-64x4d.
34 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r101_fpn_4gpu_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=False,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)
 37 |     ))
 38 | # training and testing settings
 39 | train_cfg = dict()
 40 | test_cfg = dict(
 41 |     nms_pre=1000,
 42 |     score_thr=0.05,
 43 |     nms=dict(type='nms', iou_thr=0.5),
 44 |     max_per_img=100)
 45 | # dataset settings
 46 | dataset_type = 'CocoDataset'
 47 | data_root = 'data/coco/'
 48 | img_norm_cfg = dict(
 49 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 50 | data = dict(
 51 |     imgs_per_gpu=4,
 52 |     workers_per_gpu=4,
 53 |     train=dict(
 54 |         type=dataset_type,
 55 |         ann_file=data_root + 'annotations/instances_train2017.json',
 56 |         img_prefix=data_root + 'train2017/',
 57 |         img_scale=(1333, 800),
 58 |         img_norm_cfg=img_norm_cfg,
 59 |         size_divisor=32,
 60 |         flip_ratio=0.5,
 61 |         with_mask=False,
 62 |         with_crowd=False,
 63 |         with_label=True),
 64 |     val=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_val2017.json',
 67 |         img_prefix=data_root + 'val2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=True),
 75 |     test=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False,
 86 |         test_mode=True))
 87 | # optimizer
 88 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 89 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 90 | # learning policy
 91 | lr_config = dict(
 92 |     policy='step',
 93 |     warmup='linear',
 94 |     warmup_iters=500,
 95 |     warmup_ratio=1.0 / 3,
 96 |     step=[8, 11])
 97 | checkpoint_config = dict(interval=1)
 98 | # yapf:disable
 99 | log_config = dict(
100 |     interval=50,
101 |     hooks=[
102 |         dict(type='TextLoggerHook'),
103 |         # dict(type='TensorboardLoggerHook')
104 |     ])
105 | # yapf:enable
106 | # runtime settings
107 | total_epochs = 12
108 | device_ids = range(4)
109 | dist_params = dict(backend='nccl')
110 | log_level = 'INFO'
111 | work_dir = './work_dirs/fovea_release_r101_fpn_4gpu_1x'
112 | load_from = None
113 | resume_from = None
114 | workflow = [('train', 1)]
115 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r101_fpn_4gpu_2x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=False,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)
 37 |     ))
 38 | # training and testing settings
 39 | train_cfg = dict()
 40 | test_cfg = dict(
 41 |     nms_pre=1000,
 42 |     score_thr=0.05,
 43 |     nms=dict(type='nms', iou_thr=0.5),
 44 |     max_per_img=100)
 45 | # dataset settings
 46 | dataset_type = 'CocoDataset'
 47 | data_root = 'data/coco/'
 48 | img_norm_cfg = dict(
 49 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 50 | data = dict(
 51 |     imgs_per_gpu=4,
 52 |     workers_per_gpu=4,
 53 |     train=dict(
 54 |         type=dataset_type,
 55 |         ann_file=data_root + 'annotations/instances_train2017.json',
 56 |         img_prefix=data_root + 'train2017/',
 57 |         img_scale=(1333, 800),
 58 |         img_norm_cfg=img_norm_cfg,
 59 |         size_divisor=32,
 60 |         flip_ratio=0.5,
 61 |         with_mask=False,
 62 |         with_crowd=False,
 63 |         with_label=True),
 64 |     val=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_val2017.json',
 67 |         img_prefix=data_root + 'val2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=True),
 75 |     test=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False,
 86 |         test_mode=True))
 87 | # optimizer
 88 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 89 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 90 | # learning policy
 91 | lr_config = dict(
 92 |     policy='step',
 93 |     warmup='linear',
 94 |     warmup_iters=500,
 95 |     warmup_ratio=1.0 / 3,
 96 |     step=[16, 22])
 97 | checkpoint_config = dict(interval=1)
 98 | # yapf:disable
 99 | log_config = dict(
100 |     interval=50,
101 |     hooks=[
102 |         dict(type='TextLoggerHook'),
103 |         # dict(type='TensorboardLoggerHook')
104 |     ])
105 | # yapf:enable
106 | # runtime settings
107 | total_epochs = 24
108 | device_ids = range(4)
109 | dist_params = dict(backend='nccl')
110 | log_level = 'INFO'
111 | work_dir = './work_dirs/fovea_release_r101_fpn_4gpu_2x'
112 | load_from = None
113 | resume_from = None
114 | workflow = [('train', 1)]
115 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r101_fpn_4gpu_2x_align.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=True,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0),
 37 |         norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)
 38 |     ))
 39 | # training and testing settings
 40 | train_cfg = dict()
 41 | test_cfg = dict(
 42 |     nms_pre=1000,
 43 |     score_thr=0.05,
 44 |     nms=dict(type='nms', iou_thr=0.5),
 45 |     max_per_img=100)
 46 | # dataset settings
 47 | dataset_type = 'CocoDataset'
 48 | data_root = 'data/coco/'
 49 | img_norm_cfg = dict(
 50 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 51 | data = dict(
 52 |     imgs_per_gpu=4,
 53 |     workers_per_gpu=4,
 54 |     train=dict(
 55 |         type=dataset_type,
 56 |         ann_file=data_root + 'annotations/instances_train2017.json',
 57 |         img_prefix=data_root + 'train2017/',
 58 |         img_scale=(1333, 800),
 59 |         img_norm_cfg=img_norm_cfg,
 60 |         size_divisor=32,
 61 |         flip_ratio=0.5,
 62 |         with_mask=False,
 63 |         with_crowd=False,
 64 |         with_label=True),
 65 |     val=dict(
 66 |         type=dataset_type,
 67 |         ann_file=data_root + 'annotations/instances_val2017.json',
 68 |         img_prefix=data_root + 'val2017/',
 69 |         img_scale=(1333, 800),
 70 |         img_norm_cfg=img_norm_cfg,
 71 |         size_divisor=32,
 72 |         flip_ratio=0,
 73 |         with_mask=False,
 74 |         with_crowd=False,
 75 |         with_label=True),
 76 |     test=dict(
 77 |         type=dataset_type,
 78 |         ann_file=data_root + 'annotations/instances_val2017.json',
 79 |         img_prefix=data_root + 'val2017/',
 80 |         img_scale=(1333, 800),
 81 |         img_norm_cfg=img_norm_cfg,
 82 |         size_divisor=32,
 83 |         flip_ratio=0,
 84 |         with_mask=False,
 85 |         with_crowd=False,
 86 |         with_label=False,
 87 |         test_mode=True))
 88 | # optimizer
 89 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 90 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 91 | # learning policy
 92 | lr_config = dict(
 93 |     policy='step',
 94 |     warmup='linear',
 95 |     warmup_iters=500,
 96 |     warmup_ratio=1.0 / 3,
 97 |     step=[16, 22])
 98 | checkpoint_config = dict(interval=1)
 99 | # yapf:disable
100 | log_config = dict(
101 |     interval=50,
102 |     hooks=[
103 |         dict(type='TextLoggerHook'),
104 |         # dict(type='TensorboardLoggerHook')
105 |     ])
106 | # yapf:enable
107 | # runtime settings
108 | total_epochs = 24
109 | device_ids = range(4)
110 | dist_params = dict(backend='nccl')
111 | log_level = 'INFO'
112 | work_dir = './work_dirs/fovea_release_r101_fpn_4gpu_2x_align'
113 | load_from = None
114 | resume_from = None
115 | workflow = [('train', 1)]
116 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r50_fpn_4gpu_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=False,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)
 37 |     ))
 38 | # training and testing settings
 39 | train_cfg = dict()
 40 | test_cfg = dict(
 41 |     nms_pre=1000,
 42 |     score_thr=0.05,
 43 |     nms=dict(type='nms', iou_thr=0.5),
 44 |     max_per_img=100)
 45 | # dataset settings
 46 | dataset_type = 'CocoDataset'
 47 | data_root = 'data/coco/'
 48 | img_norm_cfg = dict(
 49 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 50 | data = dict(
 51 |     imgs_per_gpu=4,
 52 |     workers_per_gpu=4,
 53 |     train=dict(
 54 |         type=dataset_type,
 55 |         ann_file=data_root + 'annotations/instances_train2017.json',
 56 |         img_prefix=data_root + 'train2017/',
 57 |         img_scale=(1333, 800),
 58 |         img_norm_cfg=img_norm_cfg,
 59 |         size_divisor=32,
 60 |         flip_ratio=0.5,
 61 |         with_mask=False,
 62 |         with_crowd=False,
 63 |         with_label=True),
 64 |     val=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_val2017.json',
 67 |         img_prefix=data_root + 'val2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=True),
 75 |     test=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False,
 86 |         test_mode=True))
 87 | # optimizer
 88 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 89 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 90 | # learning policy
 91 | lr_config = dict(
 92 |     policy='step',
 93 |     warmup='linear',
 94 |     warmup_iters=500,
 95 |     warmup_ratio=1.0 / 3,
 96 |     step=[8, 11])
 97 | checkpoint_config = dict(interval=1)
 98 | # yapf:disable
 99 | log_config = dict(
100 |     interval=50,
101 |     hooks=[
102 |         dict(type='TextLoggerHook'),
103 |         # dict(type='TensorboardLoggerHook')
104 |     ])
105 | # yapf:enable
106 | # runtime settings
107 | total_epochs = 12
108 | device_ids = range(4)
109 | dist_params = dict(backend='nccl')
110 | log_level = 'INFO'
111 | work_dir = './work_dirs/fovea_release_r50_fpn_4gpu_1x'
112 | load_from = None
113 | resume_from = None
114 | workflow = [('train', 1)]
115 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r50_fpn_4gpu_2x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=False,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)
 37 |     ))
 38 | # training and testing settings
 39 | train_cfg = dict()
 40 | test_cfg = dict(
 41 |     nms_pre=1000,
 42 |     score_thr=0.05,
 43 |     nms=dict(type='nms', iou_thr=0.5),
 44 |     max_per_img=100)
 45 | # dataset settings
 46 | dataset_type = 'CocoDataset'
 47 | data_root = 'data/coco/'
 48 | img_norm_cfg = dict(
 49 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 50 | data = dict(
 51 |     imgs_per_gpu=4,
 52 |     workers_per_gpu=4,
 53 |     train=dict(
 54 |         type=dataset_type,
 55 |         ann_file=data_root + 'annotations/instances_train2017.json',
 56 |         img_prefix=data_root + 'train2017/',
 57 |         img_scale=(1333, 800),
 58 |         img_norm_cfg=img_norm_cfg,
 59 |         size_divisor=32,
 60 |         flip_ratio=0.5,
 61 |         with_mask=False,
 62 |         with_crowd=False,
 63 |         with_label=True),
 64 |     val=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_val2017.json',
 67 |         img_prefix=data_root + 'val2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=True),
 75 |     test=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False,
 86 |         test_mode=True))
 87 | # optimizer
 88 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 89 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 90 | # learning policy
 91 | lr_config = dict(
 92 |     policy='step',
 93 |     warmup='linear',
 94 |     warmup_iters=500,
 95 |     warmup_ratio=1.0 / 3,
 96 |     step=[16, 22])
 97 | checkpoint_config = dict(interval=1)
 98 | # yapf:disable
 99 | log_config = dict(
100 |     interval=50,
101 |     hooks=[
102 |         dict(type='TextLoggerHook'),
103 |         # dict(type='TensorboardLoggerHook')
104 |     ])
105 | # yapf:enable
106 | # runtime settings
107 | total_epochs = 24
108 | device_ids = range(4)
109 | dist_params = dict(backend='nccl')
110 | log_level = 'INFO'
111 | work_dir = './work_dirs/fovea_release_r50_fpn_4gpu_2x'
112 | load_from = None
113 | resume_from = None
114 | workflow = [('train', 1)]
115 | 


--------------------------------------------------------------------------------
/configs/foveabox/fovea_r50_fpn_4gpu_2x_align.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FOVEA',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         num_outs=5,
 18 |         add_extra_convs=True),
 19 |     bbox_head=dict(
 20 |         type='FoveaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         strides=[8, 16, 32, 64, 128],
 26 |         base_edge_list=[16, 32, 64, 128, 256],
 27 |         scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
 28 |         sigma=0.4,
 29 |         with_deform=True,
 30 |         loss_cls=dict(
 31 |             type='FocalLoss',
 32 |             use_sigmoid=True,
 33 |             gamma=1.50,
 34 |             alpha=0.4,
 35 |             loss_weight=1.0),
 36 |         loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0),
 37 |         norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)
 38 |     ))
 39 | # training and testing settings
 40 | train_cfg = dict()
 41 | test_cfg = dict(
 42 |     nms_pre=1000,
 43 |     score_thr=0.05,
 44 |     nms=dict(type='nms', iou_thr=0.5),
 45 |     max_per_img=100)
 46 | # dataset settings
 47 | dataset_type = 'CocoDataset'
 48 | data_root = 'data/coco/'
 49 | img_norm_cfg = dict(
 50 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 51 | data = dict(
 52 |     imgs_per_gpu=4,
 53 |     workers_per_gpu=4,
 54 |     train=dict(
 55 |         type=dataset_type,
 56 |         ann_file=data_root + 'annotations/instances_train2017.json',
 57 |         img_prefix=data_root + 'train2017/',
 58 |         img_scale=(1333, 800),
 59 |         img_norm_cfg=img_norm_cfg,
 60 |         size_divisor=32,
 61 |         flip_ratio=0.5,
 62 |         with_mask=False,
 63 |         with_crowd=False,
 64 |         with_label=True),
 65 |     val=dict(
 66 |         type=dataset_type,
 67 |         ann_file=data_root + 'annotations/instances_val2017.json',
 68 |         img_prefix=data_root + 'val2017/',
 69 |         img_scale=(1333, 800),
 70 |         img_norm_cfg=img_norm_cfg,
 71 |         size_divisor=32,
 72 |         flip_ratio=0,
 73 |         with_mask=False,
 74 |         with_crowd=False,
 75 |         with_label=True),
 76 |     test=dict(
 77 |         type=dataset_type,
 78 |         ann_file=data_root + 'annotations/instances_val2017.json',
 79 |         img_prefix=data_root + 'val2017/',
 80 |         img_scale=(1333, 800),
 81 |         img_norm_cfg=img_norm_cfg,
 82 |         size_divisor=32,
 83 |         flip_ratio=0,
 84 |         with_mask=False,
 85 |         with_crowd=False,
 86 |         with_label=False,
 87 |         test_mode=True))
 88 | # optimizer
 89 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 90 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 91 | # learning policy
 92 | lr_config = dict(
 93 |     policy='step',
 94 |     warmup='linear',
 95 |     warmup_iters=500,
 96 |     warmup_ratio=1.0 / 3,
 97 |     step=[16, 22])
 98 | checkpoint_config = dict(interval=1)
 99 | # yapf:disable
100 | log_config = dict(
101 |     interval=50,
102 |     hooks=[
103 |         dict(type='TextLoggerHook'),
104 |         # dict(type='TensorboardLoggerHook')
105 |     ])
106 | # yapf:enable
107 | # runtime settings
108 | total_epochs = 24
109 | device_ids = range(4)
110 | dist_params = dict(backend='nccl')
111 | log_level = 'INFO'
112 | work_dir = './work_dirs/fovea_release_r50_fpn_4gpu_2x_align'
113 | load_from = None
114 | resume_from = None
115 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/configs/ghm/README.md:
--------------------------------------------------------------------------------
 1 | # Gradient Harmonized Single-stage Detector
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{li2019gradient,
 7 |   title={Gradient Harmonized Single-stage Detector},
 8 |   author={Li, Buyu and Liu, Yu and Wang, Xiaogang},
 9 |   booktitle={AAAI Conference on Artificial Intelligence},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | |    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: |
18 | |    R-50-FPN     | pytorch |   1x    |   3.9    | 0.500               | 9.4            |  36.9  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) |
19 | |    R-101-FPN    | pytorch |   1x    |   5.8    | 0.625               | 8.5            |  39.0  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) |
20 | | X-101-32x4d-FPN | pytorch |   1x    |   7.0    | 0.818               | 7.6            |  40.5  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) |
21 | | X-101-64x4d-FPN | pytorch |   1x    |   9.9    | 1.191               | 6.1            |  41.6  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) |


--------------------------------------------------------------------------------
/configs/gn/README.md:
--------------------------------------------------------------------------------
 1 | # Group Normalization
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{wu2018group,
 7 |   title={Group Normalization},
 8 |   author={Wu, Yuxin and He, Kaiming},
 9 |   booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.8   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
19 | | R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
20 | | R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.5   | 37.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
21 | | R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
22 | | R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
23 | | R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.0   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
24 | 
25 | **Notes:**
26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
27 | - The `3x` schedule is epoch [28, 34, 36].
28 | - **Memory, Train/Inf time is outdated.**


--------------------------------------------------------------------------------
/configs/grid_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Grid R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{lu2019grid,
 7 |   title={Grid r-cnn},
 8 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
 9 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
10 |   year={2019}
11 | }
12 | 
13 | @article{lu2019grid,
14 |   title={Grid R-CNN Plus: Faster and Better},
15 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
16 |   journal={arXiv preprint arXiv:1906.05688},
17 |   year={2019}
18 | }
19 | ```
20 | 
21 | ## Results and Models
22 | 
23 | | Backbone    | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
25 | | R-50        | 2x      | 4.8      | 1.172               | 10.9           | 40.3   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) |
26 | | R-101       | 2x      | 6.7      | 1.214               | 10.0           | 41.7   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) |
27 | | X-101-32x4d | 2x      | 8.0      | 1.335               | 8.5            | 43.0   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) |
28 | | X-101-64x4d | 2x      | 10.9     | 1.753               | 6.4            | 43.1   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) |
29 | 
30 | **Notes:**
31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper.
32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs.
33 | 


--------------------------------------------------------------------------------
/configs/htc/README.md:
--------------------------------------------------------------------------------
 1 | # Hybrid Task Cascade for Instance Segmentation
 2 | 
 3 | ## Introduction
 4 | 
 5 | We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518).
 6 | 
 7 | ```
 8 | @inproceedings{chen2019hybrid,
 9 |   title={Hybrid task cascade for instance segmentation},
10 |   author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin},
11 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## Dataset
17 | 
18 | HTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path.
19 | The directory should be like this.
20 | 
21 | ```
22 | mmdetection
23 | ├── mmdet
24 | ├── tools
25 | ├── configs
26 | ├── data
27 | │   ├── coco
28 | │   │   ├── annotations
29 | │   │   ├── train2017
30 | │   │   ├── val2017
31 | │   │   ├── test2017
32 | |   |   ├── stuffthingmaps
33 | ```
34 | 
35 | ## Results and Models
36 | 
37 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
38 | 
39 | | Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
40 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
41 | | R-50-FPN  | pytorch | 1x      | 7.4      | 0.936               | 4.1            | 42.1   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) |
42 | | R-50-FPN  | pytorch | 20e     | -        | -                   | -              | 43.2   | 38.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) |
43 | | R-101-FPN | pytorch | 20e     | 9.3      | 1.051               | 4.0            | 44.9   | 39.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) |
44 | | X-101-32x4d-FPN | pytorch |20e| 5.8      | 0.769               | 3.8            | 46.1   | 40.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |
45 | | X-101-64x4d-FPN | pytorch |20e| 7.5      | 1.120               | 3.5            | 46.9   | 40.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |
46 | 
47 | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC.
48 | - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models.
49 | If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01.
50 | 
51 | We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used.
52 | 
53 | | Backbone         | Style   | DCN   | training scales | Lr schd | box AP | mask AP | Download |
54 | |:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:|
55 | | X-101-64x4d-FPN  | pytorch | c3-c5 | 400~1400        | 20e     | 50.7   | 43.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) |
56 | 


--------------------------------------------------------------------------------
/configs/libra_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf).
 6 | 
 7 | ```
 8 | @inproceedings{pang2019libra,
 9 |   title={Libra R-CNN: Towards Balanced Learning for Object Detection},
10 |   author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin},
11 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## Results and models
17 | 
18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
19 | 
20 | | Architecture | Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
22 | | Faster R-CNN | R-50-FPN        | pytorch | 1x | 4.2  | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) |
23 | | Fast R-CNN   | R-50-FPN        | pytorch | 1x | 3.7  | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) |
24 | | Faster R-CNN | R-101-FPN       | pytorch | 1x | 6.0  | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) |
25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8  | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) |
26 | | RetinaNet    | R-50-FPN        | pytorch | 1x | 3.7  | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) |
27 | 


--------------------------------------------------------------------------------
/configs/ms_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Mask Scoring R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{huang2019msrcnn,
 7 |     title={Mask Scoring R-CNN},
 8 |     author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang},
 9 |     booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
10 |     year={2019},
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | style      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN      | caffe      | 1x      | 4.3      | 0.537               | 10.1           | 37.4   | 35.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) |
19 | | R-50-FPN      | caffe      | 2x      | -        | -                   | -              | 38.2   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) |
20 | | R-101-FPN     | caffe      | 1x      | 6.2      | 0.682               |  9.1           | 39.8   | 37.2    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) |
21 | | R-101-FPN     | caffe      | 2x      | -        | -                   |  -             | 40.7   | 37.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) |
22 | | R-X101-32x4d  | pytorch    | 2x      | 7.6      | 0.844               |  8.0           | 41.7   | 38.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) |
23 | | R-X101-64x4d  | pytorch    | 1x      | 10.5     | 1.214               |  6.4           | 42.0   | 39.1    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) |
24 | | R-X101-64x4d  | pytorch    | 2x      | -       | -                    |  -             | 42.2   | 38.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) |
25 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/README.md:
--------------------------------------------------------------------------------
1 | ### SSD
2 | 
3 | | Backbone | Size  | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                             Download                                                             |
4 | | :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |
5 | |  VGG16   |  300  | caffe |  240e   |   2.5    |        0.159        |  35.7 / 53.6   |  77.5  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) |
6 | |  VGG16   |  512  | caffe |  240e   |   4.3    |        0.214        |  27.5 / 35.9   |  80.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) |


--------------------------------------------------------------------------------
/configs/scratch/README.md:
--------------------------------------------------------------------------------
 1 | # Rethinking ImageNet Pre-training
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{he2018rethinking,
 7 |   title={Rethinking imagenet pre-training},
 8 |   author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr},
 9 |   journal={arXiv preprint arXiv:1811.08883},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Model        | Backbone  | Style   | Lr schd | box AP | mask AP | Download |
17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:|
18 | | Faster R-CNN | R-50-FPN  | pytorch | 6x      | 40.1   | -       | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x_20190515-ff554978.pth) |
19 | | Mask R-CNN   | R-50-FPN  | pytorch | 6x      | 41.0   | 37.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) |
20 | 
21 | Note:
22 | - The above models are trained with 16 GPUs.


--------------------------------------------------------------------------------
/configs/wider_face/README.md:
--------------------------------------------------------------------------------
 1 | ## WIDER Face Dataset
 2 | 
 3 | To use the WIDER Face dataset you need to download it
 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format
 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git).
 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders
 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`.
 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`.
 9 | The directory should be like this:
10 | 
11 | ```
12 | mmdetection
13 | ├── mmdet
14 | ├── tools
15 | ├── configs
16 | ├── data
17 | │   ├── WIDERFace
18 | │   │   ├── WIDER_train
19 | │   |   │   ├──0--Parade
20 | │   |   │   ├── ...
21 | │   |   │   ├── Annotations
22 | │   │   ├── WIDER_val
23 | │   |   │   ├──0--Parade
24 | │   |   │   ├── ...
25 | │   |   │   ├── Annotations
26 | │   │   ├── val.txt
27 | │   │   ├── train.txt
28 | 
29 | ```
30 | 
31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or
32 | create your own config based on the presented one.
33 | 


--------------------------------------------------------------------------------
/demo/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taokong/FoveaBox/474516bc0646ad17b14ec89add220289337e7f4e/demo/coco_test_12510.jpg


--------------------------------------------------------------------------------
/demo/corruptions_sev_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taokong/FoveaBox/474516bc0646ad17b14ec89add220289337e7f4e/demo/corruptions_sev_3.png


--------------------------------------------------------------------------------
/demo/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taokong/FoveaBox/474516bc0646ad17b14ec89add220289337e7f4e/demo/demo.jpg


--------------------------------------------------------------------------------
/demo/foveabox.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taokong/FoveaBox/474516bc0646ad17b14ec89add220289337e7f4e/demo/foveabox.jpg


--------------------------------------------------------------------------------
/demo/loss_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taokong/FoveaBox/474516bc0646ad17b14ec89add220289337e7f4e/demo/loss_curve.png


--------------------------------------------------------------------------------
/demo/webcam_demo.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import cv2
 4 | import torch
 5 | 
 6 | from mmdet.apis import inference_detector, init_detector, show_result
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='MMDetection webcam demo')
11 |     parser.add_argument('config', help='test config file path')
12 |     parser.add_argument('checkpoint', help='checkpoint file')
13 |     parser.add_argument('--device', type=int, default=0, help='CUDA device id')
14 |     parser.add_argument(
15 |         '--camera-id', type=int, default=0, help='camera device id')
16 |     parser.add_argument(
17 |         '--score-thr', type=float, default=0.5, help='bbox score threshold')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def main():
23 |     args = parse_args()
24 | 
25 |     model = init_detector(
26 |         args.config, args.checkpoint, device=torch.device('cuda', args.device))
27 | 
28 |     camera = cv2.VideoCapture(args.camera_id)
29 | 
30 |     print('Press "Esc", "q" or "Q" to exit.')
31 |     while True:
32 |         ret_val, img = camera.read()
33 |         result = inference_detector(model, img)
34 | 
35 |         ch = cv2.waitKey(1)
36 |         if ch == 27 or ch == ord('q') or ch == ord('Q'):
37 |             break
38 | 
39 |         show_result(
40 |             img, result, model.CLASSES, score_thr=args.score_thr, wait_time=1)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH="1.1.0"
 2 | ARG CUDA="10.0"
 3 | ARG CUDNN="7.5"
 4 | 
 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 6 | 
 7 | RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6
 8 | 
 9 | # Install mmdetection
10 | RUN conda install cython -y
11 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
12 | WORKDIR /mmdetection
13 | RUN pip install -e .
14 | 


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .env import get_root_logger, init_dist, set_random_seed
 2 | from .inference import (inference_detector, init_detector, show_result,
 3 |                         show_result_pyplot)
 4 | from .train import train_detector
 5 | 
 6 | __all__ = [
 7 |     'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
 8 |     'init_detector', 'inference_detector', 'show_result', 'show_result_pyplot'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | import subprocess
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | import torch.distributed as dist
 9 | import torch.multiprocessing as mp
10 | from mmcv.runner import get_dist_info
11 | 
12 | 
13 | def init_dist(launcher, backend='nccl', **kwargs):
14 |     if mp.get_start_method(allow_none=True) is None:
15 |         mp.set_start_method('spawn')
16 |     if launcher == 'pytorch':
17 |         _init_dist_pytorch(backend, **kwargs)
18 |     elif launcher == 'mpi':
19 |         _init_dist_mpi(backend, **kwargs)
20 |     elif launcher == 'slurm':
21 |         _init_dist_slurm(backend, **kwargs)
22 |     else:
23 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
24 | 
25 | 
26 | def _init_dist_pytorch(backend, **kwargs):
27 |     # TODO: use local_rank instead of rank % num_gpus
28 |     rank = int(os.environ['RANK'])
29 |     num_gpus = torch.cuda.device_count()
30 |     torch.cuda.set_device(rank % num_gpus)
31 |     dist.init_process_group(backend=backend, **kwargs)
32 | 
33 | 
34 | def _init_dist_mpi(backend, **kwargs):
35 |     raise NotImplementedError
36 | 
37 | 
38 | def _init_dist_slurm(backend, port=29500, **kwargs):
39 |     proc_id = int(os.environ['SLURM_PROCID'])
40 |     ntasks = int(os.environ['SLURM_NTASKS'])
41 |     node_list = os.environ['SLURM_NODELIST']
42 |     num_gpus = torch.cuda.device_count()
43 |     torch.cuda.set_device(proc_id % num_gpus)
44 |     addr = subprocess.getoutput(
45 |         'scontrol show hostname {} | head -n1'.format(node_list))
46 |     os.environ['MASTER_PORT'] = str(port)
47 |     os.environ['MASTER_ADDR'] = addr
48 |     os.environ['WORLD_SIZE'] = str(ntasks)
49 |     os.environ['RANK'] = str(proc_id)
50 |     dist.init_process_group(backend=backend)
51 | 
52 | 
53 | def set_random_seed(seed):
54 |     random.seed(seed)
55 |     np.random.seed(seed)
56 |     torch.manual_seed(seed)
57 |     torch.cuda.manual_seed_all(seed)
58 | 
59 | 
60 | def get_root_logger(log_level=logging.INFO):
61 |     logger = logging.getLogger()
62 |     if not logger.hasHandlers():
63 |         logging.basicConfig(
64 |             format='%(asctime)s - %(levelname)s - %(message)s',
65 |             level=log_level)
66 |     rank, _ = get_dist_info()
67 |     if rank != 0:
68 |         logger.setLevel('ERROR')
69 |     return logger
70 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .fp16 import *  # noqa: F401, F403
5 | from .mask import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_inside_flags, anchor_target
3 | from .guided_anchor_target import ga_loc_target, ga_shape_target
4 | 
5 | __all__ = [
6 |     'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
7 |     'ga_shape_target'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AnchorGenerator(object):
 5 | 
 6 |     def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
 7 |         self.base_size = base_size
 8 |         self.scales = torch.Tensor(scales)
 9 |         self.ratios = torch.Tensor(ratios)
10 |         self.scale_major = scale_major
11 |         self.ctr = ctr
12 |         self.base_anchors = self.gen_base_anchors()
13 | 
14 |     @property
15 |     def num_base_anchors(self):
16 |         return self.base_anchors.size(0)
17 | 
18 |     def gen_base_anchors(self):
19 |         w = self.base_size
20 |         h = self.base_size
21 |         if self.ctr is None:
22 |             x_ctr = 0.5 * (w - 1)
23 |             y_ctr = 0.5 * (h - 1)
24 |         else:
25 |             x_ctr, y_ctr = self.ctr
26 | 
27 |         h_ratios = torch.sqrt(self.ratios)
28 |         w_ratios = 1 / h_ratios
29 |         if self.scale_major:
30 |             ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
31 |             hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
32 |         else:
33 |             ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
34 |             hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
35 | 
36 |         # yapf: disable
37 |         base_anchors = torch.stack(
38 |             [
39 |                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
40 |                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
41 |             ],
42 |             dim=-1).round()
43 |         # yapf: enable
44 | 
45 |         return base_anchors
46 | 
47 |     def _meshgrid(self, x, y, row_major=True):
48 |         xx = x.repeat(len(y))
49 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
50 |         if row_major:
51 |             return xx, yy
52 |         else:
53 |             return yy, xx
54 | 
55 |     def grid_anchors(self, featmap_size, stride=16, device='cuda'):
56 |         base_anchors = self.base_anchors.to(device)
57 | 
58 |         feat_h, feat_w = featmap_size
59 |         shift_x = torch.arange(0, feat_w, device=device) * stride
60 |         shift_y = torch.arange(0, feat_h, device=device) * stride
61 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
62 |         shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
63 |         shifts = shifts.type_as(base_anchors)
64 |         # first feat_w elements correspond to the first row of shifts
65 |         # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
66 |         # shifted anchors (K, A, 4), reshape to (K*A, 4)
67 | 
68 |         all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
69 |         all_anchors = all_anchors.view(-1, 4)
70 |         # first A rows correspond to A anchors of (0, 0) in feature map,
71 |         # then (0, 1), (0, 2), ...
72 |         return all_anchors
73 | 
74 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
75 |         feat_h, feat_w = featmap_size
76 |         valid_h, valid_w = valid_size
77 |         assert valid_h <= feat_h and valid_w <= feat_w
78 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
79 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
80 |         valid_x[:valid_w] = 1
81 |         valid_y[:valid_h] = 1
82 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
83 |         valid = valid_xx & valid_yy
84 |         valid = valid[:, None].expand(
85 |             valid.size(0), self.num_base_anchors).contiguous().view(-1)
86 |         return valid
87 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 2 | from .bbox_target import bbox_target
 3 | from .geometry import bbox_overlaps
 4 | from .samplers import (BaseSampler, CombinedSampler,
 5 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 6 |                        PseudoSampler, RandomSampler, SamplingResult)
 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
 8 |                          bbox_mapping, bbox_mapping_back, delta2bbox,
 9 |                          distance2bbox, roi2bbox)
10 | 
11 | from .assign_sampling import (  # isort:skip, avoid recursive imports
12 |     assign_and_sample, build_assigner, build_sampler)
13 | 
14 | __all__ = [
15 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
16 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
17 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
18 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
19 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
20 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
21 |     'distance2bbox', 'bbox_target'
22 | ]
23 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
11 |     else:
12 |         raise TypeError('Invalid type {} for building a sampler'.format(
13 |             type(cfg)))
14 | 
15 | 
16 | def build_sampler(cfg, **kwargs):
17 |     if isinstance(cfg, samplers.BaseSampler):
18 |         return cfg
19 |     elif isinstance(cfg, dict):
20 |         return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
21 |     else:
22 |         raise TypeError('Invalid type {} for building a sampler'.format(
23 |             type(cfg)))
24 | 
25 | 
26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
27 |     bbox_assigner = build_assigner(cfg.assigner)
28 |     bbox_sampler = build_sampler(cfg.sampler)
29 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
30 |                                          gt_labels)
31 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
32 |                                           gt_labels)
33 |     return assign_result, sampling_result
34 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
2 | from .assign_result import AssignResult
3 | from .base_assigner import BaseAssigner
4 | from .max_iou_assigner import MaxIoUAssigner
5 | 
6 | __all__ = [
7 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AssignResult(object):
 5 | 
 6 |     def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
 7 |         self.num_gts = num_gts
 8 |         self.gt_inds = gt_inds
 9 |         self.max_overlaps = max_overlaps
10 |         self.labels = labels
11 | 
12 |     def add_gt_(self, gt_labels):
13 |         self_inds = torch.arange(
14 |             1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 |         self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 |         self.max_overlaps = torch.cat(
17 |             [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 |         if self.labels is not None:
19 |             self.labels = torch.cat([gt_labels, self.labels])
20 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..utils import multi_apply
 4 | from .transforms import bbox2delta
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 |         pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 |                                       target_stds)
56 |         bbox_targets[:num_pos, :] = pos_bbox_targets
57 |         bbox_weights[:num_pos, :] = 1
58 |     if num_neg > 0:
59 |         label_weights[-num_neg:] = 1.0
60 | 
61 |     return labels, label_weights, bbox_targets, bbox_weights
62 | 
63 | 
64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
65 |     bbox_targets_expand = bbox_targets.new_zeros(
66 |         (bbox_targets.size(0), 4 * num_classes))
67 |     bbox_weights_expand = bbox_weights.new_zeros(
68 |         (bbox_weights.size(0), 4 * num_classes))
69 |     for i in torch.nonzero(labels > 0).squeeze(-1):
70 |         start, end = labels[i] * 4, (labels[i] + 1) * 4
71 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
72 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
73 |     return bbox_targets_expand, bbox_weights_expand
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | 
10 | __all__ = [
11 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 |     'OHEMSampler', 'SamplingResult'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     def sample(self,
32 |                assign_result,
33 |                bboxes,
34 |                gt_bboxes,
35 |                gt_labels=None,
36 |                **kwargs):
37 |         """Sample positive and negative bboxes.
38 | 
39 |         This is a simple implementation of bbox sampling given candidates,
40 |         assigning results and ground truth bboxes.
41 | 
42 |         Args:
43 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
44 |             bboxes (Tensor): Boxes to be sampled from.
45 |             gt_bboxes (Tensor): Ground truth bboxes.
46 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 | 
48 |         Returns:
49 |             :obj:`SamplingResult`: Sampling result.
50 |         """
51 |         bboxes = bboxes[:, :4]
52 | 
53 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
54 |         if self.add_gt_as_proposals:
55 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
56 |             assign_result.add_gt_(gt_labels)
57 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
58 |             gt_flags = torch.cat([gt_ones, gt_flags])
59 | 
60 |         num_expected_pos = int(self.num * self.pos_fraction)
61 |         pos_inds = self.pos_sampler._sample_pos(
62 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
63 |         # We found that sampled indices have duplicated items occasionally.
64 |         # (may be a bug of PyTorch)
65 |         pos_inds = pos_inds.unique()
66 |         num_sampled_pos = pos_inds.numel()
67 |         num_expected_neg = self.num - num_sampled_pos
68 |         if self.neg_pos_ub >= 0:
69 |             _pos = max(1, num_sampled_pos)
70 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
71 |             if num_expected_neg > neg_upper_bound:
72 |                 num_expected_neg = neg_upper_bound
73 |         neg_inds = self.neg_sampler._sample_neg(
74 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
75 |         neg_inds = neg_inds.unique()
76 | 
77 |         return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
78 |                               assign_result, gt_flags)
79 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..assign_sampling import build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..transforms import bbox2roi
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  context,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 |                                           add_gt_as_proposals)
18 |         if not hasattr(context, 'num_stages'):
19 |             self.bbox_roi_extractor = context.bbox_roi_extractor
20 |             self.bbox_head = context.bbox_head
21 |         else:
22 |             self.bbox_roi_extractor = context.bbox_roi_extractor[
23 |                 context.current_stage]
24 |             self.bbox_head = context.bbox_head[context.current_stage]
25 | 
26 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
27 |         with torch.no_grad():
28 |             rois = bbox2roi([bboxes])
29 |             bbox_feats = self.bbox_roi_extractor(
30 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
31 |             cls_score, _ = self.bbox_head(bbox_feats)
32 |             loss = self.bbox_head.loss(
33 |                 cls_score=cls_score,
34 |                 bbox_pred=None,
35 |                 labels=labels,
36 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
37 |                 bbox_targets=None,
38 |                 bbox_weights=None,
39 |                 reduction_override='none')['loss_cls']
40 |             _, topk_loss_inds = loss.topk(num_expected)
41 |         return inds[topk_loss_inds]
42 | 
43 |     def _sample_pos(self,
44 |                     assign_result,
45 |                     num_expected,
46 |                     bboxes=None,
47 |                     feats=None,
48 |                     **kwargs):
49 |         # Sample some hard positive samples
50 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
51 |         if pos_inds.numel() != 0:
52 |             pos_inds = pos_inds.squeeze(1)
53 |         if pos_inds.numel() <= num_expected:
54 |             return pos_inds
55 |         else:
56 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
57 |                                     assign_result.labels[pos_inds], feats)
58 | 
59 |     def _sample_neg(self,
60 |                     assign_result,
61 |                     num_expected,
62 |                     bboxes=None,
63 |                     feats=None,
64 |                     **kwargs):
65 |         # Sample some hard negative samples
66 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
67 |         if neg_inds.numel() != 0:
68 |             neg_inds = neg_inds.squeeze(1)
69 |         if len(neg_inds) <= num_expected:
70 |             return neg_inds
71 |         else:
72 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
73 |                                     assign_result.labels[neg_inds], feats)
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 |                                             add_gt_as_proposals)
17 | 
18 |     @staticmethod
19 |     def random_choice(gallery, num):
20 |         """Random select some elements from the gallery.
21 | 
22 |         It seems that Pytorch's implementation is slower than numpy so we use
23 |         numpy to randperm the indices.
24 |         """
25 |         assert len(gallery) >= num
26 |         if isinstance(gallery, list):
27 |             gallery = np.array(gallery)
28 |         cands = np.arange(len(gallery))
29 |         np.random.shuffle(cands)
30 |         rand_inds = cands[:num]
31 |         if not isinstance(gallery, np.ndarray):
32 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 |         return gallery[rand_inds]
34 | 
35 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
36 |         """Randomly sample some positive samples."""
37 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 |         if pos_inds.numel() != 0:
39 |             pos_inds = pos_inds.squeeze(1)
40 |         if pos_inds.numel() <= num_expected:
41 |             return pos_inds
42 |         else:
43 |             return self.random_choice(pos_inds, num_expected)
44 | 
45 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
46 |         """Randomly sample some negative samples."""
47 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 |         if neg_inds.numel() != 0:
49 |             neg_inds = neg_inds.squeeze(1)
50 |         if len(neg_inds) <= num_expected:
51 |             return neg_inds
52 |         else:
53 |             return self.random_choice(neg_inds, num_expected)
54 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SamplingResult(object):
 5 | 
 6 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
 7 |                  gt_flags):
 8 |         self.pos_inds = pos_inds
 9 |         self.neg_inds = neg_inds
10 |         self.pos_bboxes = bboxes[pos_inds]
11 |         self.neg_bboxes = bboxes[neg_inds]
12 |         self.pos_is_gt = gt_flags[pos_inds]
13 | 
14 |         self.num_gts = gt_bboxes.shape[0]
15 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 |         self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 |         if assign_result.labels is not None:
18 |             self.pos_gt_labels = assign_result.labels[pos_inds]
19 |         else:
20 |             self.pos_gt_labels = None
21 | 
22 |     @property
23 |     def bboxes(self):
24 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (coco_classes, dataset_aliases, get_classes,
 2 |                           imagenet_det_classes, imagenet_vid_classes,
 3 |                           voc_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          DistEvalHook, DistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 9 |                      print_recall_summary)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     if isinstance(inputs, torch.Tensor):
 9 |         return inputs.to(dst_type)
10 |     elif isinstance(inputs, str):
11 |         return inputs
12 |     elif isinstance(inputs, np.ndarray):
13 |         return inputs
14 |     elif isinstance(inputs, abc.Mapping):
15 |         return type(inputs)({
16 |             k: cast_tensor_type(v, src_type, dst_type)
17 |             for k, v in inputs.items()
18 |         })
19 |     elif isinstance(inputs, abc.Iterable):
20 |         return type(inputs)(
21 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .utils import split_combined_polys
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import torch
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | 
 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 8 |                 cfg):
 9 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
10 |     mask_targets = map(mask_target_single, pos_proposals_list,
11 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
12 |     mask_targets = torch.cat(list(mask_targets))
13 |     return mask_targets
14 | 
15 | 
16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
17 |     mask_size = _pair(cfg.mask_size)
18 |     num_pos = pos_proposals.size(0)
19 |     mask_targets = []
20 |     if num_pos > 0:
21 |         proposals_np = pos_proposals.cpu().numpy()
22 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
23 |         for i in range(num_pos):
24 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
25 |             bbox = proposals_np[i, :].astype(np.int32)
26 |             x1, y1, x2, y2 = bbox
27 |             w = np.maximum(x2 - x1 + 1, 1)
28 |             h = np.maximum(y2 - y1 + 1, 1)
29 |             # mask is uint8 both before and after resizing
30 |             # mask_size (h, w) to (w, h)
31 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
32 |                                    mask_size[::-1])
33 |             mask_targets.append(target)
34 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
35 |             pos_proposals.device)
36 |     else:
37 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
38 |     return mask_targets
39 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes,
 7 |                    multi_scores,
 8 |                    score_thr,
 9 |                    nms_cfg,
10 |                    max_num=-1,
11 |                    score_factors=None):
12 |     """NMS for multi-class bboxes.
13 | 
14 |     Args:
15 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 |         multi_scores (Tensor): shape (n, #class)
17 |         score_thr (float): bbox threshold, bboxes with scores lower than it
18 |             will not be considered.
19 |         nms_thr (float): NMS IoU threshold
20 |         max_num (int): if there are more than max_num bboxes after NMS,
21 |             only top max_num will be kept.
22 |         score_factors (Tensor): The factors multiplied to scores before
23 |             applying NMS
24 | 
25 |     Returns:
26 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
27 |             are 0-based.
28 |     """
29 |     num_classes = multi_scores.shape[1]
30 |     bboxes, labels = [], []
31 |     nms_cfg_ = nms_cfg.copy()
32 |     nms_type = nms_cfg_.pop('type', 'nms')
33 |     nms_op = getattr(nms_wrapper, nms_type)
34 |     for i in range(1, num_classes):
35 |         cls_inds = multi_scores[:, i] > score_thr
36 |         if not cls_inds.any():
37 |             continue
38 |         # get bboxes and scores of this class
39 |         if multi_bboxes.shape[1] == 4:
40 |             _bboxes = multi_bboxes[cls_inds, :]
41 |         else:
42 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
43 |         _scores = multi_scores[cls_inds, i]
44 |         if score_factors is not None:
45 |             _scores *= score_factors[cls_inds]
46 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
47 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
48 |         cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
49 |                                            i - 1,
50 |                                            dtype=torch.long)
51 |         bboxes.append(cls_dets)
52 |         labels.append(cls_labels)
53 |     if bboxes:
54 |         bboxes = torch.cat(bboxes)
55 |         labels = torch.cat(labels)
56 |         if bboxes.shape[0] > max_num:
57 |             _, inds = bboxes[:, -1].sort(descending=True)
58 |             inds = inds[:max_num]
59 |             bboxes = bboxes[inds]
60 |             labels = labels[inds]
61 |     else:
62 |         bboxes = multi_bboxes.new_zeros((0, 5))
63 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
64 | 
65 |     return bboxes, labels
66 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from mmdet.ops import nms
 5 | from ..bbox import bbox_mapping_back
 6 | 
 7 | 
 8 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
 9 |     """Merge augmented proposals (multiscale, flip, etc.)
10 | 
11 |     Args:
12 |         aug_proposals (list[Tensor]): proposals from different testing
13 |             schemes, shape (n, 5). Note that they are not rescaled to the
14 |             original image size.
15 |         img_metas (list[dict]): image info including "shape_scale" and "flip".
16 |         rpn_test_cfg (dict): rpn test config.
17 | 
18 |     Returns:
19 |         Tensor: shape (n, 4), proposals corresponding to original image scale.
20 |     """
21 |     recovered_proposals = []
22 |     for proposals, img_info in zip(aug_proposals, img_metas):
23 |         img_shape = img_info['img_shape']
24 |         scale_factor = img_info['scale_factor']
25 |         flip = img_info['flip']
26 |         _proposals = proposals.clone()
27 |         _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
28 |                                               scale_factor, flip)
29 |         recovered_proposals.append(_proposals)
30 |     aug_proposals = torch.cat(recovered_proposals, dim=0)
31 |     merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
32 |     scores = merged_proposals[:, 4]
33 |     _, order = scores.sort(0, descending=True)
34 |     num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
35 |     order = order[:num]
36 |     merged_proposals = merged_proposals[order, :]
37 |     return merged_proposals
38 | 
39 | 
40 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
41 |     """Merge augmented detection bboxes and scores.
42 | 
43 |     Args:
44 |         aug_bboxes (list[Tensor]): shape (n, 4*#class)
45 |         aug_scores (list[Tensor] or None): shape (n, #class)
46 |         img_shapes (list[Tensor]): shape (3, ).
47 |         rcnn_test_cfg (dict): rcnn test config.
48 | 
49 |     Returns:
50 |         tuple: (bboxes, scores)
51 |     """
52 |     recovered_bboxes = []
53 |     for bboxes, img_info in zip(aug_bboxes, img_metas):
54 |         img_shape = img_info[0]['img_shape']
55 |         scale_factor = img_info[0]['scale_factor']
56 |         flip = img_info[0]['flip']
57 |         bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
58 |         recovered_bboxes.append(bboxes)
59 |     bboxes = torch.stack(recovered_bboxes).mean(dim=0)
60 |     if aug_scores is None:
61 |         return bboxes
62 |     else:
63 |         scores = torch.stack(aug_scores).mean(dim=0)
64 |         return bboxes, scores
65 | 
66 | 
67 | def merge_aug_scores(aug_scores):
68 |     """Merge augmented bbox scores."""
69 |     if isinstance(aug_scores[0], torch.Tensor):
70 |         return torch.mean(torch.stack(aug_scores), dim=0)
71 |     else:
72 |         return np.mean(aug_scores, axis=0)
73 | 
74 | 
75 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
76 |     """Merge augmented mask prediction.
77 | 
78 |     Args:
79 |         aug_masks (list[ndarray]): shape (n, #class, h, w)
80 |         img_shapes (list[ndarray]): shape (3, ).
81 |         rcnn_test_cfg (dict): rcnn test config.
82 | 
83 |     Returns:
84 |         tuple: (bboxes, scores)
85 |     """
86 |     recovered_masks = [
87 |         mask if not img_info[0]['flip'] else mask[..., ::-1]
88 |         for mask, img_info in zip(aug_masks, img_metas)
89 |     ]
90 |     if weights is None:
91 |         merged_masks = np.mean(recovered_masks, axis=0)
92 |     else:
93 |         merged_masks = np.average(
94 |             np.array(recovered_masks), axis=0, weights=np.array(weights))
95 |     return merged_masks
96 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads
2 | from .misc import multi_apply, tensor2imgs, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from mmcv.runner import OptimizerHook
 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 6 |                           _unflatten_dense_tensors)
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in params
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model.parameters(), self.coalesce,
55 |                         self.bucket_size_mb)
56 |         if self.grad_clip is not None:
57 |             self.clip_grads(runner.model.parameters())
58 |         runner.optimizer.step()
59 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | from six.moves import map, zip
 6 | 
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     map_results = map(pfunc, *args)
24 |     return tuple(map(list, zip(*map_results)))
25 | 
26 | 
27 | def unmap(data, count, inds, fill=0):
28 |     """ Unmap a subset of item (data) back to the original set of items (of
29 |     size count) """
30 |     if data.dim() == 1:
31 |         ret = data.new_full((count, ), fill)
32 |         ret[inds] = data
33 |     else:
34 |         new_size = (count, ) + data.size()[1:]
35 |         ret = data.new_full(new_size, fill)
36 |         ret[inds, :] = data
37 |     return ret
38 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_dataset
 2 | from .cityscapes import CityscapesDataset
 3 | from .coco import CocoDataset
 4 | from .custom import CustomDataset
 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 6 | from .extra_aug import ExtraAugmentation
 7 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 8 | from .registry import DATASETS
 9 | from .utils import random_scale, show_ann, to_tensor
10 | from .voc import VOCDataset
11 | from .wider_face import WIDERFaceDataset
12 | from .xml_style import XMLDataset
13 | 
14 | __all__ = [
15 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
16 |     'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
17 |     'build_dataloader', 'to_tensor', 'random_scale', 'show_ann',
18 |     'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', 'WIDERFaceDataset',
19 |     'DATASETS', 'build_dataset'
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmdet/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 5 | from .registry import DATASETS
 6 | 
 7 | 
 8 | def _concat_dataset(cfg, default_args=None):
 9 |     ann_files = cfg['ann_file']
10 |     img_prefixes = cfg.get('img_prefix', None)
11 |     seg_prefixes = cfg.get('seg_prefixes', None)
12 |     proposal_files = cfg.get('proposal_file', None)
13 | 
14 |     datasets = []
15 |     num_dset = len(ann_files)
16 |     for i in range(num_dset):
17 |         data_cfg = copy.deepcopy(cfg)
18 |         data_cfg['ann_file'] = ann_files[i]
19 |         if isinstance(img_prefixes, (list, tuple)):
20 |             data_cfg['img_prefix'] = img_prefixes[i]
21 |         if isinstance(seg_prefixes, (list, tuple)):
22 |             data_cfg['seg_prefix'] = seg_prefixes[i]
23 |         if isinstance(proposal_files, (list, tuple)):
24 |             data_cfg['proposal_file'] = proposal_files[i]
25 |         datasets.append(build_dataset(data_cfg, default_args))
26 | 
27 |     return ConcatDataset(datasets)
28 | 
29 | 
30 | def build_dataset(cfg, default_args=None):
31 |     if isinstance(cfg, (list, tuple)):
32 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
33 |     elif cfg['type'] == 'RepeatDataset':
34 |         dataset = RepeatDataset(
35 |             build_dataset(cfg['dataset'], default_args), cfg['times'])
36 |     elif isinstance(cfg['ann_file'], (list, tuple)):
37 |         dataset = _concat_dataset(cfg, default_args)
38 |     else:
39 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
40 | 
41 |     return dataset
42 | 


--------------------------------------------------------------------------------
/mmdet/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | from .coco import CocoDataset
 2 | from .registry import DATASETS
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class CityscapesDataset(CocoDataset):
 7 | 
 8 |     CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
 9 |                'bicycle')
10 | 


--------------------------------------------------------------------------------
/mmdet/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | from .registry import DATASETS
 5 | 
 6 | 
 7 | @DATASETS.register_module
 8 | class ConcatDataset(_ConcatDataset):
 9 |     """A wrapper of concatenated dataset.
10 | 
11 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
12 |     concat the group flag for image aspect ratio.
13 | 
14 |     Args:
15 |         datasets (list[:obj:`Dataset`]): A list of datasets.
16 |     """
17 | 
18 |     def __init__(self, datasets):
19 |         super(ConcatDataset, self).__init__(datasets)
20 |         self.CLASSES = datasets[0].CLASSES
21 |         if hasattr(datasets[0], 'flag'):
22 |             flags = []
23 |             for i in range(0, len(datasets)):
24 |                 flags.append(datasets[i].flag)
25 |             self.flag = np.concatenate(flags)
26 | 
27 | 
28 | @DATASETS.register_module
29 | class RepeatDataset(object):
30 |     """A wrapper of repeated dataset.
31 | 
32 |     The length of repeated dataset will be `times` larger than the original
33 |     dataset. This is useful when the data loading time is long but the dataset
34 |     is small. Using RepeatDataset can reduce the data loading time between
35 |     epochs.
36 | 
37 |     Args:
38 |         dataset (:obj:`Dataset`): The dataset to be repeated.
39 |         times (int): Repeat times.
40 |     """
41 | 
42 |     def __init__(self, dataset, times):
43 |         self.dataset = dataset
44 |         self.times = times
45 |         self.CLASSES = dataset.CLASSES
46 |         if hasattr(self.dataset, 'flag'):
47 |             self.flag = np.tile(self.dataset.flag, times)
48 | 
49 |         self._ori_len = len(self.dataset)
50 | 
51 |     def __getitem__(self, idx):
52 |         return self.dataset[idx % self._ori_len]
53 | 
54 |     def __len__(self):
55 |         return self.times * self._ori_len
56 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from functools import partial
 3 | 
 4 | from mmcv.parallel import collate
 5 | from mmcv.runner import get_dist_info
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
 9 | 
10 | if platform.system() != 'Windows':
11 |     # https://github.com/pytorch/pytorch/issues/973
12 |     import resource
13 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
14 |     resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
15 | 
16 | 
17 | def build_dataloader(dataset,
18 |                      imgs_per_gpu,
19 |                      workers_per_gpu,
20 |                      num_gpus=1,
21 |                      dist=True,
22 |                      **kwargs):
23 |     shuffle = kwargs.get('shuffle', True)
24 |     if dist:
25 |         rank, world_size = get_dist_info()
26 |         if shuffle:
27 |             sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
28 |                                               world_size, rank)
29 |         else:
30 |             sampler = DistributedSampler(
31 |                 dataset, world_size, rank, shuffle=False)
32 |         batch_size = imgs_per_gpu
33 |         num_workers = workers_per_gpu
34 |     else:
35 |         sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
36 |         batch_size = num_gpus * imgs_per_gpu
37 |         num_workers = num_gpus * workers_per_gpu
38 | 
39 |     data_loader = DataLoader(
40 |         dataset,
41 |         batch_size=batch_size,
42 |         sampler=sampler,
43 |         num_workers=num_workers,
44 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
45 |         pin_memory=False,
46 |         **kwargs)
47 | 
48 |     return data_loader
49 | 


--------------------------------------------------------------------------------
/mmdet/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | 


--------------------------------------------------------------------------------
/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import Sequence
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import mmcv
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | 
 9 | def to_tensor(data):
10 |     """Convert objects of various python types to :obj:`torch.Tensor`.
11 | 
12 |     Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
13 |     :class:`Sequence`, :class:`int` and :class:`float`.
14 |     """
15 |     if isinstance(data, torch.Tensor):
16 |         return data
17 |     elif isinstance(data, np.ndarray):
18 |         return torch.from_numpy(data)
19 |     elif isinstance(data, Sequence) and not mmcv.is_str(data):
20 |         return torch.tensor(data)
21 |     elif isinstance(data, int):
22 |         return torch.LongTensor([data])
23 |     elif isinstance(data, float):
24 |         return torch.FloatTensor([data])
25 |     else:
26 |         raise TypeError('type {} cannot be converted to tensor.'.format(
27 |             type(data)))
28 | 
29 | 
30 | def random_scale(img_scales, mode='range'):
31 |     """Randomly select a scale from a list of scales or scale ranges.
32 | 
33 |     Args:
34 |         img_scales (list[tuple]): Image scale or scale range.
35 |         mode (str): "range" or "value".
36 | 
37 |     Returns:
38 |         tuple: Sampled image scale.
39 |     """
40 |     num_scales = len(img_scales)
41 |     if num_scales == 1:  # fixed scale is specified
42 |         img_scale = img_scales[0]
43 |     elif num_scales == 2:  # randomly sample a scale
44 |         if mode == 'range':
45 |             img_scale_long = [max(s) for s in img_scales]
46 |             img_scale_short = [min(s) for s in img_scales]
47 |             long_edge = np.random.randint(
48 |                 min(img_scale_long),
49 |                 max(img_scale_long) + 1)
50 |             short_edge = np.random.randint(
51 |                 min(img_scale_short),
52 |                 max(img_scale_short) + 1)
53 |             img_scale = (long_edge, short_edge)
54 |         elif mode == 'value':
55 |             img_scale = img_scales[np.random.randint(num_scales)]
56 |     else:
57 |         if mode != 'value':
58 |             raise ValueError(
59 |                 'Only "value" mode supports more than 2 image scales')
60 |         img_scale = img_scales[np.random.randint(num_scales)]
61 |     return img_scale
62 | 
63 | 
64 | def show_ann(coco, img, ann_info):
65 |     plt.imshow(mmcv.bgr2rgb(img))
66 |     plt.axis('off')
67 |     coco.showAnns(ann_info)
68 |     plt.show()
69 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .registry import DATASETS
 2 | from .xml_style import XMLDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class VOCDataset(XMLDataset):
 7 | 
 8 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 9 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
10 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
11 |                'tvmonitor')
12 | 
13 |     def __init__(self, **kwargs):
14 |         super(VOCDataset, self).__init__(**kwargs)
15 |         if 'VOC2007' in self.img_prefix:
16 |             self.year = 2007
17 |         elif 'VOC2012' in self.img_prefix:
18 |             self.year = 2012
19 |         else:
20 |             raise ValueError('Cannot infer dataset year from img_prefix')
21 | 


--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .registry import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module
11 | class WIDERFaceDataset(XMLDataset):
12 |     """
13 |     Reader for the WIDER Face dataset in PASCAL VOC format.
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         img_infos = []
24 |         img_ids = mmcv.list_from_file(ann_file)
25 |         for img_id in img_ids:
26 |             filename = '{}.jpg'.format(img_id)
27 |             xml_path = osp.join(self.img_prefix, 'Annotations',
28 |                                 '{}.xml'.format(img_id))
29 |             tree = ET.parse(xml_path)
30 |             root = tree.getroot()
31 |             size = root.find('size')
32 |             width = int(size.find('width').text)
33 |             height = int(size.find('height').text)
34 |             folder = root.find('folder').text
35 |             img_infos.append(
36 |                 dict(
37 |                     id=img_id,
38 |                     filename=osp.join(folder, filename),
39 |                     width=width,
40 |                     height=height))
41 | 
42 |         return img_infos
43 | 


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | from .registry import DATASETS
 9 | 
10 | 
11 | @DATASETS.register_module
12 | class XMLDataset(CustomDataset):
13 | 
14 |     def __init__(self, min_size=None, **kwargs):
15 |         super(XMLDataset, self).__init__(**kwargs)
16 |         self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
17 |         self.min_size = min_size
18 | 
19 |     def load_annotations(self, ann_file):
20 |         img_infos = []
21 |         img_ids = mmcv.list_from_file(ann_file)
22 |         for img_id in img_ids:
23 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
24 |             xml_path = osp.join(self.img_prefix, 'Annotations',
25 |                                 '{}.xml'.format(img_id))
26 |             tree = ET.parse(xml_path)
27 |             root = tree.getroot()
28 |             size = root.find('size')
29 |             width = int(size.find('width').text)
30 |             height = int(size.find('height').text)
31 |             img_infos.append(
32 |                 dict(id=img_id, filename=filename, width=width, height=height))
33 |         return img_infos
34 | 
35 |     def get_ann_info(self, idx):
36 |         img_id = self.img_infos[idx]['id']
37 |         xml_path = osp.join(self.img_prefix, 'Annotations',
38 |                             '{}.xml'.format(img_id))
39 |         tree = ET.parse(xml_path)
40 |         root = tree.getroot()
41 |         bboxes = []
42 |         labels = []
43 |         bboxes_ignore = []
44 |         labels_ignore = []
45 |         for obj in root.findall('object'):
46 |             name = obj.find('name').text
47 |             label = self.cat2label[name]
48 |             difficult = int(obj.find('difficult').text)
49 |             bnd_box = obj.find('bndbox')
50 |             bbox = [
51 |                 int(bnd_box.find('xmin').text),
52 |                 int(bnd_box.find('ymin').text),
53 |                 int(bnd_box.find('xmax').text),
54 |                 int(bnd_box.find('ymax').text)
55 |             ]
56 |             ignore = False
57 |             if self.min_size:
58 |                 assert not self.test_mode
59 |                 w = bbox[2] - bbox[0]
60 |                 h = bbox[3] - bbox[1]
61 |                 if w < self.min_size or h < self.min_size:
62 |                     ignore = True
63 |             if difficult or ignore:
64 |                 bboxes_ignore.append(bbox)
65 |                 labels_ignore.append(label)
66 |             else:
67 |                 bboxes.append(bbox)
68 |                 labels.append(label)
69 |         if not bboxes:
70 |             bboxes = np.zeros((0, 4))
71 |             labels = np.zeros((0, ))
72 |         else:
73 |             bboxes = np.array(bboxes, ndmin=2) - 1
74 |             labels = np.array(labels)
75 |         if not bboxes_ignore:
76 |             bboxes_ignore = np.zeros((0, 4))
77 |             labels_ignore = np.zeros((0, ))
78 |         else:
79 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
80 |             labels_ignore = np.array(labels_ignore)
81 |         ann = dict(
82 |             bboxes=bboxes.astype(np.float32),
83 |             labels=labels.astype(np.int64),
84 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
85 |             labels_ignore=labels_ignore.astype(np.int64))
86 |         return ann
87 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_heads import *  # noqa: F401,F403
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .bbox_heads import *  # noqa: F401,F403
 4 | from .builder import (build_backbone, build_detector, build_head, build_loss,
 5 |                       build_neck, build_roi_extractor, build_shared_head)
 6 | from .detectors import *  # noqa: F401,F403
 7 | from .losses import *  # noqa: F401,F403
 8 | from .mask_heads import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
11 |                        ROI_EXTRACTORS, SHARED_HEADS)
12 | from .roi_extractors import *  # noqa: F401,F403
13 | from .shared_heads import *  # noqa: F401,F403
14 | 
15 | __all__ = [
16 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
17 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
18 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_head import AnchorHead
 2 | from .fcos_head import FCOSHead
 3 | from .ga_retina_head import GARetinaHead
 4 | from .ga_rpn_head import GARPNHead
 5 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
 6 | from .retina_head import RetinaHead
 7 | from .rpn_head import RPNHead
 8 | from .ssd_head import SSDHead
 9 | from .fovea_head import FoveaHead
10 | __all__ = [
11 |     'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
12 |     'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead',
13 |     'FoveaHead'
14 | ]


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | from mmcv.cnn import normal_init
 4 | 
 5 | from ..registry import HEADS
 6 | from ..utils import ConvModule, bias_init_with_prob
 7 | from .anchor_head import AnchorHead
 8 | 
 9 | 
10 | @HEADS.register_module
11 | class RetinaHead(AnchorHead):
12 | 
13 |     def __init__(self,
14 |                  num_classes,
15 |                  in_channels,
16 |                  stacked_convs=4,
17 |                  octave_base_scale=4,
18 |                  scales_per_octave=3,
19 |                  conv_cfg=None,
20 |                  norm_cfg=None,
21 |                  **kwargs):
22 |         self.stacked_convs = stacked_convs
23 |         self.octave_base_scale = octave_base_scale
24 |         self.scales_per_octave = scales_per_octave
25 |         self.conv_cfg = conv_cfg
26 |         self.norm_cfg = norm_cfg
27 |         octave_scales = np.array(
28 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
29 |         anchor_scales = octave_scales * octave_base_scale
30 |         super(RetinaHead, self).__init__(
31 |             num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
32 | 
33 |     def _init_layers(self):
34 |         self.relu = nn.ReLU(inplace=True)
35 |         self.cls_convs = nn.ModuleList()
36 |         self.reg_convs = nn.ModuleList()
37 |         for i in range(self.stacked_convs):
38 |             chn = self.in_channels if i == 0 else self.feat_channels
39 |             self.cls_convs.append(
40 |                 ConvModule(
41 |                     chn,
42 |                     self.feat_channels,
43 |                     3,
44 |                     stride=1,
45 |                     padding=1,
46 |                     conv_cfg=self.conv_cfg,
47 |                     norm_cfg=self.norm_cfg))
48 |             self.reg_convs.append(
49 |                 ConvModule(
50 |                     chn,
51 |                     self.feat_channels,
52 |                     3,
53 |                     stride=1,
54 |                     padding=1,
55 |                     conv_cfg=self.conv_cfg,
56 |                     norm_cfg=self.norm_cfg))
57 |         self.retina_cls = nn.Conv2d(
58 |             self.feat_channels,
59 |             self.num_anchors * self.cls_out_channels,
60 |             3,
61 |             padding=1)
62 |         self.retina_reg = nn.Conv2d(
63 |             self.feat_channels, self.num_anchors * 4, 3, padding=1)
64 | 
65 |     def init_weights(self):
66 |         for m in self.cls_convs:
67 |             normal_init(m.conv, std=0.01)
68 |         for m in self.reg_convs:
69 |             normal_init(m.conv, std=0.01)
70 |         bias_cls = bias_init_with_prob(0.01)
71 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
72 |         normal_init(self.retina_reg, std=0.01)
73 | 
74 |     def forward_single(self, x):
75 |         cls_feat = x
76 |         reg_feat = x
77 |         for cls_conv in self.cls_convs:
78 |             cls_feat = cls_conv(cls_feat)
79 |         for reg_conv in self.reg_convs:
80 |             reg_feat = reg_conv(reg_feat)
81 |         cls_score = self.retina_cls(cls_feat)
82 |         bbox_pred = self.retina_reg(reg_feat)
83 |         return cls_score, bbox_pred
84 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import HRNet
2 | from .resnet import ResNet, make_res_layer
3 | from .resnext import ResNeXt
4 | from .ssd_vgg import SSDVGG
5 | 
6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet']
7 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from .double_bbox_head import DoubleConvFCBBoxHead
4 | 
5 | __all__ = [
6 |     'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 5 |                        ROI_EXTRACTORS, SHARED_HEADS)
 6 | 
 7 | 
 8 | def build(cfg, registry, default_args=None):
 9 |     if isinstance(cfg, list):
10 |         modules = [
11 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
12 |         ]
13 |         return nn.Sequential(*modules)
14 |     else:
15 |         return build_from_cfg(cfg, registry, default_args)
16 | 
17 | 
18 | def build_backbone(cfg):
19 |     return build(cfg, BACKBONES)
20 | 
21 | 
22 | def build_neck(cfg):
23 |     return build(cfg, NECKS)
24 | 
25 | 
26 | def build_roi_extractor(cfg):
27 |     return build(cfg, ROI_EXTRACTORS)
28 | 
29 | 
30 | def build_shared_head(cfg):
31 |     return build(cfg, SHARED_HEADS)
32 | 
33 | 
34 | def build_head(cfg):
35 |     return build(cfg, HEADS)
36 | 
37 | 
38 | def build_loss(cfg):
39 |     return build(cfg, LOSSES)
40 | 
41 | 
42 | def build_detector(cfg, train_cfg=None, test_cfg=None):
43 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
44 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .cascade_rcnn import CascadeRCNN
 3 | from .double_head_rcnn import DoubleHeadRCNN
 4 | from .fast_rcnn import FastRCNN
 5 | from .faster_rcnn import FasterRCNN
 6 | from .fcos import FCOS
 7 | from .grid_rcnn import GridRCNN
 8 | from .htc import HybridTaskCascade
 9 | from .mask_rcnn import MaskRCNN
10 | from .mask_scoring_rcnn import MaskScoringRCNN
11 | from .retinanet import RetinaNet
12 | from .rpn import RPN
13 | from .single_stage import SingleStageDetector
14 | from .two_stage import TwoStageDetector
15 | from .fovea import FOVEA
16 | 
17 | __all__ = [
18 |     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
19 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
20 |     'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN', 'FOVEA'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  bbox_roi_extractor,
11 |                  bbox_head,
12 |                  train_cfg,
13 |                  test_cfg,
14 |                  neck=None,
15 |                  shared_head=None,
16 |                  mask_roi_extractor=None,
17 |                  mask_head=None,
18 |                  pretrained=None):
19 |         super(FastRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             shared_head=shared_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             pretrained=pretrained)
30 | 
31 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
33 |             if not isinstance(var, list):
34 |                 raise TypeError('{} must be a list, but got {}'.format(
35 |                     name, type(var)))
36 | 
37 |         num_augs = len(imgs)
38 |         if num_augs != len(img_metas):
39 |             raise ValueError(
40 |                 'num of augmentations ({}) != num of image meta ({})'.format(
41 |                     len(imgs), len(img_metas)))
42 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
43 |         imgs_per_gpu = imgs[0].size(0)
44 |         assert imgs_per_gpu == 1
45 | 
46 |         if num_augs == 1:
47 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
48 |                                     **kwargs)
49 |         else:
50 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
51 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  shared_head=None,
17 |                  pretrained=None):
18 |         super(FasterRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             shared_head=shared_head,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             pretrained=pretrained)
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fcos.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FCOS(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fovea.py:
--------------------------------------------------------------------------------
 1 | from .single_stage import SingleStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FOVEA(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class MaskRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  mask_roi_extractor,
14 |                  mask_head,
15 |                  train_cfg,
16 |                  test_cfg,
17 |                  neck=None,
18 |                  shared_head=None,
19 |                  pretrained=None):
20 |         super(MaskRCNN, self).__init__(
21 |             backbone=backbone,
22 |             neck=neck,
23 |             shared_head=shared_head,
24 |             rpn_head=rpn_head,
25 |             bbox_roi_extractor=bbox_roi_extractor,
26 |             bbox_head=bbox_head,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             train_cfg=train_cfg,
30 |             test_cfg=test_cfg,
31 |             pretrained=pretrained)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from mmdet.core import bbox_mapping, tensor2imgs
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | from .test_mixins import RPNTestMixin
 8 | 
 9 | 
10 | @DETECTORS.register_module
11 | class RPN(BaseDetector, RPNTestMixin):
12 | 
13 |     def __init__(self,
14 |                  backbone,
15 |                  neck,
16 |                  rpn_head,
17 |                  train_cfg,
18 |                  test_cfg,
19 |                  pretrained=None):
20 |         super(RPN, self).__init__()
21 |         self.backbone = builder.build_backbone(backbone)
22 |         self.neck = builder.build_neck(neck) if neck is not None else None
23 |         self.rpn_head = builder.build_head(rpn_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(RPN, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             self.neck.init_weights()
33 |         self.rpn_head.init_weights()
34 | 
35 |     def extract_feat(self, img):
36 |         x = self.backbone(img)
37 |         if self.with_neck:
38 |             x = self.neck(x)
39 |         return x
40 | 
41 |     def forward_dummy(self, img):
42 |         x = self.extract_feat(img)
43 |         rpn_outs = self.rpn_head(x)
44 |         return rpn_outs
45 | 
46 |     def forward_train(self,
47 |                       img,
48 |                       img_meta,
49 |                       gt_bboxes=None,
50 |                       gt_bboxes_ignore=None):
51 |         if self.train_cfg.rpn.get('debug', False):
52 |             self.rpn_head.debug_imgs = tensor2imgs(img)
53 | 
54 |         x = self.extract_feat(img)
55 |         rpn_outs = self.rpn_head(x)
56 | 
57 |         rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
58 |         losses = self.rpn_head.loss(
59 |             *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
60 |         return losses
61 | 
62 |     def simple_test(self, img, img_meta, rescale=False):
63 |         x = self.extract_feat(img)
64 |         proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
65 |         if rescale:
66 |             for proposals, meta in zip(proposal_list, img_meta):
67 |                 proposals[:, :4] /= meta['scale_factor']
68 |         # TODO: remove this restriction
69 |         return proposal_list[0].cpu().numpy()
70 | 
71 |     def aug_test(self, imgs, img_metas, rescale=False):
72 |         proposal_list = self.aug_test_rpn(
73 |             self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
74 |         if not rescale:
75 |             for proposals, img_meta in zip(proposal_list, img_metas[0]):
76 |                 img_shape = img_meta['img_shape']
77 |                 scale_factor = img_meta['scale_factor']
78 |                 flip = img_meta['flip']
79 |                 proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
80 |                                                 scale_factor, flip)
81 |         # TODO: remove this restriction
82 |         return proposal_list[0].cpu().numpy()
83 | 
84 |     def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20):
85 |         """Show RPN proposals on the image.
86 | 
87 |         Although we assume batch size is 1, this method supports arbitrary
88 |         batch size.
89 |         """
90 |         img_tensor = data['img'][0]
91 |         img_metas = data['img_meta'][0].data[0]
92 |         imgs = tensor2imgs(img_tensor, **img_norm_cfg)
93 |         assert len(imgs) == len(img_metas)
94 |         for img, img_meta in zip(imgs, img_metas):
95 |             h, w, _ = img_meta['img_shape']
96 |             img_show = img[:h, :w, :]
97 |             mmcv.imshow_bboxes(img_show, result, top_k=top_k)
98 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from mmdet.core import bbox2result
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 | 
12 |     def __init__(self,
13 |                  backbone,
14 |                  neck=None,
15 |                  bbox_head=None,
16 |                  train_cfg=None,
17 |                  test_cfg=None,
18 |                  pretrained=None):
19 |         super(SingleStageDetector, self).__init__()
20 |         self.backbone = builder.build_backbone(backbone)
21 |         if neck is not None:
22 |             self.neck = builder.build_neck(neck)
23 |         self.bbox_head = builder.build_head(bbox_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(SingleStageDetector, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             if isinstance(self.neck, nn.Sequential):
33 |                 for m in self.neck:
34 |                     m.init_weights()
35 |             else:
36 |                 self.neck.init_weights()
37 |         self.bbox_head.init_weights()
38 | 
39 |     def extract_feat(self, img):
40 |         x = self.backbone(img)
41 |         if self.with_neck:
42 |             x = self.neck(x)
43 |         return x
44 | 
45 |     def forward_dummy(self, img):
46 |         x = self.extract_feat(img)
47 |         outs = self.bbox_head(x)
48 |         return outs
49 | 
50 |     def forward_train(self,
51 |                       img,
52 |                       img_metas,
53 |                       gt_bboxes,
54 |                       gt_labels,
55 |                       gt_bboxes_ignore=None):
56 |         x = self.extract_feat(img)
57 |         outs = self.bbox_head(x)
58 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
59 |         losses = self.bbox_head.loss(
60 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
61 |         return losses
62 | 
63 |     def simple_test(self, img, img_meta, rescale=False):
64 |         x = self.extract_feat(img)
65 |         outs = self.bbox_head(x)
66 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
67 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
68 |         bbox_results = [
69 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
70 |             for det_bboxes, det_labels in bbox_list
71 |         ]
72 |         return bbox_results[0]
73 | 
74 |     def aug_test(self, imgs, img_metas, rescale=False):
75 |         raise NotImplementedError
76 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 4 |                                  cross_entropy, mask_cross_entropy)
 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 6 | from .ghm_loss import GHMC, GHMR
 7 | from .iou_loss import BoundedIoULoss, IoULoss, bounded_iou_loss, iou_loss
 8 | from .mse_loss import MSELoss, mse_loss
 9 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss
10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
11 | 
12 | __all__ = [
13 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
14 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
15 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
16 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
17 |     'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
18 |     'weight_reduce_loss', 'weighted_loss'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1):
 5 |     assert isinstance(topk, (int, tuple))
 6 |     if isinstance(topk, int):
 7 |         topk = (topk, )
 8 |         return_single = True
 9 |     else:
10 |         return_single = False
11 | 
12 |     maxk = max(topk)
13 |     _, pred_label = pred.topk(maxk, dim=1)
14 |     pred_label = pred_label.t()
15 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
16 | 
17 |     res = []
18 |     for k in topk:
19 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
20 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
21 |     return res[0] if return_single else res
22 | 
23 | 
24 | class Accuracy(nn.Module):
25 | 
26 |     def __init__(self, topk=(1, )):
27 |         super().__init__()
28 |         self.topk = topk
29 | 
30 |     def forward(self, pred, target):
31 |         return accuracy(pred, target, self.topk)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from ..registry import LOSSES
 6 | from .utils import weighted_loss
 7 | 
 8 | 
 9 | @weighted_loss
10 | def balanced_l1_loss(pred,
11 |                      target,
12 |                      beta=1.0,
13 |                      alpha=0.5,
14 |                      gamma=1.5,
15 |                      reduction='mean'):
16 |     assert beta > 0
17 |     assert pred.size() == target.size() and target.numel() > 0
18 | 
19 |     diff = torch.abs(pred - target)
20 |     b = np.e**(gamma / alpha) - 1
21 |     loss = torch.where(
22 |         diff < beta, alpha / b *
23 |         (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
24 |         gamma * diff + gamma / b - alpha * beta)
25 | 
26 |     return loss
27 | 
28 | 
29 | @LOSSES.register_module
30 | class BalancedL1Loss(nn.Module):
31 |     """Balanced L1 Loss
32 | 
33 |     arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
34 |     """
35 | 
36 |     def __init__(self,
37 |                  alpha=0.5,
38 |                  gamma=1.5,
39 |                  beta=1.0,
40 |                  reduction='mean',
41 |                  loss_weight=1.0):
42 |         super(BalancedL1Loss, self).__init__()
43 |         self.alpha = alpha
44 |         self.gamma = gamma
45 |         self.beta = beta
46 |         self.reduction = reduction
47 |         self.loss_weight = loss_weight
48 | 
49 |     def forward(self,
50 |                 pred,
51 |                 target,
52 |                 weight=None,
53 |                 avg_factor=None,
54 |                 reduction_override=None,
55 |                 **kwargs):
56 |         assert reduction_override in (None, 'none', 'mean', 'sum')
57 |         reduction = (
58 |             reduction_override if reduction_override else self.reduction)
59 |         loss_bbox = self.loss_weight * balanced_l1_loss(
60 |             pred,
61 |             target,
62 |             weight,
63 |             alpha=self.alpha,
64 |             gamma=self.gamma,
65 |             beta=self.beta,
66 |             reduction=reduction,
67 |             avg_factor=avg_factor,
68 |             **kwargs)
69 |         return loss_bbox
70 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
 5 | from ..registry import LOSSES
 6 | from .utils import weight_reduce_loss
 7 | 
 8 | 
 9 | # This method is only for debugging
10 | def py_sigmoid_focal_loss(pred,
11 |                           target,
12 |                           weight=None,
13 |                           gamma=2.0,
14 |                           alpha=0.25,
15 |                           reduction='mean',
16 |                           avg_factor=None):
17 |     pred_sigmoid = pred.sigmoid()
18 |     target = target.type_as(pred)
19 |     pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
20 |     focal_weight = (alpha * target + (1 - alpha) *
21 |                     (1 - target)) * pt.pow(gamma)
22 |     loss = F.binary_cross_entropy_with_logits(
23 |         pred, target, reduction='none') * focal_weight
24 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
25 |     return loss
26 | 
27 | 
28 | def sigmoid_focal_loss(pred,
29 |                        target,
30 |                        weight=None,
31 |                        gamma=2.0,
32 |                        alpha=0.25,
33 |                        reduction='mean',
34 |                        avg_factor=None):
35 |     # Function.apply does not accept keyword arguments, so the decorator
36 |     # "weighted_loss" is not applicable
37 |     loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
38 |     # TODO: find a proper way to handle the shape of weight
39 |     if weight is not None:
40 |         weight = weight.view(-1, 1)
41 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
42 |     return loss
43 | 
44 | 
45 | @LOSSES.register_module
46 | class FocalLoss(nn.Module):
47 | 
48 |     def __init__(self,
49 |                  use_sigmoid=True,
50 |                  gamma=2.0,
51 |                  alpha=0.25,
52 |                  reduction='mean',
53 |                  loss_weight=1.0):
54 |         super(FocalLoss, self).__init__()
55 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
56 |         self.use_sigmoid = use_sigmoid
57 |         self.gamma = gamma
58 |         self.alpha = alpha
59 |         self.reduction = reduction
60 |         self.loss_weight = loss_weight
61 | 
62 |     def forward(self,
63 |                 pred,
64 |                 target,
65 |                 weight=None,
66 |                 avg_factor=None,
67 |                 reduction_override=None):
68 |         assert reduction_override in (None, 'none', 'mean', 'sum')
69 |         reduction = (
70 |             reduction_override if reduction_override else self.reduction)
71 |         if self.use_sigmoid:
72 |             loss_cls = self.loss_weight * sigmoid_focal_loss(
73 |                 pred,
74 |                 target,
75 |                 weight,
76 |                 gamma=self.gamma,
77 |                 alpha=self.alpha,
78 |                 reduction=reduction,
79 |                 avg_factor=avg_factor)
80 |         else:
81 |             raise NotImplementedError
82 |         return loss_cls
83 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | mse_loss = weighted_loss(F.mse_loss)
 8 | 
 9 | 
10 | @LOSSES.register_module
11 | class MSELoss(nn.Module):
12 | 
13 |     def __init__(self, reduction='mean', loss_weight=1.0):
14 |         super().__init__()
15 |         self.reduction = reduction
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(self, pred, target, weight=None, avg_factor=None):
19 |         loss = self.loss_weight * mse_loss(
20 |             pred,
21 |             target,
22 |             weight,
23 |             reduction=self.reduction,
24 |             avg_factor=avg_factor)
25 |         return loss
26 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def smooth_l1_loss(pred, target, beta=1.0):
10 |     assert beta > 0
11 |     assert pred.size() == target.size() and target.numel() > 0
12 |     diff = torch.abs(pred - target)
13 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
14 |                        diff - 0.5 * beta)
15 |     return loss
16 | 
17 | 
18 | @LOSSES.register_module
19 | class SmoothL1Loss(nn.Module):
20 | 
21 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
22 |         super(SmoothL1Loss, self).__init__()
23 |         self.beta = beta
24 |         self.reduction = reduction
25 |         self.loss_weight = loss_weight
26 | 
27 |     def forward(self,
28 |                 pred,
29 |                 target,
30 |                 weight=None,
31 |                 avg_factor=None,
32 |                 reduction_override=None,
33 |                 **kwargs):
34 |         assert reduction_override in (None, 'none', 'mean', 'sum')
35 |         reduction = (
36 |             reduction_override if reduction_override else self.reduction)
37 |         loss_bbox = self.loss_weight * smooth_l1_loss(
38 |             pred,
39 |             target,
40 |             weight,
41 |             beta=self.beta,
42 |             reduction=reduction,
43 |             avg_factor=avg_factor,
44 |             **kwargs)
45 |         return loss_bbox
46 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> @weighted_loss
68 |     >>> def l1_loss(pred, target):
69 |     >>>     return (pred - target).abs()
70 | 
71 |     >>> pred = torch.Tensor([0, 2, 3])
72 |     >>> target = torch.Tensor([1, 1, 1])
73 |     >>> weight = torch.Tensor([1, 0, 1])
74 | 
75 |     >>> l1_loss(pred, target)
76 |     tensor(1.3333)
77 |     >>> l1_loss(pred, target, weight)
78 |     tensor(1.)
79 |     >>> l1_loss(pred, target, reduction='none')
80 |     tensor([1., 1., 2.])
81 |     >>> l1_loss(pred, target, weight, avg_factor=2)
82 |     tensor(1.5000)
83 |     """
84 | 
85 |     @functools.wraps(loss_func)
86 |     def wrapper(pred,
87 |                 target,
88 |                 weight=None,
89 |                 reduction='mean',
90 |                 avg_factor=None,
91 |                 **kwargs):
92 |         # get element-wise loss
93 |         loss = loss_func(pred, target, **kwargs)
94 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
95 |         return loss
96 | 
97 |     return wrapper
98 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fcn_mask_head import FCNMaskHead
 2 | from .fused_semantic_head import FusedSemanticHead
 3 | from .grid_head import GridHead
 4 | from .htc_mask_head import HTCMaskHead
 5 | from .maskiou_head import MaskIoUHead
 6 | 
 7 | __all__ = [
 8 |     'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
 9 |     'MaskIoUHead'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from ..registry import HEADS
 2 | from ..utils import ConvModule
 3 | from .fcn_mask_head import FCNMaskHead
 4 | 
 5 | 
 6 | @HEADS.register_module
 7 | class HTCMaskHead(FCNMaskHead):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
11 |         self.conv_res = ConvModule(
12 |             self.conv_out_channels,
13 |             self.conv_out_channels,
14 |             1,
15 |             conv_cfg=self.conv_cfg,
16 |             norm_cfg=self.norm_cfg)
17 | 
18 |     def init_weights(self):
19 |         super(HTCMaskHead, self).init_weights()
20 |         self.conv_res.init_weights()
21 | 
22 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
23 |         if res_feat is not None:
24 |             res_feat = self.conv_res(res_feat)
25 |             x = x + res_feat
26 |         for conv in self.convs:
27 |             x = conv(x)
28 |         res_feat = x
29 |         outs = []
30 |         if return_logits:
31 |             x = self.upsample(x)
32 |             if self.upsample_method == 'deconv':
33 |                 x = self.relu(x)
34 |             mask_pred = self.conv_logits(x)
35 |             outs.append(mask_pred)
36 |         if return_feat:
37 |             outs.append(res_feat)
38 |         return outs if len(outs) > 1 else outs[0]
39 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .fpn import FPN
3 | from .hrfpn import HRFPN
4 | 
5 | __all__ = ['FPN', 'BFP', 'HRFPN']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/hrfpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mmcv.cnn.weight_init import caffe2_xavier_init
  5 | from torch.utils.checkpoint import checkpoint
  6 | 
  7 | from ..registry import NECKS
  8 | from ..utils import ConvModule
  9 | 
 10 | 
 11 | @NECKS.register_module
 12 | class HRFPN(nn.Module):
 13 |     """HRFPN (High Resolution Feature Pyrmamids)
 14 | 
 15 |     arXiv: https://arxiv.org/abs/1904.04514
 16 | 
 17 |     Args:
 18 |         in_channels (list): number of channels for each branch.
 19 |         out_channels (int): output channels of feature pyramids.
 20 |         num_outs (int): number of output stages.
 21 |         pooling_type (str): pooling for generating feature pyramids
 22 |             from {MAX, AVG}.
 23 |         conv_cfg (dict): dictionary to construct and config conv layer.
 24 |         norm_cfg (dict): dictionary to construct and config norm layer.
 25 |         with_cp  (bool): Use checkpoint or not. Using checkpoint will save some
 26 |             memory while slowing down the training speed.
 27 |         stride (int): stride of 3x3 convolutional layers
 28 |     """
 29 | 
 30 |     def __init__(self,
 31 |                  in_channels,
 32 |                  out_channels,
 33 |                  num_outs=5,
 34 |                  pooling_type='AVG',
 35 |                  conv_cfg=None,
 36 |                  norm_cfg=None,
 37 |                  with_cp=False,
 38 |                  stride=1):
 39 |         super(HRFPN, self).__init__()
 40 |         assert isinstance(in_channels, list)
 41 |         self.in_channels = in_channels
 42 |         self.out_channels = out_channels
 43 |         self.num_ins = len(in_channels)
 44 |         self.num_outs = num_outs
 45 |         self.with_cp = with_cp
 46 |         self.conv_cfg = conv_cfg
 47 |         self.norm_cfg = norm_cfg
 48 | 
 49 |         self.reduction_conv = ConvModule(
 50 |             sum(in_channels),
 51 |             out_channels,
 52 |             kernel_size=1,
 53 |             conv_cfg=self.conv_cfg,
 54 |             activation=None)
 55 | 
 56 |         self.fpn_convs = nn.ModuleList()
 57 |         for i in range(self.num_outs):
 58 |             self.fpn_convs.append(
 59 |                 ConvModule(
 60 |                     out_channels,
 61 |                     out_channels,
 62 |                     kernel_size=3,
 63 |                     padding=1,
 64 |                     stride=stride,
 65 |                     conv_cfg=self.conv_cfg,
 66 |                     activation=None))
 67 | 
 68 |         if pooling_type == 'MAX':
 69 |             self.pooling = F.max_pool2d
 70 |         else:
 71 |             self.pooling = F.avg_pool2d
 72 | 
 73 |     def init_weights(self):
 74 |         for m in self.modules():
 75 |             if isinstance(m, nn.Conv2d):
 76 |                 caffe2_xavier_init(m)
 77 | 
 78 |     def forward(self, inputs):
 79 |         assert len(inputs) == self.num_ins
 80 |         outs = [inputs[0]]
 81 |         for i in range(1, self.num_ins):
 82 |             outs.append(
 83 |                 F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
 84 |         out = torch.cat(outs, dim=1)
 85 |         if out.requires_grad and self.with_cp:
 86 |             out = checkpoint(self.reduction_conv, out)
 87 |         else:
 88 |             out = self.reduction_conv(out)
 89 |         outs = [out]
 90 |         for i in range(1, self.num_outs):
 91 |             outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
 92 |         outputs = []
 93 | 
 94 |         for i in range(self.num_outs):
 95 |             if outs[i].requires_grad and self.with_cp:
 96 |                 tmp_out = checkpoint(self.fpn_convs[i], outs[i])
 97 |             else:
 98 |                 tmp_out = self.fpn_convs[i](outs[i])
 99 |             outputs.append(tmp_out)
100 |         return tuple(outputs)
101 | 


--------------------------------------------------------------------------------
/mmdet/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_attention import GeneralizedAttention
2 | from .non_local import NonLocal2D
3 | 
4 | __all__ = ['NonLocal2D', 'GeneralizedAttention']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | from mmdet.utils import Registry
 2 | 
 3 | BACKBONES = Registry('backbone')
 4 | NECKS = Registry('neck')
 5 | ROI_EXTRACTORS = Registry('roi_extractor')
 6 | SHARED_HEADS = Registry('shared_head')
 7 | HEADS = Registry('head')
 8 | LOSSES = Registry('loss')
 9 | DETECTORS = Registry('detector')
10 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from mmdet.core import auto_fp16
 8 | from ..backbones import ResNet, make_res_layer
 9 | from ..registry import SHARED_HEADS
10 | 
11 | 
12 | @SHARED_HEADS.register_module
13 | class ResLayer(nn.Module):
14 | 
15 |     def __init__(self,
16 |                  depth,
17 |                  stage=3,
18 |                  stride=2,
19 |                  dilation=1,
20 |                  style='pytorch',
21 |                  norm_cfg=dict(type='BN', requires_grad=True),
22 |                  norm_eval=True,
23 |                  with_cp=False,
24 |                  dcn=None):
25 |         super(ResLayer, self).__init__()
26 |         self.norm_eval = norm_eval
27 |         self.norm_cfg = norm_cfg
28 |         self.stage = stage
29 |         self.fp16_enabled = False
30 |         block, stage_blocks = ResNet.arch_settings[depth]
31 |         stage_block = stage_blocks[stage]
32 |         planes = 64 * 2**stage
33 |         inplanes = 64 * 2**(stage - 1) * block.expansion
34 | 
35 |         res_layer = make_res_layer(
36 |             block,
37 |             inplanes,
38 |             planes,
39 |             stage_block,
40 |             stride=stride,
41 |             dilation=dilation,
42 |             style=style,
43 |             with_cp=with_cp,
44 |             norm_cfg=self.norm_cfg,
45 |             dcn=dcn)
46 |         self.add_module('layer{}'.format(stage + 1), res_layer)
47 | 
48 |     def init_weights(self, pretrained=None):
49 |         if isinstance(pretrained, str):
50 |             logger = logging.getLogger()
51 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
52 |         elif pretrained is None:
53 |             for m in self.modules():
54 |                 if isinstance(m, nn.Conv2d):
55 |                     kaiming_init(m)
56 |                 elif isinstance(m, nn.BatchNorm2d):
57 |                     constant_init(m, 1)
58 |         else:
59 |             raise TypeError('pretrained must be a str or None')
60 | 
61 |     @auto_fp16()
62 |     def forward(self, x):
63 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
64 |         out = res_layer(x)
65 |         return out
66 | 
67 |     def train(self, mode=True):
68 |         super(ResLayer, self).train(mode)
69 |         if self.norm_eval:
70 |             for m in self.modules():
71 |                 if isinstance(m, nn.BatchNorm2d):
72 |                     m.eval()
73 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule, build_conv_layer
 2 | from .conv_ws import ConvWS2d, conv_ws_2d
 3 | from .norm import build_norm_layer
 4 | from .scale import Scale
 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init,
 6 |                           uniform_init, xavier_init)
 7 | 
 8 | __all__ = [
 9 |     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
10 |     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
11 |     'kaiming_init', 'bias_init_with_prob', 'Scale'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv_ws_2d(input,
 6 |                weight,
 7 |                bias=None,
 8 |                stride=1,
 9 |                padding=0,
10 |                dilation=1,
11 |                groups=1,
12 |                eps=1e-5):
13 |     c_in = weight.size(0)
14 |     weight_flat = weight.view(c_in, -1)
15 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     weight = (weight - mean) / (std + eps)
18 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 | 
20 | 
21 | class ConvWS2d(nn.Conv2d):
22 | 
23 |     def __init__(self,
24 |                  in_channels,
25 |                  out_channels,
26 |                  kernel_size,
27 |                  stride=1,
28 |                  padding=0,
29 |                  dilation=1,
30 |                  groups=1,
31 |                  bias=True,
32 |                  eps=1e-5):
33 |         super(ConvWS2d, self).__init__(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride=stride,
38 |             padding=padding,
39 |             dilation=dilation,
40 |             groups=groups,
41 |             bias=bias)
42 |         self.eps = eps
43 | 
44 |     def forward(self, x):
45 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 |                           self.dilation, self.groups, self.eps)
47 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {
 4 |     # format: layer_type: (abbreviation, module)
 5 |     'BN': ('bn', nn.BatchNorm2d),
 6 |     'SyncBN': ('bn', nn.SyncBatchNorm),
 7 |     'GN': ('gn', nn.GroupNorm),
 8 |     # and potentially 'SN'
 9 | }
10 | 
11 | 
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 |     """ Build normalization layer
14 | 
15 |     Args:
16 |         cfg (dict): cfg should contain:
17 |             type (str): identify norm layer type.
18 |             layer args: args needed to instantiate a norm layer.
19 |             requires_grad (bool): [optional] whether stop gradient updates
20 |         num_features (int): number of channels from input.
21 |         postfix (int, str): appended into norm abbreviation to
22 |             create named layer.
23 | 
24 |     Returns:
25 |         name (str): abbreviation + postfix
26 |         layer (nn.Module): created norm layer
27 |     """
28 |     assert isinstance(cfg, dict) and 'type' in cfg
29 |     cfg_ = cfg.copy()
30 | 
31 |     layer_type = cfg_.pop('type')
32 |     if layer_type not in norm_cfg:
33 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 |     else:
35 |         abbr, norm_layer = norm_cfg[layer_type]
36 |         if norm_layer is None:
37 |             raise NotImplementedError
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     requires_grad = cfg_.pop('requires_grad', True)
43 |     cfg_.setdefault('eps', 1e-5)
44 |     if layer_type != 'GN':
45 |         layer = norm_layer(num_features, **cfg_)
46 |         if layer_type == 'SyncBN':
47 |             layer._specify_ddp_gpu_num(1)
48 |     else:
49 |         assert 'num_groups' in cfg_
50 |         layer = norm_layer(num_channels=num_features, **cfg_)
51 | 
52 |     for param in layer.parameters():
53 |         param.requires_grad = requires_grad
54 | 
55 |     return name, layer
56 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 | 
 7 |     def __init__(self, scale=1.0):
 8 |         super(Scale, self).__init__()
 9 |         self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
10 | 
11 |     def forward(self, x):
12 |         return x * self.scale
13 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context_block import ContextBlock
 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
 3 |                   DeformRoIPoolingPack, ModulatedDeformConv,
 4 |                   ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
 5 |                   deform_conv, deform_roi_pooling, modulated_deform_conv)
 6 | from .masked_conv import MaskedConv2d
 7 | from .nms import nms, soft_nms
 8 | from .roi_align import RoIAlign, roi_align
 9 | from .roi_pool import RoIPool, roi_pool
10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
11 | 
12 | __all__ = [
13 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
14 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
15 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
16 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
17 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
18 |     'MaskedConv2d', 'ContextBlock'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .masked_conv import MaskedConv2d, masked_conv2d
2 | 
3 | __all__ = ['masked_conv2d', 'MaskedConv2d']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/masked_conv.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Function
 6 | from torch.autograd.function import once_differentiable
 7 | from torch.nn.modules.utils import _pair
 8 | 
 9 | from . import masked_conv2d_cuda
10 | 
11 | 
12 | class MaskedConv2dFunction(Function):
13 | 
14 |     @staticmethod
15 |     def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
16 |         assert mask.dim() == 3 and mask.size(0) == 1
17 |         assert features.dim() == 4 and features.size(0) == 1
18 |         assert features.size()[2:] == mask.size()[1:]
19 |         pad_h, pad_w = _pair(padding)
20 |         stride_h, stride_w = _pair(stride)
21 |         if stride_h != 1 or stride_w != 1:
22 |             raise ValueError(
23 |                 'Stride could not only be 1 in masked_conv2d currently.')
24 |         if not features.is_cuda:
25 |             raise NotImplementedError
26 | 
27 |         out_channel, in_channel, kernel_h, kernel_w = weight.size()
28 | 
29 |         batch_size = features.size(0)
30 |         out_h = int(
31 |             math.floor((features.size(2) + 2 * pad_h -
32 |                         (kernel_h - 1) - 1) / stride_h + 1))
33 |         out_w = int(
34 |             math.floor((features.size(3) + 2 * pad_w -
35 |                         (kernel_h - 1) - 1) / stride_w + 1))
36 |         mask_inds = torch.nonzero(mask[0] > 0)
37 |         output = features.new_zeros(batch_size, out_channel, out_h, out_w)
38 |         if mask_inds.numel() > 0:
39 |             mask_h_idx = mask_inds[:, 0].contiguous()
40 |             mask_w_idx = mask_inds[:, 1].contiguous()
41 |             data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
42 |                                           mask_inds.size(0))
43 |             masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx,
44 |                                                      mask_w_idx, kernel_h,
45 |                                                      kernel_w, pad_h, pad_w,
46 |                                                      data_col)
47 | 
48 |             masked_output = torch.addmm(1, bias[:, None], 1,
49 |                                         weight.view(out_channel, -1), data_col)
50 |             masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx,
51 |                                                      mask_w_idx, out_h, out_w,
52 |                                                      out_channel, output)
53 |         return output
54 | 
55 |     @staticmethod
56 |     @once_differentiable
57 |     def backward(ctx, grad_output):
58 |         return (None, ) * 5
59 | 
60 | 
61 | masked_conv2d = MaskedConv2dFunction.apply
62 | 
63 | 
64 | class MaskedConv2d(nn.Conv2d):
65 |     """A MaskedConv2d which inherits the official Conv2d.
66 | 
67 |     The masked forward doesn't implement the backward function and only
68 |     supports the stride parameter to be 1 currently.
69 |     """
70 | 
71 |     def __init__(self,
72 |                  in_channels,
73 |                  out_channels,
74 |                  kernel_size,
75 |                  stride=1,
76 |                  padding=0,
77 |                  dilation=1,
78 |                  groups=1,
79 |                  bias=True):
80 |         super(MaskedConv2d,
81 |               self).__init__(in_channels, out_channels, kernel_size, stride,
82 |                              padding, dilation, groups, bias)
83 | 
84 |     def forward(self, input, mask=None):
85 |         if mask is None:  # fallback to the normal Conv2d
86 |             return super(MaskedConv2d, self).forward(input)
87 |         else:
88 |             return masked_conv2d(input, mask, self.weight, self.bias,
89 |                                  self.padding)
90 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
 7 |                                const int width, const int channels,
 8 |                                const int kernel_h, const int kernel_w,
 9 |                                const int pad_h, const int pad_w,
10 |                                const at::Tensor mask_h_idx,
11 |                                const at::Tensor mask_w_idx, const int mask_cnt,
12 |                                at::Tensor col);
13 | 
14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
15 |                                const int width, const int channels,
16 |                                const at::Tensor mask_h_idx,
17 |                                const at::Tensor mask_w_idx, const int mask_cnt,
18 |                                at::Tensor im);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
28 |                                const at::Tensor mask_w_idx, const int kernel_h,
29 |                                const int kernel_w, const int pad_h,
30 |                                const int pad_w, at::Tensor col) {
31 |   CHECK_INPUT(im);
32 |   CHECK_INPUT(mask_h_idx);
33 |   CHECK_INPUT(mask_w_idx);
34 |   CHECK_INPUT(col);
35 |   // im: (n, ic, h, w), kernel size (kh, kw)
36 |   // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
37 | 
38 |   int channels = im.size(1);
39 |   int height = im.size(2);
40 |   int width = im.size(3);
41 |   int mask_cnt = mask_h_idx.size(0);
42 | 
43 |   MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
44 |                              pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
45 |                              col);
46 | 
47 |   return 1;
48 | }
49 | 
50 | int masked_col2im_forward_cuda(const at::Tensor col,
51 |                                const at::Tensor mask_h_idx,
52 |                                const at::Tensor mask_w_idx, int height,
53 |                                int width, int channels, at::Tensor im) {
54 |   CHECK_INPUT(col);
55 |   CHECK_INPUT(mask_h_idx);
56 |   CHECK_INPUT(mask_w_idx);
57 |   CHECK_INPUT(im);
58 |   // im: (n, ic, h, w), kernel size (kh, kw)
59 |   // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
60 | 
61 |   int mask_cnt = mask_h_idx.size(0);
62 | 
63 |   MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
64 |                              mask_w_idx, mask_cnt, im);
65 | 
66 |   return 1;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("masked_im2col_forward", &masked_im2col_forward_cuda,
71 |         "masked_im2col forward (CUDA)");
72 |   m.def("masked_col2im_forward", &masked_col2im_forward_cuda,
73 |         "masked_col2im forward (CUDA)");
74 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from . import nms_cpu, nms_cuda
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | 
 8 | def nms(dets, iou_thr, device_id=None):
 9 |     """Dispatch to either CPU or GPU NMS implementations.
10 | 
11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 |     will be used. The returned type will always be the same as inputs.
14 | 
15 |     Arguments:
16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
17 |         iou_thr (float): IoU threshold for NMS.
18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
20 | 
21 |     Returns:
22 |         tuple: kept bboxes and indice, which is always the same data type as
23 |             the input.
24 |     """
25 |     # convert dets (tensor or numpy array) to tensor
26 |     if isinstance(dets, torch.Tensor):
27 |         is_numpy = False
28 |         dets_th = dets
29 |     elif isinstance(dets, np.ndarray):
30 |         is_numpy = True
31 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 |         dets_th = torch.from_numpy(dets).to(device)
33 |     else:
34 |         raise TypeError(
35 |             'dets must be either a Tensor or numpy array, but got {}'.format(
36 |                 type(dets)))
37 | 
38 |     # execute cpu or cuda nms
39 |     if dets_th.shape[0] == 0:
40 |         inds = dets_th.new_zeros(0, dtype=torch.long)
41 |     else:
42 |         if dets_th.is_cuda:
43 |             inds = nms_cuda.nms(dets_th, iou_thr)
44 |         else:
45 |             inds = nms_cpu.nms(dets_th, iou_thr)
46 | 
47 |     if is_numpy:
48 |         inds = inds.cpu().numpy()
49 |     return dets[inds, :], inds
50 | 
51 | 
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 |     if isinstance(dets, torch.Tensor):
54 |         is_tensor = True
55 |         dets_np = dets.detach().cpu().numpy()
56 |     elif isinstance(dets, np.ndarray):
57 |         is_tensor = False
58 |         dets_np = dets
59 |     else:
60 |         raise TypeError(
61 |             'dets must be either a Tensor or numpy array, but got {}'.format(
62 |                 type(dets)))
63 | 
64 |     method_codes = {'linear': 1, 'gaussian': 2}
65 |     if method not in method_codes:
66 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 |     new_dets, inds = soft_nms_cpu(
68 |         dets_np,
69 |         iou_thr,
70 |         method=method_codes[method],
71 |         sigma=sigma,
72 |         min_score=min_score)
73 | 
74 |     if is_tensor:
75 |         return dets.new_tensor(new_dets), dets.new_tensor(
76 |             inds, dtype=torch.long)
77 |     else:
78 |         return new_dets.astype(np.float32), inds.astype(np.int64)
79 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = _pair(out_size)
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, self.out_size,
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_cuda
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
24 |                               output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
45 |                                    spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += '(out_size={}, spatial_scale={}'.format(
73 |             self.out_size, self.spatial_scale)
74 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int channels = features.size(1);
45 |   int height = features.size(2);
46 |   int width = features.size(3);
47 | 
48 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 |                         num_rois, pooled_height, pooled_width, output, argmax);
50 | 
51 |   return 1;
52 | }
53 | 
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 |                               at::Tensor argmax, float spatial_scale,
56 |                               at::Tensor bottom_grad) {
57 |   CHECK_INPUT(top_grad);
58 |   CHECK_INPUT(rois);
59 |   CHECK_INPUT(argmax);
60 |   CHECK_INPUT(bottom_grad);
61 | 
62 |   int pooled_height = top_grad.size(2);
63 |   int pooled_width = top_grad.size(3);
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 | 
67 |   if (size_rois != 5) {
68 |     printf("wrong roi size\n");
69 |     return 0;
70 |   }
71 |   int batch_size = bottom_grad.size(0);
72 |   int channels = bottom_grad.size(1);
73 |   int height = bottom_grad.size(2);
74 |   int width = bottom_grad.size(3);
75 | 
76 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 |                          channels, height, width, num_rois, pooled_height,
78 |                          pooled_width, bottom_grad);
79 | 
80 |   return 1;
81 | }
82 | 
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from . import sigmoid_focal_loss_cuda
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
19 |                                                gamma, alpha)
20 |         return loss
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         input, target = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
31 |                                                    num_classes, gamma, alpha)
32 |         return d_input, None, None, None, None
33 | 
34 | 
35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
36 | 
37 | 
38 | # TODO: remove this module
39 | class SigmoidFocalLoss(nn.Module):
40 | 
41 |     def __init__(self, gamma, alpha):
42 |         super(SigmoidFocalLoss, self).__init__()
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         assert logits.is_cuda
48 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
49 |         return loss.sum()
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format(
53 |             self.gamma, self.alpha)
54 |         return tmpstr
55 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .flops_counter import get_model_complexity_info
2 | from .registry import Registry, build_from_cfg
3 | 
4 | __all__ = ['Registry', 'build_from_cfg', 'get_model_complexity_info']
5 | 


--------------------------------------------------------------------------------
/mmdet/utils/registry.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import mmcv
 4 | 
 5 | 
 6 | class Registry(object):
 7 | 
 8 |     def __init__(self, name):
 9 |         self._name = name
10 |         self._module_dict = dict()
11 | 
12 |     def __repr__(self):
13 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
14 |             self._name, list(self._module_dict.keys()))
15 |         return format_str
16 | 
17 |     @property
18 |     def name(self):
19 |         return self._name
20 | 
21 |     @property
22 |     def module_dict(self):
23 |         return self._module_dict
24 | 
25 |     def get(self, key):
26 |         return self._module_dict.get(key, None)
27 | 
28 |     def _register_module(self, module_class):
29 |         """Register a module.
30 | 
31 |         Args:
32 |             module (:obj:`nn.Module`): Module to be registered.
33 |         """
34 |         if not inspect.isclass(module_class):
35 |             raise TypeError('module must be a class, but got {}'.format(
36 |                 type(module_class)))
37 |         module_name = module_class.__name__
38 |         if module_name in self._module_dict:
39 |             raise KeyError('{} is already registered in {}'.format(
40 |                 module_name, self.name))
41 |         self._module_dict[module_name] = module_class
42 | 
43 |     def register_module(self, cls):
44 |         self._register_module(cls)
45 |         return cls
46 | 
47 | 
48 | def build_from_cfg(cfg, registry, default_args=None):
49 |     """Build a module from config dict.
50 | 
51 |     Args:
52 |         cfg (dict): Config dict. It should at least contain the key "type".
53 |         registry (:obj:`Registry`): The registry to search the type from.
54 |         default_args (dict, optional): Default initialization arguments.
55 | 
56 |     Returns:
57 |         obj: The constructed object.
58 |     """
59 |     assert isinstance(cfg, dict) and 'type' in cfg
60 |     assert isinstance(default_args, dict) or default_args is None
61 |     args = cfg.copy()
62 |     obj_type = args.pop('type')
63 |     if mmcv.is_str(obj_type):
64 |         obj_type = registry.get(obj_type)
65 |         if obj_type is None:
66 |             raise KeyError('{} is not in the {} registry'.format(
67 |                 obj_type, registry.name))
68 |     elif not inspect.isclass(obj_type):
69 |         raise TypeError('type must be a str or valid type, but got {}'.format(
70 |             type(obj_type)))
71 |     if default_args is not None:
72 |         for name, value in default_args.items():
73 |             args.setdefault(name, value)
74 |     return obj_type(**args)
75 | 


--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.core import coco_eval
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser(description='COCO Evaluation')
 8 |     parser.add_argument('result', help='result file path')
 9 |     parser.add_argument('--ann', help='annotation file path')
10 |     parser.add_argument(
11 |         '--types',
12 |         type=str,
13 |         nargs='+',
14 |         choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 |         default=['bbox'],
16 |         help='result types')
17 |     parser.add_argument(
18 |         '--max-dets',
19 |         type=int,
20 |         nargs='+',
21 |         default=[100, 300, 1000],
22 |         help='proposal numbers, only used for recall evaluation')
23 |     args = parser.parse_args()
24 |     coco_eval(args.result, args.types, args.ann, args.max_dets)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | CHECKPOINT=$2
 7 | GPUS=$3
 8 | 
 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | 
 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from mmcv import Config
 4 | 
 5 | from mmdet.models import build_detector
 6 | from mmdet.utils import get_model_complexity_info
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Train a detector')
11 |     parser.add_argument('config', help='train config file path')
12 |     parser.add_argument(
13 |         '--shape',
14 |         type=int,
15 |         nargs='+',
16 |         default=[1280, 800],
17 |         help='input image size')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def main():
23 | 
24 |     args = parse_args()
25 | 
26 |     if len(args.shape) == 1:
27 |         input_shape = (3, args.shape[0], args.shape[0])
28 |     elif len(args.shape) == 2:
29 |         input_shape = (3, ) + tuple(args.shape)
30 |     else:
31 |         raise ValueError('invalid input shape')
32 | 
33 |     cfg = Config.fromfile(args.config)
34 |     model = build_detector(
35 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda()
36 |     model.eval()
37 | 
38 |     if hasattr(model, 'forward_dummy'):
39 |         model.forward = model.forward_dummy
40 |     else:
41 |         raise NotImplementedError(
42 |             'FLOPs counter is currently not currently supported with {}'.
43 |             format(model.__class__.__name__))
44 | 
45 |     flops, params = get_model_complexity_info(model, input_shape)
46 |     split_line = '=' * 30
47 |     print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
48 |         split_line, input_shape, flops, params))
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${5:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${PY_ARGS:-"--validate"}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/tools/upgrade_model_version.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import re
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def convert(in_file, out_file):
 9 |     """Convert keys in checkpoints.
10 | 
11 |     There can be some breaking changes during the development of mmdetection,
12 |     and this tool is used for upgrading checkpoints trained with old versions
13 |     to the latest one.
14 |     """
15 |     checkpoint = torch.load(in_file)
16 |     in_state_dict = checkpoint.pop('state_dict')
17 |     out_state_dict = OrderedDict()
18 |     for key, val in in_state_dict.items():
19 |         # Use ConvModule instead of nn.Conv2d in RetinaNet
20 |         # cls_convs.0.weight -> cls_convs.0.conv.weight
21 |         m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
22 |         if m is not None:
23 |             param = m.groups()[1]
24 |             new_key = key.replace(param, 'conv.{}'.format(param))
25 |             out_state_dict[new_key] = val
26 |             continue
27 | 
28 |         out_state_dict[key] = val
29 |     checkpoint['state_dict'] = out_state_dict
30 |     torch.save(checkpoint, out_file)
31 | 
32 | 
33 | def main():
34 |     parser = argparse.ArgumentParser(description='Upgrade model version')
35 |     parser.add_argument('in_file', help='input checkpoint file')
36 |     parser.add_argument('out_file', help='output checkpoint file')
37 |     args = parser.parse_args()
38 |     convert(args.in_file, args.out_file)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | 
 6 | from mmdet import datasets
 7 | from mmdet.core import eval_map
 8 | 
 9 | 
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 |     det_results = mmcv.load(result_file)
12 |     gt_bboxes = []
13 |     gt_labels = []
14 |     gt_ignore = []
15 |     for i in range(len(dataset)):
16 |         ann = dataset.get_ann_info(i)
17 |         bboxes = ann['bboxes']
18 |         labels = ann['labels']
19 |         if 'bboxes_ignore' in ann:
20 |             ignore = np.concatenate([
21 |                 np.zeros(bboxes.shape[0], dtype=np.bool),
22 |                 np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 |             ])
24 |             gt_ignore.append(ignore)
25 |             bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 |             labels = np.concatenate([labels, ann['labels_ignore']])
27 |         gt_bboxes.append(bboxes)
28 |         gt_labels.append(labels)
29 |     if not gt_ignore:
30 |         gt_ignore = gt_ignore
31 |     if hasattr(dataset, 'year') and dataset.year == 2007:
32 |         dataset_name = 'voc07'
33 |     else:
34 |         dataset_name = dataset.CLASSES
35 |     eval_map(
36 |         det_results,
37 |         gt_bboxes,
38 |         gt_labels,
39 |         gt_ignore=gt_ignore,
40 |         scale_ranges=None,
41 |         iou_thr=iou_thr,
42 |         dataset=dataset_name,
43 |         print_summary=True)
44 | 
45 | 
46 | def main():
47 |     parser = ArgumentParser(description='VOC Evaluation')
48 |     parser.add_argument('result', help='result file path')
49 |     parser.add_argument('config', help='config file path')
50 |     parser.add_argument(
51 |         '--iou-thr',
52 |         type=float,
53 |         default=0.5,
54 |         help='IoU threshold for evaluation')
55 |     args = parser.parse_args()
56 |     cfg = mmcv.Config.fromfile(args.config)
57 |     test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
58 |     voc_eval(args.result, test_dataset, args.iou_thr)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------