├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── GETTING_STARTED.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── annotation
├── dataset_example
│ ├── image
│ │ └── 21.jpg
│ ├── seg_train.json
│ ├── train.json
│ └── train.xml
├── demo
├── faster_rcnn_r50_fpn_1x.py
├── mask_rcnn_r101_fpn_1x.py
├── mmcv
│ ├── checkpoint
│ ├── config
│ ├── io
│ ├── parallel
│ └── runner
│ │ ├── hooks
│ │ └── utils
├── mmdet
│ ├── apis
│ │ ├── __init__
│ │ ├── inference
│ │ └── train
│ ├── datasets
│ │ ├── datasets
│ │ └── loader
│ ├── models
│ │ ├── anchor_heads
│ │ ├── backbones
│ │ ├── bbox_heads
│ │ ├── builder
│ │ ├── losses
│ │ ├── necks
│ │ ├── resnet
│ │ │ ├── conv_block
│ │ │ ├── identity_block
│ │ │ ├── resnet-101(这个是maskrcnn-tf的,m2det只输出这里的C2-C5,而且两处block数目也不同)
│ │ │ ├── resnet-50.png
│ │ │ └── 其中采用了右边这种,注意通道先压缩再扩充,减少参数,可以参考resnet-50结构图.png
│ │ └── utils
│ └── registry
└── model_vis
│ ├── inference.png
│ └── maskrcnn-model-inference.png
├── configs
├── cascade_mask_rcnn_r101_fpn_1x.py
├── cascade_mask_rcnn_r50_caffe_c4_1x.py
├── cascade_mask_rcnn_r50_fpn_1x.py
├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py
├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py
├── cascade_rcnn_r101_fpn_1x.py
├── cascade_rcnn_r50_caffe_c4_1x.py
├── cascade_rcnn_r50_fpn_1x.py
├── cascade_rcnn_x101_32x4d_fpn_1x.py
├── cascade_rcnn_x101_64x4d_fpn_1x.py
├── dcn
│ ├── README.md
│ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
│ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
│ ├── faster_rcnn_dpool_r50_fpn_1x.py
│ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
│ ├── faster_rcnn_mdpool_r50_fpn_1x.py
│ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
├── empirical_attention
│ ├── README.md
│ ├── faster_rcnn_r50_fpn_attention_0010_1x.py
│ ├── faster_rcnn_r50_fpn_attention_0010_dcn_1x.py
│ ├── faster_rcnn_r50_fpn_attention_1111_1x.py
│ └── faster_rcnn_r50_fpn_attention_1111_dcn_1x.py
├── fast_mask_rcnn_r101_fpn_1x.py
├── fast_mask_rcnn_r50_caffe_c4_1x.py
├── fast_mask_rcnn_r50_fpn_1x.py
├── fast_rcnn_r101_fpn_1x.py
├── fast_rcnn_r50_caffe_c4_1x.py
├── fast_rcnn_r50_fpn_1x.py
├── faster_rcnn_ohem_r50_fpn_1x.py
├── faster_rcnn_r101_fpn_1x.py
├── faster_rcnn_r50_caffe_c4_1x.py
├── faster_rcnn_r50_fpn_1x.py
├── faster_rcnn_x101_32x4d_fpn_1x.py
├── faster_rcnn_x101_64x4d_fpn_1x.py
├── fcos
│ ├── README.md
│ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py
│ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py
│ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py
├── fp16
│ ├── faster_rcnn_r50_fpn_fp16_1x.py
│ ├── mask_rcnn_r50_fpn_fp16_1x.py
│ └── retinanet_r50_fpn_fp16_1x.py
├── gcnet
│ ├── README.md
│ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py
│ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py
│ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py
│ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py
│ └── mask_rcnn_r50_fpn_sbn_1x.py
├── ghm
│ ├── README.md
│ └── retinanet_ghm_r50_fpn_1x.py
├── gn+ws
│ ├── README.md
│ ├── faster_rcnn_r50_fpn_gn_ws_1x.py
│ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
│ ├── mask_rcnn_r50_fpn_gn_ws_2x.py
│ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
├── gn
│ ├── README.md
│ ├── mask_rcnn_r101_fpn_gn_2x.py
│ ├── mask_rcnn_r50_fpn_gn_2x.py
│ └── mask_rcnn_r50_fpn_gn_contrib_2x.py
├── grid_rcnn
│ ├── README.md
│ ├── grid_rcnn_gn_head_r50_fpn_2x.py
│ └── grid_rcnn_gn_head_x101_32x4d_fpn_2x.py
├── guided_anchoring
│ ├── README.md
│ ├── ga_fast_r50_caffe_fpn_1x.py
│ ├── ga_faster_r50_caffe_fpn_1x.py
│ ├── ga_faster_x101_32x4d_fpn_1x.py
│ ├── ga_retinanet_r50_caffe_fpn_1x.py
│ ├── ga_retinanet_x101_32x4d_fpn_1x.py
│ ├── ga_rpn_r101_caffe_rpn_1x.py
│ ├── ga_rpn_r50_caffe_fpn_1x.py
│ └── ga_rpn_x101_32x4d_fpn_1x.py
├── hrnet
│ ├── README.md
│ ├── cascade_rcnn_hrnetv2p_w32_20e.py
│ ├── faster_rcnn_hrnetv2p_w18_1x.py
│ ├── faster_rcnn_hrnetv2p_w32_1x.py
│ ├── faster_rcnn_hrnetv2p_w40_1x.py
│ ├── mask_rcnn_hrnetv2p_w18_1x.py
│ └── mask_rcnn_hrnetv2p_w32_1x.py
├── htc
│ ├── README.md
│ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
│ ├── htc_r101_fpn_20e.py
│ ├── htc_r50_fpn_1x.py
│ ├── htc_r50_fpn_20e.py
│ ├── htc_without_semantic_r50_fpn_1x.py
│ ├── htc_x101_32x4d_fpn_20e_16gpu.py
│ └── htc_x101_64x4d_fpn_20e_16gpu.py
├── libra_rcnn
│ ├── README.md
│ ├── libra_fast_rcnn_r50_fpn_1x.py
│ ├── libra_faster_rcnn_r101_fpn_1x.py
│ ├── libra_faster_rcnn_r50_fpn_1x.py
│ ├── libra_faster_rcnn_x101_64x4d_fpn_1x.py
│ └── libra_retinanet_r50_fpn_1x.py
├── mask_r101_(test).py
├── mask_rcnn_r101_fpn_1x.py
├── mask_rcnn_r50_caffe_c4_1x.py
├── mask_rcnn_r50_fpn_1x.py
├── mask_rcnn_x101_32x4d_fpn_1x.py
├── mask_rcnn_x101_64x4d_fpn_1x.py
├── ms_rcnn
│ ├── README.md
│ ├── ms_rcnn_r101_caffe_fpn_1x.py
│ ├── ms_rcnn_r50_caffe_fpn_1x.py
│ └── ms_rcnn_x101_64x4d_fpn_1x.py
├── pascal_voc
│ ├── faster_rcnn_r50_fpn_1x_voc0712.py
│ ├── ssd300_voc.py
│ └── ssd512_voc.py
├── retinanet_r101_fpn_1x.py
├── retinanet_r50_fpn_1x.py
├── retinanet_x101_32x4d_fpn_1x.py
├── retinanet_x101_64x4d_fpn_1x.py
├── rpn_r101_fpn_1x.py
├── rpn_r50_caffe_c4_1x.py
├── rpn_r50_fpn_1x.py
├── rpn_x101_32x4d_fpn_1x.py
├── rpn_x101_64x4d_fpn_1x.py
├── scratch
│ ├── README.md
│ ├── scratch_faster_rcnn_r50_fpn_gn_6x.py
│ └── scratch_mask_rcnn_r50_fpn_gn_6x.py
├── ssd300_coco.py
├── ssd512_coco.py
└── wider_face
│ ├── README.md
│ └── ssd300_wider_face.py
├── demo.py
├── hook.py
├── mmdet
├── __init__.py
├── apis
│ ├── __init__.py
│ ├── env.py
│ ├── inference.py
│ └── train.py
├── core
│ ├── __init__.py
│ ├── anchor
│ │ ├── __init__.py
│ │ ├── anchor_generator.py
│ │ ├── anchor_target.py
│ │ └── guided_anchor_target.py
│ ├── bbox
│ │ ├── __init__.py
│ │ ├── assign_sampling.py
│ │ ├── assigners
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-37.pyc
│ │ │ │ ├── approx_max_iou_assigner.cpython-37.pyc
│ │ │ │ ├── assign_result.cpython-37.pyc
│ │ │ │ ├── base_assigner.cpython-37.pyc
│ │ │ │ └── max_iou_assigner.cpython-37.pyc
│ │ │ ├── approx_max_iou_assigner.py
│ │ │ ├── assign_result.py
│ │ │ ├── base_assigner.py
│ │ │ └── max_iou_assigner.py
│ │ ├── bbox_target.py
│ │ ├── geometry.py
│ │ ├── samplers
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-37.pyc
│ │ │ │ ├── base_sampler.cpython-37.pyc
│ │ │ │ ├── combined_sampler.cpython-37.pyc
│ │ │ │ ├── instance_balanced_pos_sampler.cpython-37.pyc
│ │ │ │ ├── iou_balanced_neg_sampler.cpython-37.pyc
│ │ │ │ ├── ohem_sampler.cpython-37.pyc
│ │ │ │ ├── pseudo_sampler.cpython-37.pyc
│ │ │ │ ├── random_sampler.cpython-37.pyc
│ │ │ │ └── sampling_result.cpython-37.pyc
│ │ │ ├── base_sampler.py
│ │ │ ├── combined_sampler.py
│ │ │ ├── instance_balanced_pos_sampler.py
│ │ │ ├── iou_balanced_neg_sampler.py
│ │ │ ├── ohem_sampler.py
│ │ │ ├── pseudo_sampler.py
│ │ │ ├── random_sampler.py
│ │ │ └── sampling_result.py
│ │ └── transforms.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── bbox_overlaps.py
│ │ ├── class_names.py
│ │ ├── coco_utils.py
│ │ ├── eval_hooks.py
│ │ ├── mean_ap.py
│ │ └── recall.py
│ ├── fp16
│ │ ├── __init__.py
│ │ ├── decorators.py
│ │ ├── hooks.py
│ │ └── utils.py
│ ├── mask
│ │ ├── __init__.py
│ │ ├── mask_target.py
│ │ └── utils.py
│ ├── post_processing
│ │ ├── __init__.py
│ │ ├── bbox_nms.py
│ │ └── merge_augs.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── dist_utils.py
│ │ └── misc.py
├── datasets
│ ├── __init__.py
│ ├── builder.py
│ ├── coco.py
│ ├── custom.py
│ ├── dataset_wrappers.py
│ ├── extra_aug.py
│ ├── loader
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ ├── build_loader.cpython-37.pyc
│ │ │ └── sampler.cpython-37.pyc
│ │ ├── build_loader.py
│ │ └── sampler.py
│ ├── my_dataset.py
│ ├── registry.py
│ ├── transforms.py
│ ├── utils.py
│ ├── voc.py
│ ├── wider_face.py
│ └── xml_style.py
├── models
│ ├── __init__.py
│ ├── anchor_heads
│ │ ├── __init__.py
│ │ ├── anchor_head.py
│ │ ├── fcos_head.py
│ │ ├── ga_retina_head.py
│ │ ├── ga_rpn_head.py
│ │ ├── guided_anchor_head.py
│ │ ├── retina_head.py
│ │ ├── rpn_head.py
│ │ └── ssd_head.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── hrnet.py
│ │ ├── resnet.py
│ │ ├── resnext.py
│ │ └── ssd_vgg.py
│ ├── bbox_heads
│ │ ├── __init__.py
│ │ ├── bbox_head.py
│ │ └── convfc_bbox_head.py
│ ├── builder.py
│ ├── detectors
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── cascade_rcnn.py
│ │ ├── fast_rcnn.py
│ │ ├── faster_rcnn.py
│ │ ├── fcos.py
│ │ ├── grid_rcnn.py
│ │ ├── htc.py
│ │ ├── mask_rcnn.py
│ │ ├── mask_scoring_rcnn.py
│ │ ├── retinanet.py
│ │ ├── rpn.py
│ │ ├── single_stage.py
│ │ ├── test_mixins.py
│ │ └── two_stage.py
│ ├── losses
│ │ ├── __init__.py
│ │ ├── accuracy.py
│ │ ├── balanced_l1_loss.py
│ │ ├── cross_entropy_loss.py
│ │ ├── focal_loss.py
│ │ ├── ghm_loss.py
│ │ ├── iou_loss.py
│ │ ├── mse_loss.py
│ │ ├── smooth_l1_loss.py
│ │ └── utils.py
│ ├── mask_heads
│ │ ├── __init__.py
│ │ ├── fcn_mask_head.py
│ │ ├── fused_semantic_head.py
│ │ ├── grid_head.py
│ │ ├── htc_mask_head.py
│ │ └── maskiou_head.py
│ ├── necks
│ │ ├── __init__.py
│ │ ├── bfp.py
│ │ ├── fpn.py
│ │ └── hrfpn.py
│ ├── plugins
│ │ ├── __init__.py
│ │ ├── generalized_attention.py
│ │ └── non_local.py
│ ├── registry.py
│ ├── roi_extractors
│ │ ├── __init__.py
│ │ └── single_level.py
│ ├── shared_heads
│ │ ├── __init__.py
│ │ └── res_layer.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── conv_module.py
│ │ ├── conv_ws.py
│ │ ├── norm.py
│ │ ├── scale.py
│ │ └── weight_init.py
├── utils
│ ├── __init__.py
│ └── registry.py
└── version.py
├── mmdetection记录.md
├── outputs
├── _s1019.png
├── _screenshot_02.04.2019.png
└── _screenshot_071019.png
├── setup.py
└── tools
├── analyze_logs.py
├── coco_eval.py
├── convert_datasets
└── pascal_voc.py
├── detectron2pytorch.py
├── dist_test.sh
├── dist_train.sh
├── publish_model.py
├── slurm_test.sh
├── slurm_train.sh
├── test.py
├── train.py
├── upgrade_model_version.py
└── voc_eval.py
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at chenkaidev@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to mmdetection
2 |
3 | All kinds of contributions are welcome, including but not limited to the following.
4 |
5 | - Fixes (typo, bugs)
6 | - New features and components
7 |
8 | ## Workflow
9 |
10 | 1. fork and pull the latest mmdetection
11 | 2. checkout a new branch (do not use master branch for PRs)
12 | 3. commit your changes
13 | 4. create a PR
14 |
15 | Note
16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
17 | - If you are the author of some papers and would like to include your method to mmdetection,
18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution.
19 |
20 | ## Code style
21 |
22 | ### Python
23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
24 | We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
25 | Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf).
26 |
27 | >Before you create a PR, make sure that your code lints and is formatted by yapf.
28 |
29 | ### C++ and CUDA
30 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | ## Installation
2 |
3 | ### Requirements
4 |
5 | - Linux
6 | - Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/))
7 | - PyTorch 1.0+ or PyTorch-nightly
8 | - CUDA 9.0+
9 | - NCCL 2+
10 | - GCC 4.9+
11 | - [mmcv](https://github.com/open-mmlab/mmcv)
12 |
13 | We have tested the following versions of OS and softwares:
14 |
15 | - OS: Ubuntu 16.04/18.04 and CentOS 7.2
16 | - CUDA: 9.0/9.2/10.0
17 | - NCCL: 2.1.15/2.2.13/2.3.7/2.4.2
18 | - GCC: 4.9/5.3/5.4/7.3
19 |
20 | ### Install mmdetection
21 |
22 | a. Create a conda virtual environment and activate it. Then install Cython.
23 |
24 | ```shell
25 | conda create -n open-mmlab python=3.7 -y
26 | conda activate open-mmlab
27 |
28 | conda install cython
29 | ```
30 |
31 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/).
32 |
33 | c. Clone the mmdetection repository.
34 |
35 | ```shell
36 | git clone https://github.com/open-mmlab/mmdetection.git
37 | cd mmdetection
38 | ```
39 |
40 | d. Install mmdetection (other dependencies will be installed automatically).
41 |
42 | ```shell
43 | python setup.py develop
44 | # or "pip install -v -e ."
45 | ```
46 |
47 | Note:
48 |
49 | 1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d.
50 | The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models.
51 |
52 | 2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again.
53 |
54 | ### Prepare COCO dataset.
55 |
56 | It is recommended to symlink the dataset root to `$MMDETECTION/data`.
57 |
58 | ```
59 | mmdetection
60 | ├── mmdet
61 | ├── tools
62 | ├── configs
63 | ├── data
64 | │ ├── coco
65 | │ │ ├── annotations
66 | │ │ ├── train2017
67 | │ │ ├── val2017
68 | │ │ ├── test2017
69 | │ ├── VOCdevkit
70 | │ │ ├── VOC2007
71 | │ │ ├── VOC2012
72 |
73 | ```
74 |
75 | ### Scripts
76 | [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
77 | a script for setting up mmdetection with conda.
78 |
79 | ### Notice
80 | You can run `python(3) setup.py develop` or `pip install -v -e .` to install mmdetection if you want to make modifications to it frequently.
81 |
82 | If there are more than one mmdetection on your machine, and you want to use them alternatively.
83 | Please insert the following code to the main file
84 | ```python
85 | import os.path as osp
86 | import sys
87 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))
88 | ```
89 | or run the following command in the terminal of corresponding folder.
90 | ```shell
91 | export PYTHONPATH=`pwd`:$PYTHONPATH
92 | ```
93 |
--------------------------------------------------------------------------------
/annotation/dataset_example/image/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/dataset_example/image/21.jpg
--------------------------------------------------------------------------------
/annotation/dataset_example/train.json:
--------------------------------------------------------------------------------
1 | {
2 | "images": [
3 | {
4 | "height": 682,
5 | "width": 500,
6 | "id": 1,
7 | "file_name": "21.jpg"
8 | }
9 | ],
10 | "categories": [
11 | {
12 | "supercategory": "car",
13 | "id": 1,
14 | "name": "car"
15 | }
16 | ],
17 | "annotations": [
18 | {
19 | "segmentation": [
20 | [
21 | 0.0
22 | ]
23 | ],
24 | "iscrowd": 0,
25 | "image_id": 1,
26 | "area": 31164,
27 | "bbox": [
28 | 136,
29 | 347,
30 | 196,
31 | 159
32 | ],
33 | "category_id": 1,
34 | "id": 1
35 | }
36 | ]
37 | }
--------------------------------------------------------------------------------
/annotation/dataset_example/train.xml:
--------------------------------------------------------------------------------
1 |
2 | image
3 | 21.jpg
4 | /py/mmdetection-master/data/coco/train2014/image/21.jpg
5 |
6 | Unknown
7 |
8 |
9 | 500
10 | 682
11 | 3
12 |
13 | 0
14 |
26 |
27 |
--------------------------------------------------------------------------------
/annotation/demo:
--------------------------------------------------------------------------------
1 | 用于检测demo,可以对单个图片或者文件夹下所有图片进行检测并存储。
2 |
3 | 分析demo运行过程:
4 |
5 | 1. from mmdet.apis import init_detector, inference_detector, show_result
6 | 和之前一样,在import时会索引对应文件夹的__init__.py文件,
7 | 这里找到mmdet.apis,其下的__init__.py的all包含了需要import的变量名,但是这些变量又是层层导入的,又import了一堆
8 | 不赘述
9 | 注意:其实只要不因用Registry,基本都没什么预执行的命令
10 |
11 | 2. init_detector(config_file, checkpoint_file, device='cuda:0')
12 | 原来是直接build_detector(cfg.model, test_cfg=cfg.test_cfg),现在用这个函数在其内部build,并加载权重,配置CLASS属性
13 |
14 | 3. inference_detector(model, imgs)
15 | 进行图像的变换(inference没用);
16 | 根据输入的imgs是否为可迭代对象进行检测(可以直接输入folder的可迭代list路径)
17 |
18 | 4. show_result(img, result, model.CLASSES)
19 | 和以前一样,只是这次吧class_name使用Registry管理,作为model的属性直接传给mmcv了
--------------------------------------------------------------------------------
/annotation/mmcv/checkpoint:
--------------------------------------------------------------------------------
1 |
2 |
3 | load_checkpoint(model,filename,map_location=None,strict=False,logger=None)
4 | 功能: 从URL链接或者文件中加载模型,这里只关注文件加载
5 | 实现: 先用torch.load将pth文件加载到遍量checkpoint,
6 | 然后从中提取权值参数存为state_dict,(因为还有可能pth中存有模型后者优化器数据)
7 | 最后load_state_dict将数据加载
8 |
--------------------------------------------------------------------------------
/annotation/mmcv/io:
--------------------------------------------------------------------------------
1 |
2 | 读取图片:
3 | def imread(img_or_path, flag='color'):
4 | """Read an image.
5 |
6 | Args:
7 | img_or_path (ndarray or str): Either a numpy array or image path.
8 | If it is a numpy array (loaded image), then it will be returned
9 | as is.
10 | flag (str): Flags specifying the color type of a loaded image,
11 | candidates are `color`, `grayscale` and `unchanged`.
12 |
13 | Returns:
14 | ndarray: Loaded image array.
15 | """
16 | if isinstance(img_or_path, np.ndarray):
17 | return img_or_path
18 | elif is_str(img_or_path):
19 | flag = imread_flags[flag] if is_str(flag) else flag
20 | check_file_exist(img_or_path,
21 | 'img file does not exist: {}'.format(img_or_path))
22 | return cv2.imread(img_or_path, flag)
23 | else:
24 | raise TypeError('"img" must be a numpy array or a filename')
25 |
26 | 实际上就是个封装的cv2.imread,输入图片就好了,其他的功能没必要
27 |
28 |
29 | 写图片
30 | def imwrite(img, file_path, params=None, auto_mkdir=True):
31 | """Write image to file
32 |
33 | Args:
34 | img (ndarray): Image array to be written.
35 | file_path (str): Image file path.
36 | params (None or list): Same as opencv's :func:`imwrite` interface.
37 | auto_mkdir (bool): If the parrent folder of `file_path` does not exist,
38 | whether to create it automatically.
39 |
40 | Returns:
41 | bool: Successful or not.
42 | """
43 | if auto_mkdir:
44 | dir_name = osp.abspath(osp.dirname(file_path))
45 | mkdir_or_exist(dir_name)
46 | return cv2.imwrite(file_path, img, params)
47 |
48 | 会自动检索目录是否存在,不存在则创建,cv2.imwrite写
49 |
--------------------------------------------------------------------------------
/annotation/mmcv/parallel:
--------------------------------------------------------------------------------
1 | 多GPU并行计算的相关程序
2 |
3 | -------------------
--------------------------------------------------------------------------------
/annotation/mmcv/runner/hooks:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmcv/runner/hooks
--------------------------------------------------------------------------------
/annotation/mmcv/runner/utils:
--------------------------------------------------------------------------------
1 |
2 |
3 | def obj_from_dict(info, parent=None, default_args=None)
4 | info: cfg的配置信息
5 | parent:父类信息,如
6 |
7 | 作用:利用配置字典信息进行数据类的初始化(可选类型,在py文件设置),选项有custom/voc/coco等,见__init__.py
8 | 实现:info也就是py配置文件中会有一个type,一般是'CocoDataset',那么会据此在parent中搜寻得到对应的类赋给obj_type,也就是,最后用info其他信息作为初始量传入该类中进行初始化
9 |
--------------------------------------------------------------------------------
/annotation/mmdet/apis/__init__:
--------------------------------------------------------------------------------
1 |
2 | 这里的all变量包含了所有引入的变量,下面两句比较重要:
3 |
4 | from .train import train_detector
5 | from .inference import init_detector, inference_detector, show_result
--------------------------------------------------------------------------------
/annotation/mmdet/apis/inference:
--------------------------------------------------------------------------------
1 |
2 | 改动:和原来变化不大,只是多加了个init_detector多封装一层,原来直接build_detector,现在先传入这个init再在其内调用build
3 |
4 | 功能:主要是inference阶段的一些东西,这里是用的数据都是py配置文件的data.test的参数(test_cfg数据是模型参数,在model搭建时已经传入用于构建RPN等)
5 |
6 | 代码注释:参见最原始版本的注释
7 |
8 | init_detector(config, checkpoint=None, device='cuda:0'):
9 | 调用build_detector;加载断点权参数到模型上;配置class属性;
10 | 模型设置eval放到gpu
11 |
12 |
13 |
14 | inference_detector(model, imgs)
15 | 搭建检测器:
16 | 实例化图像变换类ImageTransform,传入两个参数:cfg.data.test.size_divisor和**cfg.img_norm_cfg(初始化类的一些配置)
17 | 根据输入的是图片list还是单张图片传入不同函数处理得到结果(支持list等可迭代对象输入)
18 |
19 | _inference_generator(model, imgs, img_transform, cfg, device)
20 | 迭代多张图片,返回生成器的形式
21 |
22 | _inference_single(model, img, img_transform, cfg, device)
23 | inference前向传播
24 |
25 |
26 | show_result(img, result, dataset='coco', score_thr=0.3, out_file=None)
27 | 这里绘制mask和detection结果,如果想存图也可以在最后输入out_file即可,或者自己该,多加个路径存一下
28 |
29 |
--------------------------------------------------------------------------------
/annotation/mmdet/apis/train:
--------------------------------------------------------------------------------
1 | def train_detector(model,dataset,cfg,distributed=False,validate=False,logger=None)
2 | 训练入口,提供两种训练方式:分布式与非分布式
3 |
4 | -----[主要函数]-----------------------------------------------------
5 | def _non_dist_train(model, dataset, cfg, validate=False)
6 | 功能:非分布式训练入口
7 | 步骤:(本部分涉及runner方法的部分参见runner的注释)
8 | 1. 数据加载data_loaders封装:定位到mmdet/datasets/loader/build_loader.py,参见datasets/loader说明
9 | 2. 模型加载MMDataParallel:继承自torch.nn.DataParallel,管理多GPU并行计算。这个又是封装在mmcv的东西,查看过去发现构造函数未单独定义,仍用DataParallel格式,指定模型和gpu即可(多GPU用list)
10 | 3. 编译Runner(本步骤只是将相关参数传递,没有任何的构建和运行。就像model的build一样只放了模块没有连接和顺序)。
11 | 4. 注册钩子。
12 | 5. 断点加载或文件加载数据,分别使用runner.resume和runner.load_checkpoint函数。
13 | 6. 开始训练runner.run(data_loaders, cfg.workflow, cfg.total_epochs)。
14 |
15 |
16 | def build_optimizer(model, optimizer_cfg)
17 | 功能:配置优化器参数(待填坑)
18 |
19 |
20 | def batch_processor(model, data, train_mode)
21 | 功能:该函数是主要的训练函数,用于处理单个batch数据
22 | 实现:注释参见相应的程序段。调用方法是将该函数传入runner,然后在runner.run()中调用该函数。
23 | 具体代码段为:outputs = self.batch_processor(
24 | self.model, data_batch, train_mode=True, **kwargs)
25 |
26 |
27 | def parse_losses(losses)
28 | 功能:计算损失
29 |
30 | 如Faster RCNN的输出:
31 | {
32 | 'loss_rpn_cls': [
33 | tensor(0.6204, device='cuda:0', grad_fn=),
34 | tensor(0.0567, device='cuda:0', grad_fn=),
35 | tensor(0.0220, device='cuda:0', grad_fn=),
36 | tensor(0., device='cuda:0', grad_fn=),
37 | tensor(0., device='cuda:0', grad_fn=)],
38 | 'loss_rpn_bbox': [
39 | tensor(0.5475, device='cuda:0', grad_fn=),
40 | tensor(0., device='cuda:0', grad_fn=),
41 | tensor(0., device='cuda:0', grad_fn=),
42 | tensor(0., device='cuda:0', grad_fn=),
43 | tensor(0., device='cuda:0', grad_fn=)],
44 | 'loss_cls':
45 | tensor(4.3445, device='cuda:0', grad_fn=),
46 | 'acc':
47 | tensor([0.], device='cuda:0'),
48 | 'loss_bbox':
49 | tensor(5.2616e-05, device='cuda:0', grad_fn=)
50 | }
51 |
--------------------------------------------------------------------------------
/annotation/mmdet/datasets/datasets:
--------------------------------------------------------------------------------
1 |
2 | 关于dataset介绍:
3 | 1. 管理方式:Registry.
4 | 通过DATASETS进行具体数据集对应类实例的 注册 和 调用 。
5 | (1)注册:通过Registry管理
6 | (2)调用build:build时会调用dataset类的具体方法,如图片/标注的解析等。
7 |
8 |
9 | 2. 数据集类别和关系
10 | 常见数据集之间的继承关系:
11 | CocoDataset --> CustomDataset --> torch.utils.data.Dataset
12 | VOCDataset --> XMLDataset --> CustomDataset --> torch.utils.data.Dataset
13 |
14 | 3. MyDataset
15 | (1)简单继承
16 | 标注用的coco或者voc格式时,直接继承自CocoDataset或者XMLDataset,然后修改类别即可。
17 | (2)自定义格式
18 | 在数据集的label和上述标准不同,或者连格式都是完全自定义(如yolo,武大遥感车辆的txt标注)时,通过重写load_annotations和get_ann_info函数(参考COCO和VOC)完成。
19 | (3)建议方式:
20 | 在datasets文件夹下新建my_dataset文件,在其内定义自己的数据集类型,并在__init__.py中导入搜索路径即可被查询。
21 |
22 | 4. 常见数据集
23 | COCO和VOC在初始化都有一个CLASS变量,存放所有类别。
24 |
--------------------------------------------------------------------------------
/annotation/mmdet/datasets/loader:
--------------------------------------------------------------------------------
1 |
2 |
3 | ============== loader文件夹 ==============
4 |
5 | -------- build_loader -------
6 | DataLoader封装:
7 | 采用的是pytorch内置方法:class torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=, pin_memory=False, drop_last=False);
8 |
9 |
10 |
11 | 采样器Sampler:(先不细看,留坑)
12 | 决定每个batch样本的获取方式,并设置bs和工作线程数;继承的最高父类是torch.utils.data.sampler的Sampler。
13 | 关于Sampler:
14 | 所有采样器的基础类。每个采样器子类必须提供一个__iter__方法,提供一种迭代数据集元素的索引的方法,以及返回迭代器长度的__len__方法。
15 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/anchor_heads:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ###############################################
5 | ########### anchor_heads
6 | ###############################################
7 |
8 | ———————————— AnchorHead ————————————————————
9 | AnchorHead(nn.Module)
10 | 输入参数:
11 | num_classes (int): Number of classes(对于二阶段如RPN处,是2,用于区分前景背景;对于单阶段,为如81等,直接进行分类回归)
12 | in_channels (int): Number of channels in the input feature map.
13 | feat_channels (int): Number of channels of the feature map.
14 | anchor_scales (Iterable): Anchor scales.
15 | anchor_ratios (Iterable): Anchor aspect ratios.
16 | anchor_strides (Iterable): Anchor strides.
17 | anchor_base_sizes (Iterable): Anchor base sizes.
18 | target_means (Iterable): Mean values of regression targets.
19 | target_stds (Iterable): Std values of regression targets.
20 | loss_cls (dict): Config of classification loss.
21 | loss_bbox (dict): Config of localization loss.
22 |
23 | 功能:
24 | (1)对一些参数进行了必要的赋值
25 | (2)根据配置生成了各个尺度的anchor box
26 | (3)指定分类和回归的损失类型并进行build配置
27 |
28 |
29 | ———————————— RPNHead ————————————————————
30 | class RPNHead(AnchorHead)
31 | 输入参数:(继承自上面AnchorHead,所以大多数一样)
32 | num_classes = 2 # RPN的anchor只区分前景和背景,通过RPNHead的构造函数直接指定,然后给父类AnchorHead的
33 | in_channels = 256
34 | feat_channels = 256
35 | anchor_scales = [8]
36 | anchor_ratios = [0.5, 1.0, 2.0]
37 | anchor_strides = [4, 8, 16, 32, 64] # anchor的步长,对应着降采样步长
38 | anchor_base_sizes = None # 如果不指明该项的值,默认会置为为anchor_strides
39 | target_means = [0.0, 0.0, 0.0, 0.0]
40 | target_stds = [1.0, 1.0, 1.0, 1.0]
41 | 'loss_cls': {'type': 'CrossEntropyLoss', 'use_sigmoid': True, 'loss_weight': 1.0},
42 | 'loss_bbox': {'type': 'SmoothL1Loss', 'beta': 0.1111111111111111, 'loss_weight': 1.0}
43 | 功能:
44 | (1)生成anchor(在AnchorHead的构造函数中进行的),作为self的属性self.anchor_generators,该列表元素为类的实例,每个对应一个尺度的anchor组
45 | (2)构建对单个anchor进行分类和回归的网络
46 |
47 |
48 | ###############################################
49 | ########### mask_heads
50 | ###############################################
51 |
52 | ———————————— FCNMaskHead ——————————————
53 | 继承关系: FCNMaskHead(nn.Module)
54 | 输入参数: num_convs=4, # 经过RoiAlign处理的14*14检测结果进行几个卷积处理
55 | in_channels=256,
56 | conv_out_channels=256,
57 | num_classes=81 # 生成mask的数目
58 | (上述为MaskRCNN的参数,更多参数设置直接查看类构造函数)
59 | inference输出: mask
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/backbones:
--------------------------------------------------------------------------------
1 |
2 | ———————————— ResNet ————————————————————
3 |
4 | 继承关系:
5 | 继承自nn.Module基类,无需担心
6 |
7 | 传入参数(ResNet-101):
8 | 1. 首先在build pop去掉了type名,然后经过**unpack后将backbone字典解压成:形参 = 实参 的形式,传入构造函数
9 | 如: self = ResNet()
10 | depth = 101 # 网络层数
11 | num_stages = 4 # stages数目,降采样尺度
12 | strides = (1, 2, 2, 2)
13 | dilations = (1, 1, 1, 1)
14 | out_indices = (0, 1, 2, 3) # 输出的stage的序号
15 | style = 'pytorch' # 网络风格: 如果设置pytorch,则stride为2的层是conv3x3的卷积层;
16 | 如果设置caffe, 则stride为2的层是第一个conv1x1的卷积层
17 | frozen_stages = 1 # 冻结的stage数量,即该stage不更新参数,-1表示所有的stage都更新参数
18 | normalize = {'type': 'BN', 'frozen': False}
19 | norm_eval = True
20 | dcn = None
21 | stage_with_dcn = (False, False, False, False)
22 | with_cp = False
23 | zero_init_residual = True
24 | 2. 类初始参数:
25 | arch_settings = { 18: (BasicBlock, (2, 2, 2, 2)),
26 | 34: (BasicBlock, (3, 4, 6, 3)),
27 | 50: (Bottleneck, (3, 4, 6, 3)),
28 | 101: (Bottleneck, (3, 4, 23, 3)),
29 | 152: (Bottleneck, (3, 8, 36, 3))
30 | 预设了五种不同的resnet结构,dict的key是深度,value是tuple,包含一个class(选用哪个模块作來搭建)和一个各stage模块数的tuple
31 | resnet-108为例,不同stage分别是3,4,23,3个Bottleneck模块,而降采样步长为1, 2, 2, 2,dilation是1,1,1,1
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/bbox_heads:
--------------------------------------------------------------------------------
1 |
2 |
3 | ———————————— BBoxHead ——————————————
4 | 继承关系: BBoxHead(nn.Module)
5 | 功能: 是其他box_head的最高父类
6 |
7 | ———————————— ConvFCBBoxHeadd ——————————————
8 | 继承关系: ConvFCBBoxHead(BBoxHead)
9 | 功能: 执行分类和回归得到对应的分数与偏移
10 |
11 | ———————————— SharedFCBBoxHead ——————————————
12 | 继承关系: SharedFCBBoxHead(ConvFCBBoxHead)
13 | 功能:
14 | 实际上这个层基本是闲置的,就是为了以后的封装多样化其他高级设置而加入。实际调用时在config文件都是该模块为type名,但是构造函数之后直接进入ConvFCBBoxHeadd
15 |
16 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/builder:
--------------------------------------------------------------------------------
1 |
2 | 7个基本大类的build,按照Registry的不同占位层进行不同配置
3 | 主要原因是每个大类对应不同功能,落实在其_module_dict包含不同的层(class),所以分开build
4 | build_backbone (cfg)
5 | build_neck(cfg)
6 | build_roi_extractor(cfg)
7 | build_head(cfg)
8 | build_shared_head(cfg)
9 | build_loss(cfg)
10 | build_detector(cfg, train_cfg=None, test_cfg=None)
11 |
12 | build(cfg, registry, default_args=None)
13 | 上述7个函数均调用该build函数,通过registry标志进行选择(BACKBONES等7个),
14 | 其中DETECTORS特有训练和检测的配置信息,从原始cfg中分离出来(这里传入的cfg实际是cfg.model,也就是参数配置cfg中的模型配置)
15 |
16 |
17 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/losses:
--------------------------------------------------------------------------------
1 | 先看看__init__.py,就能大致直到引入了哪些类型loss可供选择,以及通过哪些文件配置,以便底层更改:
2 |
3 | from .accuracy import accuracy, Accuracy
4 | from .cross_entropy_loss import (cross_entropy, binary_cross_entropy,
5 | mask_cross_entropy, CrossEntropyLoss)
6 | from .focal_loss import sigmoid_focal_loss, FocalLoss
7 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss
8 | from .ghm_loss import GHMC, GHMR
9 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss
10 | from .mse_loss import mse_loss, MSELoss
11 | from .iou_loss import iou_loss, bounded_iou_loss, IoULoss, BoundedIoULoss
12 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
13 |
14 | 构建方法和以前不同,这次改动后统一采用registry进行管理。所以子函数方法会调用静态修饰器提前把loss实现类的函数名传递到搜索字典中去。
15 | 该loss的类实际以继承自nn.Module的层的形式进行封装,通过执行前向传播来实现loss计算(在forward定义计算)
16 | 调用方法:
17 | 首先是head初始化的build进行组件堆叠时,调用loss层的构造函数__init__,初始化loss层作为一个层被加进去叠上,返回一个layer的类;在进行前向传播时,通过传入计算参数,调用forward函数执行传播
18 |
19 | --------- CrossEntropyLoss ----------------------------------------------------------------------
20 | class CrossEntropyLoss(nn.Module)
21 | 输入参数:
22 | use_sigmoid=False,
23 | use_mask=False,
24 | reduction='mean',
25 | loss_weight=1.0,
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | ---------- SmoothL1Loss ---------------------------------------------------------------------
34 | class SmoothL1Loss(nn.Module)
35 | 输入参数:
36 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0)
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | -------------------------------------------------------------------------------
--------------------------------------------------------------------------------
/annotation/mmdet/models/necks:
--------------------------------------------------------------------------------
1 |
2 | ———————————— FPN (详细实现见源码注释)————————————————————
3 | 继承关系:nn.Module基类
4 |
5 | 传入参数:从config.py文件获取得到的
6 | in_channels : [256, 512, 1024, 2048] 不同stage的最后输出通道数
7 | out_channels : 256 FPN输出的特征层的通道数
8 | num_outs : 5 输出特征融合层的数目
9 |
10 |
11 | 网络搭建:
12 | 遍历indice的0-4,向两个Modulelist中分别添加FPN实现层:
13 | (1)lateral_convs : 搭建FPN的通道变化支路,1*1卷积 将输出变为256维
14 | (2)fpn_convs : 3*3卷积消除棋盘效应(这里虽然加了四个卷积核,但是尺寸一样,不过当然设置一个來共用,因为学习的参数不同)
15 |
16 |
17 | 支持扩展:
18 | 加激活函数/bn/指定融合层/特征层增加等,不解读
19 |
20 |
--------------------------------------------------------------------------------
/annotation/mmdet/models/resnet/conv_block:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmdet/models/resnet/conv_block
--------------------------------------------------------------------------------
/annotation/mmdet/models/resnet/identity_block:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmdet/models/resnet/identity_block
--------------------------------------------------------------------------------
/annotation/mmdet/models/resnet/resnet-101(这个是maskrcnn-tf的,m2det只输出这里的C2-C5,而且两处block数目也不同):
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmdet/models/resnet/resnet-101(这个是maskrcnn-tf的,m2det只输出这里的C2-C5,而且两处block数目也不同)
--------------------------------------------------------------------------------
/annotation/mmdet/models/resnet/resnet-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmdet/models/resnet/resnet-50.png
--------------------------------------------------------------------------------
/annotation/mmdet/models/resnet/其中采用了右边这种,注意通道先压缩再扩充,减少参数,可以参考resnet-50结构图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/mmdet/models/resnet/其中采用了右边这种,注意通道先压缩再扩充,减少参数,可以参考resnet-50结构图.png
--------------------------------------------------------------------------------
/annotation/mmdet/models/utils:
--------------------------------------------------------------------------------
1 | —————————————————— ConvModule(nn.Module) ——————————————————
2 | 搭建通用的卷积+norm+activation的模块
3 |
4 |
--------------------------------------------------------------------------------
/annotation/mmdet/registry:
--------------------------------------------------------------------------------
1 | 位置:
2 | mmdet/model的registry只是注册关键字;mmdet/utils/registry存放的才是真正的注册实现函数。
3 |
4 | 功能:
5 | 注册模块占位符
6 | 在程序运行之前先注册相应的模块占位,便于在config文件直接对相应的模块进行配置填充
7 |
8 | 类型:
9 | 7大类:(实际后来dataset也是这么管理的)
10 | BACKBONES = Registry('backbone')
11 | NECKS = Registry('neck')
12 | ROI_EXTRACTORS = Registry('roi_extractor')
13 | SHARED_HEADS = Registry('shared_head')
14 | HEADS = Registry('head')
15 | LOSSES = Registry('loss')
16 | DETECTORS = Registry('detector')
17 | 每类包含各个具体的分类,如BACKBONES中有'ResNet', 'ResNeXt', 'SSDVGG',添加方法后述
18 |
19 |
20 | *直观理解:
21 | 即便如此,Registry的具体形式是什么?
22 | 例如import的DETECTOR为例,直接打印查看DETECTOR得到:
23 | Registry(name=detector, items=['SingleStageDetector', 'TwoStageDetector', 'RPN', 'FastRCNN',
24 | 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN'])
25 | 查看type是
26 |
27 |
28 |
29 | 理解Registry的作用:
30 | Registry的模块有7个,每个下的_module_dict字典会添加存放其中的不同类
31 | 作用是用于索引和搭建;
32 |
33 |
34 |
35 | build_from_cfg(cfg, registry, default_args=None)
36 | 输入:模型配置,模块占位符,训练/检测配置
37 | 输出:网络7个大类的模块化构建完成
38 | 注意:上述的五个大类,即使是DETECTORS,本质都是占位符,在传入cfg的真正参数之前都是不连接的;
39 | 顺序是:先搭建DETECTORS,然后根据其配置需求依次搭建其下的前四种模块,整个构成DETECTORS
40 |
--------------------------------------------------------------------------------
/annotation/model_vis/inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/model_vis/inference.png
--------------------------------------------------------------------------------
/annotation/model_vis/maskrcnn-model-inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/annotation/model_vis/maskrcnn-model-inference.png
--------------------------------------------------------------------------------
/configs/empirical_attention/README.md:
--------------------------------------------------------------------------------
1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks
2 |
3 | ## Introduction
4 |
5 | ```
6 | @article{zhu2019empirical,
7 | title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
8 | author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng},
9 | journal={arXiv preprint arXiv:1904.05873},
10 | year={2019}
11 | }
12 | ```
13 |
14 |
15 | ## Results and Models
16 |
17 | | Backbone | Attention Component | DCN | Lr schd | box AP | Download |
18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:|
19 | | R-50 | 1111 | N | 1x | 38.6 | - |
20 | | R-50 | 0010 | N | 1x | 38.2 | - |
21 | | R-50 | 1111 | Y | 1x | 41.0 | - |
22 | | R-50 | 0010 | Y | 1x | 40.8 | - |
23 |
24 |
--------------------------------------------------------------------------------
/configs/fcos/README.md:
--------------------------------------------------------------------------------
1 | # FCOS: Fully Convolutional One-Stage Object Detection
2 |
3 | ## Introduction
4 |
5 | ```
6 | @article{tian2019fcos,
7 | title={FCOS: Fully Convolutional One-Stage Object Detection},
8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
9 | journal={arXiv preprint arXiv:1904.01355},
10 | year={2019}
11 | }
12 | ```
13 |
14 | ## Results and Models
15 |
16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) |
19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) |
20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) |
21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) |
22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) |
23 |
24 |
25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) |
28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) |
29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) |
30 |
31 | **Notes:**
32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
33 | - The X-101 backbone is X-101-64x4d.
34 |
--------------------------------------------------------------------------------
/configs/ghm/README.md:
--------------------------------------------------------------------------------
1 | # Gradient Harmonized Single-stage Detector
2 |
3 | ## Introduction
4 |
5 | ```
6 | @inproceedings{li2019gradient,
7 | title={Gradient Harmonized Single-stage Detector},
8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang},
9 | booktitle={AAAI Conference on Artificial Intelligence},
10 | year={2019}
11 | }
12 | ```
13 |
14 | ## Results and Models
15 |
16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: |
18 | | R-50-FPN | pytorch | 1x | 3.9 | 0.500 | 9.4 | 36.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) |
19 | | R-101-FPN | pytorch | 1x | 5.8 | 0.625 | 8.5 | 39.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) |
20 | | X-101-32x4d-FPN | pytorch | 1x | 7.0 | 0.818 | 7.6 | 40.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) |
21 | | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 1.191 | 6.1 | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) |
--------------------------------------------------------------------------------
/configs/gn/README.md:
--------------------------------------------------------------------------------
1 | # Group Normalization
2 |
3 | ## Introduction
4 |
5 | ```
6 | @inproceedings{wu2018group,
7 | title={Group Normalization},
8 | author={Wu, Yuxin and He, Kaiming},
9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
10 | year={2018}
11 | }
12 | ```
13 |
14 | ## Results and Models
15 |
16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
24 |
25 | **Notes:**
26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
27 | - The `3x` schedule is epoch [28, 34, 36].
28 | - **Memory, Train/Inf time is outdated.**
--------------------------------------------------------------------------------
/configs/grid_rcnn/README.md:
--------------------------------------------------------------------------------
1 | # Grid R-CNN
2 |
3 | ## Introduction
4 |
5 | ```
6 | @inproceedings{lu2019grid,
7 | title={Grid r-cnn},
8 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
10 | year={2019}
11 | }
12 |
13 | @article{lu2019grid,
14 | title={Grid R-CNN Plus: Faster and Better},
15 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
16 | journal={arXiv preprint arXiv:1906.05688},
17 | year={2019}
18 | }
19 | ```
20 |
21 | ## Results and Models
22 |
23 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
25 | | R-50 | 2x | 4.8 | 1.172 | 10.9 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) |
26 | | R-101 | 2x | 6.7 | 1.214 | 10.0 | 41.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) |
27 | | X-101-32x4d | 2x | 8.0 | 1.335 | 8.5 | 43.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) |
28 | | X-101-64x4d | 2x | 10.9 | 1.753 | 6.4 | 43.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) |
29 |
30 | **Notes:**
31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper.
32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs.
33 |
--------------------------------------------------------------------------------
/configs/hrnet/README.md:
--------------------------------------------------------------------------------
1 | # High-resolution networks (HRNets) for object detection
2 |
3 | ## Introduction
4 |
5 | ```
6 | @inproceedings{SunXLW19,
7 | title={Deep High-Resolution Representation Learning for Human Pose Estimation},
8 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
9 | booktitle={CVPR},
10 | year={2019}
11 | }
12 |
13 | @article{SunZJCXLMWLW19,
14 | title={High-Resolution Representations for Labeling Pixels and Regions},
15 | author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao
16 | and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang},
17 | journal = {CoRR},
18 | volume = {abs/1904.04514},
19 | year={2019}
20 | }
21 | ```
22 |
23 | ## Results and Models
24 |
25 | Faster R-CNN
26 |
27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download|
28 | | :--:|:--:|:--:|:--:|:--:|:--:|
29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)|
30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)|
31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)|
32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)|
33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)|
34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)|
35 |
36 |
37 | Mask R-CNN
38 |
39 | |Backbone|Lr sched|mask mAP|box mAP|Download|
40 | |:--:|:--:|:--:|:--:|:--:|
41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)|
42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)|
43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)|
44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)|
45 |
46 | Cascade R-CNN
47 |
48 | |Backbone|Lr sched|mAP|Download|
49 | |:--:|:--:|:--:|:--:|
50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)|
51 |
52 | **Note:**
53 |
54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification).
55 |
--------------------------------------------------------------------------------
/configs/htc/README.md:
--------------------------------------------------------------------------------
1 | # Hybrid Task Cascade for Instance Segmentation
2 |
3 | ## Introduction
4 |
5 | We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518).
6 |
7 | ```
8 | @inproceedings{chen2019hybrid,
9 | title={Hybrid task cascade for instance segmentation},
10 | author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin},
11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 | year={2019}
13 | }
14 | ```
15 |
16 | ## Dataset
17 |
18 | HTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path.
19 | The directory should be like this.
20 |
21 | ```
22 | mmdetection
23 | ├── mmdet
24 | ├── tools
25 | ├── configs
26 | ├── data
27 | │ ├── coco
28 | │ │ ├── annotations
29 | │ │ ├── train2017
30 | │ │ ├── val2017
31 | │ │ ├── test2017
32 | | | ├── stuffthingmaps
33 | ```
34 |
35 | ## Results and Models
36 |
37 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
38 |
39 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
40 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
41 | | R-50-FPN | pytorch | 1x | 7.4 | 0.936 | 4.1 | 42.1 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) |
42 | | R-50-FPN | pytorch | 20e | - | - | - | 43.2 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) |
43 | | R-101-FPN | pytorch | 20e | 9.3 | 1.051 | 4.0 | 44.9 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) |
44 | | X-101-32x4d-FPN | pytorch |20e| 5.8 | 0.769 | 3.8 | 46.1 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |
45 | | X-101-64x4d-FPN | pytorch |20e| 7.5 | 1.120 | 3.5 | 46.9 | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |
46 |
47 | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC.
48 | - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models.
49 | If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01.
50 |
51 | We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used.
52 |
53 | | Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Download |
54 | |:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:|
55 | | X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.7 | 43.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) |
56 |
--------------------------------------------------------------------------------
/configs/libra_rcnn/README.md:
--------------------------------------------------------------------------------
1 | # Libra R-CNN: Towards Balanced Learning for Object Detection
2 |
3 | ## Introduction
4 |
5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf).
6 |
7 | ```
8 | @inproceedings{pang2019libra,
9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection},
10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin},
11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 | year={2019}
13 | }
14 | ```
15 |
16 | ## Results and models
17 |
18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
19 |
20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) |
23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) |
24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) |
25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) |
26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) |
27 |
--------------------------------------------------------------------------------
/configs/ms_rcnn/README.md:
--------------------------------------------------------------------------------
1 | # Mask Scoring R-CNN
2 |
3 | ## Introduction
4 |
5 | ```
6 | @inproceedings{huang2019msrcnn,
7 | title={Mask Scoring R-CNN},
8 | author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang},
9 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
10 | year={2019},
11 | }
12 | ```
13 |
14 | ## Results and Models
15 |
16 | | Backbone | style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN | caffe | 1x | 4.3 | 0.537 | 10.1 | 37.4 | 35.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) |
19 | | R-50-FPN | caffe | 2x | - | - | - | 38.2 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) |
20 | | R-101-FPN | caffe | 1x | 6.2 | 0.682 | 9.1 | 39.8 | 37.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) |
21 | | R-101-FPN | caffe | 2x | - | - | - | 40.7 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) |
22 | | R-X101-32x4d | pytorch | 2x | 7.6 | 0.844 | 8.0 | 41.7 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) |
23 | | R-X101-64x4d | pytorch | 1x | 10.5 | 1.214 | 6.4 | 42.0 | 39.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) |
24 | | R-X101-64x4d | pytorch | 2x | - | - | - | 42.2 | 38.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) |
25 |
--------------------------------------------------------------------------------
/configs/retinanet_r101_fpn_1x.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | pretrained='modelzoo://resnet101',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=101,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | style='pytorch'),
12 | neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | start_level=1,
17 | add_extra_convs=True,
18 | num_outs=5),
19 | bbox_head=dict(
20 | type='RetinaHead',
21 | num_classes=81,
22 | in_channels=256,
23 | stacked_convs=4,
24 | feat_channels=256,
25 | octave_base_scale=4,
26 | scales_per_octave=3,
27 | anchor_ratios=[0.5, 1.0, 2.0],
28 | anchor_strides=[8, 16, 32, 64, 128],
29 | target_means=[.0, .0, .0, .0],
30 | target_stds=[1.0, 1.0, 1.0, 1.0],
31 | loss_cls=dict(
32 | type='FocalLoss',
33 | use_sigmoid=True,
34 | gamma=2.0,
35 | alpha=0.25,
36 | loss_weight=1.0),
37 | loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
38 | # training and testing settings
39 | train_cfg = dict(
40 | assigner=dict(
41 | type='MaxIoUAssigner',
42 | pos_iou_thr=0.5,
43 | neg_iou_thr=0.4,
44 | min_pos_iou=0,
45 | ignore_iof_thr=-1),
46 | allowed_border=-1,
47 | pos_weight=-1,
48 | debug=False)
49 | test_cfg = dict(
50 | nms_pre=1000,
51 | min_bbox_size=0,
52 | score_thr=0.05,
53 | nms=dict(type='nms', iou_thr=0.5),
54 | max_per_img=100)
55 | # dataset settings
56 | dataset_type = 'CocoDataset'
57 | data_root = 'data/coco/'
58 | img_norm_cfg = dict(
59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
60 | data = dict(
61 | imgs_per_gpu=2,
62 | workers_per_gpu=2,
63 | train=dict(
64 | type=dataset_type,
65 | ann_file=data_root + 'annotations/instances_train2017.json',
66 | img_prefix=data_root + 'train2017/',
67 | img_scale=(1333, 800),
68 | img_norm_cfg=img_norm_cfg,
69 | size_divisor=32,
70 | flip_ratio=0.5,
71 | with_mask=False,
72 | with_crowd=False,
73 | with_label=True),
74 | val=dict(
75 | type=dataset_type,
76 | ann_file=data_root + 'annotations/instances_val2017.json',
77 | img_prefix=data_root + 'val2017/',
78 | img_scale=(1333, 800),
79 | img_norm_cfg=img_norm_cfg,
80 | size_divisor=32,
81 | flip_ratio=0,
82 | with_mask=False,
83 | with_crowd=False,
84 | with_label=True),
85 | test=dict(
86 | type=dataset_type,
87 | ann_file=data_root + 'annotations/instances_val2017.json',
88 | img_prefix=data_root + 'val2017/',
89 | img_scale=(1333, 800),
90 | img_norm_cfg=img_norm_cfg,
91 | size_divisor=32,
92 | flip_ratio=0,
93 | with_mask=False,
94 | with_crowd=False,
95 | with_label=False,
96 | test_mode=True))
97 | # optimizer
98 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
100 | # learning policy
101 | lr_config = dict(
102 | policy='step',
103 | warmup='linear',
104 | warmup_iters=500,
105 | warmup_ratio=1.0 / 3,
106 | step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 | interval=50,
111 | hooks=[
112 | dict(type='TextLoggerHook'),
113 | # dict(type='TensorboardLoggerHook')
114 | ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | device_ids = range(8)
119 | dist_params = dict(backend='nccl')
120 | log_level = 'INFO'
121 | work_dir = './work_dirs/retinanet_r101_fpn_1x'
122 | load_from = None
123 | resume_from = None
124 | workflow = [('train', 1)]
125 |
--------------------------------------------------------------------------------
/configs/retinanet_r50_fpn_1x.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | pretrained='modelzoo://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | style='pytorch'),
12 | neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | start_level=1,
17 | add_extra_convs=True,
18 | num_outs=5),
19 | bbox_head=dict(
20 | type='RetinaHead',
21 | num_classes=81,
22 | in_channels=256,
23 | stacked_convs=4,
24 | feat_channels=256,
25 | octave_base_scale=4,
26 | scales_per_octave=3,
27 | anchor_ratios=[0.5, 1.0, 2.0],
28 | anchor_strides=[8, 16, 32, 64, 128],
29 | target_means=[.0, .0, .0, .0],
30 | target_stds=[1.0, 1.0, 1.0, 1.0],
31 | loss_cls=dict(
32 | type='FocalLoss',
33 | use_sigmoid=True,
34 | gamma=2.0,
35 | alpha=0.25,
36 | loss_weight=1.0),
37 | loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
38 | # training and testing settings
39 | train_cfg = dict(
40 | assigner=dict(
41 | type='MaxIoUAssigner',
42 | pos_iou_thr=0.5,
43 | neg_iou_thr=0.4,
44 | min_pos_iou=0,
45 | ignore_iof_thr=-1),
46 | allowed_border=-1,
47 | pos_weight=-1,
48 | debug=False)
49 | test_cfg = dict(
50 | nms_pre=1000,
51 | min_bbox_size=0,
52 | score_thr=0.05,
53 | nms=dict(type='nms', iou_thr=0.5),
54 | max_per_img=100)
55 | # dataset settings
56 | dataset_type = 'CocoDataset'
57 | data_root = 'data/coco/'
58 | img_norm_cfg = dict(
59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
60 | data = dict(
61 | imgs_per_gpu=2,
62 | workers_per_gpu=2,
63 | train=dict(
64 | type=dataset_type,
65 | ann_file=data_root + 'annotations/instances_train2017.json',
66 | img_prefix=data_root + 'train2017/',
67 | img_scale=(1333, 800),
68 | img_norm_cfg=img_norm_cfg,
69 | size_divisor=32,
70 | flip_ratio=0.5,
71 | with_mask=False,
72 | with_crowd=False,
73 | with_label=True),
74 | val=dict(
75 | type=dataset_type,
76 | ann_file=data_root + 'annotations/instances_val2017.json',
77 | img_prefix=data_root + 'val2017/',
78 | img_scale=(1333, 800),
79 | img_norm_cfg=img_norm_cfg,
80 | size_divisor=32,
81 | flip_ratio=0,
82 | with_mask=False,
83 | with_crowd=False,
84 | with_label=True),
85 | test=dict(
86 | type=dataset_type,
87 | ann_file=data_root + 'annotations/instances_val2017.json',
88 | img_prefix=data_root + 'val2017/',
89 | img_scale=(1333, 800),
90 | img_norm_cfg=img_norm_cfg,
91 | size_divisor=32,
92 | flip_ratio=0,
93 | with_mask=False,
94 | with_crowd=False,
95 | with_label=False,
96 | test_mode=True))
97 | # optimizer
98 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
100 | # learning policy
101 | lr_config = dict(
102 | policy='step',
103 | warmup='linear',
104 | warmup_iters=500,
105 | warmup_ratio=1.0 / 3,
106 | step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 | interval=50,
111 | hooks=[
112 | dict(type='TextLoggerHook'),
113 | # dict(type='TensorboardLoggerHook')
114 | ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | device_ids = range(8)
119 | dist_params = dict(backend='nccl')
120 | log_level = 'INFO'
121 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
122 | load_from = None
123 | resume_from = None
124 | workflow = [('train', 1)]
125 |
--------------------------------------------------------------------------------
/configs/rpn_r101_fpn_1x.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | pretrained='modelzoo://resnet101',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=101,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | style='pytorch'),
12 | neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | num_outs=5),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=256,
20 | feat_channels=256,
21 | anchor_scales=[8],
22 | anchor_ratios=[0.5, 1.0, 2.0],
23 | anchor_strides=[4, 8, 16, 32, 64],
24 | target_means=[.0, .0, .0, .0],
25 | target_stds=[1.0, 1.0, 1.0, 1.0],
26 | loss_cls=dict(
27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
28 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)))
29 | # model training and testing settings
30 | train_cfg = dict(
31 | rpn=dict(
32 | assigner=dict(
33 | type='MaxIoUAssigner',
34 | pos_iou_thr=0.7,
35 | neg_iou_thr=0.3,
36 | min_pos_iou=0.3,
37 | ignore_iof_thr=-1),
38 | sampler=dict(
39 | type='RandomSampler',
40 | num=256,
41 | pos_fraction=0.5,
42 | neg_pos_ub=-1,
43 | add_gt_as_proposals=False),
44 | allowed_border=0,
45 | pos_weight=-1,
46 | debug=False))
47 | test_cfg = dict(
48 | rpn=dict(
49 | nms_across_levels=False,
50 | nms_pre=2000,
51 | nms_post=2000,
52 | max_num=2000,
53 | nms_thr=0.7,
54 | min_bbox_size=0))
55 | # dataset settings
56 | dataset_type = 'CocoDataset'
57 | data_root = 'data/coco/'
58 | img_norm_cfg = dict(
59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
60 | data = dict(
61 | imgs_per_gpu=2,
62 | workers_per_gpu=2,
63 | train=dict(
64 | type=dataset_type,
65 | ann_file=data_root + 'annotations/instances_train2017.json',
66 | img_prefix=data_root + 'train2017/',
67 | img_scale=(1333, 800),
68 | img_norm_cfg=img_norm_cfg,
69 | size_divisor=32,
70 | flip_ratio=0.5,
71 | with_mask=False,
72 | with_crowd=False,
73 | with_label=False),
74 | val=dict(
75 | type=dataset_type,
76 | ann_file=data_root + 'annotations/instances_val2017.json',
77 | img_prefix=data_root + 'val2017/',
78 | img_scale=(1333, 800),
79 | img_norm_cfg=img_norm_cfg,
80 | size_divisor=32,
81 | flip_ratio=0,
82 | with_mask=False,
83 | with_crowd=False,
84 | with_label=False),
85 | test=dict(
86 | type=dataset_type,
87 | ann_file=data_root + 'annotations/instances_val2017.json',
88 | img_prefix=data_root + 'val2017/',
89 | img_scale=(1333, 800),
90 | img_norm_cfg=img_norm_cfg,
91 | size_divisor=32,
92 | flip_ratio=0,
93 | with_mask=False,
94 | with_label=False,
95 | test_mode=True))
96 | # optimizer
97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
98 | # runner configs
99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
100 | lr_config = dict(
101 | policy='step',
102 | warmup='linear',
103 | warmup_iters=500,
104 | warmup_ratio=1.0 / 3,
105 | step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 | interval=50,
110 | hooks=[
111 | dict(type='TextLoggerHook'),
112 | # dict(type='TensorboardLoggerHook')
113 | ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/rpn_r101_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 |
--------------------------------------------------------------------------------
/configs/rpn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | pretrained='modelzoo://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | style='pytorch'),
12 | neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | num_outs=5),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=256,
20 | feat_channels=256,
21 | anchor_scales=[8],
22 | anchor_ratios=[0.5, 1.0, 2.0],
23 | anchor_strides=[4, 8, 16, 32, 64],
24 | target_means=[.0, .0, .0, .0],
25 | target_stds=[1.0, 1.0, 1.0, 1.0],
26 | loss_cls=dict(
27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
28 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)))
29 | # model training and testing settings
30 | train_cfg = dict(
31 | rpn=dict(
32 | assigner=dict(
33 | type='MaxIoUAssigner',
34 | pos_iou_thr=0.7,
35 | neg_iou_thr=0.3,
36 | min_pos_iou=0.3,
37 | ignore_iof_thr=-1),
38 | sampler=dict(
39 | type='RandomSampler',
40 | num=256,
41 | pos_fraction=0.5,
42 | neg_pos_ub=-1,
43 | add_gt_as_proposals=False),
44 | allowed_border=0,
45 | pos_weight=-1,
46 | debug=False))
47 | test_cfg = dict(
48 | rpn=dict(
49 | nms_across_levels=False,
50 | nms_pre=2000,
51 | nms_post=2000,
52 | max_num=2000,
53 | nms_thr=0.7,
54 | min_bbox_size=0))
55 | # dataset settings
56 | dataset_type = 'CocoDataset'
57 | data_root = 'data/coco/'
58 | img_norm_cfg = dict(
59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
60 | data = dict(
61 | imgs_per_gpu=2,
62 | workers_per_gpu=2,
63 | train=dict(
64 | type=dataset_type,
65 | ann_file=data_root + 'annotations/instances_train2017.json',
66 | img_prefix=data_root + 'train2017/',
67 | img_scale=(1333, 800),
68 | img_norm_cfg=img_norm_cfg,
69 | size_divisor=32,
70 | flip_ratio=0.5,
71 | with_mask=False,
72 | with_crowd=False,
73 | with_label=False),
74 | val=dict(
75 | type=dataset_type,
76 | ann_file=data_root + 'annotations/instances_val2017.json',
77 | img_prefix=data_root + 'val2017/',
78 | img_scale=(1333, 800),
79 | img_norm_cfg=img_norm_cfg,
80 | size_divisor=32,
81 | flip_ratio=0,
82 | with_mask=False,
83 | with_crowd=False,
84 | with_label=False),
85 | test=dict(
86 | type=dataset_type,
87 | ann_file=data_root + 'annotations/instances_val2017.json',
88 | img_prefix=data_root + 'val2017/',
89 | img_scale=(1333, 800),
90 | img_norm_cfg=img_norm_cfg,
91 | size_divisor=32,
92 | flip_ratio=0,
93 | with_mask=False,
94 | with_label=False,
95 | test_mode=True))
96 | # optimizer
97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
98 | # runner configs
99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
100 | lr_config = dict(
101 | policy='step',
102 | warmup='linear',
103 | warmup_iters=500,
104 | warmup_ratio=1.0 / 3,
105 | step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 | interval=50,
110 | hooks=[
111 | dict(type='TextLoggerHook'),
112 | # dict(type='TensorboardLoggerHook')
113 | ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/rpn_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 |
--------------------------------------------------------------------------------
/configs/scratch/README.md:
--------------------------------------------------------------------------------
1 | # Rethinking ImageNet Pre-training
2 |
3 | ## Introduction
4 |
5 | ```
6 | @article{he2018rethinking,
7 | title={Rethinking imagenet pre-training},
8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr},
9 | journal={arXiv preprint arXiv:1811.08883},
10 | year={2018}
11 | }
12 | ```
13 |
14 | ## Results and Models
15 |
16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download |
17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:|
18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x-20190515-ff554978.pth) |
19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) |
20 |
21 | Note:
22 | - The above models are trained with 16 GPUs.
--------------------------------------------------------------------------------
/configs/wider_face/README.md:
--------------------------------------------------------------------------------
1 | ## WIDER Face Dataset
2 |
3 | To use the WIDER Face dataset you need to download it
4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format
5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git).
6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders
7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`.
8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`.
9 | The directory should be like this:
10 |
11 | ```
12 | mmdetection
13 | ├── mmdet
14 | ├── tools
15 | ├── configs
16 | ├── data
17 | │ ├── WIDERFace
18 | │ │ ├── WIDER_train
19 | │ | │ ├──0--Parade
20 | │ | │ ├── ...
21 | │ | │ ├── Annotations
22 | │ │ ├── WIDER_val
23 | │ | │ ├──0--Parade
24 | │ | │ ├── ...
25 | │ | │ ├── Annotations
26 | │ │ ├── val.txt
27 | │ │ ├── train.txt
28 |
29 | ```
30 |
31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or
32 | create your own config based on the presented one.
33 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from mmdet.apis import init_detector, inference_detector, show_result
4 |
5 | if __name__ == '__main__':
6 | config_file = 'configs/faster_rcnn_r50_fpn_1x.py'
7 | checkpoint_file = 'weights/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'
8 | # checkpoint_file = 'tools/work_dirs/mask_rcnn_r101_fpn_1x/epoch_1200.pth'
9 | img_path = '/home/bit/下载/n07753592'
10 |
11 | model = init_detector(config_file, checkpoint_file, device='cuda:0')
12 |
13 | # print(model)
14 |
15 | if os.path.isdir(img_path):
16 | imgs= os.listdir(img_path)
17 | for i in range(len(imgs)):
18 | imgs[i]=os.path.join(img_path,imgs[i])
19 | for i, result in enumerate(inference_detector(model, imgs)): # 支持可迭代输入imgs
20 | print(i, imgs[i])
21 | show_result(imgs[i], result, model.CLASSES, out_file='output/result_{}.jpg'.format(i))
22 |
23 | elif os.path.isfile(img_path):
24 | result = inference_detector(model, img_path)
25 | show_result(img_path, result, model.CLASSES)
26 |
27 |
--------------------------------------------------------------------------------
/hook.py:
--------------------------------------------------------------------------------
1 | # 仅用作简单的中间变量输出和调用hook,实际上mmdetection集成了较完备的hook系统,如果进一步读懂底层代码无需这样自己写hook直接调用就行
2 | import mmcv
3 | import torch
4 | from mmcv.runner import load_checkpoint
5 | from mmdet.models import build_detector
6 | from mmdet.apis import inference_detector, show_result
7 | import ipdb
8 |
9 | def roialign_forward(module,input,output):
10 | print('\n\ninput:')
11 | print(input[0].shape,'\n',input[1].shape)
12 | print('\n\noutput:')
13 | print(output.shape)
14 | # print(type(input))
15 |
16 |
17 | if __name__ == '__main__':
18 | params=[]
19 | def hook(module,input):
20 | # print('breakpoint')
21 | params.append(input)
22 | # print(input[0].shape)
23 | # data=input
24 | cfg = mmcv.Config.fromfile('configs/faster_rcnn_r50_fpn_1x.py')
25 | cfg.model.pretrained = None
26 |
27 | torch.cuda.empty_cache()
28 |
29 | # ipdb.set_trace()
30 |
31 | # construct the model and load checkpoint
32 | model = build_detector(cfg.model, test_cfg=cfg.test_cfg)
33 | print(model)
34 | handle=model.backbone.conv1.register_forward_pre_hook(hook)
35 | # model.bbox_roi_extractor.roi_layers[0].register_forward_hook(roialign_forward)
36 |
37 | _ = load_checkpoint(model, 'weights/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth')
38 |
39 | # test a single image
40 | img= mmcv.imread('/py/pic/2.jpg')
41 | result = inference_detector(model, img, cfg)
42 | # print(params)
43 |
44 |
45 | show_result(img, result)
46 | handle.remove()
47 | # # test a list of images
48 | # imgs = ['/py/pic/4.jpg', '/py/pic/5.jpg']
49 | # for i, result in enumerate(inference_detector(model, imgs, cfg, device='cuda:0')):
50 | # print(i, imgs[i])
51 | # show_result(imgs[i], result)
52 |
53 |
--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 |
3 | __all__ = ['__version__', 'short_version']
4 |
--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .env import init_dist, get_root_logger, set_random_seed
2 | from .train import train_detector
3 | from .inference import init_detector, inference_detector, show_result
4 |
5 | __all__ = [
6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
7 | 'init_detector', 'inference_detector', 'show_result'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import random
4 | import subprocess
5 |
6 | import numpy as np
7 | import torch
8 | import torch.distributed as dist
9 | import torch.multiprocessing as mp
10 | from mmcv.runner import get_dist_info
11 |
12 |
13 | def init_dist(launcher, backend='nccl', **kwargs):
14 | if mp.get_start_method(allow_none=True) is None:
15 | mp.set_start_method('spawn')
16 | if launcher == 'pytorch':
17 | _init_dist_pytorch(backend, **kwargs)
18 | elif launcher == 'mpi':
19 | _init_dist_mpi(backend, **kwargs)
20 | elif launcher == 'slurm':
21 | _init_dist_slurm(backend, **kwargs)
22 | else:
23 | raise ValueError('Invalid launcher type: {}'.format(launcher))
24 |
25 |
26 | def _init_dist_pytorch(backend, **kwargs):
27 | # TODO: use local_rank instead of rank % num_gpus
28 | rank = int(os.environ['RANK'])
29 | num_gpus = torch.cuda.device_count()
30 | torch.cuda.set_device(rank % num_gpus)
31 | dist.init_process_group(backend=backend, **kwargs)
32 |
33 |
34 | def _init_dist_mpi(backend, **kwargs):
35 | raise NotImplementedError
36 |
37 |
38 | def _init_dist_slurm(backend, port=29500, **kwargs):
39 | proc_id = int(os.environ['SLURM_PROCID'])
40 | ntasks = int(os.environ['SLURM_NTASKS'])
41 | node_list = os.environ['SLURM_NODELIST']
42 | num_gpus = torch.cuda.device_count()
43 | torch.cuda.set_device(proc_id % num_gpus)
44 | addr = subprocess.getoutput(
45 | 'scontrol show hostname {} | head -n1'.format(node_list))
46 | os.environ['MASTER_PORT'] = str(port)
47 | os.environ['MASTER_ADDR'] = addr
48 | os.environ['WORLD_SIZE'] = str(ntasks)
49 | os.environ['RANK'] = str(proc_id)
50 | dist.init_process_group(backend=backend)
51 |
52 |
53 | def set_random_seed(seed):
54 | random.seed(seed)
55 | np.random.seed(seed)
56 | torch.manual_seed(seed)
57 | torch.cuda.manual_seed_all(seed)
58 |
59 |
60 | def get_root_logger(log_level=logging.INFO):
61 | logger = logging.getLogger()
62 | if not logger.hasHandlers():
63 | logging.basicConfig(
64 | format='%(asctime)s - %(levelname)s - %(message)s',
65 | level=log_level)
66 | rank, _ = get_dist_info()
67 | if rank != 0:
68 | logger.setLevel('ERROR')
69 | return logger
70 |
--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import * # noqa: F401, F403
2 | from .bbox import * # noqa: F401, F403
3 | from .evaluation import * # noqa: F401, F403
4 | from .fp16 import * # noqa: F401, F403
5 | from .mask import * # noqa: F401, F403
6 | from .post_processing import * # noqa: F401, F403
7 | from .utils import * # noqa: F401, F403
8 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target, anchor_inside_flags
3 | from .guided_anchor_target import ga_loc_target, ga_shape_target
4 |
5 | __all__ = [
6 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
7 | 'ga_shape_target'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class AnchorGenerator(object):
5 |
6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
7 | self.base_size = base_size
8 | self.scales = torch.Tensor(scales)
9 | self.ratios = torch.Tensor(ratios)
10 | self.scale_major = scale_major
11 | self.ctr = ctr
12 | self.base_anchors = self.gen_base_anchors()
13 |
14 | @property
15 | def num_base_anchors(self):
16 | return self.base_anchors.size(0)
17 |
18 | def gen_base_anchors(self):
19 | w = self.base_size
20 | h = self.base_size
21 | if self.ctr is None:
22 | x_ctr = 0.5 * (w - 1)
23 | y_ctr = 0.5 * (h - 1)
24 | else:
25 | x_ctr, y_ctr = self.ctr
26 |
27 | h_ratios = torch.sqrt(self.ratios)
28 | w_ratios = 1 / h_ratios
29 | if self.scale_major:
30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
32 | else:
33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
35 |
36 | base_anchors = torch.stack(
37 | [
38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
40 | ],
41 | dim=-1).round()
42 |
43 | return base_anchors
44 |
45 | def _meshgrid(self, x, y, row_major=True):
46 | xx = x.repeat(len(y))
47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
48 | if row_major:
49 | return xx, yy
50 | else:
51 | return yy, xx
52 |
53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
54 | base_anchors = self.base_anchors.to(device)
55 |
56 | feat_h, feat_w = featmap_size
57 | shift_x = torch.arange(0, feat_w, device=device) * stride
58 | shift_y = torch.arange(0, feat_h, device=device) * stride
59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
61 | shifts = shifts.type_as(base_anchors)
62 | # first feat_w elements correspond to the first row of shifts
63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
64 | # shifted anchors (K, A, 4), reshape to (K*A, 4)
65 |
66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
67 | all_anchors = all_anchors.view(-1, 4)
68 | # first A rows correspond to A anchors of (0, 0) in feature map,
69 | # then (0, 1), (0, 2), ...
70 | return all_anchors
71 |
72 | def valid_flags(self, featmap_size, valid_size, device='cuda'):
73 | feat_h, feat_w = featmap_size
74 | valid_h, valid_w = valid_size
75 | assert valid_h <= feat_h and valid_w <= feat_w
76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
78 | valid_x[:valid_w] = 1
79 | valid_y[:valid_h] = 1
80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
81 | valid = valid_xx & valid_yy
82 | valid = valid[:, None].expand(
83 | valid.size(0), self.num_base_anchors).contiguous().view(-1)
84 | return valid
85 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .geometry import bbox_overlaps
2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
4 | InstanceBalancedPosSampler, IoUBalancedNegSampler,
5 | CombinedSampler, SamplingResult)
6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample
7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result,
9 | distance2bbox)
10 | from .bbox_target import bbox_target
11 |
12 | __all__ = [
13 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
14 | 'BaseSampler', 'PseudoSampler', 'RandomSampler',
15 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
16 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
17 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
18 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
19 | 'distance2bbox', 'bbox_target'
20 | ]
21 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 | from . import assigners, samplers
4 |
5 |
6 | def build_assigner(cfg, **kwargs):
7 | if isinstance(cfg, assigners.BaseAssigner):
8 | return cfg
9 | elif isinstance(cfg, dict):
10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
11 | else:
12 | raise TypeError('Invalid type {} for building a sampler'.format(
13 | type(cfg)))
14 |
15 |
16 | def build_sampler(cfg, **kwargs):
17 | if isinstance(cfg, samplers.BaseSampler):
18 | return cfg
19 | elif isinstance(cfg, dict):
20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
21 | else:
22 | raise TypeError('Invalid type {} for building a sampler'.format(
23 | type(cfg)))
24 |
25 |
26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
27 | bbox_assigner = build_assigner(cfg.assigner)
28 | bbox_sampler = build_sampler(cfg.sampler)
29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
30 | gt_labels)
31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
32 | gt_labels)
33 | return assign_result, sampling_result
34 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_assigner import BaseAssigner
2 | from .max_iou_assigner import MaxIoUAssigner
3 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
4 | from .assign_result import AssignResult
5 |
6 | __all__ = [
7 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__pycache__/approx_max_iou_assigner.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/assigners/__pycache__/approx_max_iou_assigner.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__pycache__/assign_result.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/assigners/__pycache__/assign_result.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__pycache__/base_assigner.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/assigners/__pycache__/base_assigner.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__pycache__/max_iou_assigner.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/assigners/__pycache__/max_iou_assigner.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class AssignResult(object):
5 |
6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
7 | self.num_gts = num_gts
8 | self.gt_inds = gt_inds
9 | self.max_overlaps = max_overlaps
10 | self.labels = labels
11 |
12 | def add_gt_(self, gt_labels):
13 | self_inds = torch.arange(
14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 | self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 | self.max_overlaps = torch.cat(
17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 | if self.labels is not None:
19 | self.labels = torch.cat([gt_labels, self.labels])
20 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 |
4 | class BaseAssigner(metaclass=ABCMeta):
5 |
6 | @abstractmethod
7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 | pass
9 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .transforms import bbox2delta
4 | from ..utils import multi_apply
5 |
6 |
7 | def bbox_target(pos_bboxes_list,
8 | neg_bboxes_list,
9 | pos_gt_bboxes_list,
10 | pos_gt_labels_list,
11 | cfg,
12 | reg_classes=1,
13 | target_means=[.0, .0, .0, .0],
14 | target_stds=[1.0, 1.0, 1.0, 1.0],
15 | concat=True):
16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 | bbox_target_single,
18 | pos_bboxes_list,
19 | neg_bboxes_list,
20 | pos_gt_bboxes_list,
21 | pos_gt_labels_list,
22 | cfg=cfg,
23 | reg_classes=reg_classes,
24 | target_means=target_means,
25 | target_stds=target_stds)
26 |
27 | if concat:
28 | labels = torch.cat(labels, 0)
29 | label_weights = torch.cat(label_weights, 0)
30 | bbox_targets = torch.cat(bbox_targets, 0)
31 | bbox_weights = torch.cat(bbox_weights, 0)
32 | return labels, label_weights, bbox_targets, bbox_weights
33 |
34 |
35 | def bbox_target_single(pos_bboxes,
36 | neg_bboxes,
37 | pos_gt_bboxes,
38 | pos_gt_labels,
39 | cfg,
40 | reg_classes=1,
41 | target_means=[.0, .0, .0, .0],
42 | target_stds=[1.0, 1.0, 1.0, 1.0]):
43 | num_pos = pos_bboxes.size(0)
44 | num_neg = neg_bboxes.size(0)
45 | num_samples = num_pos + num_neg
46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 | label_weights = pos_bboxes.new_zeros(num_samples)
48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 | if num_pos > 0:
51 | labels[:num_pos] = pos_gt_labels
52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 | label_weights[:num_pos] = pos_weight
54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 | target_stds)
56 | bbox_targets[:num_pos, :] = pos_bbox_targets
57 | bbox_weights[:num_pos, :] = 1
58 | if num_neg > 0:
59 | label_weights[-num_neg:] = 1.0
60 |
61 | return labels, label_weights, bbox_targets, bbox_weights
62 |
63 |
64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
65 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
66 | 4 * num_classes))
67 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
68 | 4 * num_classes))
69 | for i in torch.nonzero(labels > 0).squeeze(-1):
70 | start, end = labels[i] * 4, (labels[i] + 1) * 4
71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :]
72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :]
73 | return bbox_targets_expand, bbox_weights_expand
74 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
5 | """Calculate overlap between two set of bboxes.
6 |
7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox
8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
9 | bboxes1 and bboxes2.
10 |
11 | Args:
12 | bboxes1 (Tensor): shape (m, 4)
13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 | must be equal.
15 | mode (str): "iou" (intersection over union) or iof (intersection over
16 | foreground).
17 |
18 | Returns:
19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 | """
21 |
22 | assert mode in ['iou', 'iof']
23 |
24 | rows = bboxes1.size(0)
25 | cols = bboxes2.size(0)
26 | if is_aligned:
27 | assert rows == cols
28 |
29 | if rows * cols == 0:
30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 |
32 | if is_aligned:
33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
35 |
36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2]
37 | overlap = wh[:, 0] * wh[:, 1]
38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 |
41 | if mode == 'iou':
42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 | ious = overlap / (area1 + area2 - overlap)
45 | else:
46 | ious = overlap / area1
47 | else:
48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
50 |
51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2]
52 | overlap = wh[:, :, 0] * wh[:, :, 1]
53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 |
56 | if mode == 'iou':
57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 | ious = overlap / (area1[:, None] + area2 - overlap)
60 | else:
61 | ious = overlap / (area1[:, None])
62 |
63 | return ious
64 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_sampler import BaseSampler
2 | from .pseudo_sampler import PseudoSampler
3 | from .random_sampler import RandomSampler
4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
6 | from .combined_sampler import CombinedSampler
7 | from .ohem_sampler import OHEMSampler
8 | from .sampling_result import SamplingResult
9 |
10 | __all__ = [
11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 | 'OHEMSampler', 'SamplingResult'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/base_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/base_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/combined_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/combined_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/instance_balanced_pos_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/instance_balanced_pos_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/iou_balanced_neg_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/iou_balanced_neg_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/ohem_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/ohem_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/pseudo_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/pseudo_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/random_sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/random_sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__pycache__/sampling_result.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/core/bbox/samplers/__pycache__/sampling_result.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 | import torch
4 |
5 | from .sampling_result import SamplingResult
6 |
7 |
8 | class BaseSampler(metaclass=ABCMeta):
9 |
10 | def __init__(self,
11 | num,
12 | pos_fraction,
13 | neg_pos_ub=-1,
14 | add_gt_as_proposals=True,
15 | **kwargs):
16 | self.num = num
17 | self.pos_fraction = pos_fraction
18 | self.neg_pos_ub = neg_pos_ub
19 | self.add_gt_as_proposals = add_gt_as_proposals
20 | self.pos_sampler = self
21 | self.neg_sampler = self
22 |
23 | @abstractmethod
24 | def _sample_pos(self, assign_result, num_expected, **kwargs):
25 | pass
26 |
27 | @abstractmethod
28 | def _sample_neg(self, assign_result, num_expected, **kwargs):
29 | pass
30 |
31 | def sample(self,
32 | assign_result,
33 | bboxes,
34 | gt_bboxes,
35 | gt_labels=None,
36 | **kwargs):
37 | """Sample positive and negative bboxes.
38 |
39 | This is a simple implementation of bbox sampling given candidates,
40 | assigning results and ground truth bboxes.
41 |
42 | Args:
43 | assign_result (:obj:`AssignResult`): Bbox assigning results.
44 | bboxes (Tensor): Boxes to be sampled from.
45 | gt_bboxes (Tensor): Ground truth bboxes.
46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 |
48 | Returns:
49 | :obj:`SamplingResult`: Sampling result.
50 | """
51 | bboxes = bboxes[:, :4]
52 |
53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
54 | if self.add_gt_as_proposals:
55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
56 | assign_result.add_gt_(gt_labels)
57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
58 | gt_flags = torch.cat([gt_ones, gt_flags])
59 |
60 | num_expected_pos = int(self.num * self.pos_fraction)
61 | pos_inds = self.pos_sampler._sample_pos(
62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
63 | # We found that sampled indices have duplicated items occasionally.
64 | # (may be a bug of PyTorch)
65 | pos_inds = pos_inds.unique()
66 | num_sampled_pos = pos_inds.numel()
67 | num_expected_neg = self.num - num_sampled_pos
68 | if self.neg_pos_ub >= 0:
69 | _pos = max(1, num_sampled_pos)
70 | neg_upper_bound = int(self.neg_pos_ub * _pos)
71 | if num_expected_neg > neg_upper_bound:
72 | num_expected_neg = neg_upper_bound
73 | neg_inds = self.neg_sampler._sample_neg(
74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
75 | neg_inds = neg_inds.unique()
76 |
77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
78 | assign_result, gt_flags)
79 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
1 | from .base_sampler import BaseSampler
2 | from ..assign_sampling import build_sampler
3 |
4 |
5 | class CombinedSampler(BaseSampler):
6 |
7 | def __init__(self, pos_sampler, neg_sampler, **kwargs):
8 | super(CombinedSampler, self).__init__(**kwargs)
9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 |
12 | def _sample_pos(self, **kwargs):
13 | raise NotImplementedError
14 |
15 | def _sample_neg(self, **kwargs):
16 | raise NotImplementedError
17 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from .random_sampler import RandomSampler
5 |
6 |
7 | class InstanceBalancedPosSampler(RandomSampler):
8 |
9 | def _sample_pos(self, assign_result, num_expected, **kwargs):
10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 | if pos_inds.numel() != 0:
12 | pos_inds = pos_inds.squeeze(1)
13 | if pos_inds.numel() <= num_expected:
14 | return pos_inds
15 | else:
16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 | num_gts = len(unique_gt_inds)
18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 | sampled_inds = []
20 | for i in unique_gt_inds:
21 | inds = torch.nonzero(assign_result.gt_inds == i.item())
22 | if inds.numel() != 0:
23 | inds = inds.squeeze(1)
24 | else:
25 | continue
26 | if len(inds) > num_per_gt:
27 | inds = self.random_choice(inds, num_per_gt)
28 | sampled_inds.append(inds)
29 | sampled_inds = torch.cat(sampled_inds)
30 | if len(sampled_inds) < num_expected:
31 | num_extra = num_expected - len(sampled_inds)
32 | extra_inds = np.array(
33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 | if len(extra_inds) > num_extra:
35 | extra_inds = self.random_choice(extra_inds, num_extra)
36 | extra_inds = torch.from_numpy(extra_inds).to(
37 | assign_result.gt_inds.device).long()
38 | sampled_inds = torch.cat([sampled_inds, extra_inds])
39 | elif len(sampled_inds) > num_expected:
40 | sampled_inds = self.random_choice(sampled_inds, num_expected)
41 | return sampled_inds
42 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .base_sampler import BaseSampler
4 | from ..transforms import bbox2roi
5 |
6 |
7 | class OHEMSampler(BaseSampler):
8 |
9 | def __init__(self,
10 | num,
11 | pos_fraction,
12 | context,
13 | neg_pos_ub=-1,
14 | add_gt_as_proposals=True,
15 | **kwargs):
16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 | add_gt_as_proposals)
18 | if not hasattr(context, 'num_stages'):
19 | self.bbox_roi_extractor = context.bbox_roi_extractor
20 | self.bbox_head = context.bbox_head
21 | else:
22 | self.bbox_roi_extractor = context.bbox_roi_extractor[
23 | context.current_stage]
24 | self.bbox_head = context.bbox_head[context.current_stage]
25 |
26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats):
27 | with torch.no_grad():
28 | rois = bbox2roi([bboxes])
29 | bbox_feats = self.bbox_roi_extractor(
30 | feats[:self.bbox_roi_extractor.num_inputs], rois)
31 | cls_score, _ = self.bbox_head(bbox_feats)
32 | loss = self.bbox_head.loss(
33 | cls_score=cls_score,
34 | bbox_pred=None,
35 | labels=labels,
36 | label_weights=cls_score.new_ones(cls_score.size(0)),
37 | bbox_targets=None,
38 | bbox_weights=None,
39 | reduction_override='none')['loss_cls']
40 | _, topk_loss_inds = loss.topk(num_expected)
41 | return inds[topk_loss_inds]
42 |
43 | def _sample_pos(self,
44 | assign_result,
45 | num_expected,
46 | bboxes=None,
47 | feats=None,
48 | **kwargs):
49 | # Sample some hard positive samples
50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0)
51 | if pos_inds.numel() != 0:
52 | pos_inds = pos_inds.squeeze(1)
53 | if pos_inds.numel() <= num_expected:
54 | return pos_inds
55 | else:
56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
57 | assign_result.labels[pos_inds], feats)
58 |
59 | def _sample_neg(self,
60 | assign_result,
61 | num_expected,
62 | bboxes=None,
63 | feats=None,
64 | **kwargs):
65 | # Sample some hard negative samples
66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0)
67 | if neg_inds.numel() != 0:
68 | neg_inds = neg_inds.squeeze(1)
69 | if len(neg_inds) <= num_expected:
70 | return neg_inds
71 | else:
72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
73 | assign_result.labels[neg_inds], feats)
74 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .base_sampler import BaseSampler
4 | from .sampling_result import SamplingResult
5 |
6 |
7 | class PseudoSampler(BaseSampler):
8 |
9 | def __init__(self, **kwargs):
10 | pass
11 |
12 | def _sample_pos(self, **kwargs):
13 | raise NotImplementedError
14 |
15 | def _sample_neg(self, **kwargs):
16 | raise NotImplementedError
17 |
18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 | pos_inds = torch.nonzero(
20 | assign_result.gt_inds > 0).squeeze(-1).unique()
21 | neg_inds = torch.nonzero(
22 | assign_result.gt_inds == 0).squeeze(-1).unique()
23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 | assign_result, gt_flags)
26 | return sampling_result
27 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from .base_sampler import BaseSampler
5 |
6 |
7 | class RandomSampler(BaseSampler):
8 |
9 | def __init__(self,
10 | num,
11 | pos_fraction,
12 | neg_pos_ub=-1,
13 | add_gt_as_proposals=True,
14 | **kwargs):
15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 | add_gt_as_proposals)
17 |
18 | @staticmethod
19 | def random_choice(gallery, num):
20 | """Random select some elements from the gallery.
21 |
22 | It seems that Pytorch's implementation is slower than numpy so we use
23 | numpy to randperm the indices.
24 | """
25 | assert len(gallery) >= num
26 | if isinstance(gallery, list):
27 | gallery = np.array(gallery)
28 | cands = np.arange(len(gallery))
29 | np.random.shuffle(cands)
30 | rand_inds = cands[:num]
31 | if not isinstance(gallery, np.ndarray):
32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 | return gallery[rand_inds]
34 |
35 | def _sample_pos(self, assign_result, num_expected, **kwargs):
36 | """Randomly sample some positive samples."""
37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 | if pos_inds.numel() != 0:
39 | pos_inds = pos_inds.squeeze(1)
40 | if pos_inds.numel() <= num_expected:
41 | return pos_inds
42 | else:
43 | return self.random_choice(pos_inds, num_expected)
44 |
45 | def _sample_neg(self, assign_result, num_expected, **kwargs):
46 | """Randomly sample some negative samples."""
47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 | if neg_inds.numel() != 0:
49 | neg_inds = neg_inds.squeeze(1)
50 | if len(neg_inds) <= num_expected:
51 | return neg_inds
52 | else:
53 | return self.random_choice(neg_inds, num_expected)
54 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class SamplingResult(object):
5 |
6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
7 | gt_flags):
8 | self.pos_inds = pos_inds
9 | self.neg_inds = neg_inds
10 | self.pos_bboxes = bboxes[pos_inds]
11 | self.neg_bboxes = bboxes[neg_inds]
12 | self.pos_is_gt = gt_flags[pos_inds]
13 |
14 | self.num_gts = gt_bboxes.shape[0]
15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 | if assign_result.labels is not None:
18 | self.pos_gt_labels = assign_result.labels[pos_inds]
19 | else:
20 | self.pos_gt_labels = None
21 |
22 | @property
23 | def bboxes(self):
24 | return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .class_names import (voc_classes, imagenet_det_classes,
2 | imagenet_vid_classes, coco_classes, dataset_aliases,
3 | get_classes)
4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,
6 | CocoDistEvalmAPHook)
7 | from .mean_ap import average_precision, eval_map, print_map_summary
8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
9 | plot_iou_recall)
10 |
11 | __all__ = [
12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 | 'plot_num_recall', 'plot_iou_recall'
18 | ]
19 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
5 | """Calculate the ious between each bbox of bboxes1 and bboxes2.
6 |
7 | Args:
8 | bboxes1(ndarray): shape (n, 4)
9 | bboxes2(ndarray): shape (k, 4)
10 | mode(str): iou (intersection over union) or iof (intersection
11 | over foreground)
12 |
13 | Returns:
14 | ious(ndarray): shape (n, k)
15 | """
16 |
17 | assert mode in ['iou', 'iof']
18 |
19 | bboxes1 = bboxes1.astype(np.float32)
20 | bboxes2 = bboxes2.astype(np.float32)
21 | rows = bboxes1.shape[0]
22 | cols = bboxes2.shape[0]
23 | ious = np.zeros((rows, cols), dtype=np.float32)
24 | if rows * cols == 0:
25 | return ious
26 | exchange = False
27 | if bboxes1.shape[0] > bboxes2.shape[0]:
28 | bboxes1, bboxes2 = bboxes2, bboxes1
29 | ious = np.zeros((cols, rows), dtype=np.float32)
30 | exchange = True
31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 | for i in range(bboxes1.shape[0]):
36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 | y_end - y_start + 1, 0)
42 | if mode == 'iou':
43 | union = area1[i] + area2 - overlap
44 | else:
45 | union = area1[i] if not exchange else area2
46 | ious[i, :] = overlap / union
47 | if exchange:
48 | ious = ious.T
49 | return ious
50 |
--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 |
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 |
--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
1 | from collections import abc
2 |
3 | import numpy as np
4 | import torch
5 |
6 |
7 | def cast_tensor_type(inputs, src_type, dst_type):
8 | if isinstance(inputs, torch.Tensor):
9 | return inputs.to(dst_type)
10 | elif isinstance(inputs, str):
11 | return inputs
12 | elif isinstance(inputs, np.ndarray):
13 | return inputs
14 | elif isinstance(inputs, abc.Mapping):
15 | return type(inputs)({
16 | k: cast_tensor_type(v, src_type, dst_type)
17 | for k, v in inputs.items()
18 | })
19 | elif isinstance(inputs, abc.Iterable):
20 | return type(inputs)(
21 | cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 | else:
23 | return inputs
24 |
--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import split_combined_polys
2 | from .mask_target import mask_target
3 |
4 | __all__ = ['split_combined_polys', 'mask_target']
5 |
--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import mmcv
4 |
5 |
6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
7 | cfg):
8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))]
9 | mask_targets = map(mask_target_single, pos_proposals_list,
10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
11 | mask_targets = torch.cat(list(mask_targets))
12 | return mask_targets
13 |
14 |
15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
16 | mask_size = cfg.mask_size
17 | num_pos = pos_proposals.size(0)
18 | mask_targets = []
19 | if num_pos > 0:
20 | proposals_np = pos_proposals.cpu().numpy()
21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
22 | for i in range(num_pos):
23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]]
24 | bbox = proposals_np[i, :].astype(np.int32)
25 | x1, y1, x2, y2 = bbox
26 | w = np.maximum(x2 - x1 + 1, 1)
27 | h = np.maximum(y2 - y1 + 1, 1)
28 | # mask is uint8 both before and after resizing
29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
30 | (mask_size, mask_size))
31 | mask_targets.append(target)
32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
33 | pos_proposals.device)
34 | else:
35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
36 | return mask_targets
37 |
--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 |
4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
5 | """Split the combined 1-D polys into masks.
6 |
7 | A mask is represented as a list of polys, and a poly is represented as
8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D
9 | tensor. Here we need to split the tensor into original representations.
10 |
11 | Args:
12 | polys (list): a list (length = image num) of 1-D tensors
13 | poly_lens (list): a list (length = image num) of poly length
14 | polys_per_mask (list): a list (length = image num) of poly number
15 | of each mask
16 |
17 | Returns:
18 | list: a list (length = image num) of list (length = mask num) of
19 | list (length = poly num) of numpy array
20 | """
21 | mask_polys_list = []
22 | for img_id in range(len(polys)):
23 | polys_single = polys[img_id]
24 | polys_lens_single = poly_lens[img_id].tolist()
25 | polys_per_mask_single = polys_per_mask[img_id].tolist()
26 |
27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 | mask_polys_list.append(mask_polys)
30 | return mask_polys_list
31 |
--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
3 | merge_aug_scores, merge_aug_masks)
4 |
5 | __all__ = [
6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 | 'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from mmdet.ops.nms import nms_wrapper
4 |
5 |
6 | def multiclass_nms(multi_bboxes,
7 | multi_scores,
8 | score_thr,
9 | nms_cfg,
10 | max_num=-1,
11 | score_factors=None):
12 | """NMS for multi-class bboxes.
13 |
14 | Args:
15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 | multi_scores (Tensor): shape (n, #class)
17 | score_thr (float): bbox threshold, bboxes with scores lower than it
18 | will not be considered.
19 | nms_thr (float): NMS IoU threshold
20 | max_num (int): if there are more than max_num bboxes after NMS,
21 | only top max_num will be kept.
22 | score_factors (Tensor): The factors multiplied to scores before
23 | applying NMS
24 |
25 | Returns:
26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
27 | are 0-based.
28 | """
29 | num_classes = multi_scores.shape[1]
30 | bboxes, labels = [], []
31 | nms_cfg_ = nms_cfg.copy()
32 | nms_type = nms_cfg_.pop('type', 'nms')
33 | nms_op = getattr(nms_wrapper, nms_type)
34 | for i in range(1, num_classes):
35 | cls_inds = multi_scores[:, i] > score_thr
36 | if not cls_inds.any():
37 | continue
38 | # get bboxes and scores of this class
39 | if multi_bboxes.shape[1] == 4:
40 | _bboxes = multi_bboxes[cls_inds, :]
41 | else:
42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
43 | _scores = multi_scores[cls_inds, i]
44 | if score_factors is not None:
45 | _scores *= score_factors[cls_inds]
46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
48 | cls_labels = multi_bboxes.new_full(
49 | (cls_dets.shape[0], ), i - 1, dtype=torch.long)
50 | bboxes.append(cls_dets)
51 | labels.append(cls_labels)
52 | if bboxes:
53 | bboxes = torch.cat(bboxes)
54 | labels = torch.cat(labels)
55 | if bboxes.shape[0] > max_num:
56 | _, inds = bboxes[:, -1].sort(descending=True)
57 | inds = inds[:max_num]
58 | bboxes = bboxes[inds]
59 | labels = labels[inds]
60 | else:
61 | bboxes = multi_bboxes.new_zeros((0, 5))
62 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
63 |
64 | return bboxes, labels
65 |
--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import numpy as np
4 |
5 | from mmdet.ops import nms
6 | from ..bbox import bbox_mapping_back
7 |
8 |
9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
10 | """Merge augmented proposals (multiscale, flip, etc.)
11 |
12 | Args:
13 | aug_proposals (list[Tensor]): proposals from different testing
14 | schemes, shape (n, 5). Note that they are not rescaled to the
15 | original image size.
16 | img_metas (list[dict]): image info including "shape_scale" and "flip".
17 | rpn_test_cfg (dict): rpn test config.
18 |
19 | Returns:
20 | Tensor: shape (n, 4), proposals corresponding to original image scale.
21 | """
22 | recovered_proposals = []
23 | for proposals, img_info in zip(aug_proposals, img_metas):
24 | img_shape = img_info['img_shape']
25 | scale_factor = img_info['scale_factor']
26 | flip = img_info['flip']
27 | _proposals = proposals.clone()
28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
29 | scale_factor, flip)
30 | recovered_proposals.append(_proposals)
31 | aug_proposals = torch.cat(recovered_proposals, dim=0)
32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
33 | scores = merged_proposals[:, 4]
34 | _, order = scores.sort(0, descending=True)
35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
36 | order = order[:num]
37 | merged_proposals = merged_proposals[order, :]
38 | return merged_proposals
39 |
40 |
41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
42 | """Merge augmented detection bboxes and scores.
43 |
44 | Args:
45 | aug_bboxes (list[Tensor]): shape (n, 4*#class)
46 | aug_scores (list[Tensor] or None): shape (n, #class)
47 | img_shapes (list[Tensor]): shape (3, ).
48 | rcnn_test_cfg (dict): rcnn test config.
49 |
50 | Returns:
51 | tuple: (bboxes, scores)
52 | """
53 | recovered_bboxes = []
54 | for bboxes, img_info in zip(aug_bboxes, img_metas):
55 | img_shape = img_info[0]['img_shape']
56 | scale_factor = img_info[0]['scale_factor']
57 | flip = img_info[0]['flip']
58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
59 | recovered_bboxes.append(bboxes)
60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0)
61 | if aug_scores is None:
62 | return bboxes
63 | else:
64 | scores = torch.stack(aug_scores).mean(dim=0)
65 | return bboxes, scores
66 |
67 |
68 | def merge_aug_scores(aug_scores):
69 | """Merge augmented bbox scores."""
70 | if isinstance(aug_scores[0], torch.Tensor):
71 | return torch.mean(torch.stack(aug_scores), dim=0)
72 | else:
73 | return np.mean(aug_scores, axis=0)
74 |
75 |
76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
77 | """Merge augmented mask prediction.
78 |
79 | Args:
80 | aug_masks (list[ndarray]): shape (n, #class, h, w)
81 | img_shapes (list[ndarray]): shape (3, ).
82 | rcnn_test_cfg (dict): rcnn test config.
83 |
84 | Returns:
85 | tuple: (bboxes, scores)
86 | """
87 | recovered_masks = [
88 | mask if not img_info[0]['flip'] else mask[..., ::-1]
89 | for mask, img_info in zip(aug_masks, img_metas)
90 | ]
91 | if weights is None:
92 | merged_masks = np.mean(recovered_masks, axis=0)
93 | else:
94 | merged_masks = np.average(
95 | np.array(recovered_masks), axis=0, weights=np.array(weights))
96 | return merged_masks
97 |
--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 | from .misc import tensor2imgs, unmap, multi_apply
3 |
4 | __all__ = [
5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 | 'multi_apply'
7 | ]
8 |
--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch.distributed as dist
4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
5 | _take_tensors)
6 | from mmcv.runner import OptimizerHook
7 |
8 |
9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 | if bucket_size_mb > 0:
11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 | buckets = _take_tensors(tensors, bucket_size_bytes)
13 | else:
14 | buckets = OrderedDict()
15 | for tensor in tensors:
16 | tp = tensor.type()
17 | if tp not in buckets:
18 | buckets[tp] = []
19 | buckets[tp].append(tensor)
20 | buckets = buckets.values()
21 |
22 | for bucket in buckets:
23 | flat_tensors = _flatten_dense_tensors(bucket)
24 | dist.all_reduce(flat_tensors)
25 | flat_tensors.div_(world_size)
26 | for tensor, synced in zip(
27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 | tensor.copy_(synced)
29 |
30 |
31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
32 | grads = [
33 | param.grad.data for param in params
34 | if param.requires_grad and param.grad is not None
35 | ]
36 | world_size = dist.get_world_size()
37 | if coalesce:
38 | _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 | else:
40 | for tensor in grads:
41 | dist.all_reduce(tensor.div_(world_size))
42 |
43 |
44 | class DistOptimizerHook(OptimizerHook):
45 |
46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 | self.grad_clip = grad_clip
48 | self.coalesce = coalesce
49 | self.bucket_size_mb = bucket_size_mb
50 |
51 | def after_train_iter(self, runner):
52 | runner.optimizer.zero_grad()
53 | runner.outputs['loss'].backward()
54 | allreduce_grads(runner.model.parameters(), self.coalesce,
55 | self.bucket_size_mb)
56 | if self.grad_clip is not None:
57 | self.clip_grads(runner.model.parameters())
58 | runner.optimizer.step()
59 |
--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 |
3 | import mmcv
4 | import numpy as np
5 | from six.moves import map, zip
6 |
7 | import ipdb
8 |
9 |
10 |
11 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
12 | num_imgs = tensor.size(0)
13 | mean = np.array(mean, dtype=np.float32)
14 | std = np.array(std, dtype=np.float32)
15 | imgs = []
16 | for img_id in range(num_imgs):
17 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
18 | img = mmcv.imdenormalize(
19 | img, mean, std, to_bgr=to_rgb).astype(np.uint8)
20 | imgs.append(np.ascontiguousarray(img))
21 | return imgs
22 |
23 |
24 | def multi_apply(func, *args, **kwargs):
25 | pfunc = partial(func, **kwargs) if kwargs else func
26 | map_results = map(pfunc, *args)
27 | return tuple(map(list, zip(*map_results)))
28 |
29 |
30 | def unmap(data, count, inds, fill=0):
31 | """ Unmap a subset of item (data) back to the original set of items (of
32 | size count) """
33 | if data.dim() == 1:
34 | ret = data.new_full((count, ), fill)
35 | ret[inds] = data
36 | else:
37 | new_size = (count, ) + data.size()[1:]
38 | ret = data.new_full(new_size, fill)
39 | ret[inds, :] = data
40 | return ret
41 |
--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom import CustomDataset
2 | from .xml_style import XMLDataset
3 | from .coco import CocoDataset
4 | from .voc import VOCDataset
5 | from .wider_face import WIDERFaceDataset
6 | from .my_dataset import MyDataset
7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
8 | from .utils import to_tensor, random_scale, show_ann
9 | from .dataset_wrappers import ConcatDataset, RepeatDataset
10 | from .extra_aug import ExtraAugmentation
11 | from .registry import DATASETS
12 | from .builder import build_dataset
13 |
14 | __all__ = [
15 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
16 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
17 | 'show_ann', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation',
18 | 'WIDERFaceDataset', 'DATASETS', 'build_dataset','MyDataset'
19 | ]
20 |
--------------------------------------------------------------------------------
/mmdet/datasets/builder.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | from mmdet.utils import build_from_cfg
4 | from .dataset_wrappers import ConcatDataset, RepeatDataset
5 | from .registry import DATASETS
6 |
7 | import ipdb
8 |
9 |
10 |
11 | def _concat_dataset(cfg):
12 | ann_files = cfg['ann_file']
13 | img_prefixes = cfg.get('img_prefix', None)
14 | seg_prefixes = cfg.get('seg_prefixes', None)
15 | proposal_files = cfg.get('proposal_file', None)
16 |
17 | datasets = []
18 | num_dset = len(ann_files)
19 | for i in range(num_dset):
20 | data_cfg = copy.deepcopy(cfg)
21 | data_cfg['ann_file'] = ann_files[i]
22 | if isinstance(img_prefixes, (list, tuple)):
23 | data_cfg['img_prefix'] = img_prefixes[i]
24 | if isinstance(seg_prefixes, (list, tuple)):
25 | data_cfg['seg_prefix'] = seg_prefixes[i]
26 | if isinstance(proposal_files, (list, tuple)):
27 | data_cfg['proposal_file'] = proposal_files[i]
28 | datasets.append(build_dataset(data_cfg))
29 |
30 | return ConcatDataset(datasets)
31 |
32 |
33 | def build_dataset(cfg):
34 | if cfg['type'] == 'RepeatDataset':
35 | dataset = RepeatDataset(build_dataset(cfg['dataset']), cfg['times'])
36 | elif isinstance(cfg['ann_file'], (list, tuple)):
37 | dataset = _concat_dataset(cfg)
38 | else:
39 | dataset = build_from_cfg(cfg, DATASETS)
40 | # ipdb.set_trace(context=35)
41 |
42 | return dataset
43 |
--------------------------------------------------------------------------------
/mmdet/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
3 |
4 | from .registry import DATASETS
5 |
6 |
7 | @DATASETS.register_module
8 | class ConcatDataset(_ConcatDataset):
9 | """A wrapper of concatenated dataset.
10 |
11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
12 | concat the group flag for image aspect ratio.
13 |
14 | Args:
15 | datasets (list[:obj:`Dataset`]): A list of datasets.
16 | """
17 |
18 | def __init__(self, datasets):
19 | super(ConcatDataset, self).__init__(datasets)
20 | self.CLASSES = datasets[0].CLASSES
21 | if hasattr(datasets[0], 'flag'):
22 | flags = []
23 | for i in range(0, len(datasets)):
24 | flags.append(datasets[i].flag)
25 | self.flag = np.concatenate(flags)
26 |
27 |
28 | @DATASETS.register_module
29 | class RepeatDataset(object):
30 | """A wrapper of repeated dataset.
31 |
32 | The length of repeated dataset will be `times` larger than the original
33 | dataset. This is useful when the data loading time is long but the dataset
34 | is small. Using RepeatDataset can reduce the data loading time between
35 | epochs.
36 |
37 | Args:
38 | dataset (:obj:`Dataset`): The dataset to be repeated.
39 | times (int): Repeat times.
40 | """
41 |
42 | def __init__(self, dataset, times):
43 | self.dataset = dataset
44 | self.times = times
45 | self.CLASSES = dataset.CLASSES
46 | if hasattr(self.dataset, 'flag'):
47 | self.flag = np.tile(self.dataset.flag, times)
48 |
49 | self._ori_len = len(self.dataset)
50 |
51 | def __getitem__(self, idx):
52 | return self.dataset[idx % self._ori_len]
53 |
54 | def __len__(self):
55 | return self.times * self._ori_len
56 |
--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 |
4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
5 |
--------------------------------------------------------------------------------
/mmdet/datasets/loader/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/datasets/loader/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/datasets/loader/__pycache__/build_loader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/datasets/loader/__pycache__/build_loader.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/datasets/loader/__pycache__/sampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/mmdet/datasets/loader/__pycache__/sampler.cpython-37.pyc
--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
1 | import platform
2 | from functools import partial
3 |
4 | from mmcv.runner import get_dist_info
5 | from mmcv.parallel import collate
6 | from torch.utils.data import DataLoader
7 |
8 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler
9 |
10 | import ipdb
11 |
12 | if platform.system() != 'Windows':
13 | # https://github.com/pytorch/pytorch/issues/973
14 | import resource
15 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
16 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
17 |
18 |
19 | def build_dataloader(dataset,
20 | imgs_per_gpu,
21 | workers_per_gpu,
22 | num_gpus=1,
23 | dist=True,
24 | **kwargs):
25 | shuffle = kwargs.get('shuffle', True)
26 | if dist:
27 | rank, world_size = get_dist_info()
28 | if shuffle:
29 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
30 | world_size, rank)
31 | else:
32 | sampler = DistributedSampler(
33 | dataset, world_size, rank, shuffle=False)
34 | batch_size = imgs_per_gpu
35 | num_workers = workers_per_gpu
36 | else:
37 | # 非分布式训练
38 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None # batch中样本的采样方式
39 | batch_size = num_gpus * imgs_per_gpu # 在这里定义batch size
40 | num_workers = num_gpus * workers_per_gpu # 多线程读取可以加快数据的读取速度
41 |
42 | # 采用pytorch内置的DataLoader方法
43 | # DataLoader是一个 迭代器
44 | # collate_fn:在数据处理中,有时会出现某个样本无法读取等问题,比如某张图片损坏。
45 | # 这时在_ getitem _函数中将出现异常,此时最好的解决方案即是将出错的样本剔除。
46 | # 如果实在是遇到这种情况无法处理,则可以返回None对象,然后在Dataloader中实现自定义的collate_fn,将空对象过滤掉。
47 | # 但要注意,在这种情况下dataloader返回的batch数目会少于batch_size。
48 |
49 | # sampler:自定义从数据集中取样本的策略,如果指定这个参数,那么shuffle必须为False
50 | data_loader = DataLoader(
51 | dataset,
52 | batch_size=batch_size,
53 | sampler=sampler,
54 | num_workers=num_workers,
55 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
56 | pin_memory=False,
57 | **kwargs)
58 |
59 | return data_loader
60 |
--------------------------------------------------------------------------------
/mmdet/datasets/my_dataset.py:
--------------------------------------------------------------------------------
1 | from .voc import VOCDataset
2 | from .registry import DATASETS
3 |
4 |
5 | @DATASETS.register_module
6 | class MyDataset(VOCDataset):
7 |
8 | CLASSES = ('large-vehicle', 'swimming-pool', 'helicopter', 'bridge', 'plane','ship',
9 | 'soccer-ball-field','basketball-court','airport','container-crane',
10 | 'ground-track-field','small-vehicle','harbor','baseball-diamond','tennis-court',
11 | 'roundabout','storage-tank','helipad')
12 |
13 | # CLASSES = ('ship','cruiser','carrier')
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/mmdet/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 |
3 | DATASETS = Registry('dataset')
4 |
--------------------------------------------------------------------------------
/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
1 | from collections import Sequence
2 |
3 | import matplotlib.pyplot as plt
4 | import mmcv
5 | import numpy as np
6 | import torch
7 |
8 |
9 | def to_tensor(data):
10 | """Convert objects of various python types to :obj:`torch.Tensor`.
11 |
12 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
13 | :class:`Sequence`, :class:`int` and :class:`float`.
14 | """
15 | if isinstance(data, torch.Tensor):
16 | return data
17 | elif isinstance(data, np.ndarray):
18 | return torch.from_numpy(data)
19 | elif isinstance(data, Sequence) and not mmcv.is_str(data):
20 | return torch.tensor(data)
21 | elif isinstance(data, int):
22 | return torch.LongTensor([data])
23 | elif isinstance(data, float):
24 | return torch.FloatTensor([data])
25 | else:
26 | raise TypeError('type {} cannot be converted to tensor.'.format(
27 | type(data)))
28 |
29 |
30 | def random_scale(img_scales, mode='range'):
31 | """Randomly select a scale from a list of scales or scale ranges.
32 |
33 | Args:
34 | img_scales (list[tuple]): Image scale or scale range.
35 | mode (str): "range" or "value".
36 |
37 | Returns:
38 | tuple: Sampled image scale.
39 | """
40 | num_scales = len(img_scales)
41 | if num_scales == 1: # fixed scale is specified
42 | img_scale = img_scales[0]
43 | elif num_scales == 2: # randomly sample a scale
44 | if mode == 'range':
45 | img_scale_long = [max(s) for s in img_scales]
46 | img_scale_short = [min(s) for s in img_scales]
47 | long_edge = np.random.randint(
48 | min(img_scale_long),
49 | max(img_scale_long) + 1)
50 | short_edge = np.random.randint(
51 | min(img_scale_short),
52 | max(img_scale_short) + 1)
53 | img_scale = (long_edge, short_edge)
54 | elif mode == 'value':
55 | img_scale = img_scales[np.random.randint(num_scales)]
56 | else:
57 | if mode != 'value':
58 | raise ValueError(
59 | 'Only "value" mode supports more than 2 image scales')
60 | img_scale = img_scales[np.random.randint(num_scales)]
61 | return img_scale
62 |
63 |
64 | def show_ann(coco, img, ann_info):
65 | plt.imshow(mmcv.bgr2rgb(img))
66 | plt.axis('off')
67 | coco.showAnns(ann_info)
68 | plt.show()
69 |
--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
1 | from .registry import DATASETS
2 | from .xml_style import XMLDataset
3 |
4 |
5 | @DATASETS.register_module
6 | class VOCDataset(XMLDataset):
7 |
8 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
9 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
10 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
11 | 'tvmonitor')
12 |
13 | def __init__(self, **kwargs):
14 | super(VOCDataset, self).__init__(**kwargs)
15 | if 'VOC2007' in self.img_prefix:
16 | self.year = 2007
17 | elif 'VOC2012' in self.img_prefix:
18 | self.year = 2012
19 | else:
20 | raise ValueError('Cannot infer dataset year from img_prefix')
21 |
--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import xml.etree.ElementTree as ET
3 |
4 | import mmcv
5 |
6 | from .registry import DATASETS
7 | from .xml_style import XMLDataset
8 |
9 |
10 | @DATASETS.register_module
11 | class WIDERFaceDataset(XMLDataset):
12 | """
13 | Reader for the WIDER Face dataset in PASCAL VOC format.
14 | Conversion scripts can be found in
15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 | """
17 | CLASSES = ('face', )
18 |
19 | def __init__(self, **kwargs):
20 | super(WIDERFaceDataset, self).__init__(**kwargs)
21 |
22 | def load_annotations(self, ann_file):
23 | img_infos = []
24 | img_ids = mmcv.list_from_file(ann_file)
25 | for img_id in img_ids:
26 | filename = '{}.jpg'.format(img_id)
27 | xml_path = osp.join(self.img_prefix, 'Annotations',
28 | '{}.xml'.format(img_id))
29 | tree = ET.parse(xml_path)
30 | root = tree.getroot()
31 | size = root.find('size')
32 | width = int(size.find('width').text)
33 | height = int(size.find('height').text)
34 | folder = root.find('folder').text
35 | img_infos.append(
36 | dict(
37 | id=img_id,
38 | filename=osp.join(folder, filename),
39 | width=width,
40 | height=height))
41 |
42 | return img_infos
43 |
--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import xml.etree.ElementTree as ET
3 |
4 | import mmcv
5 | import numpy as np
6 |
7 | from .custom import CustomDataset
8 | from .registry import DATASETS
9 |
10 |
11 | @DATASETS.register_module
12 | class XMLDataset(CustomDataset):
13 |
14 | def __init__(self, min_size=None, **kwargs):
15 | super(XMLDataset, self).__init__(**kwargs)
16 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
17 | self.min_size = min_size
18 |
19 | def load_annotations(self, ann_file):
20 | img_infos = []
21 | img_ids = mmcv.list_from_file(ann_file)
22 | for img_id in img_ids:
23 | filename = 'JPEGImages/{}.jpg'.format(img_id)
24 | xml_path = osp.join(self.img_prefix, 'Annotations',
25 | '{}.xml'.format(img_id))
26 | tree = ET.parse(xml_path)
27 | root = tree.getroot()
28 | size = root.find('size')
29 | width = int(size.find('width').text)
30 | height = int(size.find('height').text)
31 | img_infos.append(
32 | dict(id=img_id, filename=filename, width=width, height=height))
33 | return img_infos
34 |
35 | def get_ann_info(self, idx):
36 | img_id = self.img_infos[idx]['id']
37 | xml_path = osp.join(self.img_prefix, 'Annotations',
38 | '{}.xml'.format(img_id))
39 | tree = ET.parse(xml_path)
40 | root = tree.getroot()
41 | bboxes = []
42 | labels = []
43 | bboxes_ignore = []
44 | labels_ignore = []
45 | for obj in root.findall('object'):
46 | name = obj.find('name').text
47 | label = self.cat2label[name]
48 | difficult = int(obj.find('difficult').text)
49 | bnd_box = obj.find('bndbox')
50 | bbox = [
51 | int(bnd_box.find('xmin').text),
52 | int(bnd_box.find('ymin').text),
53 | int(bnd_box.find('xmax').text),
54 | int(bnd_box.find('ymax').text)
55 | ]
56 | ignore = False
57 | if self.min_size:
58 | assert not self.test_mode
59 | w = bbox[2] - bbox[0]
60 | h = bbox[3] - bbox[1]
61 | if w < self.min_size or h < self.min_size:
62 | ignore = True
63 | if difficult or ignore:
64 | bboxes_ignore.append(bbox)
65 | labels_ignore.append(label)
66 | else:
67 | bboxes.append(bbox)
68 | labels.append(label)
69 | if not bboxes:
70 | bboxes = np.zeros((0, 4))
71 | labels = np.zeros((0, ))
72 | else:
73 | bboxes = np.array(bboxes, ndmin=2) - 1
74 | labels = np.array(labels)
75 | if not bboxes_ignore:
76 | bboxes_ignore = np.zeros((0, 4))
77 | labels_ignore = np.zeros((0, ))
78 | else:
79 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
80 | labels_ignore = np.array(labels_ignore)
81 | ann = dict(
82 | bboxes=bboxes.astype(np.float32),
83 | labels=labels.astype(np.int64),
84 | bboxes_ignore=bboxes_ignore.astype(np.float32),
85 | labels_ignore=labels_ignore.astype(np.int64))
86 | return ann
87 |
--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import * # noqa: F401,F403
2 | from .necks import * # noqa: F401,F403
3 | from .roi_extractors import * # noqa: F401,F403
4 | from .anchor_heads import * # noqa: F401,F403
5 | from .shared_heads import * # noqa: F401,F403
6 | from .bbox_heads import * # noqa: F401,F403
7 | from .mask_heads import * # noqa: F401,F403
8 | from .losses import * # noqa: F401,F403
9 | from .detectors import * # noqa: F401,F403
10 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
11 | LOSSES, DETECTORS)
12 | from .builder import (build_backbone, build_neck, build_roi_extractor,
13 | build_shared_head, build_head, build_loss,
14 | build_detector)
15 |
16 | __all__ = [
17 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
18 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
19 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector'
20 | ]
21 |
--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .guided_anchor_head import GuidedAnchorHead, FeatureAdaption
3 | from .fcos_head import FCOSHead
4 | from .rpn_head import RPNHead
5 | from .ga_rpn_head import GARPNHead
6 | from .retina_head import RetinaHead
7 | from .ga_retina_head import GARetinaHead
8 | from .ssd_head import SSDHead
9 |
10 | __all__ = [
11 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
12 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead'
13 | ]
14 |
--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn as nn
3 | from mmcv.cnn import normal_init
4 |
5 | from .anchor_head import AnchorHead
6 | from ..registry import HEADS
7 | from ..utils import bias_init_with_prob, ConvModule
8 |
9 |
10 | @HEADS.register_module
11 | class RetinaHead(AnchorHead):
12 |
13 | def __init__(self,
14 | num_classes,
15 | in_channels,
16 | stacked_convs=4,
17 | octave_base_scale=4,
18 | scales_per_octave=3,
19 | conv_cfg=None,
20 | norm_cfg=None,
21 | **kwargs):
22 | self.stacked_convs = stacked_convs
23 | self.octave_base_scale = octave_base_scale
24 | self.scales_per_octave = scales_per_octave
25 | self.conv_cfg = conv_cfg
26 | self.norm_cfg = norm_cfg
27 | octave_scales = np.array(
28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)])
29 | anchor_scales = octave_scales * octave_base_scale
30 | super(RetinaHead, self).__init__(
31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
32 |
33 | def _init_layers(self):
34 | self.relu = nn.ReLU(inplace=True)
35 | self.cls_convs = nn.ModuleList()
36 | self.reg_convs = nn.ModuleList()
37 | for i in range(self.stacked_convs):
38 | chn = self.in_channels if i == 0 else self.feat_channels
39 | self.cls_convs.append(
40 | ConvModule(
41 | chn,
42 | self.feat_channels,
43 | 3,
44 | stride=1,
45 | padding=1,
46 | conv_cfg=self.conv_cfg,
47 | norm_cfg=self.norm_cfg))
48 | self.reg_convs.append(
49 | ConvModule(
50 | chn,
51 | self.feat_channels,
52 | 3,
53 | stride=1,
54 | padding=1,
55 | conv_cfg=self.conv_cfg,
56 | norm_cfg=self.norm_cfg))
57 | self.retina_cls = nn.Conv2d(
58 | self.feat_channels,
59 | self.num_anchors * self.cls_out_channels,
60 | 3,
61 | padding=1)
62 | self.retina_reg = nn.Conv2d(
63 | self.feat_channels, self.num_anchors * 4, 3, padding=1)
64 |
65 | def init_weights(self):
66 | for m in self.cls_convs:
67 | normal_init(m.conv, std=0.01)
68 | for m in self.reg_convs:
69 | normal_init(m.conv, std=0.01)
70 | bias_cls = bias_init_with_prob(0.01)
71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls)
72 | normal_init(self.retina_reg, std=0.01)
73 |
74 | def forward_single(self, x):
75 | cls_feat = x
76 | reg_feat = x
77 | for cls_conv in self.cls_convs:
78 | cls_feat = cls_conv(cls_feat)
79 | for reg_conv in self.reg_convs:
80 | reg_feat = reg_conv(reg_feat)
81 | cls_score = self.retina_cls(cls_feat)
82 | bbox_pred = self.retina_reg(reg_feat)
83 | return cls_score, bbox_pred
84 |
--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet, make_res_layer
2 | from .resnext import ResNeXt
3 | from .ssd_vgg import SSDVGG
4 | from .hrnet import HRNet
5 |
6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet']
7 |
--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 |
4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead']
5 |
--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from mmdet.utils import build_from_cfg
4 |
5 | #此处不会在执行registry而是直接进行sys.modules查询得到
6 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
7 | LOSSES, DETECTORS)
8 |
9 |
10 | def build(cfg, registry, default_args=None):
11 | if isinstance(cfg, list):
12 | modules = [
13 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
14 | ]
15 | return nn.Sequential(*modules)
16 | else:
17 | return build_from_cfg(cfg, registry, default_args)
18 |
19 |
20 | def build_backbone(cfg):
21 | return build(cfg, BACKBONES)
22 |
23 |
24 | def build_neck(cfg):
25 | return build(cfg, NECKS)
26 |
27 |
28 | def build_roi_extractor(cfg):
29 | return build(cfg, ROI_EXTRACTORS)
30 |
31 |
32 | def build_shared_head(cfg):
33 | return build(cfg, SHARED_HEADS)
34 |
35 |
36 | def build_head(cfg):
37 | return build(cfg, HEADS)
38 |
39 |
40 | def build_loss(cfg):
41 | return build(cfg, LOSSES)
42 |
43 |
44 | def build_detector(cfg, train_cfg=None, test_cfg=None):
45 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
46 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDetector
2 | from .single_stage import SingleStageDetector
3 | from .two_stage import TwoStageDetector
4 | from .rpn import RPN
5 | from .fast_rcnn import FastRCNN
6 | from .faster_rcnn import FasterRCNN
7 | from .mask_rcnn import MaskRCNN
8 | from .cascade_rcnn import CascadeRCNN
9 | from .htc import HybridTaskCascade
10 | from .retinanet import RetinaNet
11 | from .fcos import FCOS
12 | from .grid_rcnn import GridRCNN
13 | from .mask_scoring_rcnn import MaskScoringRCNN
14 |
15 | __all__ = [
16 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
17 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
18 | 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN'
19 | ]
20 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
1 | from .two_stage import TwoStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class FastRCNN(TwoStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | bbox_roi_extractor,
11 | bbox_head,
12 | train_cfg,
13 | test_cfg,
14 | neck=None,
15 | shared_head=None,
16 | mask_roi_extractor=None,
17 | mask_head=None,
18 | pretrained=None):
19 | super(FastRCNN, self).__init__(
20 | backbone=backbone,
21 | neck=neck,
22 | shared_head=shared_head,
23 | bbox_roi_extractor=bbox_roi_extractor,
24 | bbox_head=bbox_head,
25 | train_cfg=train_cfg,
26 | test_cfg=test_cfg,
27 | mask_roi_extractor=mask_roi_extractor,
28 | mask_head=mask_head,
29 | pretrained=pretrained)
30 |
31 | def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
33 | if not isinstance(var, list):
34 | raise TypeError('{} must be a list, but got {}'.format(
35 | name, type(var)))
36 |
37 | num_augs = len(imgs)
38 | if num_augs != len(img_metas):
39 | raise ValueError(
40 | 'num of augmentations ({}) != num of image meta ({})'.format(
41 | len(imgs), len(img_metas)))
42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
43 | imgs_per_gpu = imgs[0].size(0)
44 | assert imgs_per_gpu == 1
45 |
46 | if num_augs == 1:
47 | return self.simple_test(imgs[0], img_metas[0], proposals[0],
48 | **kwargs)
49 | else:
50 | return self.aug_test(imgs, img_metas, proposals, **kwargs)
51 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
1 | from .two_stage import TwoStageDetector
2 | from ..registry import DETECTORS
3 |
4 | import ipdb
5 |
6 | # 利用super的构造函数继承,递归地搭建了计算图!
7 |
8 | @DETECTORS.register_module
9 | class FasterRCNN(TwoStageDetector):
10 |
11 | def __init__(self,
12 | backbone,
13 | rpn_head,
14 | bbox_roi_extractor,
15 | bbox_head,
16 | train_cfg,
17 | test_cfg,
18 | neck=None,
19 | shared_head=None,
20 | pretrained=None):
21 | super(FasterRCNN, self).__init__(
22 | backbone=backbone,
23 | neck=neck,
24 | shared_head=shared_head,
25 | rpn_head=rpn_head,
26 | bbox_roi_extractor=bbox_roi_extractor,
27 | bbox_head=bbox_head,
28 | train_cfg=train_cfg,
29 | test_cfg=test_cfg,
30 | pretrained=pretrained)
31 |
32 |
33 | '''
34 | 训练时传入这里的参数是:
35 | self:Faster RCNN的组建
36 | backbone = {'type': 'ResNet', 'depth': 50, 'num_stages': 4, 'out_indices': (0, 1, 2, 3), 'frozen_stages': 1, 'style': 'pytorch'}
37 | rpn_head = {'type': 'RPNHead', 'in_channels': 256, 'feat_channels': 256, 'anchor_scales': [8], 'anchor_ratios': [0.5, 1.0, 2.0], 'anchor_strides': [4, 8, 16, 32, 64], 'target_means': [0.0, 0.0, 0.0, 0.0], 'target_stds': [1.0, 1.0, 1.0, 1.0], 'loss_cls': {'type': 'CrossEntropyLoss', 'use_sigmoid': True, 'loss_weight': 1.0}, 'loss_bbox': {'type': 'SmoothL1Loss', 'beta': 0.1111111111111111, 'loss_weight': 1.0}}
38 | bbox_roi_extractor = {'type': 'SingleRoIExtractor', 'roi_layer': {'type': 'RoIAlign', 'out_size': 7, 'sample_num': 2}, 'out_channels': 256, 'featmap_strides': [4, 8, 16, 32]}
39 | bbox_head = {'type': 'SharedFCBBoxHead', 'num_fcs': 2, 'in_channels': 256, 'fc_out_channels': 1024, 'roi_feat_size': 7, 'num_classes': 81, 'target_means': [0.0, 0.0, 0.0, 0.0], 'target_stds': [0.1, 0.1, 0.2, 0.2], 'reg_class_agnostic': False, 'loss_cls': {'type': 'CrossEntropyLoss', 'use_sigmoid': False, 'loss_weight': 1.0}, 'loss_bbox': {'type': 'SmoothL1Loss', 'beta': 1.0, 'loss_weight': 1.0}}
40 | train_cfg = {'rpn': {'assigner': {'type': 'MaxIoUAssigner', 'pos_iou_thr': 0.7, 'neg_iou_thr': 0.3, 'min_pos_iou': 0.3, 'ignore_iof_thr': -1}, 'sampler': {'type': 'RandomSampler', 'num': 256, 'pos_fraction': 0.5, 'neg_pos_ub': -1, 'add_gt_as_proposals': False}, 'allowed_border': 0, 'pos_weight': -1, 'debug': False}, 'rpn_proposal': {'nms_across_levels': False, 'nms_pre': 2000, 'nms_post': 2000, 'max_num': 2000, 'nms_thr': 0.7, 'min_bbox_size': 0}, 'rcnn': {'assigner': {'type': 'MaxIoUAssigner', 'pos_iou_thr': 0.5, 'neg_iou_thr': 0.5, 'min_pos_iou': 0.5, 'ignore_iof_thr': -1}, 'sampler': {'type': 'RandomSampler', 'num': 512, 'pos_fraction': 0.25, 'neg_pos_ub': -1, 'add_gt_as_proposals': True}, 'pos_weight': -1, 'debug': False}}
41 | test_cfg = {'rpn': {'nms_across_levels': False, 'nms_pre': 1000, 'nms_post': 1000, 'max_num': 1000, 'nms_thr': 0.7, 'min_bbox_size': 0}, 'rcnn': {'score_thr': 0.05, 'nms': {'type': 'nms', 'iou_thr': 0.5}, 'max_per_img': 100}}
42 | neck = {'type': 'FPN', 'in_channels': [256, 512, 1024, 2048], 'out_channels': 256, 'num_outs': 5}
43 | shared_head = None
44 | pretrained = 'modelzoo://resnet50'
45 |
46 | 后面继承构造函数传入的都是这个
47 | '''
48 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/fcos.py:
--------------------------------------------------------------------------------
1 | from .single_stage import SingleStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class FCOS(SingleStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | neck,
11 | bbox_head,
12 | train_cfg=None,
13 | test_cfg=None,
14 | pretrained=None):
15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 | test_cfg, pretrained)
17 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
1 | from .two_stage import TwoStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class MaskRCNN(TwoStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | rpn_head,
11 | bbox_roi_extractor,
12 | bbox_head,
13 | mask_roi_extractor,
14 | mask_head,
15 | train_cfg,
16 | test_cfg,
17 | neck=None,
18 | shared_head=None,
19 | pretrained=None):
20 | super(MaskRCNN, self).__init__(
21 | backbone=backbone,
22 | neck=neck,
23 | shared_head=shared_head,
24 | rpn_head=rpn_head,
25 | bbox_roi_extractor=bbox_roi_extractor,
26 | bbox_head=bbox_head,
27 | mask_roi_extractor=mask_roi_extractor,
28 | mask_head=mask_head,
29 | train_cfg=train_cfg,
30 | test_cfg=test_cfg,
31 | pretrained=pretrained)
32 |
33 | #传入这里的参数是:
34 | '''
35 | self = MaskRCNN()
36 | backbone = {'type': 'ResNet', 'depth': 101, 'num_stages': 4, 'out_indices': (0, 1, 2, 3), 'frozen_stages': 1, 'style': 'pytorch'}
37 | neck = {'type': 'FPN', 'in_channels': [256, 512, 1024, 2048], 'out_channels': 256, 'num_outs': 5}
38 | rpn_head = {'type': 'RPNHead', 'in_channels': 256, 'feat_channels': 256, 'anchor_scales': [8], 'anchor_ratios': [0.5, 1.0, 2.0], 'anchor_strides': [4, 8, 16, 32, 64], 'target_means': [0.0, 0.0, 0.0, 0.0], 'target_stds': [1.0, 1.0, 1.0, 1.0], 'use_sigmoid_cls': True}
39 | bbox_roi_extractor = {'type': 'SingleRoIExtractor', 'roi_layer': {'type': 'RoIAlign', 'out_size': 7, 'sample_num': 2}, 'out_channels': 256, 'featmap_strides': [4, 8, 16, 32]}
40 | bbox_head = {'type': 'SharedFCBBoxHead', 'num_fcs': 2, 'in_channels': 256, 'fc_out_channels': 1024, 'roi_feat_size': 7, 'num_classes': 81, 'target_means': [0.0, 0.0, 0.0, 0.0], 'target_stds': [0.1, 0.1, 0.2, 0.2], 'reg_class_agnostic': False}
41 | mask_roi_extractor = {'type': 'SingleRoIExtractor', 'roi_layer': {'type': 'RoIAlign', 'out_size': 14, 'sample_num': 2}, 'out_channels': 256, 'featmap_strides': [4, 8, 16, 32]}
42 | mask_head = {'type': 'FCNMaskHead', 'num_convs': 4, 'in_channels': 256, 'conv_out_channels': 256, 'num_classes': 81}
43 | train_cfg = None
44 | test_cfg = {'rpn': {'nms_across_levels': False, 'nms_pre': 2000, 'nms_post': 2000, 'max_num': 2000, 'nms_thr': 0.7, 'min_bbox_size': 0}, 'rcnn': {'score_thr': 0.05, 'nms': {'type': 'nms', 'iou_thr': 0.5}, 'max_per_img': 100, 'mask_thr_binary': 0.5}}
45 | pretrained = None
46 | 后面继承构造函数传入的都是这个
47 | '''
--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
1 | from .single_stage import SingleStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class RetinaNet(SingleStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | neck,
11 | bbox_head,
12 | train_cfg=None,
13 | test_cfg=None,
14 | pretrained=None):
15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 | test_cfg, pretrained)
17 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 | from mmdet.core import tensor2imgs, bbox_mapping
4 | from .base import BaseDetector
5 | from .test_mixins import RPNTestMixin
6 | from .. import builder
7 | from ..registry import DETECTORS
8 |
9 |
10 | @DETECTORS.register_module
11 | class RPN(BaseDetector, RPNTestMixin):
12 |
13 | def __init__(self,
14 | backbone,
15 | neck,
16 | rpn_head,
17 | train_cfg,
18 | test_cfg,
19 | pretrained=None):
20 | super(RPN, self).__init__()
21 | self.backbone = builder.build_backbone(backbone)
22 | self.neck = builder.build_neck(neck) if neck is not None else None
23 | self.rpn_head = builder.build_head(rpn_head)
24 | self.train_cfg = train_cfg
25 | self.test_cfg = test_cfg
26 | self.init_weights(pretrained=pretrained)
27 |
28 | def init_weights(self, pretrained=None):
29 | super(RPN, self).init_weights(pretrained)
30 | self.backbone.init_weights(pretrained=pretrained)
31 | if self.with_neck:
32 | self.neck.init_weights()
33 | self.rpn_head.init_weights()
34 |
35 | def extract_feat(self, img):
36 | x = self.backbone(img)
37 | if self.with_neck:
38 | x = self.neck(x)
39 | return x
40 |
41 | def forward_train(self,
42 | img,
43 | img_meta,
44 | gt_bboxes=None,
45 | gt_bboxes_ignore=None):
46 | if self.train_cfg.rpn.get('debug', False):
47 | self.rpn_head.debug_imgs = tensor2imgs(img)
48 |
49 | x = self.extract_feat(img)
50 | rpn_outs = self.rpn_head(x)
51 |
52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
53 | losses = self.rpn_head.loss(
54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 | return losses
56 |
57 | def simple_test(self, img, img_meta, rescale=False):
58 | x = self.extract_feat(img)
59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
60 | if rescale:
61 | for proposals, meta in zip(proposal_list, img_meta):
62 | proposals[:, :4] /= meta['scale_factor']
63 | # TODO: remove this restriction
64 | return proposal_list[0].cpu().numpy()
65 |
66 | def aug_test(self, imgs, img_metas, rescale=False):
67 | proposal_list = self.aug_test_rpn(
68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
69 | if not rescale:
70 | for proposals, img_meta in zip(proposal_list, img_metas[0]):
71 | img_shape = img_meta['img_shape']
72 | scale_factor = img_meta['scale_factor']
73 | flip = img_meta['flip']
74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
75 | scale_factor, flip)
76 | # TODO: remove this restriction
77 | return proposal_list[0].cpu().numpy()
78 |
79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20):
80 | """Show RPN proposals on the image.
81 |
82 | Although we assume batch size is 1, this method supports arbitrary
83 | batch size.
84 | """
85 | img_tensor = data['img'][0]
86 | img_metas = data['img_meta'][0].data[0]
87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg)
88 | assert len(imgs) == len(img_metas)
89 | for img, img_meta in zip(imgs, img_metas):
90 | h, w, _ = img_meta['img_shape']
91 | img_show = img[:h, :w, :]
92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k)
93 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .base import BaseDetector
4 | from .. import builder
5 | from ..registry import DETECTORS
6 | from mmdet.core import bbox2result
7 |
8 |
9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 |
12 | def __init__(self,
13 | backbone,
14 | neck=None,
15 | bbox_head=None,
16 | train_cfg=None,
17 | test_cfg=None,
18 | pretrained=None):
19 | super(SingleStageDetector, self).__init__()
20 | self.backbone = builder.build_backbone(backbone)
21 | if neck is not None:
22 | self.neck = builder.build_neck(neck)
23 | self.bbox_head = builder.build_head(bbox_head)
24 | self.train_cfg = train_cfg
25 | self.test_cfg = test_cfg
26 | self.init_weights(pretrained=pretrained)
27 |
28 | def init_weights(self, pretrained=None):
29 | super(SingleStageDetector, self).init_weights(pretrained)
30 | self.backbone.init_weights(pretrained=pretrained)
31 | if self.with_neck:
32 | if isinstance(self.neck, nn.Sequential):
33 | for m in self.neck:
34 | m.init_weights()
35 | else:
36 | self.neck.init_weights()
37 | self.bbox_head.init_weights()
38 |
39 | def extract_feat(self, img):
40 | x = self.backbone(img)
41 | if self.with_neck:
42 | x = self.neck(x)
43 | return x
44 |
45 | def forward_train(self,
46 | img,
47 | img_metas,
48 | gt_bboxes,
49 | gt_labels,
50 | gt_bboxes_ignore=None):
51 | x = self.extract_feat(img)
52 | outs = self.bbox_head(x)
53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
54 | losses = self.bbox_head.loss(
55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
56 | return losses
57 |
58 | def simple_test(self, img, img_meta, rescale=False):
59 | x = self.extract_feat(img)
60 | outs = self.bbox_head(x)
61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
63 | bbox_results = [
64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
65 | for det_bboxes, det_labels in bbox_list
66 | ]
67 | return bbox_results[0]
68 |
69 | def aug_test(self, imgs, img_metas, rescale=False):
70 | raise NotImplementedError
71 |
--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import accuracy, Accuracy
2 | from .cross_entropy_loss import (cross_entropy, binary_cross_entropy,
3 | mask_cross_entropy, CrossEntropyLoss)
4 | from .focal_loss import sigmoid_focal_loss, FocalLoss
5 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss
6 | from .ghm_loss import GHMC, GHMR
7 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss
8 | from .mse_loss import mse_loss, MSELoss
9 | from .iou_loss import iou_loss, bounded_iou_loss, IoULoss, BoundedIoULoss
10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
11 |
12 | __all__ = [
13 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
14 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
15 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
16 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
17 | 'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
18 | 'weight_reduce_loss', 'weighted_loss'
19 | ]
20 |
--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | def accuracy(pred, target, topk=1):
5 | assert isinstance(topk, (int, tuple))
6 | if isinstance(topk, int):
7 | topk = (topk, )
8 | return_single = True
9 | else:
10 | return_single = False
11 |
12 | maxk = max(topk)
13 | _, pred_label = pred.topk(maxk, dim=1)
14 | pred_label = pred_label.t()
15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
16 |
17 | res = []
18 | for k in topk:
19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
20 | res.append(correct_k.mul_(100.0 / pred.size(0)))
21 | return res[0] if return_single else res
22 |
23 |
24 | class Accuracy(nn.Module):
25 |
26 | def __init__(self, topk=(1, )):
27 | super().__init__()
28 | self.topk = topk
29 |
30 | def forward(self, pred, target):
31 | return accuracy(pred, target, self.topk)
32 |
--------------------------------------------------------------------------------
/mmdet/models/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 | from .utils import weighted_loss
6 | from ..registry import LOSSES
7 |
8 |
9 | @weighted_loss
10 | def balanced_l1_loss(pred,
11 | target,
12 | beta=1.0,
13 | alpha=0.5,
14 | gamma=1.5,
15 | reduction='mean'):
16 | assert beta > 0
17 | assert pred.size() == target.size() and target.numel() > 0
18 |
19 | diff = torch.abs(pred - target)
20 | b = np.e**(gamma / alpha) - 1
21 | loss = torch.where(
22 | diff < beta, alpha / b *
23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
24 | gamma * diff + gamma / b - alpha * beta)
25 |
26 | return loss
27 |
28 |
29 | @LOSSES.register_module
30 | class BalancedL1Loss(nn.Module):
31 | """Balanced L1 Loss
32 |
33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
34 | """
35 |
36 | def __init__(self,
37 | alpha=0.5,
38 | gamma=1.5,
39 | beta=1.0,
40 | reduction='mean',
41 | loss_weight=1.0):
42 | super(BalancedL1Loss, self).__init__()
43 | self.alpha = alpha
44 | self.gamma = gamma
45 | self.beta = beta
46 | self.reduction = reduction
47 | self.loss_weight = loss_weight
48 |
49 | def forward(self,
50 | pred,
51 | target,
52 | weight=None,
53 | avg_factor=None,
54 | reduction_override=None,
55 | **kwargs):
56 | assert reduction_override in (None, 'none', 'mean', 'sum')
57 | reduction = (
58 | reduction_override if reduction_override else self.reduction)
59 | loss_bbox = self.loss_weight * balanced_l1_loss(
60 | pred,
61 | target,
62 | weight,
63 | alpha=self.alpha,
64 | gamma=self.gamma,
65 | beta=self.beta,
66 | reduction=reduction,
67 | avg_factor=avg_factor,
68 | **kwargs)
69 | return loss_bbox
70 |
--------------------------------------------------------------------------------
/mmdet/models/losses/cross_entropy_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from .utils import weight_reduce_loss
6 | from ..registry import LOSSES
7 |
8 |
9 | def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
10 | # element-wise losses
11 | loss = F.cross_entropy(pred, label, reduction='none')
12 |
13 | # apply weights and do the reduction
14 | if weight is not None:
15 | weight = weight.float()
16 | loss = weight_reduce_loss(
17 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
18 |
19 | return loss
20 |
21 |
22 | def _expand_binary_labels(labels, label_weights, label_channels):
23 | bin_labels = labels.new_full((labels.size(0), label_channels), 0)
24 | inds = torch.nonzero(labels >= 1).squeeze()
25 | if inds.numel() > 0:
26 | bin_labels[inds, labels[inds] - 1] = 1
27 | if label_weights is None:
28 | bin_label_weights = None
29 | else:
30 | bin_label_weights = label_weights.view(-1, 1).expand(
31 | label_weights.size(0), label_channels)
32 | return bin_labels, bin_label_weights
33 |
34 |
35 | def binary_cross_entropy(pred,
36 | label,
37 | weight=None,
38 | reduction='mean',
39 | avg_factor=None):
40 | if pred.dim() != label.dim():
41 | label, weight = _expand_binary_labels(label, weight, pred.size(-1))
42 |
43 | # weighted element-wise losses
44 | if weight is not None:
45 | weight = weight.float()
46 | loss = F.binary_cross_entropy_with_logits(
47 | pred, label.float(), weight, reduction='none')
48 | # do the reduction for the weighted loss
49 | loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor)
50 |
51 | return loss
52 |
53 |
54 | def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None):
55 | # TODO: handle these two reserved arguments
56 | assert reduction == 'mean' and avg_factor is None
57 | num_rois = pred.size()[0]
58 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
59 | pred_slice = pred[inds, label].squeeze(1)
60 | return F.binary_cross_entropy_with_logits(
61 | pred_slice, target, reduction='mean')[None]
62 |
63 |
64 | @LOSSES.register_module
65 | class CrossEntropyLoss(nn.Module):
66 |
67 | def __init__(self,
68 | use_sigmoid=False,
69 | use_mask=False,
70 | reduction='mean',
71 | loss_weight=1.0):
72 | super(CrossEntropyLoss, self).__init__()
73 | assert (use_sigmoid is False) or (use_mask is False)
74 | self.use_sigmoid = use_sigmoid
75 | self.use_mask = use_mask
76 | self.reduction = reduction
77 | self.loss_weight = loss_weight
78 |
79 | if self.use_sigmoid:
80 | self.cls_criterion = binary_cross_entropy
81 | elif self.use_mask:
82 | self.cls_criterion = mask_cross_entropy
83 | else:
84 | self.cls_criterion = cross_entropy
85 |
86 | def forward(self,
87 | cls_score,
88 | label,
89 | weight=None,
90 | avg_factor=None,
91 | reduction_override=None,
92 | **kwargs):
93 | assert reduction_override in (None, 'none', 'mean', 'sum')
94 | reduction = (
95 | reduction_override if reduction_override else self.reduction)
96 | loss_cls = self.loss_weight * self.cls_criterion(
97 | cls_score,
98 | label,
99 | weight,
100 | reduction=reduction,
101 | avg_factor=avg_factor,
102 | **kwargs)
103 | return loss_cls
104 |
--------------------------------------------------------------------------------
/mmdet/models/losses/focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
5 | from .utils import weight_reduce_loss
6 | from ..registry import LOSSES
7 |
8 |
9 | # This method is only for debugging
10 | def py_sigmoid_focal_loss(pred,
11 | target,
12 | weight=None,
13 | gamma=2.0,
14 | alpha=0.25,
15 | reduction='mean',
16 | avg_factor=None):
17 | pred_sigmoid = pred.sigmoid()
18 | target = target.type_as(pred)
19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
20 | focal_weight = (alpha * target + (1 - alpha) *
21 | (1 - target)) * pt.pow(gamma)
22 | loss = F.binary_cross_entropy_with_logits(
23 | pred, target, reduction='none') * focal_weight
24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
25 | return loss
26 |
27 |
28 | def sigmoid_focal_loss(pred,
29 | target,
30 | weight=None,
31 | gamma=2.0,
32 | alpha=0.25,
33 | reduction='mean',
34 | avg_factor=None):
35 | # Function.apply does not accept keyword arguments, so the decorator
36 | # "weighted_loss" is not applicable
37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
38 | # TODO: find a proper way to handle the shape of weight
39 | if weight is not None:
40 | weight = weight.view(-1, 1)
41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
42 | return loss
43 |
44 |
45 | @LOSSES.register_module
46 | class FocalLoss(nn.Module):
47 |
48 | def __init__(self,
49 | use_sigmoid=True,
50 | gamma=2.0,
51 | alpha=0.25,
52 | reduction='mean',
53 | loss_weight=1.0):
54 | super(FocalLoss, self).__init__()
55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
56 | self.use_sigmoid = use_sigmoid
57 | self.gamma = gamma
58 | self.alpha = alpha
59 | self.reduction = reduction
60 | self.loss_weight = loss_weight
61 |
62 | def forward(self,
63 | pred,
64 | target,
65 | weight=None,
66 | avg_factor=None,
67 | reduction_override=None):
68 | assert reduction_override in (None, 'none', 'mean', 'sum')
69 | reduction = (
70 | reduction_override if reduction_override else self.reduction)
71 | if self.use_sigmoid:
72 | loss_cls = self.loss_weight * sigmoid_focal_loss(
73 | pred,
74 | target,
75 | weight,
76 | gamma=self.gamma,
77 | alpha=self.alpha,
78 | reduction=reduction,
79 | avg_factor=avg_factor)
80 | else:
81 | raise NotImplementedError
82 | return loss_cls
83 |
--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | from .utils import weighted_loss
5 | from ..registry import LOSSES
6 |
7 | mse_loss = weighted_loss(F.mse_loss)
8 |
9 |
10 | @LOSSES.register_module
11 | class MSELoss(nn.Module):
12 |
13 | def __init__(self, reduction='mean', loss_weight=1.0):
14 | super().__init__()
15 | self.reduction = reduction
16 | self.loss_weight = loss_weight
17 |
18 | def forward(self, pred, target, weight=None, avg_factor=None):
19 | loss = self.loss_weight * mse_loss(
20 | pred,
21 | target,
22 | weight,
23 | reduction=self.reduction,
24 | avg_factor=avg_factor)
25 | return loss
26 |
--------------------------------------------------------------------------------
/mmdet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .utils import weighted_loss
5 | from ..registry import LOSSES
6 |
7 |
8 | @weighted_loss
9 | def smooth_l1_loss(pred, target, beta=1.0):
10 | assert beta > 0
11 | assert pred.size() == target.size() and target.numel() > 0
12 | diff = torch.abs(pred - target)
13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
14 | diff - 0.5 * beta)
15 | return loss
16 |
17 |
18 | @LOSSES.register_module
19 | class SmoothL1Loss(nn.Module):
20 |
21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
22 | super(SmoothL1Loss, self).__init__()
23 | self.beta = beta
24 | self.reduction = reduction
25 | self.loss_weight = loss_weight
26 |
27 | def forward(self,
28 | pred,
29 | target,
30 | weight=None,
31 | avg_factor=None,
32 | reduction_override=None,
33 | **kwargs):
34 | assert reduction_override in (None, 'none', 'mean', 'sum')
35 | reduction = (
36 | reduction_override if reduction_override else self.reduction)
37 | loss_bbox = self.loss_weight * smooth_l1_loss(
38 | pred,
39 | target,
40 | weight,
41 | beta=self.beta,
42 | reduction=reduction,
43 | avg_factor=avg_factor,
44 | **kwargs)
45 | return loss_bbox
46 |
--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | import torch.nn.functional as F
4 |
5 |
6 | def reduce_loss(loss, reduction):
7 | """Reduce loss as specified.
8 |
9 | Args:
10 | loss (Tensor): Elementwise loss tensor.
11 | reduction (str): Options are "none", "mean" and "sum".
12 |
13 | Return:
14 | Tensor: Reduced loss tensor.
15 | """
16 | reduction_enum = F._Reduction.get_enum(reduction)
17 | # none: 0, elementwise_mean:1, sum: 2
18 | if reduction_enum == 0:
19 | return loss
20 | elif reduction_enum == 1:
21 | return loss.mean()
22 | elif reduction_enum == 2:
23 | return loss.sum()
24 |
25 |
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 | """Apply element-wise weight and reduce loss.
28 |
29 | Args:
30 | loss (Tensor): Element-wise loss.
31 | weight (Tensor): Element-wise weights.
32 | reduction (str): Same as built-in losses of PyTorch.
33 | avg_factor (float): Avarage factor when computing the mean of losses.
34 |
35 | Returns:
36 | Tensor: Processed loss values.
37 | """
38 | # if weight is specified, apply element-wise weight
39 | if weight is not None:
40 | loss = loss * weight
41 |
42 | # if avg_factor is not specified, just reduce the loss
43 | if avg_factor is None:
44 | loss = reduce_loss(loss, reduction)
45 | else:
46 | # if reduction is mean, then average the loss by avg_factor
47 | if reduction == 'mean':
48 | loss = loss.sum() / avg_factor
49 | # if reduction is 'none', then do nothing, otherwise raise an error
50 | elif reduction != 'none':
51 | raise ValueError('avg_factor can not be used with reduction="sum"')
52 | return loss
53 |
54 |
55 | def weighted_loss(loss_func):
56 | """Create a weighted version of a given loss function.
57 |
58 | To use this decorator, the loss function must have the signature like
59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 | element-wise loss without any reduction. This decorator will add weight
61 | and reduction arguments to the function. The decorated function will have
62 | the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 | avg_factor=None, **kwargs)`.
64 |
65 | :Example:
66 |
67 | >>> @weighted_loss
68 | >>> def l1_loss(pred, target):
69 | >>> return (pred - target).abs()
70 |
71 | >>> pred = torch.Tensor([0, 2, 3])
72 | >>> target = torch.Tensor([1, 1, 1])
73 | >>> weight = torch.Tensor([1, 0, 1])
74 |
75 | >>> l1_loss(pred, target)
76 | tensor(1.3333)
77 | >>> l1_loss(pred, target, weight)
78 | tensor(1.)
79 | >>> l1_loss(pred, target, reduction='none')
80 | tensor([1., 1., 2.])
81 | >>> l1_loss(pred, target, weight, avg_factor=2)
82 | tensor(1.5000)
83 | """
84 |
85 | @functools.wraps(loss_func)
86 | def wrapper(pred,
87 | target,
88 | weight=None,
89 | reduction='mean',
90 | avg_factor=None,
91 | **kwargs):
92 | # get element-wise loss
93 | loss = loss_func(pred, target, **kwargs)
94 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
95 | return loss
96 |
97 | return wrapper
98 |
--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 | from .fused_semantic_head import FusedSemanticHead
3 | from .grid_head import GridHead
4 | from .htc_mask_head import HTCMaskHead
5 | from .maskiou_head import MaskIoUHead
6 |
7 | __all__ = [
8 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
9 | 'MaskIoUHead'
10 | ]
11 |
--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 | from ..registry import HEADS
3 | from ..utils import ConvModule
4 |
5 |
6 | @HEADS.register_module
7 | class HTCMaskHead(FCNMaskHead):
8 |
9 | def __init__(self, *args, **kwargs):
10 | super(HTCMaskHead, self).__init__(*args, **kwargs)
11 | self.conv_res = ConvModule(
12 | self.conv_out_channels,
13 | self.conv_out_channels,
14 | 1,
15 | conv_cfg=self.conv_cfg,
16 | norm_cfg=self.norm_cfg)
17 |
18 | def init_weights(self):
19 | super(HTCMaskHead, self).init_weights()
20 | self.conv_res.init_weights()
21 |
22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
23 | if res_feat is not None:
24 | res_feat = self.conv_res(res_feat)
25 | x = x + res_feat
26 | for conv in self.convs:
27 | x = conv(x)
28 | res_feat = x
29 | outs = []
30 | if return_logits:
31 | x = self.upsample(x)
32 | if self.upsample_method == 'deconv':
33 | x = self.relu(x)
34 | mask_pred = self.conv_logits(x)
35 | outs.append(mask_pred)
36 | if return_feat:
37 | outs.append(res_feat)
38 | return outs if len(outs) > 1 else outs[0]
39 |
--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | from .bfp import BFP
3 | from .hrfpn import HRFPN
4 |
5 | __all__ = ['FPN', 'BFP', 'HRFPN']
6 |
--------------------------------------------------------------------------------
/mmdet/models/necks/hrfpn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.utils.checkpoint import checkpoint
5 | from mmcv.cnn.weight_init import caffe2_xavier_init
6 |
7 | from ..utils import ConvModule
8 | from ..registry import NECKS
9 |
10 |
11 | @NECKS.register_module
12 | class HRFPN(nn.Module):
13 | """HRFPN (High Resolution Feature Pyrmamids)
14 |
15 | arXiv: https://arxiv.org/abs/1904.04514
16 |
17 | Args:
18 | in_channels (list): number of channels for each branch.
19 | out_channels (int): output channels of feature pyramids.
20 | num_outs (int): number of output stages.
21 | pooling_type (str): pooling for generating feature pyramids
22 | from {MAX, AVG}.
23 | conv_cfg (dict): dictionary to construct and config conv layer.
24 | norm_cfg (dict): dictionary to construct and config norm layer.
25 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some
26 | memory while slowing down the training speed.
27 | """
28 |
29 | def __init__(self,
30 | in_channels,
31 | out_channels,
32 | num_outs=5,
33 | pooling_type='AVG',
34 | conv_cfg=None,
35 | norm_cfg=None,
36 | with_cp=False):
37 | super(HRFPN, self).__init__()
38 | assert isinstance(in_channels, list)
39 | self.in_channels = in_channels
40 | self.out_channels = out_channels
41 | self.num_ins = len(in_channels)
42 | self.num_outs = num_outs
43 | self.with_cp = with_cp
44 | self.conv_cfg = conv_cfg
45 | self.norm_cfg = norm_cfg
46 |
47 | self.reduction_conv = ConvModule(
48 | sum(in_channels),
49 | out_channels,
50 | kernel_size=1,
51 | conv_cfg=self.conv_cfg,
52 | activation=None)
53 |
54 | self.fpn_convs = nn.ModuleList()
55 | for i in range(self.num_outs):
56 | self.fpn_convs.append(
57 | ConvModule(
58 | out_channels,
59 | out_channels,
60 | kernel_size=3,
61 | padding=1,
62 | conv_cfg=self.conv_cfg,
63 | activation=None))
64 |
65 | if pooling_type == 'MAX':
66 | self.pooling = F.max_pool2d
67 | else:
68 | self.pooling = F.avg_pool2d
69 |
70 | def init_weights(self):
71 | for m in self.modules():
72 | if isinstance(m, nn.Conv2d):
73 | caffe2_xavier_init(m)
74 |
75 | def forward(self, inputs):
76 | assert len(inputs) == self.num_ins
77 | outs = [inputs[0]]
78 | for i in range(1, self.num_ins):
79 | outs.append(
80 | F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
81 | out = torch.cat(outs, dim=1)
82 | if out.requires_grad and self.with_cp:
83 | out = checkpoint(self.reduction_conv, out)
84 | else:
85 | out = self.reduction_conv(out)
86 | outs = [out]
87 | for i in range(1, self.num_outs):
88 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
89 | outputs = []
90 |
91 | for i in range(self.num_outs):
92 | if outs[i].requires_grad and self.with_cp:
93 | tmp_out = checkpoint(self.fpn_convs[i], outs[i])
94 | else:
95 | tmp_out = self.fpn_convs[i](outs[i])
96 | outputs.append(tmp_out)
97 | return tuple(outputs)
98 |
--------------------------------------------------------------------------------
/mmdet/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .non_local import NonLocal2D
2 | from .generalized_attention import GeneralizedAttention
3 |
4 | __all__ = ['NonLocal2D', 'GeneralizedAttention']
5 |
--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 |
3 | # registry的真正实现部分在mmdet.utils的regisry.py
4 |
5 | BACKBONES = Registry('backbone')
6 | NECKS = Registry('neck')
7 | ROI_EXTRACTORS = Registry('roi_extractor')
8 | SHARED_HEADS = Registry('shared_head')
9 | HEADS = Registry('head')
10 | LOSSES = Registry('loss')
11 | DETECTORS = Registry('detector')
12 |
--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 |
3 | __all__ = ['SingleRoIExtractor']
4 |
--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from mmdet import ops # 用于导入RoIAlign,NMS等
7 | from mmdet.core import force_fp32
8 | from ..registry import ROI_EXTRACTORS
9 |
10 | # 在registry注册并通过修饰器添加该处的字典属性
11 | @ROI_EXTRACTORS.register_module
12 | class SingleRoIExtractor(nn.Module):
13 | """Extract RoI features from a single level feature map.
14 |
15 | If there are mulitple input feature levels, each RoI is mapped to a level
16 | according to its scale.
17 |
18 | Args:
19 | roi_layer (dict): Specify RoI layer type and arguments.
20 | out_channels (int): Output channels of RoI layers.
21 | featmap_strides (int): Strides of input feature maps.
22 | finest_scale (int): Scale threshold of mapping to level 0.
23 | """
24 |
25 | def __init__(self,
26 | roi_layer,
27 | out_channels,
28 | featmap_strides,
29 | finest_scale=56):
30 | super(SingleRoIExtractor, self).__init__()
31 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
32 | self.out_channels = out_channels
33 | self.featmap_strides = featmap_strides
34 | self.finest_scale = finest_scale
35 | self.fp16_enabled = False
36 |
37 | @property
38 | def num_inputs(self):
39 | """int: Input feature map levels."""
40 | return len(self.featmap_strides)
41 |
42 | def init_weights(self):
43 | pass
44 |
45 | def build_roi_layers(self, layer_cfg, featmap_strides):
46 | cfg = layer_cfg.copy()
47 | layer_type = cfg.pop('type')
48 | assert hasattr(ops, layer_type)
49 | # 下面导入了ops的__init__.py,getattr(ops, layer_type)取出其中的RoIAlign类,得到结果:
50 | #
51 | layer_cls = getattr(ops, layer_type)
52 | # 由于有键值对,不用在意顺序
53 | roi_layers = nn.ModuleList(
54 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
55 | return roi_layers
56 |
57 | # 得到的target_lvls是一维向量,每个元素对应一个proposal的level
58 | def map_roi_levels(self, rois, num_levels):
59 | """Map rois to corresponding feature levels by scales.
60 |
61 | - scale < finest_scale * 2: level 0
62 | - finest_scale * 2 <= scale < finest_scale * 4: level 1
63 | - finest_scale * 4 <= scale < finest_scale * 8: level 2
64 | - scale >= finest_scale * 8: level 3
65 |
66 | Args:
67 | rois (Tensor): Input RoIs, shape (k, 5).
68 | num_levels (int): Total level number.
69 |
70 | Returns:
71 | Tensor: Level index (0-based) of each RoI, shape (k, )
72 | """
73 | scale = torch.sqrt(
74 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
75 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
76 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
77 | return target_lvls
78 |
79 | @force_fp32(apply_to=('feats',), out_fp16=True)
80 | def forward(self, feats, rois):
81 | if len(feats) == 1:
82 | return self.roi_layers[0](feats[0], rois)
83 |
84 | out_size = self.roi_layers[0].out_size
85 | num_levels = len(feats) # 特征图的张数决定了有几个level的输出
86 | # 一维向量,每个元素对应一个proposal的level;将rois缩放到特征图上
87 | target_lvls = self.map_roi_levels(rois, num_levels)
88 | # roi_feats为 torch.Size([2000, 256, 7, 7])
89 | roi_feats = feats[0].new_zeros(rois.size()[0], self.out_channels,
90 | out_size, out_size)
91 | for i in range(num_levels):
92 | inds = target_lvls == i
93 | if inds.any():
94 | rois_ = rois[inds, :]
95 | # 隐式前向传播
96 | roi_feats_t = self.roi_layers[i](feats[i], rois_)
97 | roi_feats[inds] += roi_feats_t
98 | return roi_feats
99 |
--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 |
3 | __all__ = ['ResLayer']
4 |
--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import torch.nn as nn
4 | from mmcv.cnn import constant_init, kaiming_init
5 | from mmcv.runner import load_checkpoint
6 |
7 | from mmdet.core import auto_fp16
8 | from ..backbones import ResNet, make_res_layer
9 | from ..registry import SHARED_HEADS
10 |
11 |
12 | @SHARED_HEADS.register_module
13 | class ResLayer(nn.Module):
14 |
15 | def __init__(self,
16 | depth,
17 | stage=3,
18 | stride=2,
19 | dilation=1,
20 | style='pytorch',
21 | norm_cfg=dict(type='BN', requires_grad=True),
22 | norm_eval=True,
23 | with_cp=False,
24 | dcn=None):
25 | super(ResLayer, self).__init__()
26 | self.norm_eval = norm_eval
27 | self.norm_cfg = norm_cfg
28 | self.stage = stage
29 | self.fp16_enabled = False
30 | block, stage_blocks = ResNet.arch_settings[depth]
31 | stage_block = stage_blocks[stage]
32 | planes = 64 * 2**stage
33 | inplanes = 64 * 2**(stage - 1) * block.expansion
34 |
35 | res_layer = make_res_layer(
36 | block,
37 | inplanes,
38 | planes,
39 | stage_block,
40 | stride=stride,
41 | dilation=dilation,
42 | style=style,
43 | with_cp=with_cp,
44 | norm_cfg=self.norm_cfg,
45 | dcn=dcn)
46 | self.add_module('layer{}'.format(stage + 1), res_layer)
47 |
48 | def init_weights(self, pretrained=None):
49 | if isinstance(pretrained, str):
50 | logger = logging.getLogger()
51 | load_checkpoint(self, pretrained, strict=False, logger=logger)
52 | elif pretrained is None:
53 | for m in self.modules():
54 | if isinstance(m, nn.Conv2d):
55 | kaiming_init(m)
56 | elif isinstance(m, nn.BatchNorm2d):
57 | constant_init(m, 1)
58 | else:
59 | raise TypeError('pretrained must be a str or None')
60 |
61 | @auto_fp16()
62 | def forward(self, x):
63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
64 | out = res_layer(x)
65 | return out
66 |
67 | def train(self, mode=True):
68 | super(ResLayer, self).train(mode)
69 | if self.norm_eval:
70 | for m in self.modules():
71 | if isinstance(m, nn.BatchNorm2d):
72 | m.eval()
73 |
--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv_ws import conv_ws_2d, ConvWS2d
2 | from .conv_module import build_conv_layer, ConvModule
3 | from .norm import build_norm_layer
4 | from .scale import Scale
5 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
6 | bias_init_with_prob)
7 |
8 | __all__ = [
9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
11 | 'kaiming_init', 'bias_init_with_prob', 'Scale'
12 | ]
13 |
--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 | def conv_ws_2d(input,
6 | weight,
7 | bias=None,
8 | stride=1,
9 | padding=0,
10 | dilation=1,
11 | groups=1,
12 | eps=1e-5):
13 | c_in = weight.size(0)
14 | weight_flat = weight.view(c_in, -1)
15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 | weight = (weight - mean) / (std + eps)
18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 |
20 |
21 | class ConvWS2d(nn.Conv2d):
22 |
23 | def __init__(self,
24 | in_channels,
25 | out_channels,
26 | kernel_size,
27 | stride=1,
28 | padding=0,
29 | dilation=1,
30 | groups=1,
31 | bias=True,
32 | eps=1e-5):
33 | super(ConvWS2d, self).__init__(
34 | in_channels,
35 | out_channels,
36 | kernel_size,
37 | stride=stride,
38 | padding=padding,
39 | dilation=dilation,
40 | groups=groups,
41 | bias=bias)
42 | self.eps = eps
43 |
44 | def forward(self, x):
45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 | self.dilation, self.groups, self.eps)
47 |
--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | norm_cfg = {
4 | # format: layer_type: (abbreviation, module)
5 | 'BN': ('bn', nn.BatchNorm2d),
6 | 'SyncBN': ('bn', nn.SyncBatchNorm),
7 | 'GN': ('gn', nn.GroupNorm),
8 | # and potentially 'SN'
9 | }
10 |
11 |
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 | """ Build normalization layer
14 |
15 | Args:
16 | cfg (dict): cfg should contain:
17 | type (str): identify norm layer type.
18 | layer args: args needed to instantiate a norm layer.
19 | requires_grad (bool): [optional] whether stop gradient updates
20 | num_features (int): number of channels from input.
21 | postfix (int, str): appended into norm abbreviation to
22 | create named layer.
23 |
24 | Returns:
25 | name (str): abbreviation + postfix
26 | layer (nn.Module): created norm layer
27 | """
28 | assert isinstance(cfg, dict) and 'type' in cfg
29 | cfg_ = cfg.copy()
30 |
31 | layer_type = cfg_.pop('type')
32 | if layer_type not in norm_cfg:
33 | raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 | else:
35 | abbr, norm_layer = norm_cfg[layer_type]
36 | if norm_layer is None:
37 | raise NotImplementedError
38 |
39 | assert isinstance(postfix, (int, str))
40 | name = abbr + str(postfix)
41 |
42 | requires_grad = cfg_.pop('requires_grad', True)
43 | cfg_.setdefault('eps', 1e-5)
44 | if layer_type != 'GN':
45 | layer = norm_layer(num_features, **cfg_)
46 | if layer_type == 'SyncBN':
47 | layer._specify_ddp_gpu_num(1)
48 | else:
49 | assert 'num_groups' in cfg_
50 | layer = norm_layer(num_channels=num_features, **cfg_)
51 |
52 | for param in layer.parameters():
53 | param.requires_grad = requires_grad
54 |
55 | return name, layer
56 |
--------------------------------------------------------------------------------
/mmdet/models/utils/scale.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class Scale(nn.Module):
6 |
7 | def __init__(self, scale=1.0):
8 | super(Scale, self).__init__()
9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
10 |
11 | def forward(self, x):
12 | return x * self.scale
13 |
--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn as nn
3 |
4 |
5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
6 | assert distribution in ['uniform', 'normal']
7 | if distribution == 'uniform':
8 | nn.init.xavier_uniform_(module.weight, gain=gain)
9 | else:
10 | nn.init.xavier_normal_(module.weight, gain=gain)
11 | if hasattr(module, 'bias'):
12 | nn.init.constant_(module.bias, bias)
13 |
14 |
15 | def normal_init(module, mean=0, std=1, bias=0):
16 | nn.init.normal_(module.weight, mean, std)
17 | if hasattr(module, 'bias'):
18 | nn.init.constant_(module.bias, bias)
19 |
20 |
21 | def uniform_init(module, a=0, b=1, bias=0):
22 | nn.init.uniform_(module.weight, a, b)
23 | if hasattr(module, 'bias'):
24 | nn.init.constant_(module.bias, bias)
25 |
26 |
27 | def kaiming_init(module,
28 | mode='fan_out',
29 | nonlinearity='relu',
30 | bias=0,
31 | distribution='normal'):
32 | assert distribution in ['uniform', 'normal']
33 | if distribution == 'uniform':
34 | nn.init.kaiming_uniform_(
35 | module.weight, mode=mode, nonlinearity=nonlinearity)
36 | else:
37 | nn.init.kaiming_normal_(
38 | module.weight, mode=mode, nonlinearity=nonlinearity)
39 | if hasattr(module, 'bias'):
40 | nn.init.constant_(module.bias, bias)
41 |
42 |
43 | def bias_init_with_prob(prior_prob):
44 | """ initialize conv/fc bias value according to giving probablity"""
45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 | return bias_init
47 |
--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import Registry, build_from_cfg
2 |
3 | __all__ = ['Registry', 'build_from_cfg']
4 |
--------------------------------------------------------------------------------
/mmdet/utils/registry.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | import mmcv
4 | import ipdb
5 |
6 |
7 | class Registry(object):
8 |
9 | def __init__(self, name):
10 | self._name = name
11 | self._module_dict = dict()
12 |
13 | def __repr__(self):
14 | format_str = self.__class__.__name__ + '(name={}, items={})'.format(
15 | self._name, list(self._module_dict.keys()))
16 | return format_str
17 |
18 | @property
19 | def name(self):
20 | return self._name
21 |
22 |
23 | @property
24 | def module_dict(self):
25 | return self._module_dict
26 |
27 | def get(self, key):
28 | return self._module_dict.get(key, None)
29 |
30 |
31 | def _register_module(self, module_class):
32 | """Register a module.
33 |
34 | Args:
35 | module (:obj:`nn.Module`): Module to be registered.
36 | """
37 | if not inspect.isclass(module_class):
38 | raise TypeError('module must be a class, but got {}'.format(
39 | type(module_class)))
40 | module_name = module_class.__name__
41 | if module_name in self._module_dict:
42 | raise KeyError('{} is already registered in {}'.format(
43 | module_name, self.name))
44 | self._module_dict[module_name] = module_class
45 |
46 | def register_module(self, cls):
47 | self._register_module(cls)
48 | return cls
49 |
50 |
51 | def build_from_cfg(cfg, registry, default_args=None):
52 | """Build a module from config dict.
53 |
54 | Args:
55 | cfg (dict): Config dict. It should at least contain the key "type".
56 | registry (:obj:`Registry`): The registry to search the type from.
57 | default_args (dict, optional): Default initialization arguments.
58 |
59 | Returns:
60 | obj: The constructed object.
61 | """
62 | assert isinstance(cfg, dict) and 'type' in cfg
63 | assert isinstance(default_args, dict) or default_args is None
64 | args = cfg.copy()
65 | obj_type = args.pop('type') # 模型名
66 | if mmcv.is_str(obj_type):
67 | # 这里的registry的get返回的_module_dict属性中包含的是detector下的模型type
68 | # 索引key得到相应的class
69 | obj_type = registry.get(obj_type)
70 | if obj_type is None:
71 | raise KeyError('{} is not in the {} registry'.format(
72 | obj_type, registry.name))
73 | elif not inspect.isclass(obj_type):
74 | raise TypeError('type must be a str or valid type, but got {}'.format(
75 | type(obj_type)))
76 | if default_args is not None:
77 | for name, value in default_args.items(): #items()返回字典的键值对用于遍历
78 | args.setdefault(name, value) #将default_args的键值对加入到args中,将模型和训练配置进行整合送入类中
79 | # 注意:无论训练/检测,都会build DETECTORS,;
80 | # **args是将字典unpack得到各个元素,分别与形参匹配送入函数中;
81 | return obj_type(**args)
82 |
--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Mon Jul 15 15:38:29 2019
3 |
4 | __version__ = '0.6.0+unknown'
5 | short_version = '0.6.0'
6 |
--------------------------------------------------------------------------------
/outputs/_s1019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/outputs/_s1019.png
--------------------------------------------------------------------------------
/outputs/_screenshot_02.04.2019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/outputs/_screenshot_02.04.2019.png
--------------------------------------------------------------------------------
/outputs/_screenshot_071019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ming71/mmdetection-annotated/dc25243ea11d9c4cfd517d7c08dfa2fd61e01895/outputs/_screenshot_071019.png
--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | from mmdet.core import coco_eval
4 |
5 |
6 | def main():
7 | parser = ArgumentParser(description='COCO Evaluation')
8 | parser.add_argument('result', help='result file path')
9 | parser.add_argument('--ann', help='annotation file path')
10 | parser.add_argument(
11 | '--types',
12 | type=str,
13 | nargs='+',
14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 | default=['bbox'],
16 | help='result types')
17 | parser.add_argument(
18 | '--max-dets',
19 | type=int,
20 | nargs='+',
21 | default=[100, 300, 1000],
22 | help='proposal numbers, only used for recall evaluation')
23 | args = parser.parse_args()
24 | coco_eval(args.result, args.types, args.ann, args.max_dets)
25 |
26 |
27 | if __name__ == '__main__':
28 | main()
29 |
--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | CONFIG=$1
6 | CHECKPOINT=$2
7 | GPUS=$3
8 |
9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 |
--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | CONFIG=$1
6 | GPUS=$2
7 |
8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 |
--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import subprocess
3 | import torch
4 |
5 |
6 | def parse_args():
7 | parser = argparse.ArgumentParser(
8 | description='Process a checkpoint to be published')
9 | parser.add_argument('in_file', help='input checkpoint filename')
10 | parser.add_argument('out_file', help='output checkpoint filename')
11 | args = parser.parse_args()
12 | return args
13 |
14 |
15 | def process_checkpoint(in_file, out_file):
16 | checkpoint = torch.load(in_file, map_location='cpu')
17 | # remove optimizer for smaller file size
18 | if 'optimizer' in checkpoint:
19 | del checkpoint['optimizer']
20 | # if it is necessary to remove some sensitive data in checkpoint['meta'],
21 | # add the code here.
22 | torch.save(checkpoint, out_file)
23 | sha = subprocess.check_output(['sha256sum', out_file]).decode()
24 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
25 | subprocess.Popen(['mv', out_file, final_file])
26 |
27 |
28 | def main():
29 | args = parse_args()
30 | process_checkpoint(args.in_file, args.out_file)
31 |
32 |
33 | if __name__ == '__main__':
34 | main()
35 |
--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 |
5 | PARTITION=$1
6 | JOB_NAME=$2
7 | CONFIG=$3
8 | CHECKPOINT=$4
9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 |
15 | srun -p ${PARTITION} \
16 | --job-name=${JOB_NAME} \
17 | --gres=gpu:${GPUS_PER_NODE} \
18 | --ntasks=${GPUS} \
19 | --ntasks-per-node=${GPUS_PER_NODE} \
20 | --cpus-per-task=${CPUS_PER_TASK} \
21 | --kill-on-bad-exit=1 \
22 | ${SRUN_ARGS} \
23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
24 |
--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 |
5 | PARTITION=$1
6 | JOB_NAME=$2
7 | CONFIG=$3
8 | WORK_DIR=$4
9 | GPUS=${5:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${PY_ARGS:-"--validate"}
14 |
15 | srun -p ${PARTITION} \
16 | --job-name=${JOB_NAME} \
17 | --gres=gpu:${GPUS_PER_NODE} \
18 | --ntasks=${GPUS} \
19 | --ntasks-per-node=${GPUS_PER_NODE} \
20 | --cpus-per-task=${CPUS_PER_TASK} \
21 | --kill-on-bad-exit=1 \
22 | ${SRUN_ARGS} \
23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
24 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import argparse
4 | import os
5 | from mmcv import Config
6 |
7 | from mmdet import __version__
8 | from mmdet.datasets import build_dataset
9 | from mmdet.apis import (train_detector, init_dist, get_root_logger,
10 | set_random_seed)
11 | from mmdet.models import build_detector
12 | import torch
13 |
14 | import ipdb
15 |
16 |
17 | def parse_args():
18 | parser = argparse.ArgumentParser(description='Train a detector')
19 | # 改动了:将config设置为可选择参数,这样就不用键入了,可以直接在这里改路径,方便
20 | parser.add_argument('--config', help='train config file path',default='../configs/faster_rcnn_r50_fpn_1x.py')
21 | parser.add_argument('--work_dir', help='the dir to save logs and models')
22 | parser.add_argument(
23 | '--resume_from', help='the checkpoint file to resume from')
24 | parser.add_argument(
25 | '--validate',
26 | action='store_true',
27 | help='whether to evaluate the checkpoint during training')
28 | parser.add_argument(
29 | '--gpus',
30 | type=int,
31 | default=1,
32 | help='number of gpus to use '
33 | '(only applicable to non-distributed training)')
34 | parser.add_argument('--seed', type=int, default=None, help='random seed')
35 | parser.add_argument(
36 | '--launcher',
37 | choices=['none', 'pytorch', 'slurm', 'mpi'],
38 | default='none',
39 | help='job launcher')
40 | parser.add_argument('--local_rank', type=int, default=0)
41 | args = parser.parse_args()
42 | if 'LOCAL_RANK' not in os.environ:
43 | os.environ['LOCAL_RANK'] = str(args.local_rank)
44 |
45 | return args
46 |
47 |
48 | def main():
49 | args = parse_args()
50 |
51 | cfg = Config.fromfile(args.config)
52 | # set cudnn_benchmark
53 | # 在图片输入尺度固定时开启,可以加速.一般都是关的,只有在固定尺度的网络如SSD512中才开启
54 | if cfg.get('cudnn_benchmark', False):
55 | torch.backends.cudnn.benchmark = True
56 | # update configs according to CLI args
57 | if args.work_dir is not None:
58 | # 创建工作目录存放训练文件,如果不键入,会自动按照py配置文件生成对应的目录
59 | cfg.work_dir = args.work_dir
60 | if args.resume_from is not None:
61 | # 断点继续训练的权值文件
62 | cfg.resume_from = args.resume_from
63 | cfg.gpus = args.gpus
64 |
65 | # init distributed env first, since logger depends on the dist info.
66 | if args.launcher == 'none':
67 | distributed = False
68 | else:
69 | distributed = True
70 | init_dist(args.launcher, **cfg.dist_params)
71 |
72 | # init logger before other steps
73 | logger = get_root_logger(cfg.log_level)
74 | logger.info('Distributed training: {}'.format(distributed))
75 |
76 | # set random seeds
77 | if args.seed is not None:
78 | logger.info('Set random seed to {}'.format(args.seed))
79 | set_random_seed(args.seed)
80 |
81 | # ipdb.set_trace(context=35)
82 | # 搭建模型
83 | model = build_detector(
84 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
85 |
86 | # 将训练配置传入
87 | train_dataset = build_dataset(cfg.data.train)
88 | if cfg.checkpoint_config is not None:
89 | # save mmdet version, config file content and class names in checkpoints as meta data
90 | # 要注意的是,以前发布的模型是不存这个类别等信息的,
91 | # 用的默认COCO或者VOC参数,所以如果用以前训练好的模型检测时会提醒warning一下,无伤大雅
92 | cfg.checkpoint_config.meta = dict(
93 | mmdet_version=__version__,
94 | config=cfg.text,
95 | CLASSES=train_dataset.CLASSES)
96 |
97 | # add an attribute for visualization convenience
98 | model.CLASSES = train_dataset.CLASSES # model的CLASSES属性本来没有的,但是python不用提前声明,再赋值的时候自动定义变量
99 | train_detector(
100 | model,
101 | train_dataset,
102 | cfg,
103 | distributed=distributed,
104 | validate=args.validate,
105 | logger=logger)
106 |
107 |
108 | if __name__ == '__main__':
109 | main()
110 |
111 |
--------------------------------------------------------------------------------
/tools/upgrade_model_version.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import re
3 | from collections import OrderedDict
4 |
5 | import torch
6 |
7 |
8 | def convert(in_file, out_file):
9 | """Convert keys in checkpoints.
10 |
11 | There can be some breaking changes during the development of mmdetection,
12 | and this tool is used for upgrading checkpoints trained with old versions
13 | to the latest one.
14 | """
15 | checkpoint = torch.load(in_file)
16 | in_state_dict = checkpoint.pop('state_dict')
17 | out_state_dict = OrderedDict()
18 | for key, val in in_state_dict.items():
19 | # Use ConvModule instead of nn.Conv2d in RetinaNet
20 | # cls_convs.0.weight -> cls_convs.0.conv.weight
21 | m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
22 | if m is not None:
23 | param = m.groups()[1]
24 | new_key = key.replace(param, 'conv.{}'.format(param))
25 | out_state_dict[new_key] = val
26 | continue
27 |
28 | out_state_dict[key] = val
29 | checkpoint['state_dict'] = out_state_dict
30 | torch.save(checkpoint, out_file)
31 |
32 |
33 | def main():
34 | parser = argparse.ArgumentParser(description='Upgrade model version')
35 | parser.add_argument('in_file', help='input checkpoint file')
36 | parser.add_argument('out_file', help='output checkpoint file')
37 | args = parser.parse_args()
38 | convert(args.in_file, args.out_file)
39 |
40 |
41 | if __name__ == '__main__':
42 | main()
43 |
--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 |
3 | import mmcv
4 | import numpy as np
5 |
6 | from mmdet import datasets
7 | from mmdet.core import eval_map
8 |
9 |
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 | det_results = mmcv.load(result_file)
12 | gt_bboxes = []
13 | gt_labels = []
14 | gt_ignore = []
15 | for i in range(len(dataset)):
16 | ann = dataset.get_ann_info(i)
17 | bboxes = ann['bboxes']
18 | labels = ann['labels']
19 | if 'bboxes_ignore' in ann:
20 | ignore = np.concatenate([
21 | np.zeros(bboxes.shape[0], dtype=np.bool),
22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 | ])
24 | gt_ignore.append(ignore)
25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 | labels = np.concatenate([labels, ann['labels_ignore']])
27 | gt_bboxes.append(bboxes)
28 | gt_labels.append(labels)
29 | if not gt_ignore:
30 | gt_ignore = gt_ignore
31 | if hasattr(dataset, 'year') and dataset.year == 2007:
32 | dataset_name = 'voc07'
33 | else:
34 | dataset_name = dataset.CLASSES
35 | eval_map(
36 | det_results,
37 | gt_bboxes,
38 | gt_labels,
39 | gt_ignore=gt_ignore,
40 | scale_ranges=None,
41 | iou_thr=iou_thr,
42 | dataset=dataset_name,
43 | print_summary=True)
44 |
45 |
46 | def main():
47 | parser = ArgumentParser(description='VOC Evaluation')
48 | parser.add_argument('result', help='result file path')
49 | parser.add_argument('config', help='config file path')
50 | parser.add_argument(
51 | '--iou-thr',
52 | type=float,
53 | default=0.5,
54 | help='IoU threshold for evaluation')
55 | args = parser.parse_args()
56 | cfg = mmcv.Config.fromfile(args.config)
57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
58 | voc_eval(args.result, test_dataset, args.iou_thr)
59 |
60 |
61 | if __name__ == '__main__':
62 | main()
63 |
--------------------------------------------------------------------------------