├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── GETTING_STARTED.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TECHNICAL_DETAILS.md ├── compile.sh ├── configs ├── cascade_mask_rcnn_r101_fpn_1x.py ├── cascade_mask_rcnn_r50_caffe_c4_1x.py ├── cascade_mask_rcnn_r50_fpn_1x.py ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py ├── cascade_rcnn_r101_fpn_1x.py ├── cascade_rcnn_r50_caffe_c4_1x.py ├── cascade_rcnn_r50_fpn_1x.py ├── cascade_rcnn_x101_32x4d_fpn_1x.py ├── cascade_rcnn_x101_64x4d_fpn_1x.py ├── dcn │ ├── README.md │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py │ ├── faster_rcnn_dpool_r50_fpn_1x.py │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_mdpool_r50_fpn_1x.py │ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py ├── empirical_attention │ ├── README.md │ ├── faster_rcnn_r50_fpn_attention_0010_1x.py │ ├── faster_rcnn_r50_fpn_attention_0010_dcn_1x.py │ ├── faster_rcnn_r50_fpn_attention_1111_1x.py │ └── faster_rcnn_r50_fpn_attention_1111_dcn_1x.py ├── fast_mask_rcnn_r101_fpn_1x.py ├── fast_mask_rcnn_r50_caffe_c4_1x.py ├── fast_mask_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py ├── fast_rcnn_r50_caffe_c4_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── faster_rcnn_ohem_r50_fpn_1x.py ├── faster_rcnn_r101_fpn_1x.py ├── faster_rcnn_r50_caffe_c4_1x.py ├── faster_rcnn_r50_fpn_1x.py ├── faster_rcnn_x101_32x4d_fpn_1x.py ├── faster_rcnn_x101_64x4d_fpn_1x.py ├── fcos │ ├── README.md │ ├── fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu.py │ ├── fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x.py │ └── fcos_r50_caffe_fpn_gn_1x_4gpu.py ├── fp16 │ ├── faster_rcnn_r50_fpn_fp16_1x.py │ ├── mask_rcnn_r50_fpn_fp16_1x.py │ └── retinanet_r50_fpn_fp16_1x.py ├── gcnet │ ├── README.md │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r16_gcb_c3-c5_r50_fpn_syncbn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_1x.py │ ├── mask_rcnn_r4_gcb_c3-c5_r50_fpn_syncbn_1x.py │ └── mask_rcnn_r50_fpn_sbn_1x.py ├── ghm │ ├── README.md │ └── retinanet_ghm_r50_fpn_1x.py ├── gn+ws │ ├── README.md │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py ├── gn │ ├── README.md │ ├── mask_rcnn_r101_fpn_gn_2x.py │ ├── mask_rcnn_r50_fpn_gn_2x.py │ └── mask_rcnn_r50_fpn_gn_contrib_2x.py ├── grid_rcnn │ ├── README.md │ ├── grid_rcnn_gn_head_r50_fpn_2x.py │ └── grid_rcnn_gn_head_x101_32x4d_fpn_2x.py ├── guided_anchoring │ ├── README.md │ ├── ga_fast_r50_caffe_fpn_1x.py │ ├── ga_faster_r50_caffe_fpn_1x.py │ ├── ga_faster_x101_32x4d_fpn_1x.py │ ├── ga_retinanet_r50_caffe_fpn_1x.py │ ├── ga_retinanet_x101_32x4d_fpn_1x.py │ ├── ga_rpn_r101_caffe_rpn_1x.py │ ├── ga_rpn_r50_caffe_fpn_1x.py │ └── ga_rpn_x101_32x4d_fpn_1x.py ├── hrnet │ ├── README.md │ ├── cascade_rcnn_hrnetv2p_w32_20e.py │ ├── faster_rcnn_hrnetv2p_w18_1x.py │ ├── faster_rcnn_hrnetv2p_w32_1x.py │ ├── faster_rcnn_hrnetv2p_w40_1x.py │ ├── mask_rcnn_hrnetv2p_w18_1x.py │ └── mask_rcnn_hrnetv2p_w32_1x.py ├── htc │ ├── README.md │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py │ ├── htc_r101_fpn_20e.py │ ├── htc_r50_fpn_1x.py │ ├── htc_r50_fpn_20e.py │ ├── htc_without_semantic_r50_fpn_1x.py │ ├── htc_x101_32x4d_fpn_20e_16gpu.py │ └── htc_x101_64x4d_fpn_20e_16gpu.py ├── libra_rcnn │ ├── README.md │ ├── libra_fast_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_r101_fpn_1x.py │ ├── libra_faster_rcnn_r50_fpn_1x.py │ ├── libra_faster_rcnn_x101_64x4d_fpn_1x.py │ └── libra_retinanet_r50_fpn_1x.py ├── mask_rcnn_r101_fpn_1x.py ├── mask_rcnn_r50_caffe_c4_1x.py ├── mask_rcnn_r50_fpn_1x.py ├── mask_rcnn_x101_32x4d_fpn_1x.py ├── mask_rcnn_x101_64x4d_fpn_1x.py ├── ms_rcnn │ ├── README.md │ ├── ms_rcnn_r101_caffe_fpn_1x.py │ ├── ms_rcnn_r50_caffe_fpn_1x.py │ └── ms_rcnn_x101_64x4d_fpn_1x.py ├── pascal_voc │ ├── faster_rcnn_r50_fpn_1x_voc0712.py │ ├── ssd300_voc.py │ └── ssd512_voc.py ├── retinanet_r101_fpn_1x.py ├── retinanet_r50_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_r50_caffe_c4_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── scratch │ ├── README.md │ ├── scratch_faster_rcnn_r50_fpn_gn_6x.py │ └── scratch_mask_rcnn_r50_fpn_gn_6x.py ├── ssd300_coco.py ├── ssd512_coco.py └── wider_face │ ├── README.md │ └── ssd300_wider_face.py ├── demo └── coco_test_12510.jpg ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ ├── inference.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ └── guided_anchor_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── base_assigner.py │ │ │ └── max_iou_assigner.py │ │ ├── bbox_target.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── concat_dataset.py │ ├── custom.py │ ├── extra_aug.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── repeat_dataset.py │ ├── transforms.py │ ├── utils.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── fcos_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── retina_head.py │ │ ├── rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── hrnet.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ └── convfc_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ ├── grid_head.py │ │ ├── htc_mask_head.py │ │ └── maskiou_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ └── hrfpn.py │ ├── plugins │ │ ├── __init__.py │ │ ├── generalized_attention.py │ │ └── non_local.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── conv_ws.py │ │ ├── norm.py │ │ ├── scale.py │ │ └── weight_init.py └── ops │ ├── __init__.py │ ├── dcn │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ └── deform_pool.py │ ├── modules │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ └── deform_pool.py │ ├── setup.py │ └── src │ │ ├── deform_conv_cuda.cpp │ │ ├── deform_conv_cuda_kernel.cu │ │ ├── deform_pool_cuda.cpp │ │ └── deform_pool_cuda_kernel.cu │ ├── gcb │ ├── __init__.py │ └── context_block.py │ ├── masked_conv │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── masked_conv.py │ ├── modules │ │ ├── __init__.py │ │ └── masked_conv.py │ ├── setup.py │ └── src │ │ ├── masked_conv2d_cuda.cpp │ │ └── masked_conv2d_kernel.cu │ ├── nms │ ├── __init__.py │ ├── nms_wrapper.py │ ├── setup.py │ └── src │ │ ├── nms_cpu.cpp │ │ ├── nms_cuda.cpp │ │ ├── nms_kernel.cu │ │ └── soft_nms_cpu.pyx │ ├── roi_align │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_align.py │ ├── gradcheck.py │ ├── modules │ │ ├── __init__.py │ │ └── roi_align.py │ ├── setup.py │ └── src │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_kernel.cu │ ├── roi_pool │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_pool.py │ ├── gradcheck.py │ ├── modules │ │ ├── __init__.py │ │ └── roi_pool.py │ ├── setup.py │ └── src │ │ ├── roi_pool_cuda.cpp │ │ └── roi_pool_kernel.cu │ └── sigmoid_focal_loss │ ├── __init__.py │ ├── functions │ ├── __init__.py │ └── sigmoid_focal_loss.py │ ├── modules │ ├── __init__.py │ └── sigmoid_focal_loss.py │ ├── setup.py │ └── src │ ├── sigmoid_focal_loss.cpp │ └── sigmoid_focal_loss_cuda.cu ├── setup.py └── tools ├── analyze_logs.py ├── coco_eval.py ├── convert_datasets └── pascal_voc.py ├── detectron2pytorch.py ├── dist_test.sh ├── dist_train.sh ├── publish_model.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── train.py ├── upgrade_model_version.py └── voc_eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/*.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at chenkaidev@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to mmdetection 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmdetection 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmdetection, 18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter. 25 | Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf). 26 | 27 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 28 | 29 | ### C++ and CUDA 30 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux 6 | - Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/)) 7 | - PyTorch 1.0+ or PyTorch-nightly 8 | - CUDA 9.0+ 9 | - NCCL 2+ 10 | - GCC 4.9+ 11 | - [mmcv](https://github.com/open-mmlab/mmcv) 12 | 13 | We have tested the following versions of OS and softwares: 14 | 15 | - OS: Ubuntu 16.04/18.04 and CentOS 7.2 16 | - CUDA: 9.0/9.2/10.0 17 | - NCCL: 2.1.15/2.2.13/2.3.7/2.4.2 18 | - GCC: 4.9/5.3/5.4/7.3 19 | 20 | ### Install mmdetection 21 | 22 | a. Create a conda virtual environment and activate it. Then install Cython. 23 | 24 | ```shell 25 | conda create -n open-mmlab python=3.7 -y 26 | conda activate open-mmlab 27 | 28 | conda install cython 29 | ``` 30 | 31 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/). 32 | 33 | c. Clone the mmdetection repository. 34 | 35 | ```shell 36 | git clone https://github.com/open-mmlab/mmdetection.git 37 | cd mmdetection 38 | ``` 39 | 40 | d. Compile cuda extensions. 41 | 42 | ```shell 43 | ./compile.sh 44 | ``` 45 | 46 | e. Install mmdetection (other dependencies will be installed automatically). 47 | 48 | ```shell 49 | python setup.py develop 50 | # or "pip install -e ." 51 | ``` 52 | 53 | Note: 54 | 55 | 1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d. 56 | The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models. 57 | 58 | 2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again. 59 | 60 | ### Prepare COCO dataset. 61 | 62 | It is recommended to symlink the dataset root to `$MMDETECTION/data`. 63 | 64 | ``` 65 | mmdetection 66 | ├── mmdet 67 | ├── tools 68 | ├── configs 69 | ├── data 70 | │ ├── coco 71 | │ │ ├── annotations 72 | │ │ ├── train2017 73 | │ │ ├── val2017 74 | │ │ ├── test2017 75 | │ ├── VOCdevkit 76 | │ │ ├── VOC2007 77 | │ │ ├── VOC2012 78 | 79 | ``` 80 | 81 | ### Scripts 82 | [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is 83 | a script for setting up mmdetection with conda. 84 | 85 | ### Notice 86 | You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently. 87 | 88 | If there are more than one mmdetection on your machine, and you want to use them alternatively. 89 | Please insert the following code to the main file 90 | ```python 91 | import os.path as osp 92 | import sys 93 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 94 | ``` 95 | or run the following command in the terminal of corresponding folder. 96 | ```shell 97 | export PYTHONPATH=`pwd`:$PYTHONPATH 98 | ``` 99 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | if [ -d "build" ]; then 22 | rm -r build 23 | fi 24 | $PYTHON setup.py build_ext --inplace 25 | 26 | echo "Building dcn..." 27 | cd ../dcn 28 | if [ -d "build" ]; then 29 | rm -r build 30 | fi 31 | $PYTHON setup.py build_ext --inplace 32 | 33 | echo "Building sigmoid focal loss op..." 34 | cd ../sigmoid_focal_loss 35 | if [ -d "build" ]; then 36 | rm -r build 37 | fi 38 | $PYTHON setup.py build_ext --inplace 39 | 40 | echo "Building masked conv op..." 41 | cd ../masked_conv 42 | if [ -d "build" ]; then 43 | rm -r build 44 | fi 45 | $PYTHON setup.py build_ext --inplace 46 | -------------------------------------------------------------------------------- /configs/empirical_attention/README.md: -------------------------------------------------------------------------------- 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{zhu2019empirical, 7 | title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, 8 | author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, 9 | journal={arXiv preprint arXiv:1904.05873}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | 15 | ## Results and Models 16 | 17 | | Backbone | Attention Component | DCN | Lr schd | box AP | Download | 18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:| 19 | | R-50 | 1111 | N | 1x | 38.6 | - | 20 | | R-50 | 0010 | N | 1x | 38.2 | - | 21 | | R-50 | 1111 | Y | 1x | 41.0 | - | 22 | | R-50 | 0010 | Y | 1x | 40.8 | - | 23 | 24 | -------------------------------------------------------------------------------- /configs/fcos/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{tian2019fcos, 7 | title={FCOS: Fully Convolutional One-Stage Object Detection}, 8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 9 | journal={arXiv preprint arXiv:1904.01355}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) | 19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) | 20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) | 21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) | 22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) | 23 | 24 | 25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) | 28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) | 29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) | 30 | 31 | **Notes:** 32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. 33 | - The X-101 backbone is X-101-64x4d. 34 | -------------------------------------------------------------------------------- /configs/ghm/README.md: -------------------------------------------------------------------------------- 1 | # Gradient Harmonized Single-stage Detector 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{li2019gradient, 7 | title={Gradient Harmonized Single-stage Detector}, 8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, 9 | booktitle={AAAI Conference on Artificial Intelligence}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: | 18 | | R-50-FPN | pytorch | 1x | 3.9 | 0.500 | 9.4 | 36.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) | 19 | | R-101-FPN | pytorch | 1x | 5.8 | 0.625 | 8.5 | 39.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) | 20 | | X-101-32x4d-FPN | pytorch | 1x | 7.0 | 0.818 | 7.6 | 40.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) | 21 | | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 1.191 | 6.1 | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) | -------------------------------------------------------------------------------- /configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{wu2018group, 7 | title={Group Normalization}, 8 | author={Wu, Yuxin and He, Kaiming}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | 19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | 20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | 21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | 22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | 23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | 24 | 25 | **Notes:** 26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). 27 | - The `3x` schedule is epoch [28, 34, 36]. 28 | - **Memory, Train/Inf time is outdated.** -------------------------------------------------------------------------------- /configs/grid_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Grid R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{lu2019grid, 7 | title={Grid r-cnn}, 8 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019} 11 | } 12 | 13 | @article{lu2019grid, 14 | title={Grid R-CNN Plus: Faster and Better}, 15 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 16 | journal={arXiv preprint arXiv:1906.05688}, 17 | year={2019} 18 | } 19 | ``` 20 | 21 | ## Results and Models 22 | 23 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 25 | | R-50 | 2x | 4.8 | 1.172 | 10.9 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) | 26 | | R-101 | 2x | 6.7 | 1.214 | 10.0 | 41.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) | 27 | | X-101-32x4d | 2x | 8.0 | 1.335 | 8.5 | 43.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) | 28 | | X-101-64x4d | 2x | 10.9 | 1.753 | 6.4 | 43.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) | 29 | 30 | **Notes:** 31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper. 32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. 33 | -------------------------------------------------------------------------------- /configs/hrnet/README.md: -------------------------------------------------------------------------------- 1 | # High-resolution networks (HRNets) for object detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{SunXLW19, 7 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 8 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, 9 | booktitle={CVPR}, 10 | year={2019} 11 | } 12 | 13 | @article{SunZJCXLMWLW19, 14 | title={High-Resolution Representations for Labeling Pixels and Regions}, 15 | author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 16 | and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, 17 | journal = {CoRR}, 18 | volume = {abs/1904.04514}, 19 | year={2019} 20 | } 21 | ``` 22 | 23 | ## Results and Models 24 | 25 | Faster R-CNN 26 | 27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download| 28 | | :--:|:--:|:--:|:--:|:--:|:--:| 29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)| 30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)| 31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)| 32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)| 33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)| 34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)| 35 | 36 | 37 | Mask R-CNN 38 | 39 | |Backbone|Lr sched|mask mAP|box mAP|Download| 40 | |:--:|:--:|:--:|:--:|:--:| 41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)| 42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)| 43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)| 44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)| 45 | 46 | Cascade R-CNN 47 | 48 | |Backbone|Lr sched|mAP|Download| 49 | |:--:|:--:|:--:|:--:| 50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)| 51 | 52 | **Note:** 53 | 54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). 55 | -------------------------------------------------------------------------------- /configs/htc/README.md: -------------------------------------------------------------------------------- 1 | # Hybrid Task Cascade for Instance Segmentation 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). 6 | 7 | ``` 8 | @inproceedings{chen2019hybrid, 9 | title={Hybrid task cascade for instance segmentation}, 10 | author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Dataset 17 | 18 | HTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path. 19 | The directory should be like this. 20 | 21 | ``` 22 | mmdetection 23 | ├── mmdet 24 | ├── tools 25 | ├── configs 26 | ├── data 27 | │ ├── coco 28 | │ │ ├── annotations 29 | │ │ ├── train2017 30 | │ │ ├── val2017 31 | │ │ ├── test2017 32 | | | ├── stuffthingmaps 33 | ``` 34 | 35 | ## Results and Models 36 | 37 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 38 | 39 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 40 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 41 | | R-50-FPN | pytorch | 1x | 7.4 | 0.936 | 4.1 | 42.1 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) | 42 | | R-50-FPN | pytorch | 20e | - | - | - | 43.2 | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) | 43 | | R-101-FPN | pytorch | 20e | 9.3 | 1.051 | 4.0 | 44.9 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) | 44 | | X-101-32x4d-FPN | pytorch |20e| 5.8 | 0.769 | 3.8 | 46.1 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) | 45 | | X-101-64x4d-FPN | pytorch |20e| 7.5 | 1.120 | 3.5 | 46.9 | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) | 46 | 47 | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC. 48 | - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models. 49 | If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01. 50 | 51 | We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used. 52 | 53 | | Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Download | 54 | |:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:| 55 | | X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.7 | 43.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) | 56 | -------------------------------------------------------------------------------- /configs/libra_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). 6 | 7 | ``` 8 | @inproceedings{pang2019libra, 9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection}, 10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Results and models 17 | 18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 19 | 20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) | 23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) | 24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) | 25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) | 26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) | 27 | -------------------------------------------------------------------------------- /configs/ms_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Mask Scoring R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{huang2019msrcnn, 7 | title={Mask Scoring R-CNN}, 8 | author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, 9 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019}, 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN | caffe | 1x | 4.3 | 0.537 | 10.1 | 37.4 | 35.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) | 19 | | R-50-FPN | caffe | 2x | - | - | - | 38.2 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) | 20 | | R-101-FPN | caffe | 1x | 6.2 | 0.682 | 9.1 | 39.8 | 37.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) | 21 | | R-101-FPN | caffe | 2x | - | - | - | 40.7 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) | 22 | | R-X101-32x4d | pytorch | 2x | 7.6 | 0.844 | 8.0 | 41.7 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) | 23 | | R-X101-64x4d | pytorch | 1x | 10.5 | 1.214 | 6.4 | 42.0 | 39.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) | 24 | | R-X101-64x4d | pytorch | 2x | - | - | - | 42.2 | 38.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) | 25 | -------------------------------------------------------------------------------- /configs/retinanet_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0], 31 | loss_cls=dict( 32 | type='FocalLoss', 33 | use_sigmoid=True, 34 | gamma=2.0, 35 | alpha=0.25, 36 | loss_weight=1.0), 37 | loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) 38 | # training and testing settings 39 | train_cfg = dict( 40 | assigner=dict( 41 | type='MaxIoUAssigner', 42 | pos_iou_thr=0.5, 43 | neg_iou_thr=0.4, 44 | min_pos_iou=0, 45 | ignore_iof_thr=-1), 46 | allowed_border=-1, 47 | pos_weight=-1, 48 | debug=False) 49 | test_cfg = dict( 50 | nms_pre=1000, 51 | min_bbox_size=0, 52 | score_thr=0.05, 53 | nms=dict(type='nms', iou_thr=0.5), 54 | max_per_img=100) 55 | # dataset settings 56 | dataset_type = 'CocoDataset' 57 | data_root = 'data/coco/' 58 | img_norm_cfg = dict( 59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 60 | data = dict( 61 | imgs_per_gpu=2, 62 | workers_per_gpu=2, 63 | train=dict( 64 | type=dataset_type, 65 | ann_file=data_root + 'annotations/instances_train2017.json', 66 | img_prefix=data_root + 'train2017/', 67 | img_scale=(1333, 800), 68 | img_norm_cfg=img_norm_cfg, 69 | size_divisor=32, 70 | flip_ratio=0.5, 71 | with_mask=False, 72 | with_crowd=False, 73 | with_label=True), 74 | val=dict( 75 | type=dataset_type, 76 | ann_file=data_root + 'annotations/instances_val2017.json', 77 | img_prefix=data_root + 'val2017/', 78 | img_scale=(1333, 800), 79 | img_norm_cfg=img_norm_cfg, 80 | size_divisor=32, 81 | flip_ratio=0, 82 | with_mask=False, 83 | with_crowd=False, 84 | with_label=True), 85 | test=dict( 86 | type=dataset_type, 87 | ann_file=data_root + 'annotations/instances_val2017.json', 88 | img_prefix=data_root + 'val2017/', 89 | img_scale=(1333, 800), 90 | img_norm_cfg=img_norm_cfg, 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_crowd=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 100 | # learning policy 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | device_ids = range(8) 119 | dist_params = dict(backend='nccl') 120 | log_level = 'INFO' 121 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 122 | load_from = None 123 | resume_from = None 124 | workflow = [('train', 1)] 125 | -------------------------------------------------------------------------------- /configs/rpn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | loss_cls=dict( 27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 28 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0))) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | debug=False)) 47 | test_cfg = dict( 48 | rpn=dict( 49 | nms_across_levels=False, 50 | nms_pre=2000, 51 | nms_post=2000, 52 | max_num=2000, 53 | nms_thr=0.7, 54 | min_bbox_size=0)) 55 | # dataset settings 56 | dataset_type = 'CocoDataset' 57 | data_root = 'data/coco/' 58 | img_norm_cfg = dict( 59 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 60 | data = dict( 61 | imgs_per_gpu=2, 62 | workers_per_gpu=2, 63 | train=dict( 64 | type=dataset_type, 65 | ann_file=data_root + 'annotations/instances_train2017.json', 66 | img_prefix=data_root + 'train2017/', 67 | img_scale=(1333, 800), 68 | img_norm_cfg=img_norm_cfg, 69 | size_divisor=32, 70 | flip_ratio=0.5, 71 | with_mask=False, 72 | with_crowd=False, 73 | with_label=False), 74 | val=dict( 75 | type=dataset_type, 76 | ann_file=data_root + 'annotations/instances_val2017.json', 77 | img_prefix=data_root + 'val2017/', 78 | img_scale=(1333, 800), 79 | img_norm_cfg=img_norm_cfg, 80 | size_divisor=32, 81 | flip_ratio=0, 82 | with_mask=False, 83 | with_crowd=False, 84 | with_label=False), 85 | test=dict( 86 | type=dataset_type, 87 | ann_file=data_root + 'annotations/instances_val2017.json', 88 | img_prefix=data_root + 'val2017/', 89 | img_scale=(1333, 800), 90 | img_norm_cfg=img_norm_cfg, 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | # runner configs 99 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/rpn_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/scratch/README.md: -------------------------------------------------------------------------------- 1 | # Rethinking ImageNet Pre-training 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{he2018rethinking, 7 | title={Rethinking imagenet pre-training}, 8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, 9 | journal={arXiv preprint arXiv:1811.08883}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download | 17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:| 18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x-20190515-ff554978.pth) | 19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) | 20 | 21 | Note: 22 | - The above models are trained with 16 GPUs. -------------------------------------------------------------------------------- /configs/wider_face/README.md: -------------------------------------------------------------------------------- 1 | ## WIDER Face Dataset 2 | 3 | To use the WIDER Face dataset you need to download it 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`. 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`. 9 | The directory should be like this: 10 | 11 | ``` 12 | mmdetection 13 | ├── mmdet 14 | ├── tools 15 | ├── configs 16 | ├── data 17 | │ ├── WIDERFace 18 | │ │ ├── WIDER_train 19 | │ | │ ├──0--Parade 20 | │ | │ ├── ... 21 | │ | │ ├── Annotations 22 | │ │ ├── WIDER_val 23 | │ | │ ├──0--Parade 24 | │ | │ ├── ... 25 | │ | │ ├── Annotations 26 | │ │ ├── val.txt 27 | │ │ ├── train.txt 28 | 29 | ``` 30 | 31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or 32 | create your own config based on the presented one. 33 | -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import init_detector, inference_detector, show_result 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'init_detector', 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target, anchor_inside_flags 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | 5 | __all__ = [ 6 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 7 | 'ga_shape_target' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, 9 | distance2bbox) 10 | from .bbox_target import bbox_target 11 | 12 | __all__ = [ 13 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 14 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 15 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 16 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 17 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 18 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 19 | 'distance2bbox', 'bbox_target' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 4 | from .assign_result import AssignResult 5 | 6 | __all__ = [ 7 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 66 | 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 68 | 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduction_override='none')['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class) 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.shape[1] 30 | bboxes, labels = [], [] 31 | nms_cfg_ = nms_cfg.copy() 32 | nms_type = nms_cfg_.pop('type', 'nms') 33 | nms_op = getattr(nms_wrapper, nms_type) 34 | for i in range(1, num_classes): 35 | cls_inds = multi_scores[:, i] > score_thr 36 | if not cls_inds.any(): 37 | continue 38 | # get bboxes and scores of this class 39 | if multi_bboxes.shape[1] == 4: 40 | _bboxes = multi_bboxes[cls_inds, :] 41 | else: 42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 43 | _scores = multi_scores[cls_inds, i] 44 | if score_factors is not None: 45 | _scores *= score_factors[cls_inds] 46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 48 | cls_labels = multi_bboxes.new_full( 49 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 50 | bboxes.append(cls_dets) 51 | labels.append(cls_labels) 52 | if bboxes: 53 | bboxes = torch.cat(bboxes) 54 | labels = torch.cat(labels) 55 | if bboxes.shape[0] > max_num: 56 | _, inds = bboxes[:, -1].sort(descending=True) 57 | inds = inds[:max_num] 58 | bboxes = bboxes[inds] 59 | labels = labels[inds] 60 | else: 61 | bboxes = multi_bboxes.new_zeros((0, 5)) 62 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 63 | 64 | return bboxes, labels 65 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmdet.ops import nms 6 | from ..bbox import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | 75 | 76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 77 | """Merge augmented mask prediction. 78 | 79 | Args: 80 | aug_masks (list[ndarray]): shape (n, #class, h, w) 81 | img_shapes (list[ndarray]): shape (3, ). 82 | rcnn_test_cfg (dict): rcnn test config. 83 | 84 | Returns: 85 | tuple: (bboxes, scores) 86 | """ 87 | recovered_masks = [ 88 | mask if not img_info[0]['flip'] else mask[..., ::-1] 89 | for mask, img_info in zip(aug_masks, img_metas) 90 | ] 91 | if weights is None: 92 | merged_masks = np.mean(recovered_masks, axis=0) 93 | else: 94 | merged_masks = np.average( 95 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 96 | return merged_masks 97 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model.parameters(), self.coalesce, 55 | self.bucket_size_mb) 56 | if self.grad_clip is not None: 57 | self.clip_grads(runner.model.parameters()) 58 | runner.optimizer.step() 59 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .wider_face import WIDERFaceDataset 6 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 7 | from .utils import to_tensor, random_scale, show_ann, get_dataset 8 | from .concat_dataset import ConcatDataset 9 | from .repeat_dataset import RepeatDataset 10 | from .extra_aug import ExtraAugmentation 11 | 12 | __all__ = [ 13 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 14 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 15 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 16 | 'ExtraAugmentation', 'WIDERFaceDataset' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | shuffle = kwargs.get('shuffle', True) 22 | if dist: 23 | rank, world_size = get_dist_info() 24 | if shuffle: 25 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 26 | world_size, rank) 27 | else: 28 | sampler = DistributedSampler( 29 | dataset, world_size, rank, shuffle=False) 30 | batch_size = imgs_per_gpu 31 | num_workers = workers_per_gpu 32 | else: 33 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 34 | batch_size = num_gpus * imgs_per_gpu 35 | num_workers = num_gpus * workers_per_gpu 36 | 37 | data_loader = DataLoader( 38 | dataset, 39 | batch_size=batch_size, 40 | sampler=sampler, 41 | num_workers=num_workers, 42 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 43 | pin_memory=False, 44 | **kwargs) 45 | 46 | return data_loader 47 | -------------------------------------------------------------------------------- /mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .xml_style import XMLDataset 7 | 8 | 9 | class WIDERFaceDataset(XMLDataset): 10 | """ 11 | Reader for the WIDER Face dataset in PASCAL VOC format. 12 | Conversion scripts can be found in 13 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 14 | """ 15 | CLASSES = ('face',) 16 | 17 | def __init__(self, **kwargs): 18 | super(WIDERFaceDataset, self).__init__(**kwargs) 19 | 20 | def load_annotations(self, ann_file): 21 | img_infos = [] 22 | img_ids = mmcv.list_from_file(ann_file) 23 | for img_id in img_ids: 24 | filename = '{}.jpg'.format(img_id) 25 | xml_path = osp.join(self.img_prefix, 'Annotations', 26 | '{}.xml'.format(img_id)) 27 | tree = ET.parse(xml_path) 28 | root = tree.getroot() 29 | size = root.find('size') 30 | width = int(size.find('width').text) 31 | height = int(size.find('height').text) 32 | folder = root.find('folder').text 33 | img_infos.append( 34 | dict(id=img_id, filename=osp.join(folder, filename), 35 | width=width, height=height)) 36 | 37 | return img_infos 38 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, min_size=None, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | self.min_size = min_size 16 | 17 | def load_annotations(self, ann_file): 18 | img_infos = [] 19 | img_ids = mmcv.list_from_file(ann_file) 20 | for img_id in img_ids: 21 | filename = 'JPEGImages/{}.jpg'.format(img_id) 22 | xml_path = osp.join(self.img_prefix, 'Annotations', 23 | '{}.xml'.format(img_id)) 24 | tree = ET.parse(xml_path) 25 | root = tree.getroot() 26 | size = root.find('size') 27 | width = int(size.find('width').text) 28 | height = int(size.find('height').text) 29 | img_infos.append( 30 | dict(id=img_id, filename=filename, width=width, height=height)) 31 | return img_infos 32 | 33 | def get_ann_info(self, idx): 34 | img_id = self.img_infos[idx]['id'] 35 | xml_path = osp.join(self.img_prefix, 'Annotations', 36 | '{}.xml'.format(img_id)) 37 | tree = ET.parse(xml_path) 38 | root = tree.getroot() 39 | bboxes = [] 40 | labels = [] 41 | bboxes_ignore = [] 42 | labels_ignore = [] 43 | for obj in root.findall('object'): 44 | name = obj.find('name').text 45 | label = self.cat2label[name] 46 | difficult = int(obj.find('difficult').text) 47 | bnd_box = obj.find('bndbox') 48 | bbox = [ 49 | int(bnd_box.find('xmin').text), 50 | int(bnd_box.find('ymin').text), 51 | int(bnd_box.find('xmax').text), 52 | int(bnd_box.find('ymax').text) 53 | ] 54 | ignore = False 55 | if self.min_size: 56 | assert not self.test_mode 57 | w = bbox[2] - bbox[0] 58 | h = bbox[3] - bbox[1] 59 | if w < self.min_size or h < self.min_size: 60 | ignore = True 61 | if difficult or ignore: 62 | bboxes_ignore.append(bbox) 63 | labels_ignore.append(label) 64 | else: 65 | bboxes.append(bbox) 66 | labels.append(label) 67 | if not bboxes: 68 | bboxes = np.zeros((0, 4)) 69 | labels = np.zeros((0, )) 70 | else: 71 | bboxes = np.array(bboxes, ndmin=2) - 1 72 | labels = np.array(labels) 73 | if not bboxes_ignore: 74 | bboxes_ignore = np.zeros((0, 4)) 75 | labels_ignore = np.zeros((0, )) 76 | else: 77 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 78 | labels_ignore = np.array(labels_ignore) 79 | ann = dict( 80 | bboxes=bboxes.astype(np.float32), 81 | labels=labels.astype(np.int64), 82 | bboxes_ignore=bboxes_ignore.astype(np.float32), 83 | labels_ignore=labels_ignore.astype(np.int64)) 84 | return ann 85 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .shared_heads import * # noqa: F401,F403 6 | from .bbox_heads import * # noqa: F401,F403 7 | from .mask_heads import * # noqa: F401,F403 8 | from .losses import * # noqa: F401,F403 9 | from .detectors import * # noqa: F401,F403 10 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 11 | LOSSES, DETECTORS) 12 | from .builder import (build_backbone, build_neck, build_roi_extractor, 13 | build_shared_head, build_head, build_loss, 14 | build_detector) 15 | 16 | __all__ = [ 17 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 18 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 19 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .guided_anchor_head import GuidedAnchorHead, FeatureAdaption 3 | from .fcos_head import FCOSHead 4 | from .rpn_head import RPNHead 5 | from .ga_rpn_head import GARPNHead 6 | from .retina_head import RetinaHead 7 | from .ga_retina_head import GARetinaHead 8 | from .ssd_head import SSDHead 9 | 10 | __all__ = [ 11 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 12 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob, ConvModule 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | **kwargs): 22 | self.stacked_convs = stacked_convs 23 | self.octave_base_scale = octave_base_scale 24 | self.scales_per_octave = scales_per_octave 25 | self.conv_cfg = conv_cfg 26 | self.norm_cfg = norm_cfg 27 | octave_scales = np.array( 28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 29 | anchor_scales = octave_scales * octave_base_scale 30 | super(RetinaHead, self).__init__( 31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 32 | 33 | def _init_layers(self): 34 | self.relu = nn.ReLU(inplace=True) 35 | self.cls_convs = nn.ModuleList() 36 | self.reg_convs = nn.ModuleList() 37 | for i in range(self.stacked_convs): 38 | chn = self.in_channels if i == 0 else self.feat_channels 39 | self.cls_convs.append( 40 | ConvModule( 41 | chn, 42 | self.feat_channels, 43 | 3, 44 | stride=1, 45 | padding=1, 46 | conv_cfg=self.conv_cfg, 47 | norm_cfg=self.norm_cfg)) 48 | self.reg_convs.append( 49 | ConvModule( 50 | chn, 51 | self.feat_channels, 52 | 3, 53 | stride=1, 54 | padding=1, 55 | conv_cfg=self.conv_cfg, 56 | norm_cfg=self.norm_cfg)) 57 | self.retina_cls = nn.Conv2d( 58 | self.feat_channels, 59 | self.num_anchors * self.cls_out_channels, 60 | 3, 61 | padding=1) 62 | self.retina_reg = nn.Conv2d( 63 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 64 | 65 | def init_weights(self): 66 | for m in self.cls_convs: 67 | normal_init(m.conv, std=0.01) 68 | for m in self.reg_convs: 69 | normal_init(m.conv, std=0.01) 70 | bias_cls = bias_init_with_prob(0.01) 71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 72 | normal_init(self.retina_reg, std=0.01) 73 | 74 | def forward_single(self, x): 75 | cls_feat = x 76 | reg_feat = x 77 | for cls_conv in self.cls_convs: 78 | cls_feat = cls_conv(cls_feat) 79 | for reg_conv in self.reg_convs: 80 | reg_feat = reg_conv(reg_feat) 81 | cls_score = self.retina_cls(cls_feat) 82 | bbox_pred = self.retina_reg(reg_feat) 83 | return cls_score, bbox_pred 84 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, make_res_layer 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .hrnet import HRNet 5 | 6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet'] 7 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 5 | LOSSES, DETECTORS) 6 | 7 | 8 | def _build_module(cfg, registry, default_args): 9 | assert isinstance(cfg, dict) and 'type' in cfg 10 | assert isinstance(default_args, dict) or default_args is None 11 | args = cfg.copy() 12 | obj_type = args.pop('type') 13 | if mmcv.is_str(obj_type): 14 | if obj_type not in registry.module_dict: 15 | raise KeyError('{} is not in the {} registry'.format( 16 | obj_type, registry.name)) 17 | obj_type = registry.module_dict[obj_type] 18 | elif not isinstance(obj_type, type): 19 | raise TypeError('type must be a str or valid type, but got {}'.format( 20 | type(obj_type))) 21 | if default_args is not None: 22 | for name, value in default_args.items(): 23 | args.setdefault(name, value) 24 | return obj_type(**args) 25 | 26 | 27 | def build(cfg, registry, default_args=None): 28 | if isinstance(cfg, list): 29 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 30 | return nn.Sequential(*modules) 31 | else: 32 | return _build_module(cfg, registry, default_args) 33 | 34 | 35 | def build_backbone(cfg): 36 | return build(cfg, BACKBONES) 37 | 38 | 39 | def build_neck(cfg): 40 | return build(cfg, NECKS) 41 | 42 | 43 | def build_roi_extractor(cfg): 44 | return build(cfg, ROI_EXTRACTORS) 45 | 46 | 47 | def build_shared_head(cfg): 48 | return build(cfg, SHARED_HEADS) 49 | 50 | 51 | def build_head(cfg): 52 | return build(cfg, HEADS) 53 | 54 | 55 | def build_loss(cfg): 56 | return build(cfg, LOSSES) 57 | 58 | 59 | def build_detector(cfg, train_cfg=None, test_cfg=None): 60 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 61 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .htc import HybridTaskCascade 10 | from .retinanet import RetinaNet 11 | from .fcos import FCOS 12 | from .grid_rcnn import GridRCNN 13 | from .mask_scoring_rcnn import MaskScoringRCNN 14 | 15 | __all__ = [ 16 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 17 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 18 | 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import accuracy, Accuracy 2 | from .cross_entropy_loss import (cross_entropy, binary_cross_entropy, 3 | mask_cross_entropy, CrossEntropyLoss) 4 | from .focal_loss import sigmoid_focal_loss, FocalLoss 5 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss 8 | from .mse_loss import mse_loss, MSELoss 9 | from .iou_loss import iou_loss, bounded_iou_loss, IoULoss, BoundedIoULoss 10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 11 | 12 | __all__ = [ 13 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 14 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 15 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 16 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 17 | 'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 18 | 'weight_reduce_loss', 'weighted_loss' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .utils import weighted_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /mmdet/models/losses/cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils import weight_reduce_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None): 10 | # element-wise losses 11 | loss = F.cross_entropy(pred, label, reduction='none') 12 | 13 | # apply weights and do the reduction 14 | if weight is not None: 15 | weight = weight.float() 16 | loss = weight_reduce_loss( 17 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor) 18 | 19 | return loss 20 | 21 | 22 | def _expand_binary_labels(labels, label_weights, label_channels): 23 | bin_labels = labels.new_full((labels.size(0), label_channels), 0) 24 | inds = torch.nonzero(labels >= 1).squeeze() 25 | if inds.numel() > 0: 26 | bin_labels[inds, labels[inds] - 1] = 1 27 | if label_weights is None: 28 | bin_label_weights = None 29 | else: 30 | bin_label_weights = label_weights.view(-1, 1).expand( 31 | label_weights.size(0), label_channels) 32 | return bin_labels, bin_label_weights 33 | 34 | 35 | def binary_cross_entropy(pred, 36 | label, 37 | weight=None, 38 | reduction='mean', 39 | avg_factor=None): 40 | if pred.dim() != label.dim(): 41 | label, weight = _expand_binary_labels(label, weight, pred.size(-1)) 42 | 43 | # weighted element-wise losses 44 | if weight is not None: 45 | weight = weight.float() 46 | loss = F.binary_cross_entropy_with_logits( 47 | pred, label.float(), weight, reduction='none') 48 | # do the reduction for the weighted loss 49 | loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor) 50 | 51 | return loss 52 | 53 | 54 | def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None): 55 | # TODO: handle these two reserved arguments 56 | assert reduction == 'mean' and avg_factor is None 57 | num_rois = pred.size()[0] 58 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 59 | pred_slice = pred[inds, label].squeeze(1) 60 | return F.binary_cross_entropy_with_logits( 61 | pred_slice, target, reduction='mean')[None] 62 | 63 | 64 | @LOSSES.register_module 65 | class CrossEntropyLoss(nn.Module): 66 | 67 | def __init__(self, 68 | use_sigmoid=False, 69 | use_mask=False, 70 | reduction='mean', 71 | loss_weight=1.0): 72 | super(CrossEntropyLoss, self).__init__() 73 | assert (use_sigmoid is False) or (use_mask is False) 74 | self.use_sigmoid = use_sigmoid 75 | self.use_mask = use_mask 76 | self.reduction = reduction 77 | self.loss_weight = loss_weight 78 | 79 | if self.use_sigmoid: 80 | self.cls_criterion = binary_cross_entropy 81 | elif self.use_mask: 82 | self.cls_criterion = mask_cross_entropy 83 | else: 84 | self.cls_criterion = cross_entropy 85 | 86 | def forward(self, 87 | cls_score, 88 | label, 89 | weight=None, 90 | avg_factor=None, 91 | reduction_override=None, 92 | **kwargs): 93 | assert reduction_override in (None, 'none', 'mean', 'sum') 94 | reduction = ( 95 | reduction_override if reduction_override else self.reduction) 96 | loss_cls = self.loss_weight * self.cls_criterion( 97 | cls_score, 98 | label, 99 | weight, 100 | reduction=reduction, 101 | avg_factor=avg_factor, 102 | **kwargs) 103 | return loss_cls 104 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from .utils import weight_reduce_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None): 35 | # Function.apply does not accept keyword arguments, so the decorator 36 | # "weighted_loss" is not applicable 37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 38 | # TODO: find a proper way to handle the shape of weight 39 | if weight is not None: 40 | weight = weight.view(-1, 1) 41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module 46 | class FocalLoss(nn.Module): 47 | 48 | def __init__(self, 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(FocalLoss, self).__init__() 55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 56 | self.use_sigmoid = use_sigmoid 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | self.reduction = reduction 60 | self.loss_weight = loss_weight 61 | 62 | def forward(self, 63 | pred, 64 | target, 65 | weight=None, 66 | avg_factor=None, 67 | reduction_override=None): 68 | assert reduction_override in (None, 'none', 'mean', 'sum') 69 | reduction = ( 70 | reduction_override if reduction_override else self.reduction) 71 | if self.use_sigmoid: 72 | loss_cls = self.loss_weight * sigmoid_focal_loss( 73 | pred, 74 | target, 75 | weight, 76 | gamma=self.gamma, 77 | alpha=self.alpha, 78 | reduction=reduction, 79 | avg_factor=avg_factor) 80 | else: 81 | raise NotImplementedError 82 | return loss_cls 83 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from .utils import weighted_loss 5 | from ..registry import LOSSES 6 | 7 | mse_loss = weighted_loss(F.mse_loss) 8 | 9 | 10 | @LOSSES.register_module 11 | class MSELoss(nn.Module): 12 | 13 | def __init__(self, reduction='mean', loss_weight=1.0): 14 | super().__init__() 15 | self.reduction = reduction 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, pred, target, weight=None, avg_factor=None): 19 | loss = self.loss_weight * mse_loss( 20 | pred, 21 | target, 22 | weight, 23 | reduction=self.reduction, 24 | avg_factor=avg_factor) 25 | return loss 26 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .utils import weighted_loss 5 | from ..registry import LOSSES 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> @weighted_loss 68 | >>> def l1_loss(pred, target): 69 | >>> return (pred - target).abs() 70 | 71 | >>> pred = torch.Tensor([0, 2, 3]) 72 | >>> target = torch.Tensor([1, 1, 1]) 73 | >>> weight = torch.Tensor([1, 0, 1]) 74 | 75 | >>> l1_loss(pred, target) 76 | tensor(1.3333) 77 | >>> l1_loss(pred, target, weight) 78 | tensor(1.) 79 | >>> l1_loss(pred, target, reduction='none') 80 | tensor([1., 1., 2.]) 81 | >>> l1_loss(pred, target, weight, avg_factor=2) 82 | tensor(1.5000) 83 | """ 84 | 85 | @functools.wraps(loss_func) 86 | def wrapper(pred, 87 | target, 88 | weight=None, 89 | reduction='mean', 90 | avg_factor=None, 91 | **kwargs): 92 | # get element-wise loss 93 | loss = loss_func(pred, target, **kwargs) 94 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 95 | return loss 96 | 97 | return wrapper 98 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | 7 | __all__ = [ 8 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 9 | 'MaskIoUHead' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from ..registry import HEADS 3 | from ..utils import ConvModule 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .bfp import BFP 3 | from .hrfpn import HRFPN 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN'] 6 | -------------------------------------------------------------------------------- /mmdet/models/necks/hrfpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.checkpoint import checkpoint 5 | from mmcv.cnn.weight_init import caffe2_xavier_init 6 | 7 | from ..utils import ConvModule 8 | from ..registry import NECKS 9 | 10 | 11 | @NECKS.register_module 12 | class HRFPN(nn.Module): 13 | """HRFPN (High Resolution Feature Pyrmamids) 14 | 15 | arXiv: https://arxiv.org/abs/1904.04514 16 | 17 | Args: 18 | in_channels (list): number of channels for each branch. 19 | out_channels (int): output channels of feature pyramids. 20 | num_outs (int): number of output stages. 21 | pooling_type (str): pooling for generating feature pyramids 22 | from {MAX, AVG}. 23 | conv_cfg (dict): dictionary to construct and config conv layer. 24 | norm_cfg (dict): dictionary to construct and config norm layer. 25 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 26 | memory while slowing down the training speed. 27 | """ 28 | 29 | def __init__(self, 30 | in_channels, 31 | out_channels, 32 | num_outs=5, 33 | pooling_type='AVG', 34 | conv_cfg=None, 35 | norm_cfg=None, 36 | with_cp=False): 37 | super(HRFPN, self).__init__() 38 | assert isinstance(in_channels, list) 39 | self.in_channels = in_channels 40 | self.out_channels = out_channels 41 | self.num_ins = len(in_channels) 42 | self.num_outs = num_outs 43 | self.with_cp = with_cp 44 | self.conv_cfg = conv_cfg 45 | self.norm_cfg = norm_cfg 46 | 47 | self.reduction_conv = ConvModule( 48 | sum(in_channels), 49 | out_channels, 50 | kernel_size=1, 51 | conv_cfg=self.conv_cfg, 52 | activation=None) 53 | 54 | self.fpn_convs = nn.ModuleList() 55 | for i in range(self.num_outs): 56 | self.fpn_convs.append( 57 | ConvModule( 58 | out_channels, 59 | out_channels, 60 | kernel_size=3, 61 | padding=1, 62 | conv_cfg=self.conv_cfg, 63 | activation=None)) 64 | 65 | if pooling_type == 'MAX': 66 | self.pooling = F.max_pool2d 67 | else: 68 | self.pooling = F.avg_pool2d 69 | 70 | def init_weights(self): 71 | for m in self.modules(): 72 | if isinstance(m, nn.Conv2d): 73 | caffe2_xavier_init(m) 74 | 75 | def forward(self, inputs): 76 | assert len(inputs) == self.num_ins 77 | outs = [inputs[0]] 78 | for i in range(1, self.num_ins): 79 | outs.append( 80 | F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear')) 81 | out = torch.cat(outs, dim=1) 82 | if out.requires_grad and self.with_cp: 83 | out = checkpoint(self.reduction_conv, out) 84 | else: 85 | out = self.reduction_conv(out) 86 | outs = [out] 87 | for i in range(1, self.num_outs): 88 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i)) 89 | outputs = [] 90 | 91 | for i in range(self.num_outs): 92 | if outs[i].requires_grad and self.with_cp: 93 | tmp_out = checkpoint(self.fpn_convs[i], outs[i]) 94 | else: 95 | tmp_out = self.fpn_convs[i](outs[i]) 96 | outputs.append(tmp_out) 97 | return tuple(outputs) 98 | -------------------------------------------------------------------------------- /mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .non_local import NonLocal2D 2 | from .generalized_attention import GeneralizedAttention 3 | 4 | __all__ = ['NonLocal2D', 'GeneralizedAttention'] 5 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | SHARED_HEADS = Registry('shared_head') 43 | HEADS = Registry('head') 44 | LOSSES = Registry('loss') 45 | DETECTORS = Registry('detector') 46 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from mmdet.core import force_fp32 8 | from ..registry import ROI_EXTRACTORS 9 | 10 | 11 | @ROI_EXTRACTORS.register_module 12 | class SingleRoIExtractor(nn.Module): 13 | """Extract RoI features from a single level feature map. 14 | 15 | If there are mulitple input feature levels, each RoI is mapped to a level 16 | according to its scale. 17 | 18 | Args: 19 | roi_layer (dict): Specify RoI layer type and arguments. 20 | out_channels (int): Output channels of RoI layers. 21 | featmap_strides (int): Strides of input feature maps. 22 | finest_scale (int): Scale threshold of mapping to level 0. 23 | """ 24 | 25 | def __init__(self, 26 | roi_layer, 27 | out_channels, 28 | featmap_strides, 29 | finest_scale=56): 30 | super(SingleRoIExtractor, self).__init__() 31 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 32 | self.out_channels = out_channels 33 | self.featmap_strides = featmap_strides 34 | self.finest_scale = finest_scale 35 | self.fp16_enabled = False 36 | 37 | @property 38 | def num_inputs(self): 39 | """int: Input feature map levels.""" 40 | return len(self.featmap_strides) 41 | 42 | def init_weights(self): 43 | pass 44 | 45 | def build_roi_layers(self, layer_cfg, featmap_strides): 46 | cfg = layer_cfg.copy() 47 | layer_type = cfg.pop('type') 48 | assert hasattr(ops, layer_type) 49 | layer_cls = getattr(ops, layer_type) 50 | roi_layers = nn.ModuleList( 51 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 52 | return roi_layers 53 | 54 | def map_roi_levels(self, rois, num_levels): 55 | """Map rois to corresponding feature levels by scales. 56 | 57 | - scale < finest_scale: level 0 58 | - finest_scale <= scale < finest_scale * 2: level 1 59 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 60 | - scale >= finest_scale * 4: level 3 61 | 62 | Args: 63 | rois (Tensor): Input RoIs, shape (k, 5). 64 | num_levels (int): Total level number. 65 | 66 | Returns: 67 | Tensor: Level index (0-based) of each RoI, shape (k, ) 68 | """ 69 | scale = torch.sqrt( 70 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 71 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 72 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 73 | return target_lvls 74 | 75 | @force_fp32(apply_to=('feats',), out_fp16=True) 76 | def forward(self, feats, rois): 77 | if len(feats) == 1: 78 | return self.roi_layers[0](feats[0], rois) 79 | 80 | out_size = self.roi_layers[0].out_size 81 | num_levels = len(feats) 82 | target_lvls = self.map_roi_levels(rois, num_levels) 83 | roi_feats = feats[0].new_zeros(rois.size()[0], self.out_channels, 84 | out_size, out_size) 85 | for i in range(num_levels): 86 | inds = target_lvls == i 87 | if inds.any(): 88 | rois_ = rois[inds, :] 89 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 90 | roi_feats[inds] += roi_feats_t 91 | return roi_feats 92 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from mmdet.core import auto_fp16 8 | from ..backbones import ResNet, make_res_layer 9 | from ..registry import SHARED_HEADS 10 | 11 | 12 | @SHARED_HEADS.register_module 13 | class ResLayer(nn.Module): 14 | 15 | def __init__(self, 16 | depth, 17 | stage=3, 18 | stride=2, 19 | dilation=1, 20 | style='pytorch', 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | with_cp=False, 24 | dcn=None): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2**stage 33 | inplanes = 64 * 2**(stage - 1) * block.expansion 34 | 35 | res_layer = make_res_layer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn) 46 | self.add_module('layer{}'.format(stage + 1), res_layer) 47 | 48 | def init_weights(self, pretrained=None): 49 | if isinstance(pretrained, str): 50 | logger = logging.getLogger() 51 | load_checkpoint(self, pretrained, strict=False, logger=logger) 52 | elif pretrained is None: 53 | for m in self.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | kaiming_init(m) 56 | elif isinstance(m, nn.BatchNorm2d): 57 | constant_init(m, 1) 58 | else: 59 | raise TypeError('pretrained must be a str or None') 60 | 61 | @auto_fp16() 62 | def forward(self, x): 63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 64 | out = res_layer(x) 65 | return out 66 | 67 | def train(self, mode=True): 68 | super(ResLayer, self).train(mode) 69 | if self.norm_eval: 70 | for m in self.modules(): 71 | if isinstance(m, nn.BatchNorm2d): 72 | m.eval() 73 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_ws import conv_ws_2d, ConvWS2d 2 | from .conv_module import build_conv_layer, ConvModule 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 6 | bias_init_with_prob) 7 | 8 | __all__ = [ 9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 11 | 'kaiming_init', 'bias_init_with_prob', 'Scale' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | 7 | def __init__(self, scale=1.0): 8 | super(Scale, self).__init__() 9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 10 | 11 | def forward(self, x): 12 | return x * self.scale 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack, 4 | deform_conv, modulated_deform_conv, deform_roi_pooling) 5 | from .gcb import ContextBlock 6 | from .nms import nms, soft_nms 7 | from .roi_align import RoIAlign, roi_align 8 | from .roi_pool import RoIPool, roi_pool 9 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 10 | from .masked_conv import MaskedConv2d 11 | 12 | __all__ = [ 13 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 14 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 15 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 16 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 17 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 18 | 'MaskedConv2d', 'ContextBlock' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | DeformConvPack, ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/dcn/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/dcn/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension( 12 | 'deform_pool_cuda', 13 | ['src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu']), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdet/ops/gcb/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | 3 | __all__ = [ 4 | 'ContextBlock', 5 | ] 6 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.masked_conv import masked_conv2d 2 | from .modules.masked_conv import MaskedConv2d 3 | 4 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/masked_conv/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/functions/masked_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.autograd import Function 4 | from torch.nn.modules.utils import _pair 5 | from .. import masked_conv2d_cuda 6 | 7 | 8 | class MaskedConv2dFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, features, mask, weight, bias, padding=0, stride=1): 12 | assert mask.dim() == 3 and mask.size(0) == 1 13 | assert features.dim() == 4 and features.size(0) == 1 14 | assert features.size()[2:] == mask.size()[1:] 15 | pad_h, pad_w = _pair(padding) 16 | stride_h, stride_w = _pair(stride) 17 | if stride_h != 1 or stride_w != 1: 18 | raise ValueError( 19 | 'Stride could not only be 1 in masked_conv2d currently.') 20 | if not features.is_cuda: 21 | raise NotImplementedError 22 | 23 | out_channel, in_channel, kernel_h, kernel_w = weight.size() 24 | 25 | batch_size = features.size(0) 26 | out_h = int( 27 | math.floor((features.size(2) + 2 * pad_h - 28 | (kernel_h - 1) - 1) / stride_h + 1)) 29 | out_w = int( 30 | math.floor((features.size(3) + 2 * pad_w - 31 | (kernel_h - 1) - 1) / stride_w + 1)) 32 | mask_inds = torch.nonzero(mask[0] > 0) 33 | output = features.new_zeros(batch_size, out_channel, out_h, out_w) 34 | if mask_inds.numel() > 0: 35 | mask_h_idx = mask_inds[:, 0].contiguous() 36 | mask_w_idx = mask_inds[:, 1].contiguous() 37 | data_col = features.new_zeros(in_channel * kernel_h * kernel_w, 38 | mask_inds.size(0)) 39 | masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx, 40 | mask_w_idx, kernel_h, 41 | kernel_w, pad_h, pad_w, 42 | data_col) 43 | 44 | masked_output = torch.addmm(1, bias[:, None], 1, 45 | weight.view(out_channel, -1), data_col) 46 | masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx, 47 | mask_w_idx, out_h, out_w, 48 | out_channel, output) 49 | return output 50 | 51 | @staticmethod 52 | def backward(ctx, grad_output): 53 | return (None, ) * 5 54 | 55 | 56 | masked_conv2d = MaskedConv2dFunction.apply 57 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/masked_conv/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/modules/masked_conv.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from ..functions.masked_conv import masked_conv2d 3 | 4 | 5 | class MaskedConv2d(nn.Conv2d): 6 | """A MaskedConv2d which inherits the official Conv2d. 7 | 8 | The masked forward doesn't implement the backward function and only 9 | supports the stride parameter to be 1 currently. 10 | """ 11 | 12 | def __init__(self, 13 | in_channels, 14 | out_channels, 15 | kernel_size, 16 | stride=1, 17 | padding=0, 18 | dilation=1, 19 | groups=1, 20 | bias=True): 21 | super(MaskedConv2d, 22 | self).__init__(in_channels, out_channels, kernel_size, stride, 23 | padding, dilation, groups, bias) 24 | 25 | def forward(self, input, mask=None): 26 | if mask is None: # fallback to the normal Conv2d 27 | return super(MaskedConv2d, self).forward(input) 28 | else: 29 | return masked_conv2d(input, mask, self.weight, self.bias, 30 | self.padding) 31 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='masked_conv2d_cuda', 6 | ext_modules=[ 7 | CUDAExtension('masked_conv2d_cuda', [ 8 | 'src/masked_conv2d_cuda.cpp', 9 | 'src/masked_conv2d_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/sigmoid_focal_loss/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | from torch.autograd.function import once_differentiable 3 | 4 | from .. import sigmoid_focal_loss_cuda 5 | 6 | 7 | class SigmoidFocalLossFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 11 | ctx.save_for_backward(input, target) 12 | num_classes = input.shape[1] 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 18 | gamma, alpha) 19 | return loss 20 | 21 | @staticmethod 22 | @once_differentiable 23 | def backward(ctx, d_loss): 24 | input, target = ctx.saved_tensors 25 | num_classes = ctx.num_classes 26 | gamma = ctx.gamma 27 | alpha = ctx.alpha 28 | d_loss = d_loss.contiguous() 29 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 30 | num_classes, gamma, alpha) 31 | return d_input, None, None, None, None 32 | 33 | 34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 35 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OceanPang/Libra_R-CNN/bd9a4f004dfdfcdc40cfc0d5a41af0bdacefba0d/mmdet/ops/sigmoid_focal_loss/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss 4 | 5 | 6 | # TODO: remove this module 7 | class SigmoidFocalLoss(nn.Module): 8 | 9 | def __init__(self, gamma, alpha): 10 | super(SigmoidFocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.alpha = alpha 13 | 14 | def forward(self, logits, targets): 15 | assert logits.is_cuda 16 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 17 | return loss.sum() 18 | 19 | def __repr__(self): 20 | tmpstr = self.__class__.__name__ + "(" 21 | tmpstr += "gamma=" + str(self.gamma) 22 | tmpstr += ", alpha=" + str(self.alpha) 23 | tmpstr += ")" 24 | return tmpstr 25 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='SigmoidFocalLoss', 6 | ext_modules=[ 7 | CUDAExtension('sigmoid_focal_loss_cuda', [ 8 | 'src/sigmoid_focal_loss.cpp', 9 | 'src/sigmoid_focal_loss_cuda.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | } 26 | 27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 28 | const at::Tensor &targets, 29 | const at::Tensor &d_losses, 30 | const int num_classes, const float gamma, 31 | const float alpha) { 32 | if (logits.type().is_cuda()) { 33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 34 | num_classes, gamma, alpha); 35 | } 36 | } 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 39 | m.def("forward", &SigmoidFocalLoss_forward, 40 | "SigmoidFocalLoss forward (CUDA)"); 41 | m.def("backward", &SigmoidFocalLoss_backward, 42 | "SigmoidFocalLoss backward (CUDA)"); 43 | } 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 6 15 | PATCH = 0 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmdet/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmdet.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmdet', 87 | version=get_version(), 88 | description='Open MMLab Detection Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, object detection', 91 | url='https://github.com/open-mmlab/mmdetection', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmdet.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='Apache License 2.0', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables', 110 | 'pycocotools' 111 | ], 112 | zip_safe=False) 113 | -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | CHECKPOINT=$2 7 | GPUS=$3 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser( 8 | description='Process a checkpoint to be published') 9 | parser.add_argument('in_file', help='input checkpoint filename') 10 | parser.add_argument('out_file', help='output checkpoint filename') 11 | args = parser.parse_args() 12 | return args 13 | 14 | 15 | def process_checkpoint(in_file, out_file): 16 | checkpoint = torch.load(in_file, map_location='cpu') 17 | # remove optimizer for smaller file size 18 | if 'optimizer' in checkpoint: 19 | del checkpoint['optimizer'] 20 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 21 | # add the code here. 22 | torch.save(checkpoint, out_file) 23 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 24 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 25 | subprocess.Popen(['mv', out_file, final_file]) 26 | 27 | 28 | def main(): 29 | args = parse_args() 30 | process_checkpoint(args.in_file, args.out_file) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | import os 5 | from mmcv import Config 6 | 7 | from mmdet import __version__ 8 | from mmdet.datasets import get_dataset 9 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 10 | set_random_seed) 11 | from mmdet.models import build_detector 12 | import torch 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument('--work_dir', help='the dir to save logs and models') 19 | parser.add_argument( 20 | '--resume_from', help='the checkpoint file to resume from') 21 | parser.add_argument( 22 | '--validate', 23 | action='store_true', 24 | help='whether to evaluate the checkpoint during training') 25 | parser.add_argument( 26 | '--gpus', 27 | type=int, 28 | default=1, 29 | help='number of gpus to use ' 30 | '(only applicable to non-distributed training)') 31 | parser.add_argument('--seed', type=int, default=None, help='random seed') 32 | parser.add_argument( 33 | '--launcher', 34 | choices=['none', 'pytorch', 'slurm', 'mpi'], 35 | default='none', 36 | help='job launcher') 37 | parser.add_argument('--local_rank', type=int, default=0) 38 | args = parser.parse_args() 39 | if 'LOCAL_RANK' not in os.environ: 40 | os.environ['LOCAL_RANK'] = str(args.local_rank) 41 | 42 | return args 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | 48 | cfg = Config.fromfile(args.config) 49 | # set cudnn_benchmark 50 | if cfg.get('cudnn_benchmark', False): 51 | torch.backends.cudnn.benchmark = True 52 | # update configs according to CLI args 53 | if args.work_dir is not None: 54 | cfg.work_dir = args.work_dir 55 | if args.resume_from is not None: 56 | cfg.resume_from = args.resume_from 57 | cfg.gpus = args.gpus 58 | 59 | # init distributed env first, since logger depends on the dist info. 60 | if args.launcher == 'none': 61 | distributed = False 62 | else: 63 | distributed = True 64 | init_dist(args.launcher, **cfg.dist_params) 65 | 66 | # init logger before other steps 67 | logger = get_root_logger(cfg.log_level) 68 | logger.info('Distributed training: {}'.format(distributed)) 69 | 70 | # set random seeds 71 | if args.seed is not None: 72 | logger.info('Set random seed to {}'.format(args.seed)) 73 | set_random_seed(args.seed) 74 | 75 | model = build_detector( 76 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 77 | 78 | train_dataset = get_dataset(cfg.data.train) 79 | if cfg.checkpoint_config is not None: 80 | # save mmdet version, config file content and class names in 81 | # checkpoints as meta data 82 | cfg.checkpoint_config.meta = dict( 83 | mmdet_version=__version__, 84 | config=cfg.text, 85 | CLASSES=train_dataset.CLASSES) 86 | # add an attribute for visualization convenience 87 | model.CLASSES = train_dataset.CLASSES 88 | train_detector( 89 | model, 90 | train_dataset, 91 | cfg, 92 | distributed=distributed, 93 | validate=args.validate, 94 | logger=logger) 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tools/upgrade_model_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert(in_file, out_file): 9 | """Convert keys in checkpoints. 10 | 11 | There can be some breaking changes during the development of mmdetection, 12 | and this tool is used for upgrading checkpoints trained with old versions 13 | to the latest one. 14 | """ 15 | checkpoint = torch.load(in_file) 16 | in_state_dict = checkpoint.pop('state_dict') 17 | out_state_dict = OrderedDict() 18 | for key, val in in_state_dict.items(): 19 | # Use ConvModule instead of nn.Conv2d in RetinaNet 20 | # cls_convs.0.weight -> cls_convs.0.conv.weight 21 | m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key) 22 | if m is not None: 23 | param = m.groups()[1] 24 | new_key = key.replace(param, 'conv.{}'.format(param)) 25 | out_state_dict[new_key] = val 26 | continue 27 | 28 | out_state_dict[key] = val 29 | checkpoint['state_dict'] = out_state_dict 30 | torch.save(checkpoint, out_file) 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description='Upgrade model version') 35 | parser.add_argument('in_file', help='input checkpoint file') 36 | parser.add_argument('out_file', help='output checkpoint file') 37 | args = parser.parse_args() 38 | convert(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | --------------------------------------------------------------------------------