├── README.md ├── mmdetection ├── .github │ └── ISSUE_TEMPLATE │ │ ├── bug_report.md │ │ ├── feature_request.md │ │ └── general_questions.md ├── .gitignore ├── .style.yapf ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── GETTING_STARTED.md ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TECHNICAL_DETAILS.md ├── compile.sh ├── configs │ ├── cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_colab.py │ ├── cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_colab_lr15.py │ ├── cascade_mask_rcnn_dconv_c3-c5_r101_fpn_1x_colab_lr3.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab_lr15.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab_lr3.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab_parent.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab_parent_lr5.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_colab_parent_lr50.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_new_fp16_dcn.py │ ├── cascade_mask_rcnn_x101_64x4d_fpn_1x_new_fp16_dcn_lr5.py │ └── cascade_mask_rcnn_x101_64x4d_fpn_1x_new_fp16_dcn_lr50.py ├── mmdet │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ ├── env.py │ │ ├── inference.py │ │ └── train.py │ ├── core │ │ ├── __init__.py │ │ ├── anchor │ │ │ ├── __init__.py │ │ │ ├── anchor_generator.py │ │ │ ├── anchor_target.py │ │ │ └── guided_anchor_target.py │ │ ├── bbox │ │ │ ├── __init__.py │ │ │ ├── assign_sampling.py │ │ │ ├── assigners │ │ │ │ ├── __init__.py │ │ │ │ ├── approx_max_iou_assigner.py │ │ │ │ ├── assign_result.py │ │ │ │ ├── base_assigner.py │ │ │ │ └── max_iou_assigner.py │ │ │ ├── bbox_target.py │ │ │ ├── geometry.py │ │ │ ├── samplers │ │ │ │ ├── __init__.py │ │ │ │ ├── base_sampler.py │ │ │ │ ├── combined_sampler.py │ │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ │ ├── ohem_sampler.py │ │ │ │ ├── pseudo_sampler.py │ │ │ │ ├── random_sampler.py │ │ │ │ └── sampling_result.py │ │ │ └── transforms.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── bbox_overlaps.py │ │ │ ├── class_names.py │ │ │ ├── coco_utils.py │ │ │ ├── eval_hooks.py │ │ │ ├── mean_ap.py │ │ │ └── recall.py │ │ ├── fp16 │ │ │ ├── __init__.py │ │ │ ├── decorators.py │ │ │ ├── hooks.py │ │ │ └── utils.py │ │ ├── mask │ │ │ ├── __init__.py │ │ │ ├── mask_target.py │ │ │ └── utils.py │ │ ├── post_processing │ │ │ ├── __init__.py │ │ │ ├── bbox_nms.py │ │ │ └── merge_augs.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── dist_utils.py │ │ │ └── misc.py │ ├── datasets │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── custom.py │ │ ├── dataset_wrappers.py │ │ ├── extra_aug.py │ │ ├── loader │ │ │ ├── __init__.py │ │ │ ├── build_loader.py │ │ │ └── sampler.py │ │ ├── oid.py │ │ ├── oid_seg.py │ │ ├── registry.py │ │ ├── repeat_dataset.py │ │ ├── transforms.py │ │ ├── utils.py │ │ ├── voc.py │ │ ├── wider_face.py │ │ └── xml_style.py │ ├── models │ │ ├── __init__.py │ │ ├── anchor_heads │ │ │ ├── __init__.py │ │ │ ├── anchor_head.py │ │ │ ├── fcos_head.py │ │ │ ├── ga_retina_head.py │ │ │ ├── ga_rpn_head.py │ │ │ ├── guided_anchor_head.py │ │ │ ├── retina_head.py │ │ │ ├── rpn_head.py │ │ │ └── ssd_head.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── hrnet.py │ │ │ ├── resnet.py │ │ │ ├── resnext.py │ │ │ └── ssd_vgg.py │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── bbox_head.py │ │ │ └── convfc_bbox_head.py │ │ ├── builder.py │ │ ├── detectors │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── cascade_rcnn.py │ │ │ ├── ensemble_model.py │ │ │ ├── fast_rcnn.py │ │ │ ├── faster_rcnn.py │ │ │ ├── fcos.py │ │ │ ├── grid_rcnn.py │ │ │ ├── htc.py │ │ │ ├── mask_rcnn.py │ │ │ ├── mask_scoring_rcnn.py │ │ │ ├── retinanet.py │ │ │ ├── rpn.py │ │ │ ├── single_stage.py │ │ │ ├── test_mixins.py │ │ │ └── two_stage.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── accuracy.py │ │ │ ├── balanced_l1_loss.py │ │ │ ├── cross_entropy_loss.py │ │ │ ├── focal_loss.py │ │ │ ├── ghm_loss.py │ │ │ ├── iou_loss.py │ │ │ ├── mse_loss.py │ │ │ ├── smooth_l1_loss.py │ │ │ └── utils.py │ │ ├── mask_heads │ │ │ ├── __init__.py │ │ │ ├── fcn_mask_head.py │ │ │ ├── fused_semantic_head.py │ │ │ ├── grid_head.py │ │ │ ├── htc_mask_head.py │ │ │ └── maskiou_head.py │ │ ├── necks │ │ │ ├── __init__.py │ │ │ ├── bfp.py │ │ │ ├── fpn.py │ │ │ └── hrfpn.py │ │ ├── plugins │ │ │ ├── __init__.py │ │ │ ├── generalized_attention.py │ │ │ └── non_local.py │ │ ├── registry.py │ │ ├── roi_extractors │ │ │ ├── __init__.py │ │ │ └── single_level.py │ │ ├── shared_heads │ │ │ ├── __init__.py │ │ │ └── res_layer.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── conv_module.py │ │ │ ├── conv_ws.py │ │ │ ├── norm.py │ │ │ ├── scale.py │ │ │ └── weight_init.py │ ├── ops │ │ ├── __init__.py │ │ ├── dcn │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ ├── deform_conv.py │ │ │ │ └── deform_pool.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── deform_conv.py │ │ │ │ └── deform_pool.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── deform_conv_cuda.cpp │ │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ │ ├── deform_pool_cuda.cpp │ │ │ │ └── deform_pool_cuda_kernel.cu │ │ ├── gcb │ │ │ ├── __init__.py │ │ │ └── context_block.py │ │ ├── masked_conv │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── masked_conv.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── masked_conv.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ │ └── masked_conv2d_kernel.cu │ │ ├── nms │ │ │ ├── __init__.py │ │ │ ├── nms_wrapper.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── nms_cpu.cpp │ │ │ │ ├── nms_cuda.cpp │ │ │ │ ├── nms_kernel.cu │ │ │ │ └── soft_nms_cpu.pyx │ │ ├── roi_align │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── gradcheck.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── roi_align_cuda.cpp │ │ │ │ └── roi_align_kernel.cu │ │ ├── roi_pool │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── gradcheck.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ ├── roi_pool_cuda.cpp │ │ │ │ └── roi_pool_kernel.cu │ │ └── sigmoid_focal_loss │ │ │ ├── __init__.py │ │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── sigmoid_focal_loss.py │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── sigmoid_focal_loss.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ ├── sigmoid_focal_loss.cpp │ │ │ └── sigmoid_focal_loss_cuda.cu │ └── utils │ │ ├── __init__.py │ │ └── registry.py ├── setup.py └── tools │ ├── analyze_logs.py │ ├── coco_eval.py │ ├── convert_datasets │ └── pascal_voc.py │ ├── detectron2pytorch.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── ensemble_test.py │ ├── infer_test.py │ ├── publish_model.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ ├── test_new.py │ ├── train.py │ ├── upgrade_model_version.py │ └── voc_eval.py └── util ├── combine_leaf_and_parent.py ├── convert_seg_results_to_sub_25.py ├── make_rebalanced_train_ann.py ├── make_rebalanced_train_ann_oversample_test.py ├── make_rebalanced_train_ann_parent.py ├── make_test_ann_pkl.py ├── make_train_leaf_ann_pkl.py ├── make_train_parent_ann_pkl.py ├── nms_on_csvs.py ├── seg_275_leave_classes.py └── seg_expand_and_adjust_thres_25.py /mmdetection/.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for your bug report and we appreciate that a lot. 11 | 12 | **Checklist** 13 | - [ ] I have searched related issues but could not get the expected help. 14 | - [ ] The bug has not been fixed in the latest version. 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | If there are any related issues or upstream bugs, please also refer to them. 19 | 20 | **Error traceback** 21 | 1. What command or script did you run? 22 | ``` 23 | A placeholder for the command. 24 | ``` 25 | 2. If applicable, paste the error trackback here using code blocks. 26 | ``` 27 | A placeholder for trackback. 28 | ``` 29 | 30 | **Reproduction details** 31 | 1. Did you make any modifications on the code? Did you understand what you have modified? 32 | 2. What dataset did you use? 33 | 34 | **Environment** 35 | - OS: [e.g., Ubuntu 16.04.6] 36 | - GCC [e.g., 5.4.0] 37 | - PyTorch version [e.g., 1.1.0] 38 | - How you installed PyTorch [e.g., pip, conda, source] 39 | - GPU model [e.g., 1080Ti, V100] 40 | - CUDA and CUDNN version 41 | - [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 42 | 43 | **Bug fix** 44 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 45 | -------------------------------------------------------------------------------- /mmdetection/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature** 11 | 12 | **Motivation** 13 | A clear and concise description of the motivation of the feature. 14 | Ex1. It is inconvenient when [....]. 15 | Ex2. There is a recent paper [....], which is very helpful for [....]. 16 | 17 | **Related resources** 18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. 23 | -------------------------------------------------------------------------------- /mmdetection/.github/ISSUE_TEMPLATE/general_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General questions 3 | about: Ask general questions to get help 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /mmdetection/.gitignore: -------------------------------------------------------------------------------- 1 | demo/*.jpg 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # cython generated cpp 109 | mmdet/ops/nms/src/soft_nms_cpu.cpp 110 | mmdet/version.py 111 | data 112 | .vscode 113 | .idea 114 | 115 | # custom 116 | *.pkl 117 | *.pkl.json 118 | *.log.json 119 | work_dirs/ 120 | checkpoints 121 | *pkl.json.bbox.json 122 | -------------------------------------------------------------------------------- /mmdetection/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /mmdetection/.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | 4 | install: 5 | - pip install flake8 6 | 7 | python: 8 | - "3.5" 9 | - "3.6" 10 | - "3.7" 11 | 12 | script: 13 | - flake8 14 | -------------------------------------------------------------------------------- /mmdetection/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at chenkaidev@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /mmdetection/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to mmdetection 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmdetection 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmdetection, 18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter. 25 | Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf). 26 | 27 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 28 | 29 | ### C++ and CUDA 30 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -------------------------------------------------------------------------------- /mmdetection/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux 6 | - Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/)) 7 | - PyTorch 1.0+ or PyTorch-nightly 8 | - CUDA 9.0+ 9 | - NCCL 2+ 10 | - GCC 4.9+ 11 | - [mmcv](https://github.com/open-mmlab/mmcv) 12 | 13 | We have tested the following versions of OS and softwares: 14 | 15 | - OS: Ubuntu 16.04/18.04 and CentOS 7.2 16 | - CUDA: 9.0/9.2/10.0 17 | - NCCL: 2.1.15/2.2.13/2.3.7/2.4.2 18 | - GCC: 4.9/5.3/5.4/7.3 19 | 20 | ### Install mmdetection 21 | 22 | a. Create a conda virtual environment and activate it. Then install Cython. 23 | 24 | ```shell 25 | conda create -n open-mmlab python=3.7 -y 26 | conda activate open-mmlab 27 | 28 | conda install cython 29 | ``` 30 | 31 | b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/). 32 | 33 | c. Clone the mmdetection repository. 34 | 35 | ```shell 36 | git clone https://github.com/open-mmlab/mmdetection.git 37 | cd mmdetection 38 | ``` 39 | 40 | d. Compile cuda extensions. 41 | 42 | ```shell 43 | ./compile.sh 44 | ``` 45 | 46 | e. Install mmdetection (other dependencies will be installed automatically). 47 | 48 | ```shell 49 | python setup.py develop 50 | # or "pip install -e ." 51 | ``` 52 | 53 | Note: 54 | 55 | 1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d. 56 | The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models. 57 | 58 | 2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again. 59 | 60 | ### Prepare COCO dataset. 61 | 62 | It is recommended to symlink the dataset root to `$MMDETECTION/data`. 63 | 64 | ``` 65 | mmdetection 66 | ├── mmdet 67 | ├── tools 68 | ├── configs 69 | ├── data 70 | │ ├── coco 71 | │ │ ├── annotations 72 | │ │ ├── train2017 73 | │ │ ├── val2017 74 | │ │ ├── test2017 75 | │ ├── VOCdevkit 76 | │ │ ├── VOC2007 77 | │ │ ├── VOC2012 78 | 79 | ``` 80 | 81 | ### Scripts 82 | [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is 83 | a script for setting up mmdetection with conda. 84 | 85 | ### Notice 86 | You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently. 87 | 88 | If there are more than one mmdetection on your machine, and you want to use them alternatively. 89 | Please insert the following code to the main file 90 | ```python 91 | import os.path as osp 92 | import sys 93 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 94 | ``` 95 | or run the following command in the terminal of corresponding folder. 96 | ```shell 97 | export PYTHONPATH=`pwd`:$PYTHONPATH 98 | ``` 99 | -------------------------------------------------------------------------------- /mmdetection/compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | if [ -d "build" ]; then 22 | rm -r build 23 | fi 24 | $PYTHON setup.py build_ext --inplace 25 | 26 | echo "Building dcn..." 27 | cd ../dcn 28 | if [ -d "build" ]; then 29 | rm -r build 30 | fi 31 | $PYTHON setup.py build_ext --inplace 32 | 33 | echo "Building sigmoid focal loss op..." 34 | cd ../sigmoid_focal_loss 35 | if [ -d "build" ]; then 36 | rm -r build 37 | fi 38 | $PYTHON setup.py build_ext --inplace 39 | 40 | echo "Building masked conv op..." 41 | cd ../masked_conv 42 | if [ -d "build" ]; then 43 | rm -r build 44 | fi 45 | $PYTHON setup.py build_ext --inplace 46 | -------------------------------------------------------------------------------- /mmdetection/mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import init_detector, inference_detector, show_result, show_gt 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'init_detector', 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target, anchor_inside_flags 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | 5 | __all__ = [ 6 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 7 | 'ga_shape_target' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, 9 | distance2bbox) 10 | from .bbox_target import bbox_target 11 | 12 | __all__ = [ 13 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 14 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 15 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 16 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 17 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 18 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 19 | 'distance2bbox', 'bbox_target' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 4 | from .assign_result import AssignResult 5 | 6 | __all__ = [ 7 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 66 | 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 68 | 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduction_override='none')['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class) 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.shape[1] 30 | bboxes, labels = [], [] 31 | nms_cfg_ = nms_cfg.copy() 32 | nms_type = nms_cfg_.pop('type', 'nms') 33 | nms_op = getattr(nms_wrapper, nms_type) 34 | for i in range(1, num_classes): 35 | cls_inds = multi_scores[:, i] > score_thr 36 | if not cls_inds.any(): 37 | continue 38 | # get bboxes and scores of this class 39 | if multi_bboxes.shape[1] == 4: 40 | _bboxes = multi_bboxes[cls_inds, :] 41 | else: 42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 43 | _scores = multi_scores[cls_inds, i] 44 | if score_factors is not None: 45 | _scores *= score_factors[cls_inds] 46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 48 | cls_labels = multi_bboxes.new_full( 49 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 50 | bboxes.append(cls_dets) 51 | labels.append(cls_labels) 52 | if bboxes: 53 | bboxes = torch.cat(bboxes) 54 | labels = torch.cat(labels) 55 | if bboxes.shape[0] > max_num: 56 | _, inds = bboxes[:, -1].sort(descending=True) 57 | inds = inds[:max_num] 58 | bboxes = bboxes[inds] 59 | labels = labels[inds] 60 | else: 61 | bboxes = multi_bboxes.new_zeros((0, 5)) 62 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 63 | 64 | return bboxes, labels 65 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmdet.ops import nms 6 | from ..bbox import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | 75 | 76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 77 | """Merge augmented mask prediction. 78 | 79 | Args: 80 | aug_masks (list[ndarray]): shape (n, #class, h, w) 81 | img_shapes (list[ndarray]): shape (3, ). 82 | rcnn_test_cfg (dict): rcnn test config. 83 | 84 | Returns: 85 | tuple: (bboxes, scores) 86 | """ 87 | recovered_masks = [ 88 | mask if not img_info[0]['flip'] else mask[..., ::-1] 89 | for mask, img_info in zip(aug_masks, img_metas) 90 | ] 91 | if weights is None: 92 | merged_masks = np.mean(recovered_masks, axis=0) 93 | else: 94 | merged_masks = np.average( 95 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 96 | return merged_masks 97 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model.parameters(), self.coalesce, 55 | self.bucket_size_mb) 56 | if self.grad_clip is not None: 57 | self.clip_grads(runner.model.parameters()) 58 | runner.optimizer.step() 59 | -------------------------------------------------------------------------------- /mmdetection/mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .oid import OIDDataset 5 | from .oid_seg import OIDSegDataset, OIDSegParentDataset 6 | from .voc import VOCDataset 7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 8 | from .utils import to_tensor, random_scale, show_ann, get_dataset 9 | from .concat_dataset import ConcatDataset 10 | from .repeat_dataset import RepeatDataset 11 | from .extra_aug import ExtraAugmentation 12 | 13 | __all__ = [ 14 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'OIDDataset', 'OIDSegDataset','OIDSegParentDataset', 'VOCDataset', 'GroupSampler', 15 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 16 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 17 | 'ExtraAugmentation' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .registry import DATASETS 6 | 7 | 8 | def _concat_dataset(cfg): 9 | ann_files = cfg['ann_file'] 10 | img_prefixes = cfg.get('img_prefix', None) 11 | seg_prefixes = cfg.get('seg_prefixes', None) 12 | proposal_files = cfg.get('proposal_file', None) 13 | 14 | datasets = [] 15 | num_dset = len(ann_files) 16 | for i in range(num_dset): 17 | data_cfg = copy.deepcopy(cfg) 18 | data_cfg['ann_file'] = ann_files[i] 19 | if isinstance(img_prefixes, (list, tuple)): 20 | data_cfg['img_prefix'] = img_prefixes[i] 21 | if isinstance(seg_prefixes, (list, tuple)): 22 | data_cfg['seg_prefix'] = seg_prefixes[i] 23 | if isinstance(proposal_files, (list, tuple)): 24 | data_cfg['proposal_file'] = proposal_files[i] 25 | datasets.append(build_dataset(data_cfg)) 26 | 27 | return ConcatDataset(datasets) 28 | 29 | 30 | def build_dataset(cfg): 31 | if cfg['type'] == 'RepeatDataset': 32 | dataset = RepeatDataset(build_dataset(cfg['dataset']), cfg['times']) 33 | elif isinstance(cfg['ann_file'], (list, tuple)): 34 | dataset = _concat_dataset(cfg) 35 | else: 36 | dataset = build_from_cfg(cfg, DATASETS) 37 | 38 | return dataset 39 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | 6 | 7 | @DATASETS.register_module 8 | class ConcatDataset(_ConcatDataset): 9 | """A wrapper of concatenated dataset. 10 | 11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 12 | concat the group flag for image aspect ratio. 13 | 14 | Args: 15 | datasets (list[:obj:`Dataset`]): A list of datasets. 16 | """ 17 | 18 | def __init__(self, datasets): 19 | super(ConcatDataset, self).__init__(datasets) 20 | self.CLASSES = datasets[0].CLASSES 21 | if hasattr(datasets[0], 'flag'): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | 27 | 28 | @DATASETS.register_module 29 | class RepeatDataset(object): 30 | """A wrapper of repeated dataset. 31 | 32 | The length of repeated dataset will be `times` larger than the original 33 | dataset. This is useful when the data loading time is long but the dataset 34 | is small. Using RepeatDataset can reduce the data loading time between 35 | epochs. 36 | 37 | Args: 38 | dataset (:obj:`Dataset`): The dataset to be repeated. 39 | times (int): Repeat times. 40 | """ 41 | 42 | def __init__(self, dataset, times): 43 | self.dataset = dataset 44 | self.times = times 45 | self.CLASSES = dataset.CLASSES 46 | if hasattr(self.dataset, 'flag'): 47 | self.flag = np.tile(self.dataset.flag, times) 48 | 49 | self._ori_len = len(self.dataset) 50 | 51 | def __getitem__(self, idx): 52 | return self.dataset[idx % self._ori_len] 53 | 54 | def __len__(self): 55 | return self.times * self._ori_len 56 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/extra_aug.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | import albumentations as A 5 | from mmcv.runner import obj_from_dict 6 | from . import transforms 7 | 8 | 9 | class ExtraAugmentation(object): 10 | 11 | def __init__(self, **kwargs): 12 | self.transform = self.transform_from_dict(**kwargs) 13 | 14 | def transform_from_dict(self, **kwargs): 15 | if 'transforms' in kwargs: 16 | kwargs['transforms'] = [self.transform_from_dict(**transform) for transform in kwargs['transforms']] 17 | try: 18 | return obj_from_dict(kwargs, transforms) 19 | except AttributeError: 20 | return obj_from_dict(kwargs, A) 21 | 22 | def __call__(self, img): 23 | data = self.transform( 24 | image=img, 25 | ) 26 | return data['image'] 27 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | shuffle = kwargs.get('shuffle', True) 22 | print(shuffle) 23 | if dist: 24 | rank, world_size = get_dist_info() 25 | print(rank, world_size) 26 | if shuffle: 27 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 28 | world_size, rank) 29 | else: 30 | sampler = DistributedSampler( 31 | dataset, world_size, rank, shuffle=False) 32 | batch_size = imgs_per_gpu 33 | num_workers = workers_per_gpu 34 | else: 35 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 36 | batch_size = num_gpus * imgs_per_gpu 37 | num_workers = num_gpus * workers_per_gpu 38 | 39 | data_loader = DataLoader( 40 | dataset, 41 | batch_size=batch_size, 42 | sampler=sampler, 43 | num_workers=num_workers, 44 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 45 | pin_memory=False, 46 | **kwargs) 47 | 48 | return data_loader 49 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | img_infos = [] 24 | img_ids = mmcv.list_from_file(ann_file) 25 | for img_id in img_ids: 26 | filename = '{}.jpg'.format(img_id) 27 | xml_path = osp.join(self.img_prefix, 'Annotations', 28 | '{}.xml'.format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find('size') 32 | width = int(size.find('width').text) 33 | height = int(size.find('height').text) 34 | folder = root.find('folder').text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height)) 41 | 42 | return img_infos 43 | -------------------------------------------------------------------------------- /mmdetection/mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann 77 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .shared_heads import * # noqa: F401,F403 6 | from .bbox_heads import * # noqa: F401,F403 7 | from .mask_heads import * # noqa: F401,F403 8 | from .losses import * # noqa: F401,F403 9 | from .detectors import * # noqa: F401,F403 10 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 11 | LOSSES, DETECTORS) 12 | from .builder import (build_backbone, build_neck, build_roi_extractor, 13 | build_shared_head, build_head, build_loss, 14 | build_detector) 15 | 16 | __all__ = [ 17 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 18 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 19 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .guided_anchor_head import GuidedAnchorHead, FeatureAdaption 3 | from .fcos_head import FCOSHead 4 | from .rpn_head import RPNHead 5 | from .ga_rpn_head import GARPNHead 6 | from .retina_head import RetinaHead 7 | from .ga_retina_head import GARetinaHead 8 | from .ssd_head import SSDHead 9 | 10 | __all__ = [ 11 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 12 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob, ConvModule 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | **kwargs): 22 | self.stacked_convs = stacked_convs 23 | self.octave_base_scale = octave_base_scale 24 | self.scales_per_octave = scales_per_octave 25 | self.conv_cfg = conv_cfg 26 | self.norm_cfg = norm_cfg 27 | octave_scales = np.array( 28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 29 | anchor_scales = octave_scales * octave_base_scale 30 | super(RetinaHead, self).__init__( 31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 32 | 33 | def _init_layers(self): 34 | self.relu = nn.ReLU(inplace=True) 35 | self.cls_convs = nn.ModuleList() 36 | self.reg_convs = nn.ModuleList() 37 | for i in range(self.stacked_convs): 38 | chn = self.in_channels if i == 0 else self.feat_channels 39 | self.cls_convs.append( 40 | ConvModule( 41 | chn, 42 | self.feat_channels, 43 | 3, 44 | stride=1, 45 | padding=1, 46 | conv_cfg=self.conv_cfg, 47 | norm_cfg=self.norm_cfg)) 48 | self.reg_convs.append( 49 | ConvModule( 50 | chn, 51 | self.feat_channels, 52 | 3, 53 | stride=1, 54 | padding=1, 55 | conv_cfg=self.conv_cfg, 56 | norm_cfg=self.norm_cfg)) 57 | self.retina_cls = nn.Conv2d( 58 | self.feat_channels, 59 | self.num_anchors * self.cls_out_channels, 60 | 3, 61 | padding=1) 62 | self.retina_reg = nn.Conv2d( 63 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 64 | 65 | def init_weights(self): 66 | for m in self.cls_convs: 67 | normal_init(m.conv, std=0.01) 68 | for m in self.reg_convs: 69 | normal_init(m.conv, std=0.01) 70 | bias_cls = bias_init_with_prob(0.01) 71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 72 | normal_init(self.retina_reg, std=0.01) 73 | 74 | def forward_single(self, x): 75 | cls_feat = x 76 | reg_feat = x 77 | for cls_conv in self.cls_convs: 78 | cls_feat = cls_conv(cls_feat) 79 | for reg_conv in self.reg_convs: 80 | reg_feat = reg_conv(reg_feat) 81 | cls_score = self.retina_cls(cls_feat) 82 | bbox_pred = self.retina_reg(reg_feat) 83 | return cls_score, bbox_pred 84 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, make_res_layer 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .hrnet import HRNet 5 | 6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet'] 7 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 5 | LOSSES, DETECTORS) 6 | 7 | 8 | def _build_module(cfg, registry, default_args): 9 | assert isinstance(cfg, dict) and 'type' in cfg 10 | assert isinstance(default_args, dict) or default_args is None 11 | args = cfg.copy() 12 | obj_type = args.pop('type') 13 | if mmcv.is_str(obj_type): 14 | if obj_type not in registry.module_dict: 15 | raise KeyError('{} is not in the {} registry'.format( 16 | obj_type, registry.name)) 17 | obj_type = registry.module_dict[obj_type] 18 | elif not isinstance(obj_type, type): 19 | raise TypeError('type must be a str or valid type, but got {}'.format( 20 | type(obj_type))) 21 | if default_args is not None: 22 | for name, value in default_args.items(): 23 | args.setdefault(name, value) 24 | return obj_type(**args) 25 | 26 | 27 | def build(cfg, registry, default_args=None): 28 | if isinstance(cfg, list): 29 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 30 | return nn.Sequential(*modules) 31 | else: 32 | return _build_module(cfg, registry, default_args) 33 | 34 | 35 | def build_backbone(cfg): 36 | return build(cfg, BACKBONES) 37 | 38 | 39 | def build_neck(cfg): 40 | return build(cfg, NECKS) 41 | 42 | 43 | def build_roi_extractor(cfg): 44 | return build(cfg, ROI_EXTRACTORS) 45 | 46 | 47 | def build_shared_head(cfg): 48 | return build(cfg, SHARED_HEADS) 49 | 50 | 51 | def build_head(cfg): 52 | return build(cfg, HEADS) 53 | 54 | 55 | def build_loss(cfg): 56 | return build(cfg, LOSSES) 57 | 58 | 59 | def build_detector(cfg, train_cfg=None, test_cfg=None): 60 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 61 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .htc import HybridTaskCascade 10 | from .retinanet import RetinaNet 11 | from .fcos import FCOS 12 | from .grid_rcnn import GridRCNN 13 | from .mask_scoring_rcnn import MaskScoringRCNN 14 | 15 | __all__ = [ 16 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 17 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 18 | 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import accuracy, Accuracy 2 | from .cross_entropy_loss import (cross_entropy, binary_cross_entropy, 3 | mask_cross_entropy, CrossEntropyLoss) 4 | from .focal_loss import sigmoid_focal_loss, FocalLoss 5 | from .smooth_l1_loss import smooth_l1_loss, SmoothL1Loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .balanced_l1_loss import balanced_l1_loss, BalancedL1Loss 8 | from .mse_loss import mse_loss, MSELoss 9 | from .iou_loss import iou_loss, bounded_iou_loss, IoULoss, BoundedIoULoss 10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 11 | 12 | __all__ = [ 13 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 14 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 15 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 16 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 17 | 'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 18 | 'weight_reduce_loss', 'weighted_loss' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .utils import weighted_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils import weight_reduce_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None): 10 | # element-wise losses 11 | loss = F.cross_entropy(pred, label, reduction='none') 12 | 13 | # apply weights and do the reduction 14 | if weight is not None: 15 | weight = weight.float() 16 | loss = weight_reduce_loss( 17 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor) 18 | 19 | return loss 20 | 21 | 22 | def _expand_binary_labels(labels, label_weights, label_channels): 23 | bin_labels = labels.new_full((labels.size(0), label_channels), 0) 24 | inds = torch.nonzero(labels >= 1).squeeze() 25 | if inds.numel() > 0: 26 | bin_labels[inds, labels[inds] - 1] = 1 27 | if label_weights is None: 28 | bin_label_weights = None 29 | else: 30 | bin_label_weights = label_weights.view(-1, 1).expand( 31 | label_weights.size(0), label_channels) 32 | return bin_labels, bin_label_weights 33 | 34 | 35 | def binary_cross_entropy(pred, 36 | label, 37 | weight=None, 38 | reduction='mean', 39 | avg_factor=None): 40 | if pred.dim() != label.dim(): 41 | label, weight = _expand_binary_labels(label, weight, pred.size(-1)) 42 | 43 | # weighted element-wise losses 44 | if weight is not None: 45 | weight = weight.float() 46 | loss = F.binary_cross_entropy_with_logits( 47 | pred, label.float(), weight, reduction='none') 48 | # do the reduction for the weighted loss 49 | loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor) 50 | 51 | return loss 52 | 53 | 54 | def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None): 55 | # TODO: handle these two reserved arguments 56 | assert reduction == 'mean' and avg_factor is None 57 | num_rois = pred.size()[0] 58 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 59 | pred_slice = pred[inds, label].squeeze(1) 60 | return F.binary_cross_entropy_with_logits( 61 | pred_slice, target, reduction='mean')[None] 62 | 63 | 64 | @LOSSES.register_module 65 | class CrossEntropyLoss(nn.Module): 66 | 67 | def __init__(self, 68 | use_sigmoid=False, 69 | use_mask=False, 70 | reduction='mean', 71 | loss_weight=1.0): 72 | super(CrossEntropyLoss, self).__init__() 73 | assert (use_sigmoid is False) or (use_mask is False) 74 | self.use_sigmoid = use_sigmoid 75 | self.use_mask = use_mask 76 | self.reduction = reduction 77 | self.loss_weight = loss_weight 78 | 79 | if self.use_sigmoid: 80 | self.cls_criterion = binary_cross_entropy 81 | elif self.use_mask: 82 | self.cls_criterion = mask_cross_entropy 83 | else: 84 | self.cls_criterion = cross_entropy 85 | 86 | def forward(self, 87 | cls_score, 88 | label, 89 | weight=None, 90 | avg_factor=None, 91 | reduction_override=None, 92 | **kwargs): 93 | assert reduction_override in (None, 'none', 'mean', 'sum') 94 | reduction = ( 95 | reduction_override if reduction_override else self.reduction) 96 | loss_cls = self.loss_weight * self.cls_criterion( 97 | cls_score, 98 | label, 99 | weight, 100 | reduction=reduction, 101 | avg_factor=avg_factor, 102 | **kwargs) 103 | return loss_cls 104 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from .utils import weight_reduce_loss 6 | from ..registry import LOSSES 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None): 35 | # Function.apply does not accept keyword arguments, so the decorator 36 | # "weighted_loss" is not applicable 37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 38 | # TODO: find a proper way to handle the shape of weight 39 | if weight is not None: 40 | weight = weight.view(-1, 1) 41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module 46 | class FocalLoss(nn.Module): 47 | 48 | def __init__(self, 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(FocalLoss, self).__init__() 55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 56 | self.use_sigmoid = use_sigmoid 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | self.reduction = reduction 60 | self.loss_weight = loss_weight 61 | 62 | def forward(self, 63 | pred, 64 | target, 65 | weight=None, 66 | avg_factor=None, 67 | reduction_override=None): 68 | assert reduction_override in (None, 'none', 'mean', 'sum') 69 | reduction = ( 70 | reduction_override if reduction_override else self.reduction) 71 | if self.use_sigmoid: 72 | loss_cls = self.loss_weight * sigmoid_focal_loss( 73 | pred, 74 | target, 75 | weight, 76 | gamma=self.gamma, 77 | alpha=self.alpha, 78 | reduction=reduction, 79 | avg_factor=avg_factor) 80 | else: 81 | raise NotImplementedError 82 | return loss_cls 83 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from .utils import weighted_loss 5 | from ..registry import LOSSES 6 | 7 | mse_loss = weighted_loss(F.mse_loss) 8 | 9 | 10 | @LOSSES.register_module 11 | class MSELoss(nn.Module): 12 | 13 | def __init__(self, reduction='mean', loss_weight=1.0): 14 | super().__init__() 15 | self.reduction = reduction 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, pred, target, weight=None, avg_factor=None): 19 | loss = self.loss_weight * mse_loss( 20 | pred, 21 | target, 22 | weight, 23 | reduction=self.reduction, 24 | avg_factor=avg_factor) 25 | return loss 26 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .utils import weighted_loss 5 | from ..registry import LOSSES 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> @weighted_loss 68 | >>> def l1_loss(pred, target): 69 | >>> return (pred - target).abs() 70 | 71 | >>> pred = torch.Tensor([0, 2, 3]) 72 | >>> target = torch.Tensor([1, 1, 1]) 73 | >>> weight = torch.Tensor([1, 0, 1]) 74 | 75 | >>> l1_loss(pred, target) 76 | tensor(1.3333) 77 | >>> l1_loss(pred, target, weight) 78 | tensor(1.) 79 | >>> l1_loss(pred, target, reduction='none') 80 | tensor([1., 1., 2.]) 81 | >>> l1_loss(pred, target, weight, avg_factor=2) 82 | tensor(1.5000) 83 | """ 84 | 85 | @functools.wraps(loss_func) 86 | def wrapper(pred, 87 | target, 88 | weight=None, 89 | reduction='mean', 90 | avg_factor=None, 91 | **kwargs): 92 | # get element-wise loss 93 | loss = loss_func(pred, target, **kwargs) 94 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 95 | return loss 96 | 97 | return wrapper 98 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | 7 | __all__ = [ 8 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 9 | 'MaskIoUHead' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/mask_heads/fused_semantic_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import kaiming_init 4 | 5 | from mmdet.core import auto_fp16, force_fp32 6 | from ..registry import HEADS 7 | from ..utils import ConvModule 8 | 9 | 10 | @HEADS.register_module 11 | class FusedSemanticHead(nn.Module): 12 | """Multi-level fused semantic segmentation head. 13 | 14 | in_1 -> 1x1 conv --- 15 | | 16 | in_2 -> 1x1 conv -- | 17 | || 18 | in_3 -> 1x1 conv - || 19 | ||| /-> 1x1 conv (mask prediction) 20 | in_4 -> 1x1 conv -----> 3x3 convs (*4) 21 | | \-> 1x1 conv (feature) 22 | in_5 -> 1x1 conv --- 23 | """ # noqa: W605 24 | 25 | def __init__(self, 26 | num_ins, 27 | fusion_level, 28 | num_convs=4, 29 | in_channels=256, 30 | conv_out_channels=256, 31 | num_classes=183, 32 | ignore_label=255, 33 | loss_weight=0.2, 34 | conv_cfg=None, 35 | norm_cfg=None): 36 | super(FusedSemanticHead, self).__init__() 37 | self.num_ins = num_ins 38 | self.fusion_level = fusion_level 39 | self.num_convs = num_convs 40 | self.in_channels = in_channels 41 | self.conv_out_channels = conv_out_channels 42 | self.num_classes = num_classes 43 | self.ignore_label = ignore_label 44 | self.loss_weight = loss_weight 45 | self.conv_cfg = conv_cfg 46 | self.norm_cfg = norm_cfg 47 | self.fp16_enabled = False 48 | 49 | self.lateral_convs = nn.ModuleList() 50 | for i in range(self.num_ins): 51 | self.lateral_convs.append( 52 | ConvModule( 53 | self.in_channels, 54 | self.in_channels, 55 | 1, 56 | conv_cfg=self.conv_cfg, 57 | norm_cfg=self.norm_cfg, 58 | inplace=False)) 59 | 60 | self.convs = nn.ModuleList() 61 | for i in range(self.num_convs): 62 | in_channels = self.in_channels if i == 0 else conv_out_channels 63 | self.convs.append( 64 | ConvModule( 65 | in_channels, 66 | conv_out_channels, 67 | 3, 68 | padding=1, 69 | conv_cfg=self.conv_cfg, 70 | norm_cfg=self.norm_cfg)) 71 | self.conv_embedding = ConvModule( 72 | conv_out_channels, 73 | conv_out_channels, 74 | 1, 75 | conv_cfg=self.conv_cfg, 76 | norm_cfg=self.norm_cfg) 77 | self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1) 78 | 79 | self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label) 80 | 81 | def init_weights(self): 82 | kaiming_init(self.conv_logits) 83 | 84 | @auto_fp16() 85 | def forward(self, feats): 86 | x = self.lateral_convs[self.fusion_level](feats[self.fusion_level]) 87 | fused_size = tuple(x.shape[-2:]) 88 | for i, feat in enumerate(feats): 89 | if i != self.fusion_level: 90 | feat = F.interpolate( 91 | feat, size=fused_size, mode='bilinear', align_corners=True) 92 | x += self.lateral_convs[i](feat) 93 | 94 | for i in range(self.num_convs): 95 | x = self.convs[i](x) 96 | 97 | mask_pred = self.conv_logits(x) 98 | x = self.conv_embedding(x) 99 | return mask_pred, x 100 | 101 | @force_fp32(apply_to=('mask_pred',)) 102 | def loss(self, mask_pred, labels): 103 | labels = labels.squeeze(1).long() 104 | loss_semantic_seg = self.criterion(mask_pred, labels) 105 | loss_semantic_seg *= self.loss_weight 106 | return loss_semantic_seg 107 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from ..registry import HEADS 3 | from ..utils import ConvModule 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .bfp import BFP 3 | from .hrfpn import HRFPN 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN'] 6 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/necks/bfp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | 5 | from ..plugins import NonLocal2D 6 | from ..registry import NECKS 7 | from ..utils import ConvModule 8 | 9 | 10 | @NECKS.register_module 11 | class BFP(nn.Module): 12 | """BFP (Balanced Feature Pyrmamids) 13 | 14 | BFP takes multi-level features as inputs and gather them into a single one, 15 | then refine the gathered feature and scatter the refined results to 16 | multi-level features. This module is used in Libra R-CNN (CVPR 2019), see 17 | https://arxiv.org/pdf/1904.02701.pdf for details. 18 | 19 | Args: 20 | in_channels (int): Number of input channels (feature maps of all levels 21 | should have the same channels). 22 | num_levels (int): Number of input feature levels. 23 | conv_cfg (dict): The config dict for convolution layers. 24 | norm_cfg (dict): The config dict for normalization layers. 25 | refine_level (int): Index of integration and refine level of BSF in 26 | multi-level features from bottom to top. 27 | refine_type (str): Type of the refine op, currently support 28 | [None, 'conv', 'non_local']. 29 | """ 30 | 31 | def __init__(self, 32 | in_channels, 33 | num_levels, 34 | refine_level=2, 35 | refine_type=None, 36 | conv_cfg=None, 37 | norm_cfg=None): 38 | super(BFP, self).__init__() 39 | assert refine_type in [None, 'conv', 'non_local'] 40 | 41 | self.in_channels = in_channels 42 | self.num_levels = num_levels 43 | self.conv_cfg = conv_cfg 44 | self.norm_cfg = norm_cfg 45 | 46 | self.refine_level = refine_level 47 | self.refine_type = refine_type 48 | assert 0 <= self.refine_level < self.num_levels 49 | 50 | if self.refine_type == 'conv': 51 | self.refine = ConvModule( 52 | self.in_channels, 53 | self.in_channels, 54 | 3, 55 | padding=1, 56 | conv_cfg=self.conv_cfg, 57 | norm_cfg=self.norm_cfg) 58 | elif self.refine_type == 'non_local': 59 | self.refine = NonLocal2D( 60 | self.in_channels, 61 | reduction=1, 62 | use_scale=False, 63 | conv_cfg=self.conv_cfg, 64 | norm_cfg=self.norm_cfg) 65 | 66 | def init_weights(self): 67 | for m in self.modules(): 68 | if isinstance(m, nn.Conv2d): 69 | xavier_init(m, distribution='uniform') 70 | 71 | def forward(self, inputs): 72 | assert len(inputs) == self.num_levels 73 | 74 | # step 1: gather multi-level features by resize and average 75 | feats = [] 76 | gather_size = inputs[self.refine_level].size()[2:] 77 | for i in range(self.num_levels): 78 | if i < self.refine_level: 79 | gathered = F.adaptive_max_pool2d( 80 | inputs[i], output_size=gather_size) 81 | else: 82 | gathered = F.interpolate( 83 | inputs[i], size=gather_size, mode='nearest') 84 | feats.append(gathered) 85 | 86 | bsf = sum(feats) / len(feats) 87 | 88 | # step 2: refine gathered features 89 | if self.refine_type is not None: 90 | bsf = self.refine(bsf) 91 | 92 | # step 3: scatter refined features to multi-levels by a residual path 93 | outs = [] 94 | for i in range(self.num_levels): 95 | out_size = inputs[i].size()[2:] 96 | if i < self.refine_level: 97 | residual = F.interpolate(bsf, size=out_size, mode='nearest') 98 | else: 99 | residual = F.adaptive_max_pool2d(bsf, output_size=out_size) 100 | outs.append(residual + inputs[i]) 101 | 102 | return tuple(outs) 103 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/necks/hrfpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.checkpoint import checkpoint 5 | from mmcv.cnn.weight_init import caffe2_xavier_init 6 | 7 | from ..utils import ConvModule 8 | from ..registry import NECKS 9 | 10 | 11 | @NECKS.register_module 12 | class HRFPN(nn.Module): 13 | """HRFPN (High Resolution Feature Pyrmamids) 14 | 15 | arXiv: https://arxiv.org/abs/1904.04514 16 | 17 | Args: 18 | in_channels (list): number of channels for each branch. 19 | out_channels (int): output channels of feature pyramids. 20 | num_outs (int): number of output stages. 21 | pooling_type (str): pooling for generating feature pyramids 22 | from {MAX, AVG}. 23 | conv_cfg (dict): dictionary to construct and config conv layer. 24 | norm_cfg (dict): dictionary to construct and config norm layer. 25 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 26 | memory while slowing down the training speed. 27 | """ 28 | 29 | def __init__(self, 30 | in_channels, 31 | out_channels, 32 | num_outs=5, 33 | pooling_type='AVG', 34 | conv_cfg=None, 35 | norm_cfg=None, 36 | with_cp=False): 37 | super(HRFPN, self).__init__() 38 | assert isinstance(in_channels, list) 39 | self.in_channels = in_channels 40 | self.out_channels = out_channels 41 | self.num_ins = len(in_channels) 42 | self.num_outs = num_outs 43 | self.with_cp = with_cp 44 | self.conv_cfg = conv_cfg 45 | self.norm_cfg = norm_cfg 46 | 47 | self.reduction_conv = ConvModule( 48 | sum(in_channels), 49 | out_channels, 50 | kernel_size=1, 51 | conv_cfg=self.conv_cfg, 52 | activation=None) 53 | 54 | self.fpn_convs = nn.ModuleList() 55 | for i in range(self.num_outs): 56 | self.fpn_convs.append( 57 | ConvModule( 58 | out_channels, 59 | out_channels, 60 | kernel_size=3, 61 | padding=1, 62 | conv_cfg=self.conv_cfg, 63 | activation=None)) 64 | 65 | if pooling_type == 'MAX': 66 | self.pooling = F.max_pool2d 67 | else: 68 | self.pooling = F.avg_pool2d 69 | 70 | def init_weights(self): 71 | for m in self.modules(): 72 | if isinstance(m, nn.Conv2d): 73 | caffe2_xavier_init(m) 74 | 75 | def forward(self, inputs): 76 | assert len(inputs) == self.num_ins 77 | outs = [inputs[0]] 78 | for i in range(1, self.num_ins): 79 | outs.append( 80 | F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear')) 81 | out = torch.cat(outs, dim=1) 82 | if out.requires_grad and self.with_cp: 83 | out = checkpoint(self.reduction_conv, out) 84 | else: 85 | out = self.reduction_conv(out) 86 | outs = [out] 87 | for i in range(1, self.num_outs): 88 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i)) 89 | outputs = [] 90 | 91 | for i in range(self.num_outs): 92 | if outs[i].requires_grad and self.with_cp: 93 | tmp_out = checkpoint(self.fpn_convs[i], outs[i]) 94 | else: 95 | tmp_out = self.fpn_convs[i](outs[i]) 96 | outputs.append(tmp_out) 97 | return tuple(outputs) 98 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .non_local import NonLocal2D 2 | from .generalized_attention import GeneralizedAttention 3 | 4 | __all__ = ['NonLocal2D', 'GeneralizedAttention'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | SHARED_HEADS = Registry('shared_head') 43 | HEADS = Registry('head') 44 | LOSSES = Registry('loss') 45 | DETECTORS = Registry('detector') 46 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from mmdet.core import force_fp32 8 | from ..registry import ROI_EXTRACTORS 9 | 10 | 11 | @ROI_EXTRACTORS.register_module 12 | class SingleRoIExtractor(nn.Module): 13 | """Extract RoI features from a single level feature map. 14 | 15 | If there are mulitple input feature levels, each RoI is mapped to a level 16 | according to its scale. 17 | 18 | Args: 19 | roi_layer (dict): Specify RoI layer type and arguments. 20 | out_channels (int): Output channels of RoI layers. 21 | featmap_strides (int): Strides of input feature maps. 22 | finest_scale (int): Scale threshold of mapping to level 0. 23 | """ 24 | 25 | def __init__(self, 26 | roi_layer, 27 | out_channels, 28 | featmap_strides, 29 | finest_scale=56): 30 | super(SingleRoIExtractor, self).__init__() 31 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 32 | self.out_channels = out_channels 33 | self.featmap_strides = featmap_strides 34 | self.finest_scale = finest_scale 35 | self.fp16_enabled = False 36 | 37 | @property 38 | def num_inputs(self): 39 | """int: Input feature map levels.""" 40 | return len(self.featmap_strides) 41 | 42 | def init_weights(self): 43 | pass 44 | 45 | def build_roi_layers(self, layer_cfg, featmap_strides): 46 | cfg = layer_cfg.copy() 47 | layer_type = cfg.pop('type') 48 | assert hasattr(ops, layer_type) 49 | layer_cls = getattr(ops, layer_type) 50 | roi_layers = nn.ModuleList( 51 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 52 | return roi_layers 53 | 54 | def map_roi_levels(self, rois, num_levels): 55 | """Map rois to corresponding feature levels by scales. 56 | 57 | - scale < finest_scale: level 0 58 | - finest_scale <= scale < finest_scale * 2: level 1 59 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 60 | - scale >= finest_scale * 4: level 3 61 | 62 | Args: 63 | rois (Tensor): Input RoIs, shape (k, 5). 64 | num_levels (int): Total level number. 65 | 66 | Returns: 67 | Tensor: Level index (0-based) of each RoI, shape (k, ) 68 | """ 69 | scale = torch.sqrt( 70 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 71 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 72 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 73 | return target_lvls 74 | 75 | @force_fp32(apply_to=('feats',), out_fp16=True) 76 | def forward(self, feats, rois): 77 | if len(feats) == 1: 78 | return self.roi_layers[0](feats[0], rois) 79 | 80 | out_size = self.roi_layers[0].out_size 81 | num_levels = len(feats) 82 | target_lvls = self.map_roi_levels(rois, num_levels) 83 | roi_feats = feats[0].new_zeros(rois.size()[0], self.out_channels, 84 | out_size, out_size) 85 | for i in range(num_levels): 86 | inds = target_lvls == i 87 | if inds.any(): 88 | rois_ = rois[inds, :] 89 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 90 | roi_feats[inds] += roi_feats_t 91 | return roi_feats 92 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from mmdet.core import auto_fp16 8 | from ..backbones import ResNet, make_res_layer 9 | from ..registry import SHARED_HEADS 10 | 11 | 12 | @SHARED_HEADS.register_module 13 | class ResLayer(nn.Module): 14 | 15 | def __init__(self, 16 | depth, 17 | stage=3, 18 | stride=2, 19 | dilation=1, 20 | style='pytorch', 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | with_cp=False, 24 | dcn=None): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2**stage 33 | inplanes = 64 * 2**(stage - 1) * block.expansion 34 | 35 | res_layer = make_res_layer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn) 46 | self.add_module('layer{}'.format(stage + 1), res_layer) 47 | 48 | def init_weights(self, pretrained=None): 49 | if isinstance(pretrained, str): 50 | logger = logging.getLogger() 51 | load_checkpoint(self, pretrained, strict=False, logger=logger) 52 | elif pretrained is None: 53 | for m in self.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | kaiming_init(m) 56 | elif isinstance(m, nn.BatchNorm2d): 57 | constant_init(m, 1) 58 | else: 59 | raise TypeError('pretrained must be a str or None') 60 | 61 | @auto_fp16() 62 | def forward(self, x): 63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 64 | out = res_layer(x) 65 | return out 66 | 67 | def train(self, mode=True): 68 | super(ResLayer, self).train(mode) 69 | if self.norm_eval: 70 | for m in self.modules(): 71 | if isinstance(m, nn.BatchNorm2d): 72 | m.eval() 73 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_ws import conv_ws_2d, ConvWS2d 2 | from .conv_module import build_conv_layer, ConvModule 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 6 | bias_init_with_prob) 7 | 8 | __all__ = [ 9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 11 | 'kaiming_init', 'bias_init_with_prob', 'Scale' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | 7 | def __init__(self, scale=1.0): 8 | super(Scale, self).__init__() 9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 10 | 11 | def forward(self, x): 12 | return x * self.scale 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack, 4 | deform_conv, modulated_deform_conv, deform_roi_pooling) 5 | from .gcb import ContextBlock 6 | from .nms import nms, soft_nms 7 | from .roi_align import RoIAlign, roi_align 8 | from .roi_pool import RoIPool, roi_pool 9 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 10 | from .masked_conv import MaskedConv2d 11 | 12 | __all__ = [ 13 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 14 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 15 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 16 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 17 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 18 | 'MaskedConv2d', 'ContextBlock' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | DeformConvPack, ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/dcn/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/dcn/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension( 12 | 'deform_pool_cuda', 13 | ['src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu']), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/gcb/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | 3 | __all__ = [ 4 | 'ContextBlock', 5 | ] 6 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.masked_conv import masked_conv2d 2 | from .modules.masked_conv import MaskedConv2d 3 | 4 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/masked_conv/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/functions/masked_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.autograd import Function 4 | from torch.nn.modules.utils import _pair 5 | from .. import masked_conv2d_cuda 6 | 7 | 8 | class MaskedConv2dFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, features, mask, weight, bias, padding=0, stride=1): 12 | assert mask.dim() == 3 and mask.size(0) == 1 13 | assert features.dim() == 4 and features.size(0) == 1 14 | assert features.size()[2:] == mask.size()[1:] 15 | pad_h, pad_w = _pair(padding) 16 | stride_h, stride_w = _pair(stride) 17 | if stride_h != 1 or stride_w != 1: 18 | raise ValueError( 19 | 'Stride could not only be 1 in masked_conv2d currently.') 20 | if not features.is_cuda: 21 | raise NotImplementedError 22 | 23 | out_channel, in_channel, kernel_h, kernel_w = weight.size() 24 | 25 | batch_size = features.size(0) 26 | out_h = int( 27 | math.floor((features.size(2) + 2 * pad_h - 28 | (kernel_h - 1) - 1) / stride_h + 1)) 29 | out_w = int( 30 | math.floor((features.size(3) + 2 * pad_w - 31 | (kernel_h - 1) - 1) / stride_w + 1)) 32 | mask_inds = torch.nonzero(mask[0] > 0) 33 | output = features.new_zeros(batch_size, out_channel, out_h, out_w) 34 | if mask_inds.numel() > 0: 35 | mask_h_idx = mask_inds[:, 0].contiguous() 36 | mask_w_idx = mask_inds[:, 1].contiguous() 37 | data_col = features.new_zeros(in_channel * kernel_h * kernel_w, 38 | mask_inds.size(0)) 39 | masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx, 40 | mask_w_idx, kernel_h, 41 | kernel_w, pad_h, pad_w, 42 | data_col) 43 | 44 | masked_output = torch.addmm(1, bias[:, None], 1, 45 | weight.view(out_channel, -1), data_col) 46 | masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx, 47 | mask_w_idx, out_h, out_w, 48 | out_channel, output) 49 | return output 50 | 51 | @staticmethod 52 | def backward(ctx, grad_output): 53 | return (None, ) * 5 54 | 55 | 56 | masked_conv2d = MaskedConv2dFunction.apply 57 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/masked_conv/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/modules/masked_conv.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from ..functions.masked_conv import masked_conv2d 3 | 4 | 5 | class MaskedConv2d(nn.Conv2d): 6 | """A MaskedConv2d which inherits the official Conv2d. 7 | 8 | The masked forward doesn't implement the backward function and only 9 | supports the stride parameter to be 1 currently. 10 | """ 11 | 12 | def __init__(self, 13 | in_channels, 14 | out_channels, 15 | kernel_size, 16 | stride=1, 17 | padding=0, 18 | dilation=1, 19 | groups=1, 20 | bias=True): 21 | super(MaskedConv2d, 22 | self).__init__(in_channels, out_channels, kernel_size, stride, 23 | padding, dilation, groups, bias) 24 | 25 | def forward(self, input, mask=None): 26 | if mask is None: # fallback to the normal Conv2d 27 | return super(MaskedConv2d, self).forward(input) 28 | else: 29 | return masked_conv2d(input, mask, self.weight, self.bias, 30 | self.padding) 31 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='masked_conv2d_cuda', 6 | ext_modules=[ 7 | CUDAExtension('masked_conv2d_cuda', [ 8 | 'src/masked_conv2d_cuda.cpp', 9 | 'src/masked_conv2d_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/sigmoid_focal_loss/functions/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | from torch.autograd.function import once_differentiable 3 | 4 | from .. import sigmoid_focal_loss_cuda 5 | 6 | 7 | class SigmoidFocalLossFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 11 | ctx.save_for_backward(input, target) 12 | num_classes = input.shape[1] 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 18 | gamma, alpha) 19 | return loss 20 | 21 | @staticmethod 22 | @once_differentiable 23 | def backward(ctx, d_loss): 24 | input, target = ctx.saved_tensors 25 | num_classes = ctx.num_classes 26 | gamma = ctx.gamma 27 | alpha = ctx.alpha 28 | d_loss = d_loss.contiguous() 29 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 30 | num_classes, gamma, alpha) 31 | return d_input, None, None, None, None 32 | 33 | 34 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 35 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boliu61/open-images-2019-instance-segmentation/52a7ec2c254deb7b702aa7a085855e31a5254624/mmdetection/mmdet/ops/sigmoid_focal_loss/modules/__init__.py -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss 4 | 5 | 6 | # TODO: remove this module 7 | class SigmoidFocalLoss(nn.Module): 8 | 9 | def __init__(self, gamma, alpha): 10 | super(SigmoidFocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.alpha = alpha 13 | 14 | def forward(self, logits, targets): 15 | assert logits.is_cuda 16 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 17 | return loss.sum() 18 | 19 | def __repr__(self): 20 | tmpstr = self.__class__.__name__ + "(" 21 | tmpstr += "gamma=" + str(self.gamma) 22 | tmpstr += ", alpha=" + str(self.alpha) 23 | tmpstr += ")" 24 | return tmpstr 25 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='SigmoidFocalLoss', 6 | ext_modules=[ 7 | CUDAExtension('sigmoid_focal_loss_cuda', [ 8 | 'src/sigmoid_focal_loss.cpp', 9 | 'src/sigmoid_focal_loss_cuda.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdetection/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | } 26 | 27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 28 | const at::Tensor &targets, 29 | const at::Tensor &d_losses, 30 | const int num_classes, const float gamma, 31 | const float alpha) { 32 | if (logits.type().is_cuda()) { 33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 34 | num_classes, gamma, alpha); 35 | } 36 | } 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 39 | m.def("forward", &SigmoidFocalLoss_forward, 40 | "SigmoidFocalLoss forward (CUDA)"); 41 | m.def("backward", &SigmoidFocalLoss_backward, 42 | "SigmoidFocalLoss backward (CUDA)"); 43 | } 44 | -------------------------------------------------------------------------------- /mmdetection/mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import Registry, build_from_cfg 2 | 3 | __all__ = ['Registry', 'build_from_cfg'] 4 | -------------------------------------------------------------------------------- /mmdetection/mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import mmcv 4 | 5 | 6 | class Registry(object): 7 | 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 14 | self._name, list(self._module_dict.keys())) 15 | return format_str 16 | 17 | @property 18 | def name(self): 19 | return self._name 20 | 21 | @property 22 | def module_dict(self): 23 | return self._module_dict 24 | 25 | def get(self, key): 26 | return self._module_dict.get(key, None) 27 | 28 | def _register_module(self, module_class): 29 | """Register a module. 30 | 31 | Args: 32 | module (:obj:`nn.Module`): Module to be registered. 33 | """ 34 | if not inspect.isclass(module_class): 35 | raise TypeError('module must be a class, but got {}'.format( 36 | type(module_class))) 37 | module_name = module_class.__name__ 38 | if module_name in self._module_dict: 39 | raise KeyError('{} is already registered in {}'.format( 40 | module_name, self.name)) 41 | self._module_dict[module_name] = module_class 42 | 43 | def register_module(self, cls): 44 | self._register_module(cls) 45 | return cls 46 | 47 | 48 | def build_from_cfg(cfg, registry, default_args=None): 49 | """Build a module from config dict. 50 | 51 | Args: 52 | cfg (dict): Config dict. It should at least contain the key "type". 53 | registry (:obj:`Registry`): The registry to search the type from. 54 | default_args (dict, optional): Default initialization arguments. 55 | 56 | Returns: 57 | obj: The constructed object. 58 | """ 59 | assert isinstance(cfg, dict) and 'type' in cfg 60 | assert isinstance(default_args, dict) or default_args is None 61 | args = cfg.copy() 62 | obj_type = args.pop('type') 63 | if mmcv.is_str(obj_type): 64 | obj_type = registry.get(obj_type) 65 | if obj_type is None: 66 | raise KeyError('{} is not in the {} registry'.format( 67 | obj_type, registry.name)) 68 | elif not inspect.isclass(obj_type): 69 | raise TypeError('type must be a str or valid type, but got {}'.format( 70 | type(obj_type))) 71 | if default_args is not None: 72 | for name, value in default_args.items(): 73 | args.setdefault(name, value) 74 | return obj_type(**args) 75 | -------------------------------------------------------------------------------- /mmdetection/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 6 15 | PATCH = 0 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmdet/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmdet.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmdet', 87 | version=get_version(), 88 | description='Open MMLab Detection Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, object detection', 91 | url='https://github.com/open-mmlab/mmdetection', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmdet.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='Apache License 2.0', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables', 110 | 'pycocotools' 111 | ], 112 | zip_safe=False) 113 | -------------------------------------------------------------------------------- /mmdetection/tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /mmdetection/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | CHECKPOINT=$2 7 | GPUS=$3 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /mmdetection/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env bash 3 | 4 | PYTHON=${PYTHON:-"python"} 5 | 6 | CONFIG=$1 7 | GPUS=$2 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /mmdetection/tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser( 8 | description='Process a checkpoint to be published') 9 | parser.add_argument('in_file', help='input checkpoint filename') 10 | parser.add_argument('out_file', help='output checkpoint filename') 11 | args = parser.parse_args() 12 | return args 13 | 14 | 15 | def process_checkpoint(in_file, out_file): 16 | checkpoint = torch.load(in_file, map_location='cpu') 17 | # remove optimizer for smaller file size 18 | if 'optimizer' in checkpoint: 19 | del checkpoint['optimizer'] 20 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 21 | # add the code here. 22 | torch.save(checkpoint, out_file) 23 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 24 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 25 | subprocess.Popen(['mv', out_file, final_file]) 26 | 27 | 28 | def main(): 29 | args = parse_args() 30 | process_checkpoint(args.in_file, args.out_file) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /mmdetection/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /mmdetection/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /mmdetection/tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | import os 5 | from mmcv import Config 6 | 7 | from mmdet import __version__ 8 | from mmdet.datasets import get_dataset 9 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 10 | set_random_seed) 11 | from mmdet.models import build_detector 12 | import torch 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument('--work_dir', help='the dir to save logs and models') 19 | parser.add_argument( 20 | '--resume_from', help='the checkpoint file to resume from') 21 | parser.add_argument( 22 | '--validate', 23 | action='store_true', 24 | help='whether to evaluate the checkpoint during training') 25 | parser.add_argument( 26 | '--gpus', 27 | type=int, 28 | default=1, 29 | help='number of gpus to use ' 30 | '(only applicable to non-distributed training)') 31 | parser.add_argument('--seed', type=int, default=None, help='random seed') 32 | parser.add_argument( 33 | '--launcher', 34 | choices=['none', 'pytorch', 'slurm', 'mpi'], 35 | default='none', 36 | help='job launcher') 37 | parser.add_argument('--local_rank', type=int, default=0) 38 | args = parser.parse_args() 39 | if 'LOCAL_RANK' not in os.environ: 40 | os.environ['LOCAL_RANK'] = str(args.local_rank) 41 | 42 | return args 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | 48 | cfg = Config.fromfile(args.config) 49 | # set cudnn_benchmark 50 | if cfg.get('cudnn_benchmark', False): 51 | torch.backends.cudnn.benchmark = True 52 | # update configs according to CLI args 53 | if args.work_dir is not None: 54 | cfg.work_dir = args.work_dir 55 | if args.resume_from is not None: 56 | cfg.resume_from = args.resume_from 57 | cfg.gpus = args.gpus 58 | 59 | # init distributed env first, since logger depends on the dist info. 60 | if args.launcher == 'none': 61 | distributed = False 62 | else: 63 | distributed = True 64 | init_dist(args.launcher, **cfg.dist_params) 65 | 66 | # init logger before other steps 67 | logger = get_root_logger(cfg.log_level) 68 | logger.info('Distributed training: {}'.format(distributed)) 69 | 70 | # set random seeds 71 | if args.seed is not None: 72 | logger.info('Set random seed to {}'.format(args.seed)) 73 | set_random_seed(args.seed) 74 | 75 | model = build_detector( 76 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 77 | 78 | train_dataset = get_dataset(cfg.data.train) 79 | if cfg.checkpoint_config is not None: 80 | # save mmdet version, config file content and class names in 81 | # checkpoints as meta data 82 | cfg.checkpoint_config.meta = dict( 83 | mmdet_version=__version__, 84 | config=cfg.text, 85 | CLASSES=train_dataset.CLASSES) 86 | # add an attribute for visualization convenience 87 | model.CLASSES = train_dataset.CLASSES 88 | train_detector( 89 | model, 90 | train_dataset, 91 | cfg, 92 | distributed=distributed, 93 | validate=args.validate, 94 | logger=logger) 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /mmdetection/tools/upgrade_model_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert(in_file, out_file): 9 | """Convert keys in checkpoints. 10 | 11 | There can be some breaking changes during the development of mmdetection, 12 | and this tool is used for upgrading checkpoints trained with old versions 13 | to the latest one. 14 | """ 15 | checkpoint = torch.load(in_file) 16 | in_state_dict = checkpoint.pop('state_dict') 17 | out_state_dict = OrderedDict() 18 | for key, val in in_state_dict.items(): 19 | # Use ConvModule instead of nn.Conv2d in RetinaNet 20 | # cls_convs.0.weight -> cls_convs.0.conv.weight 21 | m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key) 22 | if m is not None: 23 | param = m.groups()[1] 24 | new_key = key.replace(param, 'conv.{}'.format(param)) 25 | out_state_dict[new_key] = val 26 | continue 27 | 28 | out_state_dict[key] = val 29 | checkpoint['state_dict'] = out_state_dict 30 | torch.save(checkpoint, out_file) 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description='Upgrade model version') 35 | parser.add_argument('in_file', help='input checkpoint file') 36 | parser.add_argument('out_file', help='output checkpoint file') 37 | args = parser.parse_args() 38 | convert(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /mmdetection/tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /util/combine_leaf_and_parent.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from glob import glob 3 | import os 4 | from tqdm import tqdm 5 | 6 | pd.set_option('display.max_columns', 25) 7 | 8 | if __name__ == '__main__': 9 | 10 | 11 | csv_pattern1 = 'LB_csv/LB_avg3_2scale_flip_NMS_G8k_2scale_flip_thr0_0.5_*of25.csv' 12 | csv_pattern2 = 'LB_csv/LB_avg3_2scale_flip_thr0_120_*of25.csv' 13 | 14 | 15 | csv_lst1 = sorted(glob(csv_pattern1)) 16 | csv_lst2 = sorted(glob(csv_pattern2)) 17 | 18 | assert len(csv_lst1)==25 and len(csv_lst2)==25 19 | 20 | for i in tqdm(range(25)): 21 | sub1 = pd.read_csv(csv_lst1[i]) 22 | sub2 = pd.read_csv(csv_lst2[i]) 23 | assert sub1.ImageID.equals(sub2.ImageID) 24 | assert sub2.PredictionString.count()==sub2.shape[0] 25 | sub2['tmp'] = sub1.PredictionString.fillna('') 26 | sub2.PredictionString = sub2.apply(lambda x: 27 | (x.PredictionString+' '+x.tmp).strip(' '),axis=1 ) 28 | 29 | if i==0: 30 | sub = sub2.copy() 31 | else: 32 | sub = pd.concat([sub,sub2.copy()]) 33 | 34 | print('-----sub.count()-----\n',sub.count()) 35 | 36 | sub_name = os.path.basename(csv_lst1[0].replace('_0of25','')).replace('.csv','').replace('LB_','') +\ 37 | '_AND_' +\ 38 | os.path.basename(csv_lst2[0].replace('_0of25','')).replace('.csv','').replace('LB_','') 39 | 40 | sub.drop(columns='tmp').to_csv(\ 41 | 'subs/' + \ 42 | sub_name + '.csv',chunksize=10000,index=False) 43 | 44 | -------------------------------------------------------------------------------- /util/make_test_ann_pkl.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import pandas as pd 3 | 4 | if __name__ == '__main__': 5 | 6 | data_dir = 'open-images/data/' 7 | 8 | ### test ann files, split into 25 chunks for parallel inference 9 | 10 | test_img_lst = pd.read_csv(data_dir+'sample_empty_submission_seg.csv').ImageID.values 11 | test_ann = [{'filename': img+'.jpg'} for img in test_img_lst] 12 | for i in range(25): 13 | mmcv.dump(test_ann[(4000*i):(4000*i+4000)], data_dir+f'mmdet_anno/test_ann_{i}_of_25.pkl') 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /util/make_train_leaf_ann_pkl.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pandas as pd 4 | from tqdm import tqdm 5 | from multiprocessing import Pool 6 | import funcy 7 | 8 | if __name__ == '__main__': 9 | 10 | data_dir = 'open-images/data/' 11 | 12 | # 275 leaf classes 13 | classes = mmcv.load(data_dir+'seg_anno/list_of_275_leave_labels_seg.pkl') 14 | # official gt 15 | train = pd.read_csv(data_dir+'seg_anno/challenge-2019-train-segmentation-masks.csv').set_index('LabelName') 16 | train = train.loc[train.index.unique().intersection(classes)].reset_index() 17 | train.set_index('ImageID',inplace=True) 18 | train.drop(columns=['PredictedIoU','Clicks'],inplace=True) 19 | 20 | label_ids = {name: i + 1 for i, name in enumerate(classes)} 21 | 22 | # what Dun made for OD (incl all val width, height), to look up for w,d 23 | train_od_ann=mmcv.load(data_dir+'mmdet_anno/train_bbox.pkl') 24 | d_val_wh = {x['filename']: (x['width'],x['height']) for x in train_od_ann} 25 | 26 | # multi process 27 | img_lst = train.index.unique() 28 | i_lst = list(range(len(img_lst))) 29 | def process_img(i_sublst): 30 | d = {} 31 | for i in i_sublst: 32 | image_id = img_lst[i] 33 | ann_df = train.loc[[image_id]] 34 | w,h = d_val_wh[image_id+'.jpg'] 35 | bboxes = ann_df[['BoxXMin', 'BoxYMin', 'BoxXMax', 'BoxYMax']].values * [w,h,w,h] 36 | labels = ann_df.LabelName.map(label_ids).values 37 | annotation = { 38 | 'filename': image_id+'.jpg', 39 | 'width': w, 40 | 'height': h, 41 | 'ann': { 42 | 'bboxes': bboxes.astype(np.float32), 43 | 'labels': labels.astype(np.int64), 44 | 'MaskPath': ann_df.MaskPath.values 45 | } 46 | } 47 | d[i] = annotation 48 | return d 49 | 50 | chunks = funcy.lchunks(int(len(i_lst)/600), i_lst) 51 | num_processes = 12 52 | p = Pool(processes=num_processes) 53 | d_list = list(tqdm(p.imap(process_img, chunks, chunksize=1), total=len(chunks))) 54 | p.close() 55 | p.join() 56 | 57 | d_total = {} 58 | for d in d_list: 59 | d_total.update(d) 60 | 61 | train_annotations = [d_total[i] for i in range(649312)] 62 | 63 | mmcv.dump(train_annotations, data_dir+'mmdet_anno/seg_train_275_leave_cls_ann.pkl') 64 | 65 | -------------------------------------------------------------------------------- /util/seg_275_leave_classes.py: -------------------------------------------------------------------------------- 1 | # 275 leave classes 2 | CLASSES = ( 3 | '/m/0242l', '/m/03120', '/m/01j51', '/m/029b3', '/m/02zt3', '/m/0kmg4', '/m/0174k2', '/m/01k6s3', '/m/029bxz', 4 | '/m/03s_tn', '/m/0fx9l', '/m/02f9f_', '/m/02jz0l', '/m/09g1w', '/m/01lsmm', '/m/025dyy', '/m/02d9qx', 5 | '/m/03m3vtv', 6 | '/m/05gqfk', '/m/09gtd', '/m/0frqm', '/m/0k1tl', '/m/02w3r3', '/m/034c16', '/m/01_5g', '/m/02d1br', '/m/03v5tg', 7 | '/m/04ctx', '/m/0cmx8', '/m/01fh4r', '/m/02jvh9', '/m/02p5f1q', '/m/03q5c7', '/m/04dr76w', '/m/04kkgm', 8 | '/m/054fyh', 9 | '/m/058qzx', '/m/08hvt4', '/m/099ssp', '/m/04v6l4', '/m/084rd', '/m/02tsc9', '/m/03y6mg', '/m/0h8ntjv', 10 | '/m/0bt_c3', 11 | '/m/03m3pdh', '/m/0703r8', '/m/026qbn5', '/m/047j0r', '/m/05kyg_', '/m/0h8n6f9', '/m/046dlr', '/m/06_72j', 12 | '/m/025nd', 13 | '/m/02s195', '/m/04yqq2', '/m/01yx86', '/m/06z37_', '/m/0c06p', '/m/0fm3zh', '/m/0162_1', '/m/015qff', 14 | '/m/02pv19', 15 | '/m/01pns0', '/m/04h7h', '/m/079cl', '/m/04yx4', '/m/03bt1vf', '/m/01bl7v', '/m/05r655', '/m/01b9xk', 16 | '/m/01dwsz', 17 | '/m/01dwwc', '/m/01j3zr', '/m/01f91_', '/m/021mn', '/m/01tcjp', '/m/0fszt', '/m/02g30s', '/m/014j1m', 18 | '/m/0388q', 19 | '/m/043nyj', '/m/061_f', '/m/07fbm7', '/m/07j87', '/m/09k_b', '/m/09qck', '/m/0cyhj_', '/m/0dj6p', '/m/0fldg', 20 | '/m/0hqkz', '/m/0jwn_', '/m/0kpqd', '/m/01fb_0', '/m/09728', '/m/0jy4k', '/m/015wgc', '/m/02zvsm', '/m/052sf', 21 | '/m/0663v', '/m/0_cp5', '/m/015x4r', '/m/015x5n', '/m/05vtc', '/m/0cjs7', '/m/05zsy', '/m/027pcv', '/m/0fbw6', 22 | '/m/0fj52s', '/m/0hkxq', '/m/0jg57', '/m/02cvgx', '/m/0cdn1', '/m/06pcq', '/m/06m11', '/m/0ftb8', '/m/012n7d', 23 | '/m/01bjv', '/m/01x3jk', '/m/04_sv', '/m/076bq', '/m/07cmd', '/m/07jdr', '/m/07r04', '/m/01lcw4', '/m/0h2r6', 24 | '/m/0pg52', '/m/01btn', '/m/0ph39', '/m/01xs3r', '/m/0cmf2', '/m/09rvcxw', '/m/01bfm9', '/m/01d40f', 25 | '/m/01gkx_', 26 | '/m/01n4qj', '/m/01xyhv', '/m/025rp__', '/m/02fq_6', '/m/02jfl0', '/m/02wbtzl', '/m/02h19r', '/m/01cmb2', 27 | '/m/03grzl', 28 | '/m/0176mf', '/m/01nq26', '/m/01rkbr', '/m/0gjkl', '/m/04tn4x', '/m/0fly7', '/m/02p3w7d', '/m/01b638', 29 | '/m/06k2mb', 30 | '/m/01940j', '/m/01s55n', '/m/0584n8', '/m/080hkjn', '/m/01dy8n', '/m/01f8m5', '/m/05n4y', '/m/05z6w', 31 | '/m/06j2d', 32 | '/m/09b5t', '/m/09csl', '/m/09d5_', '/m/09ddx', '/m/0ccs93', '/m/0dbvp', '/m/0dftk', '/m/0f6wt', '/m/0gv1x', 33 | '/m/0h23m', '/m/0jly1', '/m/01h8tj', '/m/01h44', '/m/01dxs', '/m/0633h', '/m/01yrx', '/m/0306r', '/m/0449p', 34 | '/m/04g2r', '/m/07dm6', '/m/096mb', '/m/0bt9lr', '/m/0c29q', '/m/0cd4d', '/m/0cn6p', '/m/0dq75', '/m/01x_v', 35 | '/m/01xq0k1', '/m/03bk1', '/m/03d443', '/m/03fwl', '/m/03k3r', '/m/03qrc', '/m/04c0y', '/m/04rmv', '/m/068zj', 36 | '/m/06mf6', '/m/071qp', '/m/07bgp', '/m/0898b', '/m/08pbxl', '/m/0bwd_0j', '/m/0cnyhnx', '/m/0dbzx', '/m/02hj4', 37 | '/m/084zz', '/m/0gd36', '/m/02l8p9', '/m/0pcr', '/m/04m9y', '/m/078jl', '/m/011k07', '/m/0120dh', '/m/09f_2', 38 | '/m/09ld4', '/m/03fj2', '/m/0by6g', '/m/01xqw', '/m/0342h', '/m/03q5t', '/m/05r5c', '/m/06ncr', '/m/0l14j_', 39 | '/m/01226z', '/m/02rgn06', '/m/05ctyq', '/m/0wdt60w', '/m/019w40', '/m/03g8mr', '/m/0420v5', '/m/06_fw', 40 | '/m/04h8sr', 41 | '/m/0h8my_4', '/m/05_5p_0', '/m/04p0qw', '/m/02zn6n', '/m/0bjyj5', '/m/0d20w4', '/m/01bms0', '/m/01j5ks', 42 | '/m/01kb5b', 43 | '/m/05bm6', '/m/07dd4', '/m/0dv5r', '/m/0hdln', '/m/0lt4_', '/m/02gzp', '/m/0gxl3', '/m/06y5r', '/m/04ylt', 44 | '/m/01c648', '/m/01m2v', '/m/01m4t', '/m/020lf', '/m/03bbps', '/m/03jbxj', '/m/050k8', '/m/0h8lkj8', 45 | '/m/0bh9flk', 46 | '/m/01599', '/m/024g6', '/m/02vqfm', '/m/01z1kdw', '/m/07clx', '/m/081qc', '/m/01bqk0', '/m/03c7gz', 47 | '/m/016m2d', 48 | '/m/0283dt1', '/m/039xj_', '/m/01jfm_', '/m/083wq', '/m/0dkzw' 49 | ) -------------------------------------------------------------------------------- /util/seg_expand_and_adjust_thres_25.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from tqdm import tqdm 7 | import pickle 8 | import mmcv 9 | import argparse 10 | import platform 11 | from glob import glob 12 | 13 | if __name__ == '__main__': 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--sub_csv_pattern') 17 | parser.add_argument('--parents_only', type=int,default=0) 18 | parser.add_argument('--no_expand',type=int,default=0) 19 | parser.add_argument('--thres', type=float) 20 | args = parser.parse_args() 21 | 22 | 23 | data_dir = '/Users/bo_liu/Documents/open-images/data/' 24 | repo_dir = '/Users/bo_liu/Documents/open-images/open-images/' 25 | sub_dir = '/Users/bo_liu/Documents/open-images/subs/' 26 | 27 | all_keyed_child = mmcv.load(data_dir+'seg_all_keyed_child.pkl') 28 | 29 | sub_csvs = sorted(glob(args.sub_csv_pattern)) 30 | 31 | for sub_csv in sub_csvs: 32 | assert 'of25.csv' in sub_csv or 'of25_msk_vote' in sub_csv 33 | if 'of25_msk_vote' in sub_csv: 34 | k = int(sub_csv.split('of25_msk_vote')[-2].split('_')[-1]) 35 | else: 36 | k = int(sub_csv.replace('of25.csv','').split('_')[-1]) 37 | assert k>=0 and k<=24 38 | 39 | import gc;gc.collect() 40 | sub=pd.read_csv(sub_csv) 41 | 42 | thres = args.thres #0.001659 43 | 44 | prob_lst = [] 45 | 46 | for i in tqdm(range(len(sub))): 47 | string = sub.loc[i,'PredictionString'] 48 | if type(string) is float and np.isnan(string): continue 49 | new_string = '' 50 | lst = string.split(' ') 51 | assert len(lst)%3==0 52 | for j in range(len(lst)//3): 53 | # prob_lst.append(float(lst[3*j+1])) 54 | if float(lst[3*j+1]) < thres: 55 | continue 56 | if not args.parents_only: new_string += (' ' + lst[3*j] + ' ' + lst[3*j+1] + ' ' + lst[3*j+2]) 57 | if not args.no_expand: 58 | for parent in all_keyed_child[lst[3*j]]: 59 | new_string += (' ' + parent + ' ' + lst[3*j+1] + ' ' + lst[3*j+2]) 60 | sub.loc[i,'PredictionString'] = new_string.strip(' ') 61 | 62 | sub_filename = sub_csv[:-4] + ('' if args.no_expand else '_expand') +f'_thr{args.thres}.csv' 63 | if args.parents_only: 64 | sub_filename = sub_filename.replace('.csv','_25cls.csv') 65 | sub.to_csv( sub_filename,index=False) 66 | 67 | 68 | ## combining 25 csv 69 | assert len(sub_csvs)==25 70 | if k==9: 71 | gc.collect() 72 | gc.collect() 73 | 74 | sub = None 75 | for sub_csv in sub_csvs: 76 | df = pd.read_csv(sub_csv[:-4] + ('' if args.no_expand else '_expand') + f'_thr{args.thres}' + ('_25cls.csv' if args.parents_only else '.csv' )) 77 | if sub is None: 78 | sub = df.copy() 79 | else: 80 | sub = pd.concat([sub,df]) 81 | 82 | sub_filename = os.path.basename(sub_csv).replace('_9of25','')[:-4] + ('' if args.no_expand else '_expand') + f'_thr{args.thres}.csv' 83 | if args.parents_only: 84 | sub_filename = sub_filename.replace('.csv','_25cls.csv') 85 | sub.to_csv(sub_dir + sub_filename,index=False) 86 | 87 | 88 | --------------------------------------------------------------------------------