├── .gitignore ├── .travis.yml ├── INSTALL.md ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── TECHNICAL_DETAILS.md ├── compile.sh ├── configs ├── cascade_mask_rcnn_r101_fpn_1x.py ├── cascade_mask_rcnn_r50_c4_1x.py ├── cascade_mask_rcnn_r50_fpn_1x.py ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py ├── cascade_rcnn_r101_fpn_1x.py ├── cascade_rcnn_r50_c4_1x.py ├── cascade_rcnn_r50_fpn_1x.py ├── cascade_rcnn_x101_32x4d_fpn_1x.py ├── cascade_rcnn_x101_64x4d_fpn_1x.py ├── dcn │ ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py │ ├── faster_rcnn_dpool_r50_fpn_1x.py │ ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py │ ├── faster_rcnn_mdpool_r50_fpn_1x.py │ └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py ├── fast_mask_rcnn_r101_fpn_1x.py ├── fast_mask_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py ├── fast_rcnn_r50_c4_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── faster_rcnn_ohem_r50_fpn_1x.py ├── faster_rcnn_r101_fpn_1x.py ├── faster_rcnn_r50_c4_1x.py ├── faster_rcnn_r50_fpn_1x.py ├── faster_rcnn_x101_32x4d_fpn_1x.py ├── faster_rcnn_x101_64x4d_fpn_1x.py ├── gn+ws │ ├── README.md │ ├── faster_rcnn_r50_fpn_gn_ws_1x.py │ ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py │ ├── mask_rcnn_r50_fpn_gn_ws_2x.py │ └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py ├── htc │ ├── README.md │ ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py │ ├── htc_r101_fpn_20e.py │ ├── htc_r50_fpn_1x.py │ ├── htc_r50_fpn_20e.py │ ├── htc_without_semantic_r50_fpn_1x.py │ ├── htc_x101_32x4d_fpn_20e_16gpu.py │ └── htc_x101_64x4d_fpn_20e_16gpu.py ├── mask_rcnn_r101_fpn_1x.py ├── mask_rcnn_r101_fpn_gn_2x.py ├── mask_rcnn_r50_c4_1x.py ├── mask_rcnn_r50_fpn_1x.py ├── mask_rcnn_r50_fpn_gn_2x.py ├── mask_rcnn_r50_fpn_gn_contrib_2x.py ├── mask_rcnn_x101_32x4d_fpn_1x.py ├── mask_rcnn_x101_64x4d_fpn_1x.py ├── panoptic_net.py ├── pascal_voc │ ├── faster_rcnn_r50_fpn_1x_voc0712.py │ ├── ssd300_voc.py │ └── ssd512_voc.py ├── retinanet_r101_fpn_1x.py ├── retinanet_r50_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_r50_c4_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── ssd300_coco.py └── ssd512_coco.py ├── demo ├── coco_test_12510.jpg └── coco_val_32901.png ├── init_coco.py ├── init_coco.sh ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ ├── inference.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── anchor_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── assign_result.py │ │ │ ├── base_assigner.py │ │ │ └── max_iou_assigner.py │ │ ├── bbox_target.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── loss │ │ ├── __init__.py │ │ └── losses.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── coco_panoptic.py │ ├── concat_dataset.py │ ├── custom.py │ ├── custom_panoptic.py │ ├── extra_aug.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── repeat_dataset.py │ ├── transforms.py │ ├── utils.py │ ├── voc.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── retina_head.py │ │ ├── rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ └── convfc_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── panotic_rcnn.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── test_mixins.py │ │ ├── two_stage.py │ │ └── two_stage_panotic.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ └── htc_mask_head.py │ ├── necks │ │ ├── __init__.py │ │ └── fpn.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── seg_heads │ │ ├── __init__.py │ │ └── fcn_seg_head.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── conv_ws.py │ │ ├── norm.py │ │ └── weight_init.py └── ops │ ├── __init__.py │ ├── dcn │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ └── deform_pool.py │ ├── modules │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ └── deform_pool.py │ ├── setup.py │ └── src │ │ ├── deform_conv_cuda.cpp │ │ ├── deform_conv_cuda_kernel.cu │ │ ├── deform_pool_cuda.cpp │ │ └── deform_pool_cuda_kernel.cu │ ├── nms │ ├── __init__.py │ ├── nms_wrapper.py │ ├── setup.py │ └── src │ │ ├── nms_cpu.cpp │ │ ├── nms_cuda.cpp │ │ ├── nms_kernel.cu │ │ └── soft_nms_cpu.pyx │ ├── roi_align │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_align.py │ ├── gradcheck.py │ ├── modules │ │ ├── __init__.py │ │ └── roi_align.py │ ├── setup.py │ └── src │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_kernel.cu │ ├── roi_pool │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_pool.py │ ├── gradcheck.py │ ├── modules │ │ ├── __init__.py │ │ └── roi_pool.py │ ├── setup.py │ └── src │ │ ├── roi_pool_cuda.cpp │ │ └── roi_pool_kernel.cu │ └── sigmoid_focal_loss │ ├── __init__.py │ ├── functions │ ├── __init__.py │ └── sigmoid_focal_loss.py │ ├── modules │ ├── __init__.py │ └── sigmoid_focal_loss.py │ ├── setup.py │ └── src │ ├── sigmoid_focal_loss.cpp │ └── sigmoid_focal_loss_cuda.cu ├── panopticapi ├── CONVERTERS.md ├── README.md ├── __init__.py ├── cityscapes_gt_converter │ ├── __init__.py │ └── cityscapes_panoptic_converter.py ├── combine_semantic_and_instance_predictions.py ├── converted_data │ └── .gitignore ├── converters │ ├── 2channels2panoptic_coco_format.py │ ├── __init__.py │ ├── detection2panoptic_coco_format.py │ ├── panoptic2detection_coco_format.py │ └── panoptic2semantic_segmentation.py ├── evaluation.py ├── license.txt ├── panoptic_coco_categories.json ├── sample_data │ ├── images_info_examples.json │ ├── input_images │ │ ├── 000000142238.jpg │ │ └── 000000439180.jpg │ ├── panoptic_coco_detection_format.json │ ├── panoptic_examples.json │ ├── panoptic_examples │ │ ├── 000000142238.png │ │ └── 000000439180.png │ └── panoptic_examples_2ch_format │ │ ├── 000000142238.png │ │ └── 000000439180.png ├── utils.py └── visualization.py ├── setup.py └── tools ├── coco_eval.py ├── convert_datasets └── pascal_voc.py ├── dist_train.sh ├── panoptic_evaluate.py ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── train.py └── voc_eval.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/src/soft_nms_cpu.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: python 3 | 4 | install: 5 | - pip install flake8 6 | 7 | python: 8 | - "3.5" 9 | - "3.6" 10 | 11 | script: 12 | - flake8 -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux (tested on Ubuntu 16.04 and CentOS 7.2) 6 | - Python 3.4+ 7 | - PyTorch 1.0 8 | - Cython 9 | - [mmcv](https://github.com/open-mmlab/mmcv) 10 | 11 | ### Install mmdetection 12 | 13 | a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/). 14 | 15 | b. Clone the mmdetection repository. 16 | 17 | ```shell 18 | git clone https://github.com/open-mmlab/mmdetection.git 19 | ``` 20 | 21 | c. Compile cuda extensions. 22 | 23 | ```shell 24 | cd mmdetection 25 | pip install cython # or "conda install cython" if you prefer conda 26 | ./compile.sh # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments 27 | ``` 28 | 29 | d. Install mmdetection (other dependencies will be installed automatically). 30 | 31 | ```shell 32 | python(3) setup.py install # add --user if you want to install it locally 33 | # or "pip install ." 34 | ``` 35 | 36 | Note: You need to run the last step each time you pull updates from github. 37 | The git commit id will be written to the version number and also saved in trained models. 38 | 39 | ### Prepare COCO dataset. 40 | 41 | It is recommended to symlink the dataset root to `$MMDETECTION/data`. 42 | 43 | ``` 44 | mmdetection 45 | ├── mmdet 46 | ├── tools 47 | ├── configs 48 | ├── data 49 | │ ├── coco 50 | │ │ ├── annotations 51 | │ │ ├── train2017 52 | │ │ ├── val2017 53 | │ │ ├── test2017 54 | │ ├── VOCdevkit 55 | │ │ ├── VOC2007 56 | │ │ ├── VOC2012 57 | 58 | ``` 59 | 60 | ### Scripts 61 | Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is 62 | a script for setting up mmdetection with conda. 63 | 64 | ### Notice 65 | You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently. 66 | 67 | If there are more than one mmdetection on your machine, and you want to use them alternatively. 68 | Please insert the following code to the main file 69 | ```python 70 | import os.path as osp 71 | import sys 72 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 73 | ``` 74 | or run the following command in the terminal of corresponding folder. 75 | ```shell 76 | export PYTHONPATH=`pwd`:$PYTHONPATH 77 | ``` 78 | -------------------------------------------------------------------------------- /TECHNICAL_DETAILS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | In this section, we will introduce the main units of training a detector: 4 | data loading, model and iteration pipeline. 5 | 6 | ## Data loading 7 | 8 | Following typical conventions, we use `Dataset` and `DataLoader` for data loading 9 | with multiple workers. `Dataset` returns a dict of data items corresponding 10 | the arguments of models' forward method. 11 | Since the data in object detection may not be the same size (image size, gt bbox size, etc.), 12 | we introduce a new `DataContainer` type in `mmcv` to help collect and distribute 13 | data of different size. 14 | See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. 15 | 16 | ## Model 17 | 18 | In mmdetection, model components are basically categorized as 4 types. 19 | 20 | - backbone: usually a FCN network to extract feature maps, e.g., ResNet. 21 | - neck: the part between backbones and heads, e.g., FPN, ASPP. 22 | - head: the part for specific tasks, e.g., bbox prediction and mask prediction. 23 | - roi extractor: the part for extracting features from feature maps, e.g., RoI Align. 24 | 25 | We also write implement some general detection pipelines with the above components, 26 | such as `SingleStageDetector` and `TwoStageDetector`. 27 | 28 | ### Build a model with basic components 29 | 30 | Following some basic pipelines (e.g., two-stage detectors), the model structure 31 | can be customized through config files with no pains. 32 | 33 | If we want to implement some new components, e.g, the path aggregation 34 | FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do. 35 | 36 | 1. create a new file in `mmdet/models/necks/pafpn.py`. 37 | 38 | ```python 39 | class PAFPN(nn.Module): 40 | 41 | def __init__(self, 42 | in_channels, 43 | out_channels, 44 | num_outs, 45 | start_level=0, 46 | end_level=-1, 47 | add_extra_convs=False): 48 | pass 49 | 50 | def forward(self, inputs): 51 | # implementation is ignored 52 | pass 53 | ``` 54 | 55 | 2. modify the config file from 56 | 57 | ```python 58 | neck=dict( 59 | type='FPN', 60 | in_channels=[256, 512, 1024, 2048], 61 | out_channels=256, 62 | num_outs=5) 63 | ``` 64 | 65 | to 66 | 67 | ```python 68 | neck=dict( 69 | type='PAFPN', 70 | in_channels=[256, 512, 1024, 2048], 71 | out_channels=256, 72 | num_outs=5) 73 | ``` 74 | 75 | We will release more components (backbones, necks, heads) for research purpose. 76 | 77 | ### Write a new model 78 | 79 | To write a new detection pipeline, you need to inherit from `BaseDetector`, 80 | which defines the following abstract methods. 81 | 82 | - `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s). 83 | - `forward_train()`: forward method of the training mode 84 | - `simple_test()`: single scale testing without augmentation 85 | - `aug_test()`: testing with augmentation (multi-scale, flip, etc.) 86 | 87 | [TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py) 88 | is a good example which shows how to do that. 89 | 90 | ## Iteration pipeline 91 | 92 | We adopt distributed training for both single machine and multiple machines. 93 | Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU. 94 | 95 | Each process keeps an isolated model, data loader, and optimizer. 96 | Model parameters are only synchronized once at the begining. 97 | After a forward and backward pass, gradients will be allreduced among all GPUs, 98 | and the optimizer will update model parameters. 99 | Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration. 100 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | if [ -d "build" ]; then 22 | rm -r build 23 | fi 24 | $PYTHON setup.py build_ext --inplace 25 | 26 | echo "Building dcn..." 27 | cd ../dcn 28 | if [ -d "build" ]; then 29 | rm -r build 30 | fi 31 | $PYTHON setup.py build_ext --inplace 32 | 33 | echo "Building sigmoid focal loss op..." 34 | cd ../sigmoid_focal_loss 35 | if [ -d "build" ]; then 36 | rm -r build 37 | fi 38 | $PYTHON setup.py build_ext --inplace 39 | -------------------------------------------------------------------------------- /configs/fast_rcnn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r101_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/fast_rcnn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/htc/README.md: -------------------------------------------------------------------------------- 1 | # Hybrid Task Cascade for Instance Segmentation 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). 6 | 7 | ``` 8 | @inproceedings{chen2019hybrid, 9 | title={Hybrid task cascade for instance segmentation}, 10 | author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Dataset 17 | 18 | HTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path. 19 | The directory should be like this. 20 | 21 | ``` 22 | mmdetection 23 | ├── mmdet 24 | ├── tools 25 | ├── configs 26 | ├── data 27 | │ ├── coco 28 | │ │ ├── annotations 29 | │ │ ├── train2017 30 | │ │ ├── val2017 31 | │ │ ├── test2017 32 | | | ├── stuffthingmaps 33 | ``` 34 | 35 | ## Results and Models 36 | 37 | The results on COCO 2017val is shown in the below table. (results on test-dev are usually slightly higher than val) 38 | 39 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 40 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 41 | | R-50-FPN | pytorch | 1x | | | | 42.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) | 42 | | R-50-FPN | pytorch | 20e | | | | 43.2 | 38.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) | 43 | | R-101-FPN | pytorch | 20e | | | | 44.9 | 39.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) | 44 | | X-101-32x4d-FPN | pytorch |20e| | | | 46.1 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) | 45 | | X-101-64x4d-FPN | pytorch |20e| | | | 47.0 | 40.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) | 46 | 47 | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC. 48 | - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models. 49 | If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01. 50 | 51 | We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used. 52 | 53 | | Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Download | 54 | |:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:| 55 | | X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.7 | 43.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) | -------------------------------------------------------------------------------- /configs/retinanet_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = 'data/coco/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r101_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = 'data/coco/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=81, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/retinanet_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=81, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/rpn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r101_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_r50_c4_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | normalize=dict(type='BN', frozen=True), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_scales=[2, 4, 8, 16, 32], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[16], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=12000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r50_c4_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r50_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /configs/rpn_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /demo/coco_val_32901.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/demo/coco_val_32901.png -------------------------------------------------------------------------------- /init_coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import copy 3 | 4 | if __name__ == "__main__": 5 | 6 | idx_mapping = {idx1: idx2 for idx1, idx2 in zip(range(133), range(133))} 7 | inv_idx_mapping = {idx1: idx2 for idx1, idx2 in zip(range(133), range(133))} 8 | 9 | cat_json = json.load(open('panopticapi/panoptic_coco_categories.json')) 10 | cat_json_stff = copy.deepcopy(cat_json) 11 | cat_idx_mapping = {} 12 | for idx, k in enumerate(cat_json): 13 | cat_idx_mapping[k['id']] = idx 14 | for k, v in idx_mapping.items(): 15 | cat_json_stff[k] = cat_json[v] 16 | cat_json_stff[k]['id'] = k 17 | json.dump(cat_json_stff, open('data/coco/annotations/panoptic_coco_categories_Easystuff.json', 'w')) 18 | 19 | for s in ['train', 'val']: 20 | 21 | pano_json = json.load(open('data/coco/annotations/panoptic_{}2017.json'.format(s))) 22 | 23 | pano_json_stff = copy.deepcopy(pano_json) 24 | 25 | pano_json_stff['categories'] = cat_json_stff 26 | 27 | for anno in pano_json_stff['annotations']: 28 | for segments_info in anno['segments_info']: 29 | segments_info['category_id'] = inv_idx_mapping[cat_idx_mapping[segments_info['category_id']]] 30 | 31 | for img in pano_json_stff['images']: 32 | img['file_name'] = img['file_name'].replace('jpg', 'png') 33 | if s == 'val': 34 | pano_json_stff['images'] = sorted(pano_json_stff['images'], key=lambda x: x['id']) 35 | 36 | json.dump(pano_json_stff, open('data/coco/annotations/panoptic_{}2017_Easystuff.json'.format(s), 'w')) 37 | -------------------------------------------------------------------------------- /init_coco.sh: -------------------------------------------------------------------------------- 1 | python init_coco.py 2 | 3 | PYTHONPATH=$(pwd):$PYTHONPATH python panopticapi/converters/panoptic2semantic_segmentation.py --input_json_file data/coco/annotations/panoptic_train2017_Easystuff.json --segmentations_folder data/coco/annotations/panoptic_train2017 --semantic_seg_folder data/coco/annotations/panoptic_train2017_semantic_Easystuff --categories_json_file data/coco/annotations/panoptic_coco_categories_Easystuff.json 4 | PYTHONPATH=$(pwd):$PYTHONPATH python panopticapi/converters/panoptic2semantic_segmentation.py --input_json_file data/coco/annotations/panoptic_val2017_Easystuff.json --segmentations_folder data/coco/annotations/panoptic_val2017 --semantic_seg_folder data/coco/annotations/panoptic_val2017_semantic_Easystuff --categories_json_file data/coco/annotations/panoptic_coco_categories_Easystuff.json 5 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import inference_detector, show_result 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /mmdet/apis/inference.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as maskUtils 4 | import torch 5 | 6 | from mmdet.core import get_classes 7 | from mmdet.datasets import to_tensor 8 | from mmdet.datasets.transforms import ImageTransform 9 | 10 | 11 | def _prepare_data(img, img_transform, cfg, device): 12 | ori_shape = img.shape 13 | img, img_shape, pad_shape, scale_factor = img_transform( 14 | img, 15 | scale=cfg.data.test.img_scale, 16 | keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) 17 | img = to_tensor(img).to(device).unsqueeze(0) 18 | img_meta = [ 19 | dict( 20 | ori_shape=ori_shape, 21 | img_shape=img_shape, 22 | pad_shape=pad_shape, 23 | scale_factor=scale_factor, 24 | flip=False) 25 | ] 26 | return dict(img=[img], img_meta=[img_meta]) 27 | 28 | 29 | def _inference_single(model, img, img_transform, cfg, device): 30 | img = mmcv.imread(img) 31 | data = _prepare_data(img, img_transform, cfg, device) 32 | with torch.no_grad(): 33 | result = model(return_loss=False, rescale=True, **data) 34 | return result 35 | 36 | 37 | def _inference_generator(model, imgs, img_transform, cfg, device): 38 | for img in imgs: 39 | yield _inference_single(model, img, img_transform, cfg, device) 40 | 41 | 42 | def inference_detector(model, imgs, cfg, device='cuda:0'): 43 | img_transform = ImageTransform( 44 | size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) 45 | model = model.to(device) 46 | model.eval() 47 | 48 | if not isinstance(imgs, list): 49 | return _inference_single(model, imgs, img_transform, cfg, device) 50 | else: 51 | return _inference_generator(model, imgs, img_transform, cfg, device) 52 | 53 | 54 | def show_result(img, result, dataset='coco', score_thr=0.3, out_file=None): 55 | img = mmcv.imread(img) 56 | class_names = get_classes(dataset) 57 | if isinstance(result, tuple): 58 | bbox_result, segm_result = result 59 | else: 60 | bbox_result, segm_result = result, None 61 | bboxes = np.vstack(bbox_result) 62 | # draw segmentation masks 63 | if segm_result is not None: 64 | segms = mmcv.concat_list(segm_result) 65 | inds = np.where(bboxes[:, -1] > score_thr)[0] 66 | for i in inds: 67 | color_mask = np.random.randint( 68 | 0, 256, (1, 3), dtype=np.uint8) 69 | mask = maskUtils.decode(segms[i]).astype(np.bool) 70 | img[mask] = img[mask] * 0.5 + color_mask * 0.5 71 | # draw bounding boxes 72 | labels = [ 73 | np.full(bbox.shape[0], i, dtype=np.int32) 74 | for i, bbox in enumerate(bbox_result) 75 | ] 76 | labels = np.concatenate(labels) 77 | mmcv.imshow_det_bboxes( 78 | img.copy(), 79 | bboxes, 80 | labels, 81 | class_names=class_names, 82 | score_thr=score_thr, 83 | show=out_file is None) 84 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) 9 | from .bbox_target import bbox_target 10 | 11 | __all__ = [ 12 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 13 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 14 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 15 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 16 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 17 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict( 11 | cfg, assigners, default_args=kwargs) 12 | else: 13 | raise TypeError('Invalid type {} for building a sampler'.format( 14 | type(cfg))) 15 | 16 | 17 | def build_sampler(cfg, **kwargs): 18 | if isinstance(cfg, samplers.BaseSampler): 19 | return cfg 20 | elif isinstance(cfg, dict): 21 | return mmcv.runner.obj_from_dict( 22 | cfg, samplers, default_args=kwargs) 23 | else: 24 | raise TypeError('Invalid type {} for building a sampler'.format( 25 | type(cfg))) 26 | 27 | 28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 29 | bbox_assigner = build_assigner(cfg.assigner) 30 | bbox_sampler = build_sampler(cfg.sampler) 31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 32 | gt_labels) 33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 34 | gt_labels) 35 | return assign_result, sampling_result 36 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .assign_result import AssignResult 4 | 5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 6 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 66 | 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 68 | 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class IoUBalancedNegSampler(RandomSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | hard_thr=0.1, 13 | hard_fraction=0.5, 14 | **kwargs): 15 | super(IoUBalancedNegSampler, self).__init__(num, pos_fraction, 16 | **kwargs) 17 | assert hard_thr > 0 18 | assert 0 < hard_fraction < 1 19 | self.hard_thr = hard_thr 20 | self.hard_fraction = hard_fraction 21 | 22 | def _sample_neg(self, assign_result, num_expected, **kwargs): 23 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 24 | if neg_inds.numel() != 0: 25 | neg_inds = neg_inds.squeeze(1) 26 | if len(neg_inds) <= num_expected: 27 | return neg_inds 28 | else: 29 | max_overlaps = assign_result.max_overlaps.cpu().numpy() 30 | # balance sampling for negative samples 31 | neg_set = set(neg_inds.cpu().numpy()) 32 | easy_set = set( 33 | np.where( 34 | np.logical_and(max_overlaps >= 0, 35 | max_overlaps < self.hard_thr))[0]) 36 | hard_set = set(np.where(max_overlaps >= self.hard_thr)[0]) 37 | easy_neg_inds = list(easy_set & neg_set) 38 | hard_neg_inds = list(hard_set & neg_set) 39 | 40 | num_expected_hard = int(num_expected * self.hard_fraction) 41 | if len(hard_neg_inds) > num_expected_hard: 42 | sampled_hard_inds = self.random_choice(hard_neg_inds, 43 | num_expected_hard) 44 | else: 45 | sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) 46 | num_expected_easy = num_expected - len(sampled_hard_inds) 47 | if len(easy_neg_inds) > num_expected_easy: 48 | sampled_easy_inds = self.random_choice(easy_neg_inds, 49 | num_expected_easy) 50 | else: 51 | sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) 52 | sampled_inds = np.concatenate((sampled_easy_inds, 53 | sampled_hard_inds)) 54 | if len(sampled_inds) < num_expected: 55 | num_extra = num_expected - len(sampled_inds) 56 | extra_inds = np.array(list(neg_set - set(sampled_inds))) 57 | if len(extra_inds) > num_extra: 58 | extra_inds = self.random_choice(extra_inds, num_extra) 59 | sampled_inds = np.concatenate((sampled_inds, extra_inds)) 60 | sampled_inds = torch.from_numpy(sampled_inds).long().to( 61 | assign_result.gt_inds.device) 62 | return sampled_inds 63 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduce=False)['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import ( 2 | weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy, 3 | sigmoid_focal_loss, py_sigmoid_focal_loss, weighted_sigmoid_focal_loss, 4 | mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy, seg_cross_entropy) 5 | 6 | __all__ = [ 7 | 'weighted_nll_loss', 'weighted_cross_entropy', 8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 9 | 'py_sigmoid_focal_loss', 'weighted_sigmoid_focal_loss', 10 | 'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 'accuracy', 'seg_cross_entropy' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks, merge_aug_segs) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks', 'merge_aug_segs' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1): 7 | """NMS for multi-class bboxes. 8 | 9 | Args: 10 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 11 | multi_scores (Tensor): shape (n, #class) 12 | score_thr (float): bbox threshold, bboxes with scores lower than it 13 | will not be considered. 14 | nms_thr (float): NMS IoU threshold 15 | max_num (int): if there are more than max_num bboxes after NMS, 16 | only top max_num will be kept. 17 | 18 | Returns: 19 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 20 | are 0-based. 21 | """ 22 | num_classes = multi_scores.shape[1] 23 | bboxes, labels = [], [] 24 | nms_cfg_ = nms_cfg.copy() 25 | nms_type = nms_cfg_.pop('type', 'nms') 26 | nms_op = getattr(nms_wrapper, nms_type) 27 | for i in range(1, num_classes): 28 | cls_inds = multi_scores[:, i] > score_thr 29 | if not cls_inds.any(): 30 | continue 31 | # get bboxes and scores of this class 32 | if multi_bboxes.shape[1] == 4: 33 | _bboxes = multi_bboxes[cls_inds, :] 34 | else: 35 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 36 | _scores = multi_scores[cls_inds, i] 37 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 38 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 39 | cls_labels = multi_bboxes.new_full( 40 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 41 | bboxes.append(cls_dets) 42 | labels.append(cls_labels) 43 | if bboxes: 44 | bboxes = torch.cat(bboxes) 45 | labels = torch.cat(labels) 46 | if bboxes.shape[0] > max_num: 47 | _, inds = bboxes[:, -1].sort(descending=True) 48 | inds = inds[:max_num] 49 | bboxes = bboxes[inds] 50 | labels = labels[inds] 51 | else: 52 | bboxes = multi_bboxes.new_zeros((0, 5)) 53 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 54 | 55 | return bboxes, labels 56 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset 7 | from .concat_dataset import ConcatDataset 8 | from .repeat_dataset import RepeatDataset 9 | from .extra_aug import ExtraAugmentation 10 | from .custom_panoptic import CustomPanopticDataset 11 | from .coco_panoptic import CocoPanopticDataset 12 | 13 | __all__ = [ 14 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 15 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 16 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 17 | 'ExtraAugmentation', 'CustomPanopticDataset', 'CocoPanopticDataset' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | if dist: 22 | rank, world_size = get_dist_info() 23 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, 24 | rank) 25 | batch_size = imgs_per_gpu 26 | num_workers = workers_per_gpu 27 | else: 28 | if not kwargs.get('shuffle', True): 29 | sampler = None 30 | else: 31 | sampler = GroupSampler(dataset, imgs_per_gpu) 32 | batch_size = num_gpus * imgs_per_gpu 33 | num_workers = num_gpus * workers_per_gpu 34 | 35 | data_loader = DataLoader( 36 | dataset, 37 | batch_size=batch_size, 38 | sampler=sampler, 39 | num_workers=num_workers, 40 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 41 | pin_memory=False, 42 | **kwargs) 43 | 44 | return data_loader 45 | -------------------------------------------------------------------------------- /mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann 77 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .shared_heads import * # noqa: F401,F403 6 | from .bbox_heads import * # noqa: F401,F403 7 | from .mask_heads import * # noqa: F401,F403 8 | from .seg_heads import * # noqa: F401,F403 9 | from .detectors import * # noqa: F401,F403 10 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 11 | DETECTORS) 12 | from .builder import (build_backbone, build_neck, build_roi_extractor, 13 | build_shared_head, build_head, build_detector) 14 | 15 | __all__ = [ 16 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 17 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_detector' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | from .retina_head import RetinaHead 4 | from .ssd_head import SSDHead 5 | 6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead'] 7 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | **kwargs): 20 | self.stacked_convs = stacked_convs 21 | self.octave_base_scale = octave_base_scale 22 | self.scales_per_octave = scales_per_octave 23 | octave_scales = np.array( 24 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 25 | anchor_scales = octave_scales * octave_base_scale 26 | super(RetinaHead, self).__init__( 27 | num_classes, 28 | in_channels, 29 | anchor_scales=anchor_scales, 30 | use_sigmoid_cls=True, 31 | use_focal_loss=True, 32 | **kwargs) 33 | 34 | def _init_layers(self): 35 | self.relu = nn.ReLU(inplace=True) 36 | self.cls_convs = nn.ModuleList() 37 | self.reg_convs = nn.ModuleList() 38 | for i in range(self.stacked_convs): 39 | chn = self.in_channels if i == 0 else self.feat_channels 40 | self.cls_convs.append( 41 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 42 | self.reg_convs.append( 43 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 44 | self.retina_cls = nn.Conv2d( 45 | self.feat_channels, 46 | self.num_anchors * self.cls_out_channels, 47 | 3, 48 | padding=1) 49 | self.retina_reg = nn.Conv2d( 50 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 51 | 52 | def init_weights(self): 53 | for m in self.cls_convs: 54 | normal_init(m, std=0.01) 55 | for m in self.reg_convs: 56 | normal_init(m, std=0.01) 57 | bias_cls = bias_init_with_prob(0.01) 58 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 59 | normal_init(self.retina_reg, std=0.01) 60 | 61 | def forward_single(self, x): 62 | cls_feat = x 63 | reg_feat = x 64 | for cls_conv in self.cls_convs: 65 | cls_feat = self.relu(cls_conv(cls_feat)) 66 | for reg_conv in self.reg_convs: 67 | reg_feat = self.relu(reg_conv(reg_feat)) 68 | cls_score = self.retina_cls(cls_feat) 69 | bbox_pred = self.retina_reg(reg_feat) 70 | return cls_score, bbox_pred 71 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, make_res_layer 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | 5 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG'] 6 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS, 5 | DETECTORS) 6 | 7 | 8 | def _build_module(cfg, registry, default_args): 9 | assert isinstance(cfg, dict) and 'type' in cfg 10 | assert isinstance(default_args, dict) or default_args is None 11 | args = cfg.copy() 12 | obj_type = args.pop('type') 13 | if mmcv.is_str(obj_type): 14 | if obj_type not in registry.module_dict: 15 | raise KeyError('{} is not in the {} registry'.format( 16 | obj_type, registry.name)) 17 | obj_type = registry.module_dict[obj_type] 18 | elif not isinstance(obj_type, type): 19 | raise TypeError('type must be a str or valid type, but got {}'.format( 20 | type(obj_type))) 21 | if default_args is not None: 22 | for name, value in default_args.items(): 23 | args.setdefault(name, value) 24 | return obj_type(**args) 25 | 26 | 27 | def build(cfg, registry, default_args=None): 28 | if isinstance(cfg, list): 29 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 30 | return nn.Sequential(*modules) 31 | else: 32 | return _build_module(cfg, registry, default_args) 33 | 34 | 35 | def build_backbone(cfg): 36 | return build(cfg, BACKBONES) 37 | 38 | 39 | def build_neck(cfg): 40 | return build(cfg, NECKS) 41 | 42 | 43 | def build_roi_extractor(cfg): 44 | return build(cfg, ROI_EXTRACTORS) 45 | 46 | 47 | def build_shared_head(cfg): 48 | return build(cfg, SHARED_HEADS) 49 | 50 | 51 | def build_head(cfg): 52 | return build(cfg, HEADS) 53 | 54 | 55 | def build_detector(cfg, train_cfg=None, test_cfg=None): 56 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 57 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .htc import HybridTaskCascade 10 | from .retinanet import RetinaNet 11 | from .panotic_rcnn import PanoticRCNN 12 | from .two_stage_panotic import TwoStagePanoticDetector 13 | 14 | 15 | __all__ = [ 16 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 17 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 18 | 'RetinaNet', 'PanoticRCNN','TwoStagePanoticDetector' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/panotic_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage_panotic import TwoStagePanoticDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class PanoticRCNN(TwoStagePanoticDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | mask_roi_extractor, 15 | mask_head, 16 | seg_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(PanoticRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | rpn_head=rpn_head, 24 | bbox_roi_extractor=bbox_roi_extractor, 25 | bbox_head=bbox_head, 26 | mask_roi_extractor=mask_roi_extractor, 27 | mask_head=mask_head, 28 | seg_head=seg_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .htc_mask_head import HTCMaskHead 3 | from .fused_semantic_head import FusedSemanticHead 4 | 5 | __all__ = ['FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead'] 6 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/fused_semantic_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import kaiming_init 4 | 5 | from ..registry import HEADS 6 | from ..utils import ConvModule 7 | 8 | 9 | @HEADS.register_module 10 | class FusedSemanticHead(nn.Module): 11 | """Multi-level fused semantic segmentation head. 12 | 13 | in_1 -> 1x1 conv --- 14 | | 15 | in_2 -> 1x1 conv -- | 16 | || 17 | in_3 -> 1x1 conv - || 18 | ||| /-> 1x1 conv (mask prediction) 19 | in_4 -> 1x1 conv -----> 3x3 convs (*4) 20 | | \-> 1x1 conv (feature) 21 | in_5 -> 1x1 conv --- 22 | """ # noqa: W605 23 | 24 | def __init__(self, 25 | num_ins, 26 | fusion_level, 27 | num_convs=4, 28 | in_channels=256, 29 | conv_out_channels=256, 30 | num_classes=183, 31 | ignore_label=255, 32 | loss_weight=0.2, 33 | conv_cfg=None, 34 | normalize=None): 35 | super(FusedSemanticHead, self).__init__() 36 | self.num_ins = num_ins 37 | self.fusion_level = fusion_level 38 | self.num_convs = num_convs 39 | self.in_channels = in_channels 40 | self.conv_out_channels = conv_out_channels 41 | self.num_classes = num_classes 42 | self.ignore_label = ignore_label 43 | self.loss_weight = loss_weight 44 | self.conv_cfg = conv_cfg 45 | self.normalize = normalize 46 | self.with_bias = normalize is None 47 | 48 | self.lateral_convs = nn.ModuleList() 49 | for i in range(self.num_ins): 50 | self.lateral_convs.append( 51 | ConvModule( 52 | self.in_channels, 53 | self.in_channels, 54 | 1, 55 | conv_cfg=self.conv_cfg, 56 | normalize=self.normalize, 57 | bias=self.with_bias, 58 | inplace=False)) 59 | 60 | self.convs = nn.ModuleList() 61 | for i in range(self.num_convs): 62 | in_channels = self.in_channels if i == 0 else conv_out_channels 63 | self.convs.append( 64 | ConvModule( 65 | in_channels, 66 | conv_out_channels, 67 | 3, 68 | padding=1, 69 | conv_cfg=self.conv_cfg, 70 | normalize=self.normalize, 71 | bias=self.with_bias)) 72 | self.conv_embedding = ConvModule( 73 | conv_out_channels, 74 | conv_out_channels, 75 | 1, 76 | conv_cfg=self.conv_cfg, 77 | normalize=self.normalize, 78 | bias=self.with_bias) 79 | self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1) 80 | 81 | self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label) 82 | 83 | def init_weights(self): 84 | kaiming_init(self.conv_logits) 85 | 86 | def forward(self, feats): 87 | x = self.lateral_convs[self.fusion_level](feats[self.fusion_level]) 88 | fused_size = tuple(x.shape[-2:]) 89 | for i, feat in enumerate(feats): 90 | if i != self.fusion_level: 91 | feat = F.interpolate( 92 | feat, 93 | size=fused_size, 94 | mode='bilinear', 95 | align_corners=True) 96 | x += self.lateral_convs[i](feat) 97 | 98 | for i in range(self.num_convs): 99 | x = self.convs[i](x) 100 | 101 | mask_pred = self.conv_logits(x) 102 | x = self.conv_embedding(x) 103 | return mask_pred, x 104 | 105 | def loss(self, mask_pred, labels): 106 | labels = labels.squeeze(1).long() 107 | loss_semantic_seg = self.criterion(mask_pred, labels) 108 | loss_semantic_seg *= self.loss_weight 109 | return loss_semantic_seg 110 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from ..registry import HEADS 3 | from ..utils import ConvModule 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | normalize=self.normalize, 17 | bias=self.with_bias) 18 | 19 | def init_weights(self): 20 | super(HTCMaskHead, self).init_weights() 21 | self.conv_res.init_weights() 22 | 23 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 24 | if res_feat is not None: 25 | res_feat = self.conv_res(res_feat) 26 | x = x + res_feat 27 | for conv in self.convs: 28 | x = conv(x) 29 | res_feat = x 30 | outs = [] 31 | if return_logits: 32 | x = self.upsample(x) 33 | if self.upsample_method == 'deconv': 34 | x = self.relu(x) 35 | mask_pred = self.conv_logits(x) 36 | outs.append(mask_pred) 37 | if return_feat: 38 | outs.append(res_feat) 39 | return outs if len(outs) > 1 else outs[0] 40 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | 3 | __all__ = ['FPN'] 4 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError('module must be a child of nn.Module, but got {}'. 26 | format(module_class)) 27 | module_name = module_class.__name__ 28 | if module_name in self._module_dict: 29 | raise KeyError('{} is already registered in {}'.format( 30 | module_name, self.name)) 31 | self._module_dict[module_name] = module_class 32 | 33 | def register_module(self, cls): 34 | self._register_module(cls) 35 | return cls 36 | 37 | 38 | BACKBONES = Registry('backbone') 39 | NECKS = Registry('neck') 40 | ROI_EXTRACTORS = Registry('roi_extractor') 41 | SHARED_HEADS = Registry('shared_head') 42 | HEADS = Registry('head') 43 | DETECTORS = Registry('detector') 44 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from ..registry import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module 11 | class SingleRoIExtractor(nn.Module): 12 | """Extract RoI features from a single level feature map. 13 | 14 | If there are mulitple input feature levels, each RoI is mapped to a level 15 | according to its scale. 16 | 17 | Args: 18 | roi_layer (dict): Specify RoI layer type and arguments. 19 | out_channels (int): Output channels of RoI layers. 20 | featmap_strides (int): Strides of input feature maps. 21 | finest_scale (int): Scale threshold of mapping to level 0. 22 | """ 23 | 24 | def __init__(self, 25 | roi_layer, 26 | out_channels, 27 | featmap_strides, 28 | finest_scale=56): 29 | super(SingleRoIExtractor, self).__init__() 30 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 31 | self.out_channels = out_channels 32 | self.featmap_strides = featmap_strides 33 | self.finest_scale = finest_scale 34 | 35 | @property 36 | def num_inputs(self): 37 | """int: Input feature map levels.""" 38 | return len(self.featmap_strides) 39 | 40 | def init_weights(self): 41 | pass 42 | 43 | def build_roi_layers(self, layer_cfg, featmap_strides): 44 | cfg = layer_cfg.copy() 45 | layer_type = cfg.pop('type') 46 | assert hasattr(ops, layer_type) 47 | layer_cls = getattr(ops, layer_type) 48 | roi_layers = nn.ModuleList( 49 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 50 | return roi_layers 51 | 52 | def map_roi_levels(self, rois, num_levels): 53 | """Map rois to corresponding feature levels by scales. 54 | 55 | - scale < finest_scale: level 0 56 | - finest_scale <= scale < finest_scale * 2: level 1 57 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 58 | - scale >= finest_scale * 4: level 3 59 | 60 | Args: 61 | rois (Tensor): Input RoIs, shape (k, 5). 62 | num_levels (int): Total level number. 63 | 64 | Returns: 65 | Tensor: Level index (0-based) of each RoI, shape (k, ) 66 | """ 67 | scale = torch.sqrt( 68 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 69 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 70 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 71 | return target_lvls 72 | 73 | def forward(self, feats, rois): 74 | if len(feats) == 1: 75 | return self.roi_layers[0](feats[0], rois) 76 | 77 | out_size = self.roi_layers[0].out_size 78 | num_levels = len(feats) 79 | target_lvls = self.map_roi_levels(rois, num_levels) 80 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 81 | out_size, out_size).fill_(0) 82 | for i in range(num_levels): 83 | inds = target_lvls == i 84 | if inds.any(): 85 | rois_ = rois[inds, :] 86 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 87 | roi_feats[inds] += roi_feats_t 88 | return roi_feats 89 | -------------------------------------------------------------------------------- /mmdet/models/seg_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_seg_head import FCNSegHead 2 | 3 | __all__ = ['FCNSegHead'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from ..backbones import ResNet, make_res_layer 8 | from ..registry import SHARED_HEADS 9 | 10 | 11 | @SHARED_HEADS.register_module 12 | class ResLayer(nn.Module): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | normalize=dict(type='BN', frozen=False), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None): 24 | super(ResLayer, self).__init__() 25 | self.norm_eval = norm_eval 26 | self.normalize = normalize 27 | self.stage = stage 28 | block, stage_blocks = ResNet.arch_settings[depth] 29 | stage_block = stage_blocks[stage] 30 | planes = 64 * 2**stage 31 | inplanes = 64 * 2**(stage - 1) * block.expansion 32 | 33 | res_layer = make_res_layer( 34 | block, 35 | inplanes, 36 | planes, 37 | stage_block, 38 | stride=stride, 39 | dilation=dilation, 40 | style=style, 41 | with_cp=with_cp, 42 | normalize=self.normalize, 43 | dcn=dcn) 44 | self.add_module('layer{}'.format(stage + 1), res_layer) 45 | 46 | def init_weights(self, pretrained=None): 47 | if isinstance(pretrained, str): 48 | logger = logging.getLogger() 49 | load_checkpoint(self, pretrained, strict=False, logger=logger) 50 | elif pretrained is None: 51 | for m in self.modules(): 52 | if isinstance(m, nn.Conv2d): 53 | kaiming_init(m) 54 | elif isinstance(m, nn.BatchNorm2d): 55 | constant_init(m, 1) 56 | else: 57 | raise TypeError('pretrained must be a str or None') 58 | 59 | def forward(self, x): 60 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 61 | out = res_layer(x) 62 | return out 63 | 64 | def train(self, mode=True): 65 | super(ResLayer, self).train(mode) 66 | if self.norm_eval: 67 | for m in self.modules(): 68 | if isinstance(m, nn.BatchNorm2d): 69 | m.eval() 70 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_ws import conv_ws_2d, ConvWS2d 2 | from .conv_module import build_conv_layer, ConvModule 3 | from .norm import build_norm_layer 4 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 5 | bias_init_with_prob) 6 | 7 | __all__ = [ 8 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 9 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 10 | 'kaiming_init', 'bias_init_with_prob' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', None), 8 | 'GN': ('gn', nn.GroupNorm), 9 | # and potentially 'SN' 10 | } 11 | 12 | 13 | def build_norm_layer(cfg, num_features, postfix=''): 14 | """ Build normalization layer 15 | 16 | Args: 17 | cfg (dict): cfg should contain: 18 | type (str): identify norm layer type. 19 | layer args: args needed to instantiate a norm layer. 20 | frozen (bool): [optional] whether stop gradient updates 21 | of norm layer, it is helpful to set frozen mode 22 | in backbone's norms. 23 | num_features (int): number of channels from input 24 | postfix (int, str): appended into norm abbreation to 25 | create named layer. 26 | 27 | Returns: 28 | name (str): abbreation + postfix 29 | layer (nn.Module): created norm layer 30 | """ 31 | assert isinstance(cfg, dict) and 'type' in cfg 32 | cfg_ = cfg.copy() 33 | 34 | layer_type = cfg_.pop('type') 35 | if layer_type not in norm_cfg: 36 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 37 | else: 38 | abbr, norm_layer = norm_cfg[layer_type] 39 | if norm_layer is None: 40 | raise NotImplementedError 41 | 42 | assert isinstance(postfix, (int, str)) 43 | name = abbr + str(postfix) 44 | 45 | frozen = cfg_.pop('frozen', False) 46 | cfg_.setdefault('eps', 1e-5) 47 | if layer_type != 'GN': 48 | layer = norm_layer(num_features, **cfg_) 49 | else: 50 | assert 'num_groups' in cfg_ 51 | layer = norm_layer(num_channels=num_features, **cfg_) 52 | 53 | if frozen: 54 | for param in layer.parameters(): 55 | param.requires_grad = False 56 | 57 | return name, layer 58 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack, 4 | deform_conv, modulated_deform_conv, deform_roi_pooling) 5 | from .nms import nms, soft_nms 6 | from .roi_align import RoIAlign, roi_align 7 | from .roi_pool import RoIPool, roi_pool 8 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 9 | 10 | __all__ = [ 11 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 12 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 13 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 14 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 15 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | DeformConvPack, ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 10 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 11 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 12 | 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/dcn/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/dcn/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension('deform_pool_cuda', [ 12 | 'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu' 13 | ]), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | void DeformablePSROIPoolForward( 14 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 15 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 16 | const int height, const int width, const int num_bbox, 17 | const int channels_trans, const int no_trans, const float spatial_scale, 18 | const int output_dim, const int group_size, const int pooled_size, 19 | const int part_size, const int sample_per_part, const float trans_std); 20 | 21 | void DeformablePSROIPoolBackwardAcc( 22 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 23 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 24 | at::Tensor trans_grad, const int batch, const int channels, 25 | const int height, const int width, const int num_bbox, 26 | const int channels_trans, const int no_trans, const float spatial_scale, 27 | const int output_dim, const int group_size, const int pooled_size, 28 | const int part_size, const int sample_per_part, const float trans_std); 29 | 30 | void deform_psroi_pooling_cuda_forward( 31 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 32 | at::Tensor top_count, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std) { 35 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 36 | 37 | const int batch = input.size(0); 38 | const int channels = input.size(1); 39 | const int height = input.size(2); 40 | const int width = input.size(3); 41 | const int channels_trans = no_trans ? 2 : trans.size(1); 42 | 43 | const int num_bbox = bbox.size(0); 44 | if (num_bbox != out.size(0)) 45 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 46 | out.size(0), num_bbox); 47 | 48 | DeformablePSROIPoolForward( 49 | input, bbox, trans, out, top_count, batch, channels, height, width, 50 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 51 | pooled_size, part_size, sample_per_part, trans_std); 52 | } 53 | 54 | void deform_psroi_pooling_cuda_backward( 55 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 56 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 57 | const int no_trans, const float spatial_scale, const int output_dim, 58 | const int group_size, const int pooled_size, const int part_size, 59 | const int sample_per_part, const float trans_std) { 60 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 61 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 62 | 63 | const int batch = input.size(0); 64 | const int channels = input.size(1); 65 | const int height = input.size(2); 66 | const int width = input.size(3); 67 | const int channels_trans = no_trans ? 2 : trans.size(1); 68 | 69 | const int num_bbox = bbox.size(0); 70 | if (num_bbox != out_grad.size(0)) 71 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 72 | out_grad.size(0), num_bbox); 73 | 74 | DeformablePSROIPoolBackwardAcc( 75 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 76 | channels, height, width, num_bbox, channels_trans, no_trans, 77 | spatial_scale, output_dim, group_size, pooled_size, part_size, 78 | sample_per_part, trans_std); 79 | } 80 | 81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 82 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 83 | "deform psroi pooling forward(CUDA)"); 84 | m.def("deform_psroi_pooling_cuda_backward", 85 | &deform_psroi_pooling_cuda_backward, 86 | "deform psroi pooling backward(CUDA)"); 87 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_pool/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_pool/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/sigmoid_focal_loss/functions/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from .. import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25, reduction='mean'): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | reduction_enum = F._Reduction.get_enum(reduction) 21 | # none: 0, mean:1, sum: 2 22 | if reduction_enum == 0: 23 | return loss 24 | elif reduction_enum == 1: 25 | return loss.mean() 26 | elif reduction_enum == 2: 27 | return loss.sum() 28 | 29 | @staticmethod 30 | @once_differentiable 31 | def backward(ctx, d_loss): 32 | input, target = ctx.saved_tensors 33 | num_classes = ctx.num_classes 34 | gamma = ctx.gamma 35 | alpha = ctx.alpha 36 | d_loss = d_loss.contiguous() 37 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 38 | num_classes, gamma, alpha) 39 | return d_input, None, None, None, None 40 | 41 | 42 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 43 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/sigmoid_focal_loss/modules/__init__.py -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss 4 | 5 | 6 | class SigmoidFocalLoss(nn.Module): 7 | 8 | def __init__(self, gamma, alpha): 9 | super(SigmoidFocalLoss, self).__init__() 10 | self.gamma = gamma 11 | self.alpha = alpha 12 | 13 | def forward(self, logits, targets): 14 | assert logits.is_cuda 15 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 16 | return loss.sum() 17 | 18 | def __repr__(self): 19 | tmpstr = self.__class__.__name__ + "(" 20 | tmpstr += "gamma=" + str(self.gamma) 21 | tmpstr += ", alpha=" + str(self.alpha) 22 | tmpstr += ")" 23 | return tmpstr 24 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='SigmoidFocalLoss', 6 | ext_modules=[ 7 | CUDAExtension('sigmoid_focal_loss_cuda', [ 8 | 'src/sigmoid_focal_loss.cpp', 9 | 'src/sigmoid_focal_loss_cuda.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | } 26 | 27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 28 | const at::Tensor &targets, 29 | const at::Tensor &d_losses, 30 | const int num_classes, const float gamma, 31 | const float alpha) { 32 | if (logits.type().is_cuda()) { 33 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 34 | num_classes, gamma, alpha); 35 | } 36 | } 37 | 38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 39 | m.def("forward", &SigmoidFocalLoss_forward, 40 | "SigmoidFocalLoss forward (CUDA)"); 41 | m.def("backward", &SigmoidFocalLoss_backward, 42 | "SigmoidFocalLoss backward (CUDA)"); 43 | } 44 | -------------------------------------------------------------------------------- /panopticapi/README.md: -------------------------------------------------------------------------------- 1 | # COCO 2018 Panoptic Segmentation Task API (Beta version) 2 | This API is an experimental version of [COCO 2018 Panoptic Segmentation Task API](http://cocodataset.org/#panoptic-2018). 3 | 4 | ## Summary 5 | **Evaluation script** 6 | 7 | *evaluation.py* calculates [PQ metrics](http://cocodataset.org/#panoptic-eval). For more information about the script usage: `python evaluation.py --help` 8 | 9 | **Format converters** 10 | 11 | COCO panoptic segmentation is stored in a new [format](http://cocodataset.org/#format-data). Unlike COCO detection format that stores each segment independently, COCO panoptic format stores all segmentations for an image in a single PNG file. This compact representation naturally maintains non-overlapping property of the panoptic segmentation. 12 | 13 | We provide several converters for COCO panoptic format. Full description and usage examples are available [here](https://github.com/cocodataset/panopticapi/blob/master/CONVERTERS.md). 14 | 15 | **Semantic and instance segmentation heuristic combination** 16 | 17 | We provide simple script that heuristically combines semantic and instance segmentation predictions into panoptic segmentation prediction. 18 | 19 | The merging logic of the script is described in the panoptic segmentation [paper](https://arxiv.org/abs/1801.00868). In addition, this script is able to filter out stuff predicted segments that have their area below the threshold defined by `--stuff_area_limit` parameter. For more information about the script logic and usage: `python combine_semantic_and_instance_predictions.py --help` 20 | 21 | **COCO panoptic segmentation challenge categories** 22 | 23 | Json file [panoptic_coco_categories.json](https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json) contains the list of all categories used in COCO panoptic segmentation challenge 2018. 24 | 25 | **Visualization** 26 | 27 | *visualization.py* provides an example of generating visually appealing representation of the panoptic segmentation data. 28 | 29 | ## Contact 30 | If you have any questions regarding this API, please contact us at alexander.n.kirillov-at-gmail.com. 31 | -------------------------------------------------------------------------------- /panopticapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/__init__.py -------------------------------------------------------------------------------- /panopticapi/cityscapes_gt_converter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/cityscapes_gt_converter/__init__.py -------------------------------------------------------------------------------- /panopticapi/converted_data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /panopticapi/converters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/converters/__init__.py -------------------------------------------------------------------------------- /panopticapi/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Alexander Kirillov 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /panopticapi/sample_data/input_images/000000142238.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/input_images/000000142238.jpg -------------------------------------------------------------------------------- /panopticapi/sample_data/input_images/000000439180.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/input_images/000000439180.jpg -------------------------------------------------------------------------------- /panopticapi/sample_data/panoptic_examples/000000142238.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples/000000142238.png -------------------------------------------------------------------------------- /panopticapi/sample_data/panoptic_examples/000000439180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples/000000439180.png -------------------------------------------------------------------------------- /panopticapi/sample_data/panoptic_examples_2ch_format/000000142238.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples_2ch_format/000000142238.png -------------------------------------------------------------------------------- /panopticapi/sample_data/panoptic_examples_2ch_format/000000439180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples_2ch_format/000000439180.png -------------------------------------------------------------------------------- /panopticapi/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | import functools 6 | import traceback 7 | import json 8 | import numpy as np 9 | 10 | 11 | # The decorator is used to prints an error trhown inside process 12 | def get_traceback(f): 13 | @functools.wraps(f) 14 | def wrapper(*args, **kwargs): 15 | try: 16 | return f(*args, **kwargs) 17 | except Exception as e: 18 | print('Caught exception in worker thread:') 19 | traceback.print_exc() 20 | raise e 21 | 22 | return wrapper 23 | 24 | 25 | class IdGenerator(): 26 | ''' 27 | The class is designed to generate unique IDs that have meaningful RGB encoding. 28 | Given semantic category unique ID will be generated and its RGB encoding will 29 | have color close to the predefined semantic category color. 30 | The RGB encoding used is ID = R * 256 * G + 256 * 256 + B. 31 | Class constructor takes dictionary {id: category_info}, where all semantic 32 | class ids are presented and category_info record is a dict with fields 33 | 'isthing' and 'color' 34 | ''' 35 | def __init__(self, categories): 36 | self.taken_colors = set([0, 0, 0]) 37 | self.categories = categories 38 | for category in self.categories.values(): 39 | if category['isthing'] == 0: 40 | self.taken_colors.add(tuple(category['color'])) 41 | 42 | def get_color(self, cat_id): 43 | def random_color(base, max_dist=30): 44 | new_color = base + np.random.randint(low=-max_dist, 45 | high=max_dist+1, 46 | size=3) 47 | return tuple(np.maximum(0, np.minimum(255, new_color))) 48 | 49 | category = self.categories[cat_id] 50 | if category['isthing'] == 0: 51 | return category['color'] 52 | base_color_array = category['color'] 53 | base_color = tuple(base_color_array) 54 | if base_color not in self.taken_colors: 55 | self.taken_colors.add(base_color) 56 | return base_color 57 | else: 58 | while True: 59 | color = random_color(base_color_array) 60 | if color not in self.taken_colors: 61 | self.taken_colors.add(color) 62 | return color 63 | 64 | def get_id(self, cat_id): 65 | color = self.get_color(cat_id) 66 | return rgb2id(color) 67 | 68 | def get_id_and_color(self, cat_id): 69 | color = self.get_color(cat_id) 70 | return rgb2id(color), color 71 | 72 | 73 | def rgb2id(color): 74 | if isinstance(color, np.ndarray) and len(color.shape) == 3: 75 | if color.dtype == np.uint8: 76 | color = color.astype(np.int32) 77 | return color[:, :, 0] + 256 * color[:, :, 1] + 256 * 256 * color[:, :, 2] 78 | return int(color[0] + 256 * color[1] + 256 * 256 * color[2]) 79 | 80 | 81 | def id2rgb(id_map): 82 | if isinstance(id_map, np.ndarray): 83 | id_map_copy = id_map.copy() 84 | rgb_shape = tuple(list(id_map.shape) + [3]) 85 | rgb_map = np.zeros(rgb_shape, dtype=np.uint8) 86 | for i in range(3): 87 | rgb_map[..., i] = id_map_copy % 256 88 | id_map_copy //= 256 89 | return rgb_map 90 | color = [] 91 | for _ in range(3): 92 | color.append(id_map % 256) 93 | id_map //= 256 94 | return color 95 | 96 | 97 | def save_json(d, file): 98 | with open(file, 'w') as f: 99 | json.dump(d, f) 100 | -------------------------------------------------------------------------------- /panopticapi/visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | ''' 3 | Visualization demo for panoptic COCO sample_data 4 | 5 | The code shows an example of color generation for panoptic data (with 6 | "generate_new_colors" set to True). For each segment distinct color is used in 7 | a way that it close to the color of corresponding semantic class. 8 | ''' 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | from __future__ import unicode_literals 13 | import os, sys 14 | import numpy as np 15 | import json 16 | 17 | import PIL.Image as Image 18 | import matplotlib.pyplot as plt 19 | from skimage.segmentation import find_boundaries 20 | 21 | from utils import IdGenerator, rgb2id 22 | 23 | # whether from the PNG are used or new colors are generated 24 | generate_new_colors = True 25 | 26 | json_file = './sample_data/panoptic_examples.json' 27 | segmentations_folder = './sample_data/panoptic_examples/' 28 | img_folder = './sample_data/input_images/' 29 | panoptic_coco_categories = './panoptic_coco_categories.json' 30 | 31 | with open(json_file, 'r') as f: 32 | coco_d = json.load(f) 33 | 34 | ann = np.random.choice(coco_d['annotations']) 35 | 36 | with open(panoptic_coco_categories, 'r') as f: 37 | categories_list = json.load(f) 38 | categegories = {category['id']: category for category in categories_list} 39 | 40 | # find input img that correspond to the annotation 41 | img = None 42 | for image_info in coco_d['images']: 43 | if image_info['id'] == ann['image_id']: 44 | try: 45 | img = np.array( 46 | Image.open(os.path.join(img_folder, image_info['file_name'])) 47 | ) 48 | except: 49 | print("Undable to find correspoding input image.") 50 | break 51 | 52 | segmentation = np.array( 53 | Image.open(os.path.join(segmentations_folder, ann['file_name'])), 54 | dtype=np.uint8 55 | ) 56 | segmentation_id = rgb2id(segmentation) 57 | # find segments boundaries 58 | boundaries = find_boundaries(segmentation_id, mode='thick') 59 | 60 | if generate_new_colors: 61 | segmentation[:, :, :] = 0 62 | color_generator = IdGenerator(categegories) 63 | for segment_info in ann['segments_info']: 64 | color = color_generator.get_color(segment_info['category_id']) 65 | mask = segmentation_id == segment_info['id'] 66 | segmentation[mask] = color 67 | 68 | # depict boundaries 69 | segmentation[boundaries] = [0, 0, 0] 70 | 71 | if img is None: 72 | plt.figure() 73 | plt.imshow(segmentation) 74 | plt.axis('off') 75 | else: 76 | plt.figure(figsize=(9, 5)) 77 | plt.subplot(121) 78 | plt.imshow(img) 79 | plt.axis('off') 80 | plt.subplot(122) 81 | plt.imshow(segmentation) 82 | plt.axis('off') 83 | plt.tight_layout() 84 | plt.show() 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 6 15 | PATCH = 0 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmdet/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmdet.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmdet', 87 | version=get_version(), 88 | description='Open MMLab Detection Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, object detection', 91 | url='https://github.com/open-mmlab/mmdetection', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmdet.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='GPLv3', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables', 110 | 'pycocotools' 111 | ], 112 | zip_safe=False) 113 | -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-32} 11 | PY_ARGS=${@:5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | 14 | srun -p ${PARTITION} \ 15 | --job-name=${JOB_NAME} \ 16 | --gres=gpu:${GPUS} \ 17 | --ntasks=1 \ 18 | --ntasks-per-node=1 \ 19 | --cpus-per-task=${CPUS_PER_TASK} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python tools/test.py ${CONFIG} ${CHECKPOINT} --gpus ${GPUS} ${PY_ARGS} 23 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmdet import __version__ 7 | from mmdet.datasets import get_dataset 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmdet.models import build_detector 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | if cfg.checkpoint_config is not None: 56 | # save mmdet version in checkpoints as meta data 57 | cfg.checkpoint_config.meta = dict( 58 | mmdet_version=__version__, config=cfg.text) 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | # init logger before other steps 68 | logger = get_root_logger(cfg.log_level) 69 | logger.info('Distributed training: {}'.format(distributed)) 70 | 71 | # set random seeds 72 | if args.seed is not None: 73 | logger.info('Set random seed to {}'.format(args.seed)) 74 | set_random_seed(args.seed) 75 | 76 | model = build_detector( 77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 78 | 79 | train_dataset = get_dataset(cfg.data.train) 80 | train_detector( 81 | model, 82 | train_dataset, 83 | cfg, 84 | distributed=distributed, 85 | validate=args.validate, 86 | logger=logger) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | --------------------------------------------------------------------------------