├── .gitignore
├── .travis.yml
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── TECHNICAL_DETAILS.md
├── compile.sh
├── configs
    ├── cascade_mask_rcnn_r101_fpn_1x.py
    ├── cascade_mask_rcnn_r50_c4_1x.py
    ├── cascade_mask_rcnn_r50_fpn_1x.py
    ├── cascade_mask_rcnn_x101_32x4d_fpn_1x.py
    ├── cascade_mask_rcnn_x101_64x4d_fpn_1x.py
    ├── cascade_rcnn_r101_fpn_1x.py
    ├── cascade_rcnn_r50_c4_1x.py
    ├── cascade_rcnn_r50_fpn_1x.py
    ├── cascade_rcnn_x101_32x4d_fpn_1x.py
    ├── cascade_rcnn_x101_64x4d_fpn_1x.py
    ├── dcn
    │   ├── cascade_mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── cascade_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_dconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_dconv_c3-c5_x101_32x4d_fpn_1x.py
    │   ├── faster_rcnn_dpool_r50_fpn_1x.py
    │   ├── faster_rcnn_mdconv_c3-c5_r50_fpn_1x.py
    │   ├── faster_rcnn_mdpool_r50_fpn_1x.py
    │   └── mask_rcnn_dconv_c3-c5_r50_fpn_1x.py
    ├── fast_mask_rcnn_r101_fpn_1x.py
    ├── fast_mask_rcnn_r50_fpn_1x.py
    ├── fast_rcnn_r101_fpn_1x.py
    ├── fast_rcnn_r50_c4_1x.py
    ├── fast_rcnn_r50_fpn_1x.py
    ├── faster_rcnn_ohem_r50_fpn_1x.py
    ├── faster_rcnn_r101_fpn_1x.py
    ├── faster_rcnn_r50_c4_1x.py
    ├── faster_rcnn_r50_fpn_1x.py
    ├── faster_rcnn_x101_32x4d_fpn_1x.py
    ├── faster_rcnn_x101_64x4d_fpn_1x.py
    ├── gn+ws
    │   ├── README.md
    │   ├── faster_rcnn_r50_fpn_gn_ws_1x.py
    │   ├── mask_rcnn_r50_fpn_gn_ws_20_23_24e.py
    │   ├── mask_rcnn_r50_fpn_gn_ws_2x.py
    │   └── mask_rcnn_x101_32x4d_fpn_gn_ws_2x.py
    ├── htc
    │   ├── README.md
    │   ├── htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py
    │   ├── htc_r101_fpn_20e.py
    │   ├── htc_r50_fpn_1x.py
    │   ├── htc_r50_fpn_20e.py
    │   ├── htc_without_semantic_r50_fpn_1x.py
    │   ├── htc_x101_32x4d_fpn_20e_16gpu.py
    │   └── htc_x101_64x4d_fpn_20e_16gpu.py
    ├── mask_rcnn_r101_fpn_1x.py
    ├── mask_rcnn_r101_fpn_gn_2x.py
    ├── mask_rcnn_r50_c4_1x.py
    ├── mask_rcnn_r50_fpn_1x.py
    ├── mask_rcnn_r50_fpn_gn_2x.py
    ├── mask_rcnn_r50_fpn_gn_contrib_2x.py
    ├── mask_rcnn_x101_32x4d_fpn_1x.py
    ├── mask_rcnn_x101_64x4d_fpn_1x.py
    ├── panoptic_net.py
    ├── pascal_voc
    │   ├── faster_rcnn_r50_fpn_1x_voc0712.py
    │   ├── ssd300_voc.py
    │   └── ssd512_voc.py
    ├── retinanet_r101_fpn_1x.py
    ├── retinanet_r50_fpn_1x.py
    ├── retinanet_x101_32x4d_fpn_1x.py
    ├── retinanet_x101_64x4d_fpn_1x.py
    ├── rpn_r101_fpn_1x.py
    ├── rpn_r50_c4_1x.py
    ├── rpn_r50_fpn_1x.py
    ├── rpn_x101_32x4d_fpn_1x.py
    ├── rpn_x101_64x4d_fpn_1x.py
    ├── ssd300_coco.py
    └── ssd512_coco.py
├── demo
    ├── coco_test_12510.jpg
    └── coco_val_32901.png
├── init_coco.py
├── init_coco.sh
├── mmdet
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── env.py
    │   ├── inference.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   └── anchor_target.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── assign_result.py
    │   │   │   ├── base_assigner.py
    │   │   │   └── max_iou_assigner.py
    │   │   ├── bbox_target.py
    │   │   ├── geometry.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_sampler.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   └── sampling_result.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── coco_utils.py
    │   │   ├── eval_hooks.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── loss
    │   │   ├── __init__.py
    │   │   └── losses.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   └── utils.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   └── merge_augs.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── coco_panoptic.py
    │   ├── concat_dataset.py
    │   ├── custom.py
    │   ├── custom_panoptic.py
    │   ├── extra_aug.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── repeat_dataset.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── voc.py
    │   └── xml_style.py
    ├── models
    │   ├── __init__.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   ├── anchor_head.py
    │   │   ├── retina_head.py
    │   │   ├── rpn_head.py
    │   │   └── ssd_head.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── resnet.py
    │   │   ├── resnext.py
    │   │   └── ssd_vgg.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   ├── bbox_head.py
    │   │   └── convfc_bbox_head.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── faster_rcnn.py
    │   │   ├── htc.py
    │   │   ├── mask_rcnn.py
    │   │   ├── panotic_rcnn.py
    │   │   ├── retinanet.py
    │   │   ├── rpn.py
    │   │   ├── single_stage.py
    │   │   ├── test_mixins.py
    │   │   ├── two_stage.py
    │   │   └── two_stage_panotic.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   ├── fcn_mask_head.py
    │   │   ├── fused_semantic_head.py
    │   │   └── htc_mask_head.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   └── fpn.py
    │   ├── registry.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── seg_heads
    │   │   ├── __init__.py
    │   │   └── fcn_seg_head.py
    │   ├── shared_heads
    │   │   ├── __init__.py
    │   │   └── res_layer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── conv_module.py
    │   │   ├── conv_ws.py
    │   │   ├── norm.py
    │   │   └── weight_init.py
    └── ops
    │   ├── __init__.py
    │   ├── dcn
    │       ├── __init__.py
    │       ├── functions
    │       │   ├── __init__.py
    │       │   ├── deform_conv.py
    │       │   └── deform_pool.py
    │       ├── modules
    │       │   ├── __init__.py
    │       │   ├── deform_conv.py
    │       │   └── deform_pool.py
    │       ├── setup.py
    │       └── src
    │       │   ├── deform_conv_cuda.cpp
    │       │   ├── deform_conv_cuda_kernel.cu
    │       │   ├── deform_pool_cuda.cpp
    │       │   └── deform_pool_cuda_kernel.cu
    │   ├── nms
    │       ├── __init__.py
    │       ├── nms_wrapper.py
    │       ├── setup.py
    │       └── src
    │       │   ├── nms_cpu.cpp
    │       │   ├── nms_cuda.cpp
    │       │   ├── nms_kernel.cu
    │       │   └── soft_nms_cpu.pyx
    │   ├── roi_align
    │       ├── __init__.py
    │       ├── functions
    │       │   ├── __init__.py
    │       │   └── roi_align.py
    │       ├── gradcheck.py
    │       ├── modules
    │       │   ├── __init__.py
    │       │   └── roi_align.py
    │       ├── setup.py
    │       └── src
    │       │   ├── roi_align_cuda.cpp
    │       │   └── roi_align_kernel.cu
    │   ├── roi_pool
    │       ├── __init__.py
    │       ├── functions
    │       │   ├── __init__.py
    │       │   └── roi_pool.py
    │       ├── gradcheck.py
    │       ├── modules
    │       │   ├── __init__.py
    │       │   └── roi_pool.py
    │       ├── setup.py
    │       └── src
    │       │   ├── roi_pool_cuda.cpp
    │       │   └── roi_pool_kernel.cu
    │   └── sigmoid_focal_loss
    │       ├── __init__.py
    │       ├── functions
    │           ├── __init__.py
    │           └── sigmoid_focal_loss.py
    │       ├── modules
    │           ├── __init__.py
    │           └── sigmoid_focal_loss.py
    │       ├── setup.py
    │       └── src
    │           ├── sigmoid_focal_loss.cpp
    │           └── sigmoid_focal_loss_cuda.cu
├── panopticapi
    ├── CONVERTERS.md
    ├── README.md
    ├── __init__.py
    ├── cityscapes_gt_converter
    │   ├── __init__.py
    │   └── cityscapes_panoptic_converter.py
    ├── combine_semantic_and_instance_predictions.py
    ├── converted_data
    │   └── .gitignore
    ├── converters
    │   ├── 2channels2panoptic_coco_format.py
    │   ├── __init__.py
    │   ├── detection2panoptic_coco_format.py
    │   ├── panoptic2detection_coco_format.py
    │   └── panoptic2semantic_segmentation.py
    ├── evaluation.py
    ├── license.txt
    ├── panoptic_coco_categories.json
    ├── sample_data
    │   ├── images_info_examples.json
    │   ├── input_images
    │   │   ├── 000000142238.jpg
    │   │   └── 000000439180.jpg
    │   ├── panoptic_coco_detection_format.json
    │   ├── panoptic_examples.json
    │   ├── panoptic_examples
    │   │   ├── 000000142238.png
    │   │   └── 000000439180.png
    │   └── panoptic_examples_2ch_format
    │   │   ├── 000000142238.png
    │   │   └── 000000439180.png
    ├── utils.py
    └── visualization.py
├── setup.py
└── tools
    ├── coco_eval.py
    ├── convert_datasets
        └── pascal_voc.py
    ├── dist_train.sh
    ├── panoptic_evaluate.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── train.py
    └── voc_eval.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # cython generated cpp
107 | mmdet/ops/nms/src/soft_nms_cpu.cpp
108 | mmdet/version.py
109 | data
110 | .vscode
111 | .idea
112 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | language: python
 3 | 
 4 | install:
 5 |   - pip install flake8
 6 | 
 7 | python:
 8 |   - "3.5"
 9 |   - "3.6"
10 | 
11 | script:
12 |   - flake8


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements
 4 | 
 5 | - Linux (tested on Ubuntu 16.04 and CentOS 7.2)
 6 | - Python 3.4+
 7 | - PyTorch 1.0
 8 | - Cython
 9 | - [mmcv](https://github.com/open-mmlab/mmcv)
10 | 
11 | ### Install mmdetection
12 | 
13 | a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/).
14 | 
15 | b. Clone the mmdetection repository.
16 | 
17 | ```shell
18 | git clone https://github.com/open-mmlab/mmdetection.git
19 | ```
20 | 
21 | c. Compile cuda extensions.
22 | 
23 | ```shell
24 | cd mmdetection
25 | pip install cython  # or "conda install cython" if you prefer conda
26 | ./compile.sh  # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments
27 | ```
28 | 
29 | d. Install mmdetection (other dependencies will be installed automatically).
30 | 
31 | ```shell
32 | python(3) setup.py install  # add --user if you want to install it locally
33 | # or "pip install ."
34 | ```
35 | 
36 | Note: You need to run the last step each time you pull updates from github.
37 | The git commit id will be written to the version number and also saved in trained models.
38 | 
39 | ### Prepare COCO dataset.
40 | 
41 | It is recommended to symlink the dataset root to `$MMDETECTION/data`.
42 | 
43 | ```
44 | mmdetection
45 | ├── mmdet
46 | ├── tools
47 | ├── configs
48 | ├── data
49 | │   ├── coco
50 | │   │   ├── annotations
51 | │   │   ├── train2017
52 | │   │   ├── val2017
53 | │   │   ├── test2017
54 | │   ├── VOCdevkit
55 | │   │   ├── VOC2007
56 | │   │   ├── VOC2012
57 | 
58 | ```
59 | 
60 | ### Scripts
61 | Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
62 | a script for setting up mmdetection with conda.
63 | 
64 | ### Notice
65 | You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently.
66 | 
67 | If there are more than one mmdetection on your machine, and you want to use them alternatively.
68 | Please insert the following code to the main file
69 | ```python
70 | import os.path as osp
71 | import sys
72 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))
73 | ```
74 | or run the following command in the terminal of corresponding folder.
75 | ```shell
76 | export PYTHONPATH=`pwd`:$PYTHONPATH
77 | ```
78 | 


--------------------------------------------------------------------------------
/TECHNICAL_DETAILS.md:
--------------------------------------------------------------------------------
  1 | ## Overview
  2 | 
  3 | In this section, we will introduce the main units of training a detector:
  4 | data loading, model and iteration pipeline.
  5 | 
  6 | ## Data loading
  7 | 
  8 | Following typical conventions, we use `Dataset` and `DataLoader` for data loading
  9 | with multiple workers. `Dataset` returns a dict of data items corresponding
 10 | the arguments of models' forward method.
 11 | Since the data in object detection may not be the same size (image size, gt bbox size, etc.),
 12 | we introduce a new `DataContainer` type in `mmcv` to help collect and distribute
 13 | data of different size.
 14 | See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
 15 | 
 16 | ## Model
 17 | 
 18 | In mmdetection, model components are basically categorized as 4 types.
 19 | 
 20 | - backbone: usually a FCN network to extract feature maps, e.g., ResNet.
 21 | - neck: the part between backbones and heads, e.g., FPN, ASPP.
 22 | - head: the part for specific tasks, e.g., bbox prediction and mask prediction.
 23 | - roi extractor: the part for extracting features from feature maps, e.g., RoI Align.
 24 | 
 25 | We also write implement some general detection pipelines with the above components,
 26 | such as `SingleStageDetector` and `TwoStageDetector`.
 27 | 
 28 | ### Build a model with basic components
 29 | 
 30 | Following some basic pipelines (e.g., two-stage detectors), the model structure
 31 | can be customized through config files with no pains.
 32 | 
 33 | If we want to implement some new components, e.g, the path aggregation
 34 | FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.
 35 | 
 36 | 1. create a new file in `mmdet/models/necks/pafpn.py`.
 37 | 
 38 |     ```python
 39 |     class PAFPN(nn.Module):
 40 | 
 41 |         def __init__(self,
 42 |                     in_channels,
 43 |                     out_channels,
 44 |                     num_outs,
 45 |                     start_level=0,
 46 |                     end_level=-1,
 47 |                     add_extra_convs=False):
 48 |             pass
 49 |         
 50 |         def forward(self, inputs):
 51 |             # implementation is ignored
 52 |             pass
 53 |     ```
 54 | 
 55 | 2. modify the config file from
 56 | 
 57 |     ```python
 58 |     neck=dict(
 59 |         type='FPN',
 60 |         in_channels=[256, 512, 1024, 2048],
 61 |         out_channels=256,
 62 |         num_outs=5)
 63 |     ```
 64 | 
 65 |     to
 66 | 
 67 |     ```python
 68 |     neck=dict(
 69 |         type='PAFPN',
 70 |         in_channels=[256, 512, 1024, 2048],
 71 |         out_channels=256,
 72 |         num_outs=5)
 73 |     ```
 74 | 
 75 | We will release more components (backbones, necks, heads) for research purpose.
 76 | 
 77 | ### Write a new model
 78 | 
 79 | To write a new detection pipeline, you need to inherit from `BaseDetector`,
 80 | which defines the following abstract methods.
 81 | 
 82 | - `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).
 83 | - `forward_train()`: forward method of the training mode
 84 | - `simple_test()`: single scale testing without augmentation
 85 | - `aug_test()`: testing with augmentation (multi-scale, flip, etc.)
 86 | 
 87 | [TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)
 88 | is a good example which shows how to do that.
 89 | 
 90 | ## Iteration pipeline
 91 | 
 92 | We adopt distributed training for both single machine and multiple machines.
 93 | Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.
 94 | 
 95 | Each process keeps an isolated model, data loader, and optimizer.
 96 | Model parameters are only synchronized once at the begining.
 97 | After a forward and backward pass, gradients will be allreduced among all GPUs,
 98 | and the optimizer will update model parameters.
 99 | Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.
100 | 


--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | echo "Building roi align op..."
 6 | cd mmdet/ops/roi_align
 7 | if [ -d "build" ]; then
 8 |     rm -r build
 9 | fi
10 | $PYTHON setup.py build_ext --inplace
11 | 
12 | echo "Building roi pool op..."
13 | cd ../roi_pool
14 | if [ -d "build" ]; then
15 |     rm -r build
16 | fi
17 | $PYTHON setup.py build_ext --inplace
18 | 
19 | echo "Building nms op..."
20 | cd ../nms
21 | if [ -d "build" ]; then
22 |     rm -r build
23 | fi
24 | $PYTHON setup.py build_ext --inplace
25 | 
26 | echo "Building dcn..."
27 | cd ../dcn
28 | if [ -d "build" ]; then
29 |     rm -r build
30 | fi
31 | $PYTHON setup.py build_ext --inplace
32 | 
33 | echo "Building sigmoid focal loss op..."
34 | cd ../sigmoid_focal_loss
35 | if [ -d "build" ]; then
36 |     rm -r build
37 | fi
38 | $PYTHON setup.py build_ext --inplace
39 | 


--------------------------------------------------------------------------------
/configs/fast_rcnn_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FastRCNN',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     bbox_roi_extractor=dict(
 18 |         type='SingleRoIExtractor',
 19 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 20 |         out_channels=256,
 21 |         featmap_strides=[4, 8, 16, 32]),
 22 |     bbox_head=dict(
 23 |         type='SharedFCBBoxHead',
 24 |         num_fcs=2,
 25 |         in_channels=256,
 26 |         fc_out_channels=1024,
 27 |         roi_feat_size=7,
 28 |         num_classes=81,
 29 |         target_means=[0., 0., 0., 0.],
 30 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 31 |         reg_class_agnostic=False))
 32 | # model training and testing settings
 33 | train_cfg = dict(
 34 |     rcnn=dict(
 35 |         assigner=dict(
 36 |             type='MaxIoUAssigner',
 37 |             pos_iou_thr=0.5,
 38 |             neg_iou_thr=0.5,
 39 |             min_pos_iou=0.5,
 40 |             ignore_iof_thr=-1),
 41 |         sampler=dict(
 42 |             type='RandomSampler',
 43 |             num=512,
 44 |             pos_fraction=0.25,
 45 |             neg_pos_ub=-1,
 46 |             add_gt_as_proposals=True),
 47 |         pos_weight=-1,
 48 |         debug=False))
 49 | test_cfg = dict(
 50 |     rcnn=dict(
 51 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
 52 | # dataset settings
 53 | dataset_type = 'CocoDataset'
 54 | data_root = 'data/coco/'
 55 | img_norm_cfg = dict(
 56 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 57 | data = dict(
 58 |     imgs_per_gpu=2,
 59 |     workers_per_gpu=2,
 60 |     train=dict(
 61 |         type=dataset_type,
 62 |         ann_file=data_root + 'annotations/instances_train2017.json',
 63 |         img_prefix=data_root + 'train2017/',
 64 |         img_scale=(1333, 800),
 65 |         img_norm_cfg=img_norm_cfg,
 66 |         size_divisor=32,
 67 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=True,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=True,
 83 |         with_label=True),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 91 |         size_divisor=32,
 92 |         flip_ratio=0,
 93 |         with_mask=False,
 94 |         with_label=False,
 95 |         test_mode=True))
 96 | # optimizer
 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | # learning policy
100 | lr_config = dict(
101 |     policy='step',
102 |     warmup='linear',
103 |     warmup_iters=500,
104 |     warmup_ratio=1.0 / 3,
105 |     step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 |     interval=50,
110 |     hooks=[
111 |         dict(type='TextLoggerHook'),
112 |         # dict(type='TensorboardLoggerHook')
113 |     ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/fast_rcnn_r101_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/fast_rcnn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FastRCNN',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     bbox_roi_extractor=dict(
 18 |         type='SingleRoIExtractor',
 19 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 20 |         out_channels=256,
 21 |         featmap_strides=[4, 8, 16, 32]),
 22 |     bbox_head=dict(
 23 |         type='SharedFCBBoxHead',
 24 |         num_fcs=2,
 25 |         in_channels=256,
 26 |         fc_out_channels=1024,
 27 |         roi_feat_size=7,
 28 |         num_classes=81,
 29 |         target_means=[0., 0., 0., 0.],
 30 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 31 |         reg_class_agnostic=False))
 32 | # model training and testing settings
 33 | train_cfg = dict(
 34 |     rcnn=dict(
 35 |         assigner=dict(
 36 |             type='MaxIoUAssigner',
 37 |             pos_iou_thr=0.5,
 38 |             neg_iou_thr=0.5,
 39 |             min_pos_iou=0.5,
 40 |             ignore_iof_thr=-1),
 41 |         sampler=dict(
 42 |             type='RandomSampler',
 43 |             num=512,
 44 |             pos_fraction=0.25,
 45 |             neg_pos_ub=-1,
 46 |             add_gt_as_proposals=True),
 47 |         pos_weight=-1,
 48 |         debug=False))
 49 | test_cfg = dict(
 50 |     rcnn=dict(
 51 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
 52 | # dataset settings
 53 | dataset_type = 'CocoDataset'
 54 | data_root = 'data/coco/'
 55 | img_norm_cfg = dict(
 56 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 57 | data = dict(
 58 |     imgs_per_gpu=2,
 59 |     workers_per_gpu=2,
 60 |     train=dict(
 61 |         type=dataset_type,
 62 |         ann_file=data_root + 'annotations/instances_train2017.json',
 63 |         img_prefix=data_root + 'train2017/',
 64 |         img_scale=(1333, 800),
 65 |         img_norm_cfg=img_norm_cfg,
 66 |         size_divisor=32,
 67 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=True,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=True,
 83 |         with_label=True),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 91 |         size_divisor=32,
 92 |         flip_ratio=0,
 93 |         with_mask=False,
 94 |         with_label=False,
 95 |         test_mode=True))
 96 | # optimizer
 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | # learning policy
100 | lr_config = dict(
101 |     policy='step',
102 |     warmup='linear',
103 |     warmup_iters=500,
104 |     warmup_ratio=1.0 / 3,
105 |     step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 |     interval=50,
110 |     hooks=[
111 |         dict(type='TextLoggerHook'),
112 |         # dict(type='TensorboardLoggerHook')
113 |     ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/fast_rcnn_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/htc/README.md:
--------------------------------------------------------------------------------
 1 | # Hybrid Task Cascade for Instance Segmentation
 2 | 
 3 | ## Introduction
 4 | 
 5 | We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518).
 6 | 
 7 | ```
 8 | @inproceedings{chen2019hybrid,
 9 |   title={Hybrid task cascade for instance segmentation},
10 |   author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin},
11 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## Dataset
17 | 
18 | HTC requires COCO and COCO-stuff dataset for training. You need to download and extract it in the COCO dataset path.
19 | The directory should be like this.
20 | 
21 | ```
22 | mmdetection
23 | ├── mmdet
24 | ├── tools
25 | ├── configs
26 | ├── data
27 | │   ├── coco
28 | │   │   ├── annotations
29 | │   │   ├── train2017
30 | │   │   ├── val2017
31 | │   │   ├── test2017
32 | |   |   ├── stuffthingmaps
33 | ```
34 | 
35 | ## Results and Models
36 | 
37 | The results on COCO 2017val is shown in the below table. (results on test-dev are usually slightly higher than val)
38 | 
39 | | Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
40 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
41 | | R-50-FPN  | pytorch | 1x      |          |                     |                | 42.2   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth) |
42 | | R-50-FPN  | pytorch | 20e     |          |                     |                | 43.2   | 38.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_20e_20190408-c03b7015.pth) |
43 | | R-101-FPN | pytorch | 20e     |          |                     |                | 44.9   | 39.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth) |
44 | | X-101-32x4d-FPN | pytorch |20e|          |                     |                | 46.1   | 40.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth) |
45 | | X-101-64x4d-FPN | pytorch |20e|          |                     |                | 47.0   | 40.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth) |
46 | 
47 | - In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC.
48 | - We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models.
49 | If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01.
50 | 
51 | We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used.
52 | 
53 | | Backbone         | Style   | DCN   | training scales | Lr schd | box AP | mask AP | Download |
54 | |:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:--------:|
55 | | X-101-64x4d-FPN  | pytorch | c3-c5 | 400~1400        | 20e     | 50.7   | 43.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth) |


--------------------------------------------------------------------------------
/configs/retinanet_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         add_extra_convs=True,
 18 |         num_outs=5),
 19 |     bbox_head=dict(
 20 |         type='RetinaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         octave_base_scale=4,
 26 |         scales_per_octave=3,
 27 |         anchor_ratios=[0.5, 1.0, 2.0],
 28 |         anchor_strides=[8, 16, 32, 64, 128],
 29 |         target_means=[.0, .0, .0, .0],
 30 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 31 | # training and testing settings
 32 | train_cfg = dict(
 33 |     assigner=dict(
 34 |         type='MaxIoUAssigner',
 35 |         pos_iou_thr=0.5,
 36 |         neg_iou_thr=0.4,
 37 |         min_pos_iou=0,
 38 |         ignore_iof_thr=-1),
 39 |     smoothl1_beta=0.11,
 40 |     gamma=2.0,
 41 |     alpha=0.25,
 42 |     allowed_border=-1,
 43 |     pos_weight=-1,
 44 |     debug=False)
 45 | test_cfg = dict(
 46 |     nms_pre=1000,
 47 |     min_bbox_size=0,
 48 |     score_thr=0.05,
 49 |     nms=dict(type='nms', iou_thr=0.5),
 50 |     max_per_img=100)
 51 | # dataset settings
 52 | dataset_type = 'CocoDataset'
 53 | data_root = 'data/coco/'
 54 | img_norm_cfg = dict(
 55 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 56 | data = dict(
 57 |     imgs_per_gpu=2,
 58 |     workers_per_gpu=2,
 59 |     train=dict(
 60 |         type=dataset_type,
 61 |         ann_file=data_root + 'annotations/instances_train2017.json',
 62 |         img_prefix=data_root + 'train2017/',
 63 |         img_scale=(1333, 800),
 64 |         img_norm_cfg=img_norm_cfg,
 65 |         size_divisor=32,
 66 |         flip_ratio=0.5,
 67 |         with_mask=False,
 68 |         with_crowd=False,
 69 |         with_label=True),
 70 |     val=dict(
 71 |         type=dataset_type,
 72 |         ann_file=data_root + 'annotations/instances_val2017.json',
 73 |         img_prefix=data_root + 'val2017/',
 74 |         img_scale=(1333, 800),
 75 |         img_norm_cfg=img_norm_cfg,
 76 |         size_divisor=32,
 77 |         flip_ratio=0,
 78 |         with_mask=False,
 79 |         with_crowd=False,
 80 |         with_label=True),
 81 |     test=dict(
 82 |         type=dataset_type,
 83 |         ann_file=data_root + 'annotations/instances_val2017.json',
 84 |         img_prefix=data_root + 'val2017/',
 85 |         img_scale=(1333, 800),
 86 |         img_norm_cfg=img_norm_cfg,
 87 |         size_divisor=32,
 88 |         flip_ratio=0,
 89 |         with_mask=False,
 90 |         with_crowd=False,
 91 |         with_label=False,
 92 |         test_mode=True))
 93 | # optimizer
 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 96 | # learning policy
 97 | lr_config = dict(
 98 |     policy='step',
 99 |     warmup='linear',
100 |     warmup_iters=500,
101 |     warmup_ratio=1.0 / 3,
102 |     step=[8, 11])
103 | checkpoint_config = dict(interval=1)
104 | # yapf:disable
105 | log_config = dict(
106 |     interval=50,
107 |     hooks=[
108 |         dict(type='TextLoggerHook'),
109 |         # dict(type='TensorboardLoggerHook')
110 |     ])
111 | # yapf:enable
112 | # runtime settings
113 | total_epochs = 12
114 | device_ids = range(8)
115 | dist_params = dict(backend='nccl')
116 | log_level = 'INFO'
117 | work_dir = './work_dirs/retinanet_r101_fpn_1x'
118 | load_from = None
119 | resume_from = None
120 | workflow = [('train', 1)]
121 | 


--------------------------------------------------------------------------------
/configs/retinanet_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         add_extra_convs=True,
 18 |         num_outs=5),
 19 |     bbox_head=dict(
 20 |         type='RetinaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         octave_base_scale=4,
 26 |         scales_per_octave=3,
 27 |         anchor_ratios=[0.5, 1.0, 2.0],
 28 |         anchor_strides=[8, 16, 32, 64, 128],
 29 |         target_means=[.0, .0, .0, .0],
 30 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 31 | # training and testing settings
 32 | train_cfg = dict(
 33 |     assigner=dict(
 34 |         type='MaxIoUAssigner',
 35 |         pos_iou_thr=0.5,
 36 |         neg_iou_thr=0.4,
 37 |         min_pos_iou=0,
 38 |         ignore_iof_thr=-1),
 39 |     smoothl1_beta=0.11,
 40 |     gamma=2.0,
 41 |     alpha=0.25,
 42 |     allowed_border=-1,
 43 |     pos_weight=-1,
 44 |     debug=False)
 45 | test_cfg = dict(
 46 |     nms_pre=1000,
 47 |     min_bbox_size=0,
 48 |     score_thr=0.05,
 49 |     nms=dict(type='nms', iou_thr=0.5),
 50 |     max_per_img=100)
 51 | # dataset settings
 52 | dataset_type = 'CocoDataset'
 53 | data_root = 'data/coco/'
 54 | img_norm_cfg = dict(
 55 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 56 | data = dict(
 57 |     imgs_per_gpu=2,
 58 |     workers_per_gpu=2,
 59 |     train=dict(
 60 |         type=dataset_type,
 61 |         ann_file=data_root + 'annotations/instances_train2017.json',
 62 |         img_prefix=data_root + 'train2017/',
 63 |         img_scale=(1333, 800),
 64 |         img_norm_cfg=img_norm_cfg,
 65 |         size_divisor=32,
 66 |         flip_ratio=0.5,
 67 |         with_mask=False,
 68 |         with_crowd=False,
 69 |         with_label=True),
 70 |     val=dict(
 71 |         type=dataset_type,
 72 |         ann_file=data_root + 'annotations/instances_val2017.json',
 73 |         img_prefix=data_root + 'val2017/',
 74 |         img_scale=(1333, 800),
 75 |         img_norm_cfg=img_norm_cfg,
 76 |         size_divisor=32,
 77 |         flip_ratio=0,
 78 |         with_mask=False,
 79 |         with_crowd=False,
 80 |         with_label=True),
 81 |     test=dict(
 82 |         type=dataset_type,
 83 |         ann_file=data_root + 'annotations/instances_val2017.json',
 84 |         img_prefix=data_root + 'val2017/',
 85 |         img_scale=(1333, 800),
 86 |         img_norm_cfg=img_norm_cfg,
 87 |         size_divisor=32,
 88 |         flip_ratio=0,
 89 |         with_mask=False,
 90 |         with_crowd=False,
 91 |         with_label=False,
 92 |         test_mode=True))
 93 | # optimizer
 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 96 | # learning policy
 97 | lr_config = dict(
 98 |     policy='step',
 99 |     warmup='linear',
100 |     warmup_iters=500,
101 |     warmup_ratio=1.0 / 3,
102 |     step=[8, 11])
103 | checkpoint_config = dict(interval=1)
104 | # yapf:disable
105 | log_config = dict(
106 |     interval=50,
107 |     hooks=[
108 |         dict(type='TextLoggerHook'),
109 |         # dict(type='TensorboardLoggerHook')
110 |     ])
111 | # yapf:enable
112 | # runtime settings
113 | total_epochs = 12
114 | device_ids = range(8)
115 | dist_params = dict(backend='nccl')
116 | log_level = 'INFO'
117 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
118 | load_from = None
119 | resume_from = None
120 | workflow = [('train', 1)]
121 | 


--------------------------------------------------------------------------------
/configs/retinanet_x101_32x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='open-mmlab://resnext101_32x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=32,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         start_level=1,
 19 |         add_extra_convs=True,
 20 |         num_outs=5),
 21 |     bbox_head=dict(
 22 |         type='RetinaHead',
 23 |         num_classes=81,
 24 |         in_channels=256,
 25 |         stacked_convs=4,
 26 |         feat_channels=256,
 27 |         octave_base_scale=4,
 28 |         scales_per_octave=3,
 29 |         anchor_ratios=[0.5, 1.0, 2.0],
 30 |         anchor_strides=[8, 16, 32, 64, 128],
 31 |         target_means=[.0, .0, .0, .0],
 32 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 33 | # training and testing settings
 34 | train_cfg = dict(
 35 |     assigner=dict(
 36 |         type='MaxIoUAssigner',
 37 |         pos_iou_thr=0.5,
 38 |         neg_iou_thr=0.4,
 39 |         min_pos_iou=0,
 40 |         ignore_iof_thr=-1),
 41 |     smoothl1_beta=0.11,
 42 |     gamma=2.0,
 43 |     alpha=0.25,
 44 |     allowed_border=-1,
 45 |     pos_weight=-1,
 46 |     debug=False)
 47 | test_cfg = dict(
 48 |     nms_pre=1000,
 49 |     min_bbox_size=0,
 50 |     score_thr=0.05,
 51 |     nms=dict(type='nms', iou_thr=0.5),
 52 |     max_per_img=100)
 53 | # dataset settings
 54 | dataset_type = 'CocoDataset'
 55 | data_root = 'data/coco/'
 56 | img_norm_cfg = dict(
 57 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 58 | data = dict(
 59 |     imgs_per_gpu=2,
 60 |     workers_per_gpu=2,
 61 |     train=dict(
 62 |         type=dataset_type,
 63 |         ann_file=data_root + 'annotations/instances_train2017.json',
 64 |         img_prefix=data_root + 'train2017/',
 65 |         img_scale=(1333, 800),
 66 |         img_norm_cfg=img_norm_cfg,
 67 |         size_divisor=32,
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=False,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         size_divisor=32,
 79 |         flip_ratio=0,
 80 |         with_mask=False,
 81 |         with_crowd=False,
 82 |         with_label=True),
 83 |     test=dict(
 84 |         type=dataset_type,
 85 |         ann_file=data_root + 'annotations/instances_val2017.json',
 86 |         img_prefix=data_root + 'val2017/',
 87 |         img_scale=(1333, 800),
 88 |         img_norm_cfg=img_norm_cfg,
 89 |         size_divisor=32,
 90 |         flip_ratio=0,
 91 |         with_mask=False,
 92 |         with_crowd=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 98 | # learning policy
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | device_ids = range(8)
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/retinanet_x101_64x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='open-mmlab://resnext101_64x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=64,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         start_level=1,
 19 |         add_extra_convs=True,
 20 |         num_outs=5),
 21 |     bbox_head=dict(
 22 |         type='RetinaHead',
 23 |         num_classes=81,
 24 |         in_channels=256,
 25 |         stacked_convs=4,
 26 |         feat_channels=256,
 27 |         octave_base_scale=4,
 28 |         scales_per_octave=3,
 29 |         anchor_ratios=[0.5, 1.0, 2.0],
 30 |         anchor_strides=[8, 16, 32, 64, 128],
 31 |         target_means=[.0, .0, .0, .0],
 32 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 33 | # training and testing settings
 34 | train_cfg = dict(
 35 |     assigner=dict(
 36 |         type='MaxIoUAssigner',
 37 |         pos_iou_thr=0.5,
 38 |         neg_iou_thr=0.4,
 39 |         min_pos_iou=0,
 40 |         ignore_iof_thr=-1),
 41 |     smoothl1_beta=0.11,
 42 |     gamma=2.0,
 43 |     alpha=0.25,
 44 |     allowed_border=-1,
 45 |     pos_weight=-1,
 46 |     debug=False)
 47 | test_cfg = dict(
 48 |     nms_pre=1000,
 49 |     min_bbox_size=0,
 50 |     score_thr=0.05,
 51 |     nms=dict(type='nms', iou_thr=0.5),
 52 |     max_per_img=100)
 53 | # dataset settings
 54 | dataset_type = 'CocoDataset'
 55 | data_root = 'data/coco/'
 56 | img_norm_cfg = dict(
 57 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 58 | data = dict(
 59 |     imgs_per_gpu=2,
 60 |     workers_per_gpu=2,
 61 |     train=dict(
 62 |         type=dataset_type,
 63 |         ann_file=data_root + 'annotations/instances_train2017.json',
 64 |         img_prefix=data_root + 'train2017/',
 65 |         img_scale=(1333, 800),
 66 |         img_norm_cfg=img_norm_cfg,
 67 |         size_divisor=32,
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=False,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         size_divisor=32,
 79 |         flip_ratio=0,
 80 |         with_mask=False,
 81 |         with_crowd=False,
 82 |         with_label=True),
 83 |     test=dict(
 84 |         type=dataset_type,
 85 |         ann_file=data_root + 'annotations/instances_val2017.json',
 86 |         img_prefix=data_root + 'val2017/',
 87 |         img_scale=(1333, 800),
 88 |         img_norm_cfg=img_norm_cfg,
 89 |         size_divisor=32,
 90 |         flip_ratio=0,
 91 |         with_mask=False,
 92 |         with_crowd=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 98 | # learning policy
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | device_ids = range(8)
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/rpn_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         use_sigmoid_cls=True))
 27 | # model training and testing settings
 28 | train_cfg = dict(
 29 |     rpn=dict(
 30 |         assigner=dict(
 31 |             type='MaxIoUAssigner',
 32 |             pos_iou_thr=0.7,
 33 |             neg_iou_thr=0.3,
 34 |             min_pos_iou=0.3,
 35 |             ignore_iof_thr=-1),
 36 |         sampler=dict(
 37 |             type='RandomSampler',
 38 |             num=256,
 39 |             pos_fraction=0.5,
 40 |             neg_pos_ub=-1,
 41 |             add_gt_as_proposals=False),
 42 |         allowed_border=0,
 43 |         pos_weight=-1,
 44 |         smoothl1_beta=1 / 9.0,
 45 |         debug=False))
 46 | test_cfg = dict(
 47 |     rpn=dict(
 48 |         nms_across_levels=False,
 49 |         nms_pre=2000,
 50 |         nms_post=2000,
 51 |         max_num=2000,
 52 |         nms_thr=0.7,
 53 |         min_bbox_size=0))
 54 | # dataset settings
 55 | dataset_type = 'CocoDataset'
 56 | data_root = 'data/coco/'
 57 | img_norm_cfg = dict(
 58 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 59 | data = dict(
 60 |     imgs_per_gpu=2,
 61 |     workers_per_gpu=2,
 62 |     train=dict(
 63 |         type=dataset_type,
 64 |         ann_file=data_root + 'annotations/instances_train2017.json',
 65 |         img_prefix=data_root + 'train2017/',
 66 |         img_scale=(1333, 800),
 67 |         img_norm_cfg=img_norm_cfg,
 68 |         size_divisor=32,
 69 |         flip_ratio=0.5,
 70 |         with_mask=False,
 71 |         with_crowd=False,
 72 |         with_label=False),
 73 |     val=dict(
 74 |         type=dataset_type,
 75 |         ann_file=data_root + 'annotations/instances_val2017.json',
 76 |         img_prefix=data_root + 'val2017/',
 77 |         img_scale=(1333, 800),
 78 |         img_norm_cfg=img_norm_cfg,
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=False,
 83 |         with_label=False),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         size_divisor=32,
 91 |         flip_ratio=0,
 92 |         with_mask=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 97 | # runner configs
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | dist_params = dict(backend='nccl')
117 | log_level = 'INFO'
118 | work_dir = './work_dirs/rpn_r101_fpn_1x'
119 | load_from = None
120 | resume_from = None
121 | workflow = [('train', 1)]
122 | 


--------------------------------------------------------------------------------
/configs/rpn_r50_c4_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='open-mmlab://resnet50_caffe',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=3,
  9 |         strides=(1, 2, 2),
 10 |         dilations=(1, 1, 1),
 11 |         out_indices=(2, ),
 12 |         frozen_stages=1,
 13 |         normalize=dict(type='BN', frozen=True),
 14 |         norm_eval=True,
 15 |         style='caffe'),
 16 |     neck=None,
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_scales=[2, 4, 8, 16, 32],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[16],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         use_sigmoid_cls=True))
 27 | # model training and testing settings
 28 | train_cfg = dict(
 29 |     rpn=dict(
 30 |         assigner=dict(
 31 |             type='MaxIoUAssigner',
 32 |             pos_iou_thr=0.7,
 33 |             neg_iou_thr=0.3,
 34 |             min_pos_iou=0.3,
 35 |             ignore_iof_thr=-1),
 36 |         sampler=dict(
 37 |             type='RandomSampler',
 38 |             num=256,
 39 |             pos_fraction=0.5,
 40 |             neg_pos_ub=-1,
 41 |             add_gt_as_proposals=False),
 42 |         allowed_border=0,
 43 |         pos_weight=-1,
 44 |         smoothl1_beta=1 / 9.0,
 45 |         debug=False))
 46 | test_cfg = dict(
 47 |     rpn=dict(
 48 |         nms_across_levels=False,
 49 |         nms_pre=12000,
 50 |         nms_post=2000,
 51 |         max_num=2000,
 52 |         nms_thr=0.7,
 53 |         min_bbox_size=0))
 54 | # dataset settings
 55 | dataset_type = 'CocoDataset'
 56 | data_root = 'data/coco/'
 57 | img_norm_cfg = dict(
 58 |     mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
 59 | data = dict(
 60 |     imgs_per_gpu=2,
 61 |     workers_per_gpu=2,
 62 |     train=dict(
 63 |         type=dataset_type,
 64 |         ann_file=data_root + 'annotations/instances_train2017.json',
 65 |         img_prefix=data_root + 'train2017/',
 66 |         img_scale=(1333, 800),
 67 |         img_norm_cfg=img_norm_cfg,
 68 |         size_divisor=32,
 69 |         flip_ratio=0.5,
 70 |         with_mask=False,
 71 |         with_crowd=False,
 72 |         with_label=False),
 73 |     val=dict(
 74 |         type=dataset_type,
 75 |         ann_file=data_root + 'annotations/instances_val2017.json',
 76 |         img_prefix=data_root + 'val2017/',
 77 |         img_scale=(1333, 800),
 78 |         img_norm_cfg=img_norm_cfg,
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=False,
 83 |         with_label=False),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         size_divisor=32,
 91 |         flip_ratio=0,
 92 |         with_mask=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 97 | # runner configs
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | dist_params = dict(backend='nccl')
117 | log_level = 'INFO'
118 | work_dir = './work_dirs/rpn_r50_c4_1x'
119 | load_from = None
120 | resume_from = None
121 | workflow = [('train', 1)]
122 | 


--------------------------------------------------------------------------------
/configs/rpn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         use_sigmoid_cls=True))
 27 | # model training and testing settings
 28 | train_cfg = dict(
 29 |     rpn=dict(
 30 |         assigner=dict(
 31 |             type='MaxIoUAssigner',
 32 |             pos_iou_thr=0.7,
 33 |             neg_iou_thr=0.3,
 34 |             min_pos_iou=0.3,
 35 |             ignore_iof_thr=-1),
 36 |         sampler=dict(
 37 |             type='RandomSampler',
 38 |             num=256,
 39 |             pos_fraction=0.5,
 40 |             neg_pos_ub=-1,
 41 |             add_gt_as_proposals=False),
 42 |         allowed_border=0,
 43 |         pos_weight=-1,
 44 |         smoothl1_beta=1 / 9.0,
 45 |         debug=False))
 46 | test_cfg = dict(
 47 |     rpn=dict(
 48 |         nms_across_levels=False,
 49 |         nms_pre=2000,
 50 |         nms_post=2000,
 51 |         max_num=2000,
 52 |         nms_thr=0.7,
 53 |         min_bbox_size=0))
 54 | # dataset settings
 55 | dataset_type = 'CocoDataset'
 56 | data_root = 'data/coco/'
 57 | img_norm_cfg = dict(
 58 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 59 | data = dict(
 60 |     imgs_per_gpu=2,
 61 |     workers_per_gpu=2,
 62 |     train=dict(
 63 |         type=dataset_type,
 64 |         ann_file=data_root + 'annotations/instances_train2017.json',
 65 |         img_prefix=data_root + 'train2017/',
 66 |         img_scale=(1333, 800),
 67 |         img_norm_cfg=img_norm_cfg,
 68 |         size_divisor=32,
 69 |         flip_ratio=0.5,
 70 |         with_mask=False,
 71 |         with_crowd=False,
 72 |         with_label=False),
 73 |     val=dict(
 74 |         type=dataset_type,
 75 |         ann_file=data_root + 'annotations/instances_val2017.json',
 76 |         img_prefix=data_root + 'val2017/',
 77 |         img_scale=(1333, 800),
 78 |         img_norm_cfg=img_norm_cfg,
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=False,
 83 |         with_label=False),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         size_divisor=32,
 91 |         flip_ratio=0,
 92 |         with_mask=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 97 | # runner configs
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | dist_params = dict(backend='nccl')
117 | log_level = 'INFO'
118 | work_dir = './work_dirs/rpn_r50_fpn_1x'
119 | load_from = None
120 | resume_from = None
121 | workflow = [('train', 1)]
122 | 


--------------------------------------------------------------------------------
/configs/rpn_x101_32x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='open-mmlab://resnext101_32x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=32,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_scales=[8],
 24 |         anchor_ratios=[0.5, 1.0, 2.0],
 25 |         anchor_strides=[4, 8, 16, 32, 64],
 26 |         target_means=[.0, .0, .0, .0],
 27 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 28 |         use_sigmoid_cls=True))
 29 | # model training and testing settings
 30 | train_cfg = dict(
 31 |     rpn=dict(
 32 |         assigner=dict(
 33 |             type='MaxIoUAssigner',
 34 |             pos_iou_thr=0.7,
 35 |             neg_iou_thr=0.3,
 36 |             min_pos_iou=0.3,
 37 |             ignore_iof_thr=-1),
 38 |         sampler=dict(
 39 |             type='RandomSampler',
 40 |             num=256,
 41 |             pos_fraction=0.5,
 42 |             neg_pos_ub=-1,
 43 |             add_gt_as_proposals=False),
 44 |         allowed_border=0,
 45 |         pos_weight=-1,
 46 |         smoothl1_beta=1 / 9.0,
 47 |         debug=False))
 48 | test_cfg = dict(
 49 |     rpn=dict(
 50 |         nms_across_levels=False,
 51 |         nms_pre=2000,
 52 |         nms_post=2000,
 53 |         max_num=2000,
 54 |         nms_thr=0.7,
 55 |         min_bbox_size=0))
 56 | # dataset settings
 57 | dataset_type = 'CocoDataset'
 58 | data_root = 'data/coco/'
 59 | img_norm_cfg = dict(
 60 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 61 | data = dict(
 62 |     imgs_per_gpu=2,
 63 |     workers_per_gpu=2,
 64 |     train=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_train2017.json',
 67 |         img_prefix=data_root + 'train2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0.5,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=False),
 75 |     val=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False),
 86 |     test=dict(
 87 |         type=dataset_type,
 88 |         ann_file=data_root + 'annotations/instances_val2017.json',
 89 |         img_prefix=data_root + 'val2017/',
 90 |         img_scale=(1333, 800),
 91 |         img_norm_cfg=img_norm_cfg,
 92 |         size_divisor=32,
 93 |         flip_ratio=0,
 94 |         with_mask=False,
 95 |         with_label=False,
 96 |         test_mode=True))
 97 | # optimizer
 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 99 | # runner configs
100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
101 | lr_config = dict(
102 |     policy='step',
103 |     warmup='linear',
104 |     warmup_iters=500,
105 |     warmup_ratio=1.0 / 3,
106 |     step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 |     interval=50,
111 |     hooks=[
112 |         dict(type='TextLoggerHook'),
113 |         # dict(type='TensorboardLoggerHook')
114 |     ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | dist_params = dict(backend='nccl')
119 | log_level = 'INFO'
120 | work_dir = './work_dirs/rpn_r101_fpn_1x'
121 | load_from = None
122 | resume_from = None
123 | workflow = [('train', 1)]
124 | 


--------------------------------------------------------------------------------
/configs/rpn_x101_64x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='open-mmlab://resnext101_64x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=64,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_scales=[8],
 24 |         anchor_ratios=[0.5, 1.0, 2.0],
 25 |         anchor_strides=[4, 8, 16, 32, 64],
 26 |         target_means=[.0, .0, .0, .0],
 27 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 28 |         use_sigmoid_cls=True))
 29 | # model training and testing settings
 30 | train_cfg = dict(
 31 |     rpn=dict(
 32 |         assigner=dict(
 33 |             type='MaxIoUAssigner',
 34 |             pos_iou_thr=0.7,
 35 |             neg_iou_thr=0.3,
 36 |             min_pos_iou=0.3,
 37 |             ignore_iof_thr=-1),
 38 |         sampler=dict(
 39 |             type='RandomSampler',
 40 |             num=256,
 41 |             pos_fraction=0.5,
 42 |             neg_pos_ub=-1,
 43 |             add_gt_as_proposals=False),
 44 |         allowed_border=0,
 45 |         pos_weight=-1,
 46 |         smoothl1_beta=1 / 9.0,
 47 |         debug=False))
 48 | test_cfg = dict(
 49 |     rpn=dict(
 50 |         nms_across_levels=False,
 51 |         nms_pre=2000,
 52 |         nms_post=2000,
 53 |         max_num=2000,
 54 |         nms_thr=0.7,
 55 |         min_bbox_size=0))
 56 | # dataset settings
 57 | dataset_type = 'CocoDataset'
 58 | data_root = 'data/coco/'
 59 | img_norm_cfg = dict(
 60 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 61 | data = dict(
 62 |     imgs_per_gpu=2,
 63 |     workers_per_gpu=2,
 64 |     train=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_train2017.json',
 67 |         img_prefix=data_root + 'train2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0.5,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=False),
 75 |     val=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False),
 86 |     test=dict(
 87 |         type=dataset_type,
 88 |         ann_file=data_root + 'annotations/instances_val2017.json',
 89 |         img_prefix=data_root + 'val2017/',
 90 |         img_scale=(1333, 800),
 91 |         img_norm_cfg=img_norm_cfg,
 92 |         size_divisor=32,
 93 |         flip_ratio=0,
 94 |         with_mask=False,
 95 |         with_label=False,
 96 |         test_mode=True))
 97 | # optimizer
 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 99 | # runner configs
100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
101 | lr_config = dict(
102 |     policy='step',
103 |     warmup='linear',
104 |     warmup_iters=500,
105 |     warmup_ratio=1.0 / 3,
106 |     step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 |     interval=50,
111 |     hooks=[
112 |         dict(type='TextLoggerHook'),
113 |         # dict(type='TensorboardLoggerHook')
114 |     ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | dist_params = dict(backend='nccl')
119 | log_level = 'INFO'
120 | work_dir = './work_dirs/rpn_r101_fpn_1x'
121 | load_from = None
122 | resume_from = None
123 | workflow = [('train', 1)]
124 | 


--------------------------------------------------------------------------------
/demo/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/demo/coco_test_12510.jpg


--------------------------------------------------------------------------------
/demo/coco_val_32901.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/demo/coco_val_32901.png


--------------------------------------------------------------------------------
/init_coco.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import copy
 3 | 
 4 | if __name__ == "__main__":
 5 | 
 6 |     idx_mapping = {idx1: idx2 for idx1, idx2 in zip(range(133), range(133))}
 7 |     inv_idx_mapping = {idx1: idx2 for idx1, idx2 in zip(range(133), range(133))}
 8 | 
 9 |     cat_json = json.load(open('panopticapi/panoptic_coco_categories.json'))
10 |     cat_json_stff = copy.deepcopy(cat_json)
11 |     cat_idx_mapping = {}
12 |     for idx, k in enumerate(cat_json):
13 |         cat_idx_mapping[k['id']] = idx
14 |     for k, v in idx_mapping.items():
15 |         cat_json_stff[k] = cat_json[v]
16 |         cat_json_stff[k]['id'] = k
17 |     json.dump(cat_json_stff, open('data/coco/annotations/panoptic_coco_categories_Easystuff.json', 'w'))
18 | 
19 |     for s in ['train', 'val']:
20 | 
21 |         pano_json = json.load(open('data/coco/annotations/panoptic_{}2017.json'.format(s)))
22 | 
23 |         pano_json_stff = copy.deepcopy(pano_json)
24 | 
25 |         pano_json_stff['categories'] = cat_json_stff
26 | 
27 |         for anno in pano_json_stff['annotations']:
28 |             for segments_info in anno['segments_info']:
29 |                 segments_info['category_id'] = inv_idx_mapping[cat_idx_mapping[segments_info['category_id']]]
30 | 
31 |         for img in pano_json_stff['images']:
32 |             img['file_name'] = img['file_name'].replace('jpg', 'png')
33 |         if s == 'val':
34 |             pano_json_stff['images'] = sorted(pano_json_stff['images'], key=lambda x: x['id'])
35 | 
36 |         json.dump(pano_json_stff, open('data/coco/annotations/panoptic_{}2017_Easystuff.json'.format(s), 'w'))
37 | 


--------------------------------------------------------------------------------
/init_coco.sh:
--------------------------------------------------------------------------------
1 | python init_coco.py
2 | 
3 | PYTHONPATH=$(pwd):$PYTHONPATH python panopticapi/converters/panoptic2semantic_segmentation.py --input_json_file data/coco/annotations/panoptic_train2017_Easystuff.json --segmentations_folder data/coco/annotations/panoptic_train2017 --semantic_seg_folder data/coco/annotations/panoptic_train2017_semantic_Easystuff --categories_json_file data/coco/annotations/panoptic_coco_categories_Easystuff.json
4 | PYTHONPATH=$(pwd):$PYTHONPATH python panopticapi/converters/panoptic2semantic_segmentation.py --input_json_file data/coco/annotations/panoptic_val2017_Easystuff.json --segmentations_folder data/coco/annotations/panoptic_val2017 --semantic_seg_folder data/coco/annotations/panoptic_val2017_semantic_Easystuff --categories_json_file data/coco/annotations/panoptic_coco_categories_Easystuff.json
5 | 


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .env import init_dist, get_root_logger, set_random_seed
2 | from .train import train_detector
3 | from .inference import inference_detector, show_result
4 | 
5 | __all__ = [
6 |     'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
7 |     'inference_detector', 'show_result'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | import subprocess
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | import torch.distributed as dist
 9 | import torch.multiprocessing as mp
10 | from mmcv.runner import get_dist_info
11 | 
12 | 
13 | def init_dist(launcher, backend='nccl', **kwargs):
14 |     if mp.get_start_method(allow_none=True) is None:
15 |         mp.set_start_method('spawn')
16 |     if launcher == 'pytorch':
17 |         _init_dist_pytorch(backend, **kwargs)
18 |     elif launcher == 'mpi':
19 |         _init_dist_mpi(backend, **kwargs)
20 |     elif launcher == 'slurm':
21 |         _init_dist_slurm(backend, **kwargs)
22 |     else:
23 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
24 | 
25 | 
26 | def _init_dist_pytorch(backend, **kwargs):
27 |     # TODO: use local_rank instead of rank % num_gpus
28 |     rank = int(os.environ['RANK'])
29 |     num_gpus = torch.cuda.device_count()
30 |     torch.cuda.set_device(rank % num_gpus)
31 |     dist.init_process_group(backend=backend, **kwargs)
32 | 
33 | 
34 | def _init_dist_mpi(backend, **kwargs):
35 |     raise NotImplementedError
36 | 
37 | 
38 | def _init_dist_slurm(backend, port=29500, **kwargs):
39 |     proc_id = int(os.environ['SLURM_PROCID'])
40 |     ntasks = int(os.environ['SLURM_NTASKS'])
41 |     node_list = os.environ['SLURM_NODELIST']
42 |     num_gpus = torch.cuda.device_count()
43 |     torch.cuda.set_device(proc_id % num_gpus)
44 |     addr = subprocess.getoutput(
45 |         'scontrol show hostname {} | head -n1'.format(node_list))
46 |     os.environ['MASTER_PORT'] = str(port)
47 |     os.environ['MASTER_ADDR'] = addr
48 |     os.environ['WORLD_SIZE'] = str(ntasks)
49 |     os.environ['RANK'] = str(proc_id)
50 |     dist.init_process_group(backend=backend)
51 | 
52 | 
53 | def set_random_seed(seed):
54 |     random.seed(seed)
55 |     np.random.seed(seed)
56 |     torch.manual_seed(seed)
57 |     torch.cuda.manual_seed_all(seed)
58 | 
59 | 
60 | def get_root_logger(log_level=logging.INFO):
61 |     logger = logging.getLogger()
62 |     if not logger.hasHandlers():
63 |         logging.basicConfig(
64 |             format='%(asctime)s - %(levelname)s - %(message)s',
65 |             level=log_level)
66 |     rank, _ = get_dist_info()
67 |     if rank != 0:
68 |         logger.setLevel('ERROR')
69 |     return logger
70 | 


--------------------------------------------------------------------------------
/mmdet/apis/inference.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import pycocotools.mask as maskUtils
 4 | import torch
 5 | 
 6 | from mmdet.core import get_classes
 7 | from mmdet.datasets import to_tensor
 8 | from mmdet.datasets.transforms import ImageTransform
 9 | 
10 | 
11 | def _prepare_data(img, img_transform, cfg, device):
12 |     ori_shape = img.shape
13 |     img, img_shape, pad_shape, scale_factor = img_transform(
14 |         img,
15 |         scale=cfg.data.test.img_scale,
16 |         keep_ratio=cfg.data.test.get('resize_keep_ratio', True))
17 |     img = to_tensor(img).to(device).unsqueeze(0)
18 |     img_meta = [
19 |         dict(
20 |             ori_shape=ori_shape,
21 |             img_shape=img_shape,
22 |             pad_shape=pad_shape,
23 |             scale_factor=scale_factor,
24 |             flip=False)
25 |     ]
26 |     return dict(img=[img], img_meta=[img_meta])
27 | 
28 | 
29 | def _inference_single(model, img, img_transform, cfg, device):
30 |     img = mmcv.imread(img)
31 |     data = _prepare_data(img, img_transform, cfg, device)
32 |     with torch.no_grad():
33 |         result = model(return_loss=False, rescale=True, **data)
34 |     return result
35 | 
36 | 
37 | def _inference_generator(model, imgs, img_transform, cfg, device):
38 |     for img in imgs:
39 |         yield _inference_single(model, img, img_transform, cfg, device)
40 | 
41 | 
42 | def inference_detector(model, imgs, cfg, device='cuda:0'):
43 |     img_transform = ImageTransform(
44 |         size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg)
45 |     model = model.to(device)
46 |     model.eval()
47 | 
48 |     if not isinstance(imgs, list):
49 |         return _inference_single(model, imgs, img_transform, cfg, device)
50 |     else:
51 |         return _inference_generator(model, imgs, img_transform, cfg, device)
52 | 
53 | 
54 | def show_result(img, result, dataset='coco', score_thr=0.3, out_file=None):
55 |     img = mmcv.imread(img)
56 |     class_names = get_classes(dataset)
57 |     if isinstance(result, tuple):
58 |         bbox_result, segm_result = result
59 |     else:
60 |         bbox_result, segm_result = result, None
61 |     bboxes = np.vstack(bbox_result)
62 |     # draw segmentation masks
63 |     if segm_result is not None:
64 |         segms = mmcv.concat_list(segm_result)
65 |         inds = np.where(bboxes[:, -1] > score_thr)[0]
66 |         for i in inds:
67 |             color_mask = np.random.randint(
68 |                 0, 256, (1, 3), dtype=np.uint8)
69 |             mask = maskUtils.decode(segms[i]).astype(np.bool)
70 |             img[mask] = img[mask] * 0.5 + color_mask * 0.5
71 |     # draw bounding boxes
72 |     labels = [
73 |         np.full(bbox.shape[0], i, dtype=np.int32)
74 |         for i, bbox in enumerate(bbox_result)
75 |     ]
76 |     labels = np.concatenate(labels)
77 |     mmcv.imshow_det_bboxes(
78 |         img.copy(),
79 |         bboxes,
80 |         labels,
81 |         class_names=class_names,
82 |         score_thr=score_thr,
83 |         show=out_file is None)
84 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .mask import *  # noqa: F401, F403
4 | from .loss import *  # noqa: F401, F403
5 | from .evaluation import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target
3 | 
4 | __all__ = ['AnchorGenerator', 'anchor_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AnchorGenerator(object):
 5 | 
 6 |     def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
 7 |         self.base_size = base_size
 8 |         self.scales = torch.Tensor(scales)
 9 |         self.ratios = torch.Tensor(ratios)
10 |         self.scale_major = scale_major
11 |         self.ctr = ctr
12 |         self.base_anchors = self.gen_base_anchors()
13 | 
14 |     @property
15 |     def num_base_anchors(self):
16 |         return self.base_anchors.size(0)
17 | 
18 |     def gen_base_anchors(self):
19 |         w = self.base_size
20 |         h = self.base_size
21 |         if self.ctr is None:
22 |             x_ctr = 0.5 * (w - 1)
23 |             y_ctr = 0.5 * (h - 1)
24 |         else:
25 |             x_ctr, y_ctr = self.ctr
26 | 
27 |         h_ratios = torch.sqrt(self.ratios)
28 |         w_ratios = 1 / h_ratios
29 |         if self.scale_major:
30 |             ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
31 |             hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
32 |         else:
33 |             ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
34 |             hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
35 | 
36 |         base_anchors = torch.stack(
37 |             [
38 |                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
39 |                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
40 |             ],
41 |             dim=-1).round()
42 | 
43 |         return base_anchors
44 | 
45 |     def _meshgrid(self, x, y, row_major=True):
46 |         xx = x.repeat(len(y))
47 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
48 |         if row_major:
49 |             return xx, yy
50 |         else:
51 |             return yy, xx
52 | 
53 |     def grid_anchors(self, featmap_size, stride=16, device='cuda'):
54 |         base_anchors = self.base_anchors.to(device)
55 | 
56 |         feat_h, feat_w = featmap_size
57 |         shift_x = torch.arange(0, feat_w, device=device) * stride
58 |         shift_y = torch.arange(0, feat_h, device=device) * stride
59 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
60 |         shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
61 |         shifts = shifts.type_as(base_anchors)
62 |         # first feat_w elements correspond to the first row of shifts
63 |         # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
64 |         # shifted anchors (K, A, 4), reshape to (K*A, 4)
65 | 
66 |         all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
67 |         all_anchors = all_anchors.view(-1, 4)
68 |         # first A rows correspond to A anchors of (0, 0) in feature map,
69 |         # then (0, 1), (0, 2), ...
70 |         return all_anchors
71 | 
72 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
73 |         feat_h, feat_w = featmap_size
74 |         valid_h, valid_w = valid_size
75 |         assert valid_h <= feat_h and valid_w <= feat_w
76 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
77 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
78 |         valid_x[:valid_w] = 1
79 |         valid_y[:valid_h] = 1
80 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
81 |         valid = valid_xx & valid_yy
82 |         valid = valid[:, None].expand(
83 |             valid.size(0), self.num_base_anchors).contiguous().view(-1)
84 |         return valid
85 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .geometry import bbox_overlaps
 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
 4 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 5 |                        CombinedSampler, SamplingResult)
 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample
 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
 8 |                          bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
 9 | from .bbox_target import bbox_target
10 | 
11 | __all__ = [
12 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
13 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
14 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
15 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
16 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
17 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(
11 |             cfg, assigners, default_args=kwargs)
12 |     else:
13 |         raise TypeError('Invalid type {} for building a sampler'.format(
14 |             type(cfg)))
15 | 
16 | 
17 | def build_sampler(cfg, **kwargs):
18 |     if isinstance(cfg, samplers.BaseSampler):
19 |         return cfg
20 |     elif isinstance(cfg, dict):
21 |         return mmcv.runner.obj_from_dict(
22 |             cfg, samplers, default_args=kwargs)
23 |     else:
24 |         raise TypeError('Invalid type {} for building a sampler'.format(
25 |             type(cfg)))
26 | 
27 | 
28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
29 |     bbox_assigner = build_assigner(cfg.assigner)
30 |     bbox_sampler = build_sampler(cfg.sampler)
31 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
32 |                                          gt_labels)
33 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
34 |                                           gt_labels)
35 |     return assign_result, sampling_result
36 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_assigner import BaseAssigner
2 | from .max_iou_assigner import MaxIoUAssigner
3 | from .assign_result import AssignResult
4 | 
5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
6 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AssignResult(object):
 5 | 
 6 |     def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
 7 |         self.num_gts = num_gts
 8 |         self.gt_inds = gt_inds
 9 |         self.max_overlaps = max_overlaps
10 |         self.labels = labels
11 | 
12 |     def add_gt_(self, gt_labels):
13 |         self_inds = torch.arange(
14 |             1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 |         self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 |         self.max_overlaps = torch.cat(
17 |             [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 |         if self.labels is not None:
19 |             self.labels = torch.cat([gt_labels, self.labels])
20 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .transforms import bbox2delta
 4 | from ..utils import multi_apply
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 |         pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 |                                       target_stds)
56 |         bbox_targets[:num_pos, :] = pos_bbox_targets
57 |         bbox_weights[:num_pos, :] = 1
58 |     if num_neg > 0:
59 |         label_weights[-num_neg:] = 1.0
60 | 
61 |     return labels, label_weights, bbox_targets, bbox_weights
62 | 
63 | 
64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
65 |     bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
66 |                                                   4 * num_classes))
67 |     bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
68 |                                                   4 * num_classes))
69 |     for i in torch.nonzero(labels > 0).squeeze(-1):
70 |         start, end = labels[i] * 4, (labels[i] + 1) * 4
71 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
72 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
73 |     return bbox_targets_expand, bbox_weights_expand
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .pseudo_sampler import PseudoSampler
 3 | from .random_sampler import RandomSampler
 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 6 | from .combined_sampler import CombinedSampler
 7 | from .ohem_sampler import OHEMSampler
 8 | from .sampling_result import SamplingResult
 9 | 
10 | __all__ = [
11 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 |     'OHEMSampler', 'SamplingResult'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     def sample(self,
32 |                assign_result,
33 |                bboxes,
34 |                gt_bboxes,
35 |                gt_labels=None,
36 |                **kwargs):
37 |         """Sample positive and negative bboxes.
38 | 
39 |         This is a simple implementation of bbox sampling given candidates,
40 |         assigning results and ground truth bboxes.
41 | 
42 |         Args:
43 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
44 |             bboxes (Tensor): Boxes to be sampled from.
45 |             gt_bboxes (Tensor): Ground truth bboxes.
46 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 | 
48 |         Returns:
49 |             :obj:`SamplingResult`: Sampling result.
50 |         """
51 |         bboxes = bboxes[:, :4]
52 | 
53 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
54 |         if self.add_gt_as_proposals:
55 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
56 |             assign_result.add_gt_(gt_labels)
57 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
58 |             gt_flags = torch.cat([gt_ones, gt_flags])
59 | 
60 |         num_expected_pos = int(self.num * self.pos_fraction)
61 |         pos_inds = self.pos_sampler._sample_pos(
62 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
63 |         # We found that sampled indices have duplicated items occasionally.
64 |         # (may be a bug of PyTorch)
65 |         pos_inds = pos_inds.unique()
66 |         num_sampled_pos = pos_inds.numel()
67 |         num_expected_neg = self.num - num_sampled_pos
68 |         if self.neg_pos_ub >= 0:
69 |             _pos = max(1, num_sampled_pos)
70 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
71 |             if num_expected_neg > neg_upper_bound:
72 |                 num_expected_neg = neg_upper_bound
73 |         neg_inds = self.neg_sampler._sample_neg(
74 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
75 |         neg_inds = neg_inds.unique()
76 | 
77 |         return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
78 |                               assign_result, gt_flags)
79 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from ..assign_sampling import build_sampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class IoUBalancedNegSampler(RandomSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  hard_thr=0.1,
13 |                  hard_fraction=0.5,
14 |                  **kwargs):
15 |         super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
16 |                                                     **kwargs)
17 |         assert hard_thr > 0
18 |         assert 0 < hard_fraction < 1
19 |         self.hard_thr = hard_thr
20 |         self.hard_fraction = hard_fraction
21 | 
22 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
23 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
24 |         if neg_inds.numel() != 0:
25 |             neg_inds = neg_inds.squeeze(1)
26 |         if len(neg_inds) <= num_expected:
27 |             return neg_inds
28 |         else:
29 |             max_overlaps = assign_result.max_overlaps.cpu().numpy()
30 |             # balance sampling for negative samples
31 |             neg_set = set(neg_inds.cpu().numpy())
32 |             easy_set = set(
33 |                 np.where(
34 |                     np.logical_and(max_overlaps >= 0,
35 |                                    max_overlaps < self.hard_thr))[0])
36 |             hard_set = set(np.where(max_overlaps >= self.hard_thr)[0])
37 |             easy_neg_inds = list(easy_set & neg_set)
38 |             hard_neg_inds = list(hard_set & neg_set)
39 | 
40 |             num_expected_hard = int(num_expected * self.hard_fraction)
41 |             if len(hard_neg_inds) > num_expected_hard:
42 |                 sampled_hard_inds = self.random_choice(hard_neg_inds,
43 |                                                        num_expected_hard)
44 |             else:
45 |                 sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)
46 |             num_expected_easy = num_expected - len(sampled_hard_inds)
47 |             if len(easy_neg_inds) > num_expected_easy:
48 |                 sampled_easy_inds = self.random_choice(easy_neg_inds,
49 |                                                        num_expected_easy)
50 |             else:
51 |                 sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)
52 |             sampled_inds = np.concatenate((sampled_easy_inds,
53 |                                            sampled_hard_inds))
54 |             if len(sampled_inds) < num_expected:
55 |                 num_extra = num_expected - len(sampled_inds)
56 |                 extra_inds = np.array(list(neg_set - set(sampled_inds)))
57 |                 if len(extra_inds) > num_extra:
58 |                     extra_inds = self.random_choice(extra_inds, num_extra)
59 |                 sampled_inds = np.concatenate((sampled_inds, extra_inds))
60 |             sampled_inds = torch.from_numpy(sampled_inds).long().to(
61 |                 assign_result.gt_inds.device)
62 |             return sampled_inds
63 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from ..transforms import bbox2roi
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  context,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 |                                           add_gt_as_proposals)
18 |         if not hasattr(context, 'num_stages'):
19 |             self.bbox_roi_extractor = context.bbox_roi_extractor
20 |             self.bbox_head = context.bbox_head
21 |         else:
22 |             self.bbox_roi_extractor = context.bbox_roi_extractor[
23 |                 context.current_stage]
24 |             self.bbox_head = context.bbox_head[context.current_stage]
25 | 
26 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
27 |         with torch.no_grad():
28 |             rois = bbox2roi([bboxes])
29 |             bbox_feats = self.bbox_roi_extractor(
30 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
31 |             cls_score, _ = self.bbox_head(bbox_feats)
32 |             loss = self.bbox_head.loss(
33 |                 cls_score=cls_score,
34 |                 bbox_pred=None,
35 |                 labels=labels,
36 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
37 |                 bbox_targets=None,
38 |                 bbox_weights=None,
39 |                 reduce=False)['loss_cls']
40 |             _, topk_loss_inds = loss.topk(num_expected)
41 |         return inds[topk_loss_inds]
42 | 
43 |     def _sample_pos(self,
44 |                     assign_result,
45 |                     num_expected,
46 |                     bboxes=None,
47 |                     feats=None,
48 |                     **kwargs):
49 |         # Sample some hard positive samples
50 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
51 |         if pos_inds.numel() != 0:
52 |             pos_inds = pos_inds.squeeze(1)
53 |         if pos_inds.numel() <= num_expected:
54 |             return pos_inds
55 |         else:
56 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
57 |                                     assign_result.labels[pos_inds], feats)
58 | 
59 |     def _sample_neg(self,
60 |                     assign_result,
61 |                     num_expected,
62 |                     bboxes=None,
63 |                     feats=None,
64 |                     **kwargs):
65 |         # Sample some hard negative samples
66 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
67 |         if neg_inds.numel() != 0:
68 |             neg_inds = neg_inds.squeeze(1)
69 |         if len(neg_inds) <= num_expected:
70 |             return neg_inds
71 |         else:
72 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
73 |                                     assign_result.labels[neg_inds], feats)
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 |                                             add_gt_as_proposals)
17 | 
18 |     @staticmethod
19 |     def random_choice(gallery, num):
20 |         """Random select some elements from the gallery.
21 | 
22 |         It seems that Pytorch's implementation is slower than numpy so we use
23 |         numpy to randperm the indices.
24 |         """
25 |         assert len(gallery) >= num
26 |         if isinstance(gallery, list):
27 |             gallery = np.array(gallery)
28 |         cands = np.arange(len(gallery))
29 |         np.random.shuffle(cands)
30 |         rand_inds = cands[:num]
31 |         if not isinstance(gallery, np.ndarray):
32 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 |         return gallery[rand_inds]
34 | 
35 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
36 |         """Randomly sample some positive samples."""
37 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 |         if pos_inds.numel() != 0:
39 |             pos_inds = pos_inds.squeeze(1)
40 |         if pos_inds.numel() <= num_expected:
41 |             return pos_inds
42 |         else:
43 |             return self.random_choice(pos_inds, num_expected)
44 | 
45 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
46 |         """Randomly sample some negative samples."""
47 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 |         if neg_inds.numel() != 0:
49 |             neg_inds = neg_inds.squeeze(1)
50 |         if len(neg_inds) <= num_expected:
51 |             return neg_inds
52 |         else:
53 |             return self.random_choice(neg_inds, num_expected)
54 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SamplingResult(object):
 5 | 
 6 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
 7 |                  gt_flags):
 8 |         self.pos_inds = pos_inds
 9 |         self.neg_inds = neg_inds
10 |         self.pos_bboxes = bboxes[pos_inds]
11 |         self.neg_bboxes = bboxes[neg_inds]
12 |         self.pos_is_gt = gt_flags[pos_inds]
13 | 
14 |         self.num_gts = gt_bboxes.shape[0]
15 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 |         self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 |         if assign_result.labels is not None:
18 |             self.pos_gt_labels = assign_result.labels[pos_inds]
19 |         else:
20 |             self.pos_gt_labels = None
21 | 
22 |     @property
23 |     def bboxes(self):
24 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (voc_classes, imagenet_det_classes,
 2 |                           imagenet_vid_classes, coco_classes, dataset_aliases,
 3 |                           get_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          CocoDistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
 9 |                      plot_iou_recall)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/core/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | from .losses import (
 2 |     weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
 3 |     sigmoid_focal_loss, py_sigmoid_focal_loss, weighted_sigmoid_focal_loss,
 4 |     mask_cross_entropy, smooth_l1_loss, weighted_smoothl1, accuracy, seg_cross_entropy)
 5 | 
 6 | __all__ = [
 7 |     'weighted_nll_loss', 'weighted_cross_entropy',
 8 |     'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
 9 |     'py_sigmoid_focal_loss', 'weighted_sigmoid_focal_loss',
10 |     'mask_cross_entropy', 'smooth_l1_loss', 'weighted_smoothl1', 'accuracy', 'seg_cross_entropy'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import split_combined_polys
2 | from .mask_target import mask_target
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import mmcv
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
 9 |     mask_targets = map(mask_target_single, pos_proposals_list,
10 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
11 |     mask_targets = torch.cat(list(mask_targets))
12 |     return mask_targets
13 | 
14 | 
15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
16 |     mask_size = cfg.mask_size
17 |     num_pos = pos_proposals.size(0)
18 |     mask_targets = []
19 |     if num_pos > 0:
20 |         proposals_np = pos_proposals.cpu().numpy()
21 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
22 |         for i in range(num_pos):
23 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
24 |             bbox = proposals_np[i, :].astype(np.int32)
25 |             x1, y1, x2, y2 = bbox
26 |             w = np.maximum(x2 - x1 + 1, 1)
27 |             h = np.maximum(y2 - y1 + 1, 1)
28 |             # mask is uint8 both before and after resizing
29 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
30 |                                    (mask_size, mask_size))
31 |             mask_targets.append(target)
32 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
33 |             pos_proposals.device)
34 |     else:
35 |         mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
36 |     return mask_targets
37 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
3 |                          merge_aug_scores, merge_aug_masks, merge_aug_segs)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks', 'merge_aug_segs'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
 7 |     """NMS for multi-class bboxes.
 8 | 
 9 |     Args:
10 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
11 |         multi_scores (Tensor): shape (n, #class)
12 |         score_thr (float): bbox threshold, bboxes with scores lower than it
13 |             will not be considered.
14 |         nms_thr (float): NMS IoU threshold
15 |         max_num (int): if there are more than max_num bboxes after NMS,
16 |             only top max_num will be kept.
17 | 
18 |     Returns:
19 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
20 |             are 0-based.
21 |     """
22 |     num_classes = multi_scores.shape[1]
23 |     bboxes, labels = [], []
24 |     nms_cfg_ = nms_cfg.copy()
25 |     nms_type = nms_cfg_.pop('type', 'nms')
26 |     nms_op = getattr(nms_wrapper, nms_type)
27 |     for i in range(1, num_classes):
28 |         cls_inds = multi_scores[:, i] > score_thr
29 |         if not cls_inds.any():
30 |             continue
31 |         # get bboxes and scores of this class
32 |         if multi_bboxes.shape[1] == 4:
33 |             _bboxes = multi_bboxes[cls_inds, :]
34 |         else:
35 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
36 |         _scores = multi_scores[cls_inds, i]
37 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
38 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
39 |         cls_labels = multi_bboxes.new_full(
40 |             (cls_dets.shape[0], ), i - 1, dtype=torch.long)
41 |         bboxes.append(cls_dets)
42 |         labels.append(cls_labels)
43 |     if bboxes:
44 |         bboxes = torch.cat(bboxes)
45 |         labels = torch.cat(labels)
46 |         if bboxes.shape[0] > max_num:
47 |             _, inds = bboxes[:, -1].sort(descending=True)
48 |             inds = inds[:max_num]
49 |             bboxes = bboxes[inds]
50 |             labels = labels[inds]
51 |     else:
52 |         bboxes = multi_bboxes.new_zeros((0, 5))
53 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
54 | 
55 |     return bboxes, labels
56 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 | from .misc import tensor2imgs, unmap, multi_apply
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
 5 |                           _take_tensors)
 6 | from mmcv.runner import OptimizerHook
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in model.parameters()
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
55 |         if self.grad_clip is not None:
56 |             self.clip_grads(runner.model.parameters())
57 |         runner.optimizer.step()
58 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | from six.moves import map, zip
 6 | 
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     map_results = map(pfunc, *args)
24 |     return tuple(map(list, zip(*map_results)))
25 | 
26 | 
27 | def unmap(data, count, inds, fill=0):
28 |     """ Unmap a subset of item (data) back to the original set of items (of
29 |     size count) """
30 |     if data.dim() == 1:
31 |         ret = data.new_full((count, ), fill)
32 |         ret[inds] = data
33 |     else:
34 |         new_size = (count, ) + data.size()[1:]
35 |         ret = data.new_full(new_size, fill)
36 |         ret[inds, :] = data
37 |     return ret
38 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .custom import CustomDataset
 2 | from .xml_style import XMLDataset
 3 | from .coco import CocoDataset
 4 | from .voc import VOCDataset
 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset
 7 | from .concat_dataset import ConcatDataset
 8 | from .repeat_dataset import RepeatDataset
 9 | from .extra_aug import ExtraAugmentation
10 | from .custom_panoptic import CustomPanopticDataset
11 | from .coco_panoptic import CocoPanopticDataset
12 | 
13 | __all__ = [
14 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
15 |     'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
16 |     'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',
17 |     'ExtraAugmentation', 'CustomPanopticDataset', 'CocoPanopticDataset'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | 
 5 | class ConcatDataset(_ConcatDataset):
 6 |     """A wrapper of concatenated dataset.
 7 | 
 8 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
 9 |     concat the group flag for image aspect ratio.
10 | 
11 |     Args:
12 |         datasets (list[:obj:`Dataset`]): A list of datasets.
13 |     """
14 | 
15 |     def __init__(self, datasets):
16 |         super(ConcatDataset, self).__init__(datasets)
17 |         self.CLASSES = datasets[0].CLASSES
18 |         if hasattr(datasets[0], 'flag'):
19 |             flags = []
20 |             for i in range(0, len(datasets)):
21 |                 flags.append(datasets[i].flag)
22 |             self.flag = np.concatenate(flags)
23 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 | 
4 | __all__ = [
5 |     'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
6 | ]
7 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | from mmcv.runner import get_dist_info
 4 | from mmcv.parallel import collate
 5 | from torch.utils.data import DataLoader
 6 | 
 7 | from .sampler import GroupSampler, DistributedGroupSampler
 8 | 
 9 | # https://github.com/pytorch/pytorch/issues/973
10 | import resource
11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
13 | 
14 | 
15 | def build_dataloader(dataset,
16 |                      imgs_per_gpu,
17 |                      workers_per_gpu,
18 |                      num_gpus=1,
19 |                      dist=True,
20 |                      **kwargs):
21 |     if dist:
22 |         rank, world_size = get_dist_info()
23 |         sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
24 |                                           rank)
25 |         batch_size = imgs_per_gpu
26 |         num_workers = workers_per_gpu
27 |     else:
28 |         if not kwargs.get('shuffle', True):
29 |             sampler = None
30 |         else:
31 |             sampler = GroupSampler(dataset, imgs_per_gpu)
32 |         batch_size = num_gpus * imgs_per_gpu
33 |         num_workers = num_gpus * workers_per_gpu
34 | 
35 |     data_loader = DataLoader(
36 |         dataset,
37 |         batch_size=batch_size,
38 |         sampler=sampler,
39 |         num_workers=num_workers,
40 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
41 |         pin_memory=False,
42 |         **kwargs)
43 | 
44 |     return data_loader
45 | 


--------------------------------------------------------------------------------
/mmdet/datasets/repeat_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RepeatDataset(object):
 5 | 
 6 |     def __init__(self, dataset, times):
 7 |         self.dataset = dataset
 8 |         self.times = times
 9 |         self.CLASSES = dataset.CLASSES
10 |         if hasattr(self.dataset, 'flag'):
11 |             self.flag = np.tile(self.dataset.flag, times)
12 | 
13 |         self._ori_len = len(self.dataset)
14 | 
15 |     def __getitem__(self, idx):
16 |         return self.dataset[idx % self._ori_len]
17 | 
18 |     def __len__(self):
19 |         return self.times * self._ori_len
20 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .xml_style import XMLDataset
 2 | 
 3 | 
 4 | class VOCDataset(XMLDataset):
 5 | 
 6 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 7 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
 8 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
 9 |                'tvmonitor')
10 | 
11 |     def __init__(self, **kwargs):
12 |         super(VOCDataset, self).__init__(**kwargs)
13 |         if 'VOC2007' in self.img_prefix:
14 |             self.year = 2007
15 |         elif 'VOC2012' in self.img_prefix:
16 |             self.year = 2012
17 |         else:
18 |             raise ValueError('Cannot infer dataset year from img_prefix')
19 | 


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | 
 9 | 
10 | class XMLDataset(CustomDataset):
11 | 
12 |     def __init__(self, **kwargs):
13 |         super(XMLDataset, self).__init__(**kwargs)
14 |         self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
15 | 
16 |     def load_annotations(self, ann_file):
17 |         img_infos = []
18 |         img_ids = mmcv.list_from_file(ann_file)
19 |         for img_id in img_ids:
20 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
21 |             xml_path = osp.join(self.img_prefix, 'Annotations',
22 |                                 '{}.xml'.format(img_id))
23 |             tree = ET.parse(xml_path)
24 |             root = tree.getroot()
25 |             size = root.find('size')
26 |             width = int(size.find('width').text)
27 |             height = int(size.find('height').text)
28 |             img_infos.append(
29 |                 dict(id=img_id, filename=filename, width=width, height=height))
30 |         return img_infos
31 | 
32 |     def get_ann_info(self, idx):
33 |         img_id = self.img_infos[idx]['id']
34 |         xml_path = osp.join(self.img_prefix, 'Annotations',
35 |                             '{}.xml'.format(img_id))
36 |         tree = ET.parse(xml_path)
37 |         root = tree.getroot()
38 |         bboxes = []
39 |         labels = []
40 |         bboxes_ignore = []
41 |         labels_ignore = []
42 |         for obj in root.findall('object'):
43 |             name = obj.find('name').text
44 |             label = self.cat2label[name]
45 |             difficult = int(obj.find('difficult').text)
46 |             bnd_box = obj.find('bndbox')
47 |             bbox = [
48 |                 int(bnd_box.find('xmin').text),
49 |                 int(bnd_box.find('ymin').text),
50 |                 int(bnd_box.find('xmax').text),
51 |                 int(bnd_box.find('ymax').text)
52 |             ]
53 |             if difficult:
54 |                 bboxes_ignore.append(bbox)
55 |                 labels_ignore.append(label)
56 |             else:
57 |                 bboxes.append(bbox)
58 |                 labels.append(label)
59 |         if not bboxes:
60 |             bboxes = np.zeros((0, 4))
61 |             labels = np.zeros((0, ))
62 |         else:
63 |             bboxes = np.array(bboxes, ndmin=2) - 1
64 |             labels = np.array(labels)
65 |         if not bboxes_ignore:
66 |             bboxes_ignore = np.zeros((0, 4))
67 |             labels_ignore = np.zeros((0, ))
68 |         else:
69 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
70 |             labels_ignore = np.array(labels_ignore)
71 |         ann = dict(
72 |             bboxes=bboxes.astype(np.float32),
73 |             labels=labels.astype(np.int64),
74 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
75 |             labels_ignore=labels_ignore.astype(np.int64))
76 |         return ann
77 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *  # noqa: F401,F403
 2 | from .necks import *  # noqa: F401,F403
 3 | from .roi_extractors import *  # noqa: F401,F403
 4 | from .anchor_heads import *  # noqa: F401,F403
 5 | from .shared_heads import *  # noqa: F401,F403
 6 | from .bbox_heads import *  # noqa: F401,F403
 7 | from .mask_heads import *  # noqa: F401,F403
 8 | from .seg_heads import *  # noqa: F401,F403
 9 | from .detectors import *  # noqa: F401,F403
10 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
11 |                        DETECTORS)
12 | from .builder import (build_backbone, build_neck, build_roi_extractor,
13 |                       build_shared_head, build_head, build_detector)
14 | 
15 | __all__ = [
16 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS',
17 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
18 |     'build_shared_head', 'build_head', 'build_detector'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .rpn_head import RPNHead
3 | from .retina_head import RetinaHead
4 | from .ssd_head import SSDHead
5 | 
6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead']
7 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | from mmcv.cnn import normal_init
 4 | 
 5 | from .anchor_head import AnchorHead
 6 | from ..registry import HEADS
 7 | from ..utils import bias_init_with_prob
 8 | 
 9 | 
10 | @HEADS.register_module
11 | class RetinaHead(AnchorHead):
12 | 
13 |     def __init__(self,
14 |                  num_classes,
15 |                  in_channels,
16 |                  stacked_convs=4,
17 |                  octave_base_scale=4,
18 |                  scales_per_octave=3,
19 |                  **kwargs):
20 |         self.stacked_convs = stacked_convs
21 |         self.octave_base_scale = octave_base_scale
22 |         self.scales_per_octave = scales_per_octave
23 |         octave_scales = np.array(
24 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
25 |         anchor_scales = octave_scales * octave_base_scale
26 |         super(RetinaHead, self).__init__(
27 |             num_classes,
28 |             in_channels,
29 |             anchor_scales=anchor_scales,
30 |             use_sigmoid_cls=True,
31 |             use_focal_loss=True,
32 |             **kwargs)
33 | 
34 |     def _init_layers(self):
35 |         self.relu = nn.ReLU(inplace=True)
36 |         self.cls_convs = nn.ModuleList()
37 |         self.reg_convs = nn.ModuleList()
38 |         for i in range(self.stacked_convs):
39 |             chn = self.in_channels if i == 0 else self.feat_channels
40 |             self.cls_convs.append(
41 |                 nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
42 |             self.reg_convs.append(
43 |                 nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
44 |         self.retina_cls = nn.Conv2d(
45 |             self.feat_channels,
46 |             self.num_anchors * self.cls_out_channels,
47 |             3,
48 |             padding=1)
49 |         self.retina_reg = nn.Conv2d(
50 |             self.feat_channels, self.num_anchors * 4, 3, padding=1)
51 | 
52 |     def init_weights(self):
53 |         for m in self.cls_convs:
54 |             normal_init(m, std=0.01)
55 |         for m in self.reg_convs:
56 |             normal_init(m, std=0.01)
57 |         bias_cls = bias_init_with_prob(0.01)
58 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
59 |         normal_init(self.retina_reg, std=0.01)
60 | 
61 |     def forward_single(self, x):
62 |         cls_feat = x
63 |         reg_feat = x
64 |         for cls_conv in self.cls_convs:
65 |             cls_feat = self.relu(cls_conv(cls_feat))
66 |         for reg_conv in self.reg_convs:
67 |             reg_feat = self.relu(reg_conv(reg_feat))
68 |         cls_score = self.retina_cls(cls_feat)
69 |         bbox_pred = self.retina_reg(reg_feat)
70 |         return cls_score, bbox_pred
71 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet, make_res_layer
2 | from .resnext import ResNeXt
3 | from .ssd_vgg import SSDVGG
4 | 
5 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | 
4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | from torch import nn
 3 | 
 4 | from .registry import (BACKBONES, NECKS, ROI_EXTRACTORS, SHARED_HEADS, HEADS,
 5 |                        DETECTORS)
 6 | 
 7 | 
 8 | def _build_module(cfg, registry, default_args):
 9 |     assert isinstance(cfg, dict) and 'type' in cfg
10 |     assert isinstance(default_args, dict) or default_args is None
11 |     args = cfg.copy()
12 |     obj_type = args.pop('type')
13 |     if mmcv.is_str(obj_type):
14 |         if obj_type not in registry.module_dict:
15 |             raise KeyError('{} is not in the {} registry'.format(
16 |                 obj_type, registry.name))
17 |         obj_type = registry.module_dict[obj_type]
18 |     elif not isinstance(obj_type, type):
19 |         raise TypeError('type must be a str or valid type, but got {}'.format(
20 |             type(obj_type)))
21 |     if default_args is not None:
22 |         for name, value in default_args.items():
23 |             args.setdefault(name, value)
24 |     return obj_type(**args)
25 | 
26 | 
27 | def build(cfg, registry, default_args=None):
28 |     if isinstance(cfg, list):
29 |         modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
30 |         return nn.Sequential(*modules)
31 |     else:
32 |         return _build_module(cfg, registry, default_args)
33 | 
34 | 
35 | def build_backbone(cfg):
36 |     return build(cfg, BACKBONES)
37 | 
38 | 
39 | def build_neck(cfg):
40 |     return build(cfg, NECKS)
41 | 
42 | 
43 | def build_roi_extractor(cfg):
44 |     return build(cfg, ROI_EXTRACTORS)
45 | 
46 | 
47 | def build_shared_head(cfg):
48 |     return build(cfg, SHARED_HEADS)
49 | 
50 | 
51 | def build_head(cfg):
52 |     return build(cfg, HEADS)
53 | 
54 | 
55 | def build_detector(cfg, train_cfg=None, test_cfg=None):
56 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
57 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .single_stage import SingleStageDetector
 3 | from .two_stage import TwoStageDetector
 4 | from .rpn import RPN
 5 | from .fast_rcnn import FastRCNN
 6 | from .faster_rcnn import FasterRCNN
 7 | from .mask_rcnn import MaskRCNN
 8 | from .cascade_rcnn import CascadeRCNN
 9 | from .htc import HybridTaskCascade
10 | from .retinanet import RetinaNet
11 | from .panotic_rcnn import PanoticRCNN
12 | from .two_stage_panotic import TwoStagePanoticDetector
13 | 
14 | 
15 | __all__ = [
16 |     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
17 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
18 |     'RetinaNet', 'PanoticRCNN','TwoStagePanoticDetector'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  bbox_roi_extractor,
11 |                  bbox_head,
12 |                  train_cfg,
13 |                  test_cfg,
14 |                  neck=None,
15 |                  shared_head=None,
16 |                  mask_roi_extractor=None,
17 |                  mask_head=None,
18 |                  pretrained=None):
19 |         super(FastRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             shared_head=shared_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             pretrained=pretrained)
30 | 
31 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
33 |             if not isinstance(var, list):
34 |                 raise TypeError('{} must be a list, but got {}'.format(
35 |                     name, type(var)))
36 | 
37 |         num_augs = len(imgs)
38 |         if num_augs != len(img_metas):
39 |             raise ValueError(
40 |                 'num of augmentations ({}) != num of image meta ({})'.format(
41 |                     len(imgs), len(img_metas)))
42 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
43 |         imgs_per_gpu = imgs[0].size(0)
44 |         assert imgs_per_gpu == 1
45 | 
46 |         if num_augs == 1:
47 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
48 |                                     **kwargs)
49 |         else:
50 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
51 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  shared_head=None,
17 |                  pretrained=None):
18 |         super(FasterRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             shared_head=shared_head,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             pretrained=pretrained)
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class MaskRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  mask_roi_extractor,
14 |                  mask_head,
15 |                  train_cfg,
16 |                  test_cfg,
17 |                  neck=None,
18 |                  shared_head=None,
19 |                  pretrained=None):
20 |         super(MaskRCNN, self).__init__(
21 |             backbone=backbone,
22 |             neck=neck,
23 |             shared_head=shared_head,
24 |             rpn_head=rpn_head,
25 |             bbox_roi_extractor=bbox_roi_extractor,
26 |             bbox_head=bbox_head,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             train_cfg=train_cfg,
30 |             test_cfg=test_cfg,
31 |             pretrained=pretrained)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/panotic_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage_panotic import TwoStagePanoticDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class PanoticRCNN(TwoStagePanoticDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  rpn_head,
12 |                  bbox_roi_extractor,
13 |                  bbox_head,
14 |                  mask_roi_extractor,
15 |                  mask_head,
16 |                  seg_head,
17 |                  train_cfg,
18 |                  test_cfg,
19 |                  pretrained=None):
20 |         super(PanoticRCNN, self).__init__(
21 |             backbone=backbone,
22 |             neck=neck,
23 |             rpn_head=rpn_head,
24 |             bbox_roi_extractor=bbox_roi_extractor,
25 |             bbox_head=bbox_head,
26 |             mask_roi_extractor=mask_roi_extractor,
27 |             mask_head=mask_head,
28 |             seg_head=seg_head,
29 |             train_cfg=train_cfg,
30 |             test_cfg=test_cfg,
31 |             pretrained=pretrained)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from .single_stage import SingleStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from mmdet.core import tensor2imgs, bbox_mapping
 4 | from .base import BaseDetector
 5 | from .test_mixins import RPNTestMixin
 6 | from .. import builder
 7 | from ..registry import DETECTORS
 8 | 
 9 | 
10 | @DETECTORS.register_module
11 | class RPN(BaseDetector, RPNTestMixin):
12 | 
13 |     def __init__(self,
14 |                  backbone,
15 |                  neck,
16 |                  rpn_head,
17 |                  train_cfg,
18 |                  test_cfg,
19 |                  pretrained=None):
20 |         super(RPN, self).__init__()
21 |         self.backbone = builder.build_backbone(backbone)
22 |         self.neck = builder.build_neck(neck) if neck is not None else None
23 |         self.rpn_head = builder.build_head(rpn_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(RPN, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             self.neck.init_weights()
33 |         self.rpn_head.init_weights()
34 | 
35 |     def extract_feat(self, img):
36 |         x = self.backbone(img)
37 |         if self.with_neck:
38 |             x = self.neck(x)
39 |         return x
40 | 
41 |     def forward_train(self,
42 |                       img,
43 |                       img_meta,
44 |                       gt_bboxes=None,
45 |                       gt_bboxes_ignore=None):
46 |         if self.train_cfg.rpn.get('debug', False):
47 |             self.rpn_head.debug_imgs = tensor2imgs(img)
48 | 
49 |         x = self.extract_feat(img)
50 |         rpn_outs = self.rpn_head(x)
51 | 
52 |         rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
53 |         losses = self.rpn_head.loss(
54 |             *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 |         return losses
56 | 
57 |     def simple_test(self, img, img_meta, rescale=False):
58 |         x = self.extract_feat(img)
59 |         proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
60 |         if rescale:
61 |             for proposals, meta in zip(proposal_list, img_meta):
62 |                 proposals[:, :4] /= meta['scale_factor']
63 |         # TODO: remove this restriction
64 |         return proposal_list[0].cpu().numpy()
65 | 
66 |     def aug_test(self, imgs, img_metas, rescale=False):
67 |         proposal_list = self.aug_test_rpn(
68 |             self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
69 |         if not rescale:
70 |             for proposals, img_meta in zip(proposal_list, img_metas[0]):
71 |                 img_shape = img_meta['img_shape']
72 |                 scale_factor = img_meta['scale_factor']
73 |                 flip = img_meta['flip']
74 |                 proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
75 |                                                 scale_factor, flip)
76 |         # TODO: remove this restriction
77 |         return proposal_list[0].cpu().numpy()
78 | 
79 |     def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20):
80 |         """Show RPN proposals on the image.
81 | 
82 |         Although we assume batch size is 1, this method supports arbitrary
83 |         batch size.
84 |         """
85 |         img_tensor = data['img'][0]
86 |         img_metas = data['img_meta'][0].data[0]
87 |         imgs = tensor2imgs(img_tensor, **img_norm_cfg)
88 |         assert len(imgs) == len(img_metas)
89 |         for img, img_meta in zip(imgs, img_metas):
90 |             h, w, _ = img_meta['img_shape']
91 |             img_show = img[:h, :w, :]
92 |             mmcv.imshow_bboxes(img_show, result, top_k=top_k)
93 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from .base import BaseDetector
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from mmdet.core import bbox2result
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 | 
12 |     def __init__(self,
13 |                  backbone,
14 |                  neck=None,
15 |                  bbox_head=None,
16 |                  train_cfg=None,
17 |                  test_cfg=None,
18 |                  pretrained=None):
19 |         super(SingleStageDetector, self).__init__()
20 |         self.backbone = builder.build_backbone(backbone)
21 |         if neck is not None:
22 |             self.neck = builder.build_neck(neck)
23 |         self.bbox_head = builder.build_head(bbox_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(SingleStageDetector, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             if isinstance(self.neck, nn.Sequential):
33 |                 for m in self.neck:
34 |                     m.init_weights()
35 |             else:
36 |                 self.neck.init_weights()
37 |         self.bbox_head.init_weights()
38 | 
39 |     def extract_feat(self, img):
40 |         x = self.backbone(img)
41 |         if self.with_neck:
42 |             x = self.neck(x)
43 |         return x
44 | 
45 |     def forward_train(self,
46 |                       img,
47 |                       img_metas,
48 |                       gt_bboxes,
49 |                       gt_labels,
50 |                       gt_bboxes_ignore=None):
51 |         x = self.extract_feat(img)
52 |         outs = self.bbox_head(x)
53 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
54 |         losses = self.bbox_head.loss(
55 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
56 |         return losses
57 | 
58 |     def simple_test(self, img, img_meta, rescale=False):
59 |         x = self.extract_feat(img)
60 |         outs = self.bbox_head(x)
61 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
62 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
63 |         bbox_results = [
64 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
65 |             for det_bboxes, det_labels in bbox_list
66 |         ]
67 |         return bbox_results[0]
68 | 
69 |     def aug_test(self, imgs, img_metas, rescale=False):
70 |         raise NotImplementedError
71 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 | from .htc_mask_head import HTCMaskHead
3 | from .fused_semantic_head import FusedSemanticHead
4 | 
5 | __all__ = ['FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/fused_semantic_head.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from mmcv.cnn import kaiming_init
  4 | 
  5 | from ..registry import HEADS
  6 | from ..utils import ConvModule
  7 | 
  8 | 
  9 | @HEADS.register_module
 10 | class FusedSemanticHead(nn.Module):
 11 |     """Multi-level fused semantic segmentation head.
 12 | 
 13 |     in_1 -> 1x1 conv ---
 14 |                         |
 15 |     in_2 -> 1x1 conv -- |
 16 |                        ||
 17 |     in_3 -> 1x1 conv - ||
 18 |                       |||                  /-> 1x1 conv (mask prediction)
 19 |     in_4 -> 1x1 conv -----> 3x3 convs (*4)
 20 |                         |                  \-> 1x1 conv (feature)
 21 |     in_5 -> 1x1 conv ---
 22 |     """  # noqa: W605
 23 | 
 24 |     def __init__(self,
 25 |                  num_ins,
 26 |                  fusion_level,
 27 |                  num_convs=4,
 28 |                  in_channels=256,
 29 |                  conv_out_channels=256,
 30 |                  num_classes=183,
 31 |                  ignore_label=255,
 32 |                  loss_weight=0.2,
 33 |                  conv_cfg=None,
 34 |                  normalize=None):
 35 |         super(FusedSemanticHead, self).__init__()
 36 |         self.num_ins = num_ins
 37 |         self.fusion_level = fusion_level
 38 |         self.num_convs = num_convs
 39 |         self.in_channels = in_channels
 40 |         self.conv_out_channels = conv_out_channels
 41 |         self.num_classes = num_classes
 42 |         self.ignore_label = ignore_label
 43 |         self.loss_weight = loss_weight
 44 |         self.conv_cfg = conv_cfg
 45 |         self.normalize = normalize
 46 |         self.with_bias = normalize is None
 47 | 
 48 |         self.lateral_convs = nn.ModuleList()
 49 |         for i in range(self.num_ins):
 50 |             self.lateral_convs.append(
 51 |                 ConvModule(
 52 |                     self.in_channels,
 53 |                     self.in_channels,
 54 |                     1,
 55 |                     conv_cfg=self.conv_cfg,
 56 |                     normalize=self.normalize,
 57 |                     bias=self.with_bias,
 58 |                     inplace=False))
 59 | 
 60 |         self.convs = nn.ModuleList()
 61 |         for i in range(self.num_convs):
 62 |             in_channels = self.in_channels if i == 0 else conv_out_channels
 63 |             self.convs.append(
 64 |                 ConvModule(
 65 |                     in_channels,
 66 |                     conv_out_channels,
 67 |                     3,
 68 |                     padding=1,
 69 |                     conv_cfg=self.conv_cfg,
 70 |                     normalize=self.normalize,
 71 |                     bias=self.with_bias))
 72 |         self.conv_embedding = ConvModule(
 73 |             conv_out_channels,
 74 |             conv_out_channels,
 75 |             1,
 76 |             conv_cfg=self.conv_cfg,
 77 |             normalize=self.normalize,
 78 |             bias=self.with_bias)
 79 |         self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1)
 80 | 
 81 |         self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label)
 82 | 
 83 |     def init_weights(self):
 84 |         kaiming_init(self.conv_logits)
 85 | 
 86 |     def forward(self, feats):
 87 |         x = self.lateral_convs[self.fusion_level](feats[self.fusion_level])
 88 |         fused_size = tuple(x.shape[-2:])
 89 |         for i, feat in enumerate(feats):
 90 |             if i != self.fusion_level:
 91 |                 feat = F.interpolate(
 92 |                     feat,
 93 |                     size=fused_size,
 94 |                     mode='bilinear',
 95 |                     align_corners=True)
 96 |                 x += self.lateral_convs[i](feat)
 97 | 
 98 |         for i in range(self.num_convs):
 99 |             x = self.convs[i](x)
100 | 
101 |         mask_pred = self.conv_logits(x)
102 |         x = self.conv_embedding(x)
103 |         return mask_pred, x
104 | 
105 |     def loss(self, mask_pred, labels):
106 |         labels = labels.squeeze(1).long()
107 |         loss_semantic_seg = self.criterion(mask_pred, labels)
108 |         loss_semantic_seg *= self.loss_weight
109 |         return loss_semantic_seg
110 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from .fcn_mask_head import FCNMaskHead
 2 | from ..registry import HEADS
 3 | from ..utils import ConvModule
 4 | 
 5 | 
 6 | @HEADS.register_module
 7 | class HTCMaskHead(FCNMaskHead):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
11 |         self.conv_res = ConvModule(
12 |             self.conv_out_channels,
13 |             self.conv_out_channels,
14 |             1,
15 |             conv_cfg=self.conv_cfg,
16 |             normalize=self.normalize,
17 |             bias=self.with_bias)
18 | 
19 |     def init_weights(self):
20 |         super(HTCMaskHead, self).init_weights()
21 |         self.conv_res.init_weights()
22 | 
23 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
24 |         if res_feat is not None:
25 |             res_feat = self.conv_res(res_feat)
26 |             x = x + res_feat
27 |         for conv in self.convs:
28 |             x = conv(x)
29 |         res_feat = x
30 |         outs = []
31 |         if return_logits:
32 |             x = self.upsample(x)
33 |             if self.upsample_method == 'deconv':
34 |                 x = self.relu(x)
35 |             mask_pred = self.conv_logits(x)
36 |             outs.append(mask_pred)
37 |         if return_feat:
38 |             outs.append(res_feat)
39 |         return outs if len(outs) > 1 else outs[0]
40 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | 
3 | __all__ = ['FPN']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Registry(object):
 5 | 
 6 |     def __init__(self, name):
 7 |         self._name = name
 8 |         self._module_dict = dict()
 9 | 
10 |     @property
11 |     def name(self):
12 |         return self._name
13 | 
14 |     @property
15 |     def module_dict(self):
16 |         return self._module_dict
17 | 
18 |     def _register_module(self, module_class):
19 |         """Register a module.
20 | 
21 |         Args:
22 |             module (:obj:`nn.Module`): Module to be registered.
23 |         """
24 |         if not issubclass(module_class, nn.Module):
25 |             raise TypeError('module must be a child of nn.Module, but got {}'.
26 |                             format(module_class))
27 |         module_name = module_class.__name__
28 |         if module_name in self._module_dict:
29 |             raise KeyError('{} is already registered in {}'.format(
30 |                 module_name, self.name))
31 |         self._module_dict[module_name] = module_class
32 | 
33 |     def register_module(self, cls):
34 |         self._register_module(cls)
35 |         return cls
36 | 
37 | 
38 | BACKBONES = Registry('backbone')
39 | NECKS = Registry('neck')
40 | ROI_EXTRACTORS = Registry('roi_extractor')
41 | SHARED_HEADS = Registry('shared_head')
42 | HEADS = Registry('head')
43 | DETECTORS = Registry('detector')
44 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from mmdet import ops
 7 | from ..registry import ROI_EXTRACTORS
 8 | 
 9 | 
10 | @ROI_EXTRACTORS.register_module
11 | class SingleRoIExtractor(nn.Module):
12 |     """Extract RoI features from a single level feature map.
13 | 
14 |     If there are mulitple input feature levels, each RoI is mapped to a level
15 |     according to its scale.
16 | 
17 |     Args:
18 |         roi_layer (dict): Specify RoI layer type and arguments.
19 |         out_channels (int): Output channels of RoI layers.
20 |         featmap_strides (int): Strides of input feature maps.
21 |         finest_scale (int): Scale threshold of mapping to level 0.
22 |     """
23 | 
24 |     def __init__(self,
25 |                  roi_layer,
26 |                  out_channels,
27 |                  featmap_strides,
28 |                  finest_scale=56):
29 |         super(SingleRoIExtractor, self).__init__()
30 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
31 |         self.out_channels = out_channels
32 |         self.featmap_strides = featmap_strides
33 |         self.finest_scale = finest_scale
34 | 
35 |     @property
36 |     def num_inputs(self):
37 |         """int: Input feature map levels."""
38 |         return len(self.featmap_strides)
39 | 
40 |     def init_weights(self):
41 |         pass
42 | 
43 |     def build_roi_layers(self, layer_cfg, featmap_strides):
44 |         cfg = layer_cfg.copy()
45 |         layer_type = cfg.pop('type')
46 |         assert hasattr(ops, layer_type)
47 |         layer_cls = getattr(ops, layer_type)
48 |         roi_layers = nn.ModuleList(
49 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
50 |         return roi_layers
51 | 
52 |     def map_roi_levels(self, rois, num_levels):
53 |         """Map rois to corresponding feature levels by scales.
54 | 
55 |         - scale < finest_scale: level 0
56 |         - finest_scale <= scale < finest_scale * 2: level 1
57 |         - finest_scale * 2 <= scale < finest_scale * 4: level 2
58 |         - scale >= finest_scale * 4: level 3
59 | 
60 |         Args:
61 |             rois (Tensor): Input RoIs, shape (k, 5).
62 |             num_levels (int): Total level number.
63 | 
64 |         Returns:
65 |             Tensor: Level index (0-based) of each RoI, shape (k, )
66 |         """
67 |         scale = torch.sqrt(
68 |             (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
69 |         target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
70 |         target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
71 |         return target_lvls
72 | 
73 |     def forward(self, feats, rois):
74 |         if len(feats) == 1:
75 |             return self.roi_layers[0](feats[0], rois)
76 | 
77 |         out_size = self.roi_layers[0].out_size
78 |         num_levels = len(feats)
79 |         target_lvls = self.map_roi_levels(rois, num_levels)
80 |         roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,
81 |                                            out_size, out_size).fill_(0)
82 |         for i in range(num_levels):
83 |             inds = target_lvls == i
84 |             if inds.any():
85 |                 rois_ = rois[inds, :]
86 |                 roi_feats_t = self.roi_layers[i](feats[i], rois_)
87 |                 roi_feats[inds] += roi_feats_t
88 |         return roi_feats
89 | 


--------------------------------------------------------------------------------
/mmdet/models/seg_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_seg_head import FCNSegHead
2 | 
3 | __all__ = ['FCNSegHead']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from ..backbones import ResNet, make_res_layer
 8 | from ..registry import SHARED_HEADS
 9 | 
10 | 
11 | @SHARED_HEADS.register_module
12 | class ResLayer(nn.Module):
13 | 
14 |     def __init__(self,
15 |                  depth,
16 |                  stage=3,
17 |                  stride=2,
18 |                  dilation=1,
19 |                  style='pytorch',
20 |                  normalize=dict(type='BN', frozen=False),
21 |                  norm_eval=True,
22 |                  with_cp=False,
23 |                  dcn=None):
24 |         super(ResLayer, self).__init__()
25 |         self.norm_eval = norm_eval
26 |         self.normalize = normalize
27 |         self.stage = stage
28 |         block, stage_blocks = ResNet.arch_settings[depth]
29 |         stage_block = stage_blocks[stage]
30 |         planes = 64 * 2**stage
31 |         inplanes = 64 * 2**(stage - 1) * block.expansion
32 | 
33 |         res_layer = make_res_layer(
34 |             block,
35 |             inplanes,
36 |             planes,
37 |             stage_block,
38 |             stride=stride,
39 |             dilation=dilation,
40 |             style=style,
41 |             with_cp=with_cp,
42 |             normalize=self.normalize,
43 |             dcn=dcn)
44 |         self.add_module('layer{}'.format(stage + 1), res_layer)
45 | 
46 |     def init_weights(self, pretrained=None):
47 |         if isinstance(pretrained, str):
48 |             logger = logging.getLogger()
49 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
50 |         elif pretrained is None:
51 |             for m in self.modules():
52 |                 if isinstance(m, nn.Conv2d):
53 |                     kaiming_init(m)
54 |                 elif isinstance(m, nn.BatchNorm2d):
55 |                     constant_init(m, 1)
56 |         else:
57 |             raise TypeError('pretrained must be a str or None')
58 | 
59 |     def forward(self, x):
60 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
61 |         out = res_layer(x)
62 |         return out
63 | 
64 |     def train(self, mode=True):
65 |         super(ResLayer, self).train(mode)
66 |         if self.norm_eval:
67 |             for m in self.modules():
68 |                 if isinstance(m, nn.BatchNorm2d):
69 |                     m.eval()
70 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_ws import conv_ws_2d, ConvWS2d
 2 | from .conv_module import build_conv_layer, ConvModule
 3 | from .norm import build_norm_layer
 4 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
 5 |                           bias_init_with_prob)
 6 | 
 7 | __all__ = [
 8 |     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
 9 |     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
10 |     'kaiming_init', 'bias_init_with_prob'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv_ws_2d(input,
 6 |                weight,
 7 |                bias=None,
 8 |                stride=1,
 9 |                padding=0,
10 |                dilation=1,
11 |                groups=1,
12 |                eps=1e-5):
13 |     c_in = weight.size(0)
14 |     weight_flat = weight.view(c_in, -1)
15 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     weight = (weight - mean) / (std + eps)
18 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 | 
20 | 
21 | class ConvWS2d(nn.Conv2d):
22 | 
23 |     def __init__(self,
24 |                  in_channels,
25 |                  out_channels,
26 |                  kernel_size,
27 |                  stride=1,
28 |                  padding=0,
29 |                  dilation=1,
30 |                  groups=1,
31 |                  bias=True,
32 |                  eps=1e-5):
33 |         super(ConvWS2d, self).__init__(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride=stride,
38 |             padding=padding,
39 |             dilation=dilation,
40 |             groups=groups,
41 |             bias=bias)
42 |         self.eps = eps
43 | 
44 |     def forward(self, x):
45 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 |                           self.dilation, self.groups, self.eps)
47 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | norm_cfg = {
 5 |     # format: layer_type: (abbreviation, module)
 6 |     'BN': ('bn', nn.BatchNorm2d),
 7 |     'SyncBN': ('bn', None),
 8 |     'GN': ('gn', nn.GroupNorm),
 9 |     # and potentially 'SN'
10 | }
11 | 
12 | 
13 | def build_norm_layer(cfg, num_features, postfix=''):
14 |     """ Build normalization layer
15 | 
16 |     Args:
17 |         cfg (dict): cfg should contain:
18 |             type (str): identify norm layer type.
19 |             layer args: args needed to instantiate a norm layer.
20 |             frozen (bool): [optional] whether stop gradient updates
21 |                 of norm layer, it is helpful to set frozen mode
22 |                 in backbone's norms.
23 |         num_features (int): number of channels from input
24 |         postfix (int, str): appended into norm abbreation to
25 |             create named layer.
26 | 
27 |     Returns:
28 |         name (str): abbreation + postfix
29 |         layer (nn.Module): created norm layer
30 |     """
31 |     assert isinstance(cfg, dict) and 'type' in cfg
32 |     cfg_ = cfg.copy()
33 | 
34 |     layer_type = cfg_.pop('type')
35 |     if layer_type not in norm_cfg:
36 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
37 |     else:
38 |         abbr, norm_layer = norm_cfg[layer_type]
39 |         if norm_layer is None:
40 |             raise NotImplementedError
41 | 
42 |     assert isinstance(postfix, (int, str))
43 |     name = abbr + str(postfix)
44 | 
45 |     frozen = cfg_.pop('frozen', False)
46 |     cfg_.setdefault('eps', 1e-5)
47 |     if layer_type != 'GN':
48 |         layer = norm_layer(num_features, **cfg_)
49 |     else:
50 |         assert 'num_groups' in cfg_
51 |         layer = norm_layer(num_channels=num_features, **cfg_)
52 | 
53 |     if frozen:
54 |         for param in layer.parameters():
55 |             param.requires_grad = False
56 | 
57 |     return name, layer
58 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dcn import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                   ModulatedDeformConvPack, DeformRoIPooling,
 3 |                   DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack,
 4 |                   deform_conv, modulated_deform_conv, deform_roi_pooling)
 5 | from .nms import nms, soft_nms
 6 | from .roi_align import RoIAlign, roi_align
 7 | from .roi_pool import RoIPool, roi_pool
 8 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
 9 | 
10 | __all__ = [
11 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
12 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
13 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
14 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
15 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv
 2 | from .functions.deform_pool import deform_roi_pooling
 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv,
 4 |                                   DeformConvPack, ModulatedDeformConvPack)
 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 6 |                                   ModulatedDeformRoIPoolingPack)
 7 | 
 8 | __all__ = [
 9 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
10 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
11 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
12 |     'deform_roi_pooling'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/dcn/functions/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/dcn/functions/deform_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from .. import deform_pool_cuda
 5 | 
 6 | 
 7 | class DeformRoIPoolingFunction(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx,
11 |                 data,
12 |                 rois,
13 |                 offset,
14 |                 spatial_scale,
15 |                 out_size,
16 |                 out_channels,
17 |                 no_trans,
18 |                 group_size=1,
19 |                 part_size=None,
20 |                 sample_per_part=4,
21 |                 trans_std=.0):
22 |         ctx.spatial_scale = spatial_scale
23 |         ctx.out_size = out_size
24 |         ctx.out_channels = out_channels
25 |         ctx.no_trans = no_trans
26 |         ctx.group_size = group_size
27 |         ctx.part_size = out_size if part_size is None else part_size
28 |         ctx.sample_per_part = sample_per_part
29 |         ctx.trans_std = trans_std
30 | 
31 |         assert 0.0 <= ctx.trans_std <= 1.0
32 |         if not data.is_cuda:
33 |             raise NotImplementedError
34 | 
35 |         n = rois.shape[0]
36 |         output = data.new_empty(n, out_channels, out_size, out_size)
37 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
38 |         deform_pool_cuda.deform_psroi_pooling_cuda_forward(
39 |             data, rois, offset, output, output_count, ctx.no_trans,
40 |             ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
41 |             ctx.part_size, ctx.sample_per_part, ctx.trans_std)
42 | 
43 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
44 |             ctx.save_for_backward(data, rois, offset)
45 |         ctx.output_count = output_count
46 | 
47 |         return output
48 | 
49 |     @staticmethod
50 |     def backward(ctx, grad_output):
51 |         if not grad_output.is_cuda:
52 |             raise NotImplementedError
53 | 
54 |         data, rois, offset = ctx.saved_tensors
55 |         output_count = ctx.output_count
56 |         grad_input = torch.zeros_like(data)
57 |         grad_rois = None
58 |         grad_offset = torch.zeros_like(offset)
59 | 
60 |         deform_pool_cuda.deform_psroi_pooling_cuda_backward(
61 |             grad_output, data, rois, offset, output_count, grad_input,
62 |             grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
63 |             ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
64 |             ctx.trans_std)
65 |         return (grad_input, grad_rois, grad_offset, None, None, None, None,
66 |                 None, None, None, None)
67 | 
68 | 
69 | deform_roi_pooling = DeformRoIPoolingFunction.apply
70 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/dcn/modules/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/dcn/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='deform_conv',
 6 |     ext_modules=[
 7 |         CUDAExtension('deform_conv_cuda', [
 8 |             'src/deform_conv_cuda.cpp',
 9 |             'src/deform_conv_cuda_kernel.cu',
10 |         ]),
11 |         CUDAExtension('deform_pool_cuda', [
12 |             'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu'
13 |         ]),
14 |     ],
15 |     cmdclass={'build_ext': BuildExtension})
16 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/src/deform_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <torch/extension.h>
 9 | 
10 | #include <cmath>
11 | #include <vector>
12 | 
13 | void DeformablePSROIPoolForward(
14 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
15 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
16 |     const int height, const int width, const int num_bbox,
17 |     const int channels_trans, const int no_trans, const float spatial_scale,
18 |     const int output_dim, const int group_size, const int pooled_size,
19 |     const int part_size, const int sample_per_part, const float trans_std);
20 | 
21 | void DeformablePSROIPoolBackwardAcc(
22 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
23 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
24 |     at::Tensor trans_grad, const int batch, const int channels,
25 |     const int height, const int width, const int num_bbox,
26 |     const int channels_trans, const int no_trans, const float spatial_scale,
27 |     const int output_dim, const int group_size, const int pooled_size,
28 |     const int part_size, const int sample_per_part, const float trans_std);
29 | 
30 | void deform_psroi_pooling_cuda_forward(
31 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
32 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std) {
35 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
36 | 
37 |   const int batch = input.size(0);
38 |   const int channels = input.size(1);
39 |   const int height = input.size(2);
40 |   const int width = input.size(3);
41 |   const int channels_trans = no_trans ? 2 : trans.size(1);
42 | 
43 |   const int num_bbox = bbox.size(0);
44 |   if (num_bbox != out.size(0))
45 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
46 |              out.size(0), num_bbox);
47 | 
48 |   DeformablePSROIPoolForward(
49 |       input, bbox, trans, out, top_count, batch, channels, height, width,
50 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
51 |       pooled_size, part_size, sample_per_part, trans_std);
52 | }
53 | 
54 | void deform_psroi_pooling_cuda_backward(
55 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
56 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
57 |     const int no_trans, const float spatial_scale, const int output_dim,
58 |     const int group_size, const int pooled_size, const int part_size,
59 |     const int sample_per_part, const float trans_std) {
60 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
61 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
62 | 
63 |   const int batch = input.size(0);
64 |   const int channels = input.size(1);
65 |   const int height = input.size(2);
66 |   const int width = input.size(3);
67 |   const int channels_trans = no_trans ? 2 : trans.size(1);
68 | 
69 |   const int num_bbox = bbox.size(0);
70 |   if (num_bbox != out_grad.size(0))
71 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
72 |              out_grad.size(0), num_bbox);
73 | 
74 |   DeformablePSROIPoolBackwardAcc(
75 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
76 |       channels, height, width, num_bbox, channels_trans, no_trans,
77 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
78 |       sample_per_part, trans_std);
79 | }
80 | 
81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
82 |   m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
83 |         "deform psroi pooling forward(CUDA)");
84 |   m.def("deform_psroi_pooling_cuda_backward",
85 |         &deform_psroi_pooling_cuda_backward,
86 |         "deform psroi pooling backward(CUDA)");
87 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from . import nms_cuda, nms_cpu
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | 
 8 | def nms(dets, iou_thr, device_id=None):
 9 |     """Dispatch to either CPU or GPU NMS implementations.
10 | 
11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 |     will be used. The returned type will always be the same as inputs.
14 | 
15 |     Arguments:
16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
17 |         iou_thr (float): IoU threshold for NMS.
18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
20 | 
21 |     Returns:
22 |         tuple: kept bboxes and indice, which is always the same data type as
23 |             the input.
24 |     """
25 |     # convert dets (tensor or numpy array) to tensor
26 |     if isinstance(dets, torch.Tensor):
27 |         is_numpy = False
28 |         dets_th = dets
29 |     elif isinstance(dets, np.ndarray):
30 |         is_numpy = True
31 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 |         dets_th = torch.from_numpy(dets).to(device)
33 |     else:
34 |         raise TypeError(
35 |             'dets must be either a Tensor or numpy array, but got {}'.format(
36 |                 type(dets)))
37 | 
38 |     # execute cpu or cuda nms
39 |     if dets_th.shape[0] == 0:
40 |         inds = dets_th.new_zeros(0, dtype=torch.long)
41 |     else:
42 |         if dets_th.is_cuda:
43 |             inds = nms_cuda.nms(dets_th, iou_thr)
44 |         else:
45 |             inds = nms_cpu.nms(dets_th, iou_thr)
46 | 
47 |     if is_numpy:
48 |         inds = inds.cpu().numpy()
49 |     return dets[inds, :], inds
50 | 
51 | 
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 |     if isinstance(dets, torch.Tensor):
54 |         is_tensor = True
55 |         dets_np = dets.detach().cpu().numpy()
56 |     elif isinstance(dets, np.ndarray):
57 |         is_tensor = False
58 |         dets_np = dets
59 |     else:
60 |         raise TypeError(
61 |             'dets must be either a Tensor or numpy array, but got {}'.format(
62 |                 type(dets)))
63 | 
64 |     method_codes = {'linear': 1, 'gaussian': 2}
65 |     if method not in method_codes:
66 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 |     new_dets, inds = soft_nms_cpu(
68 |         dets_np,
69 |         iou_thr,
70 |         method=method_codes[method],
71 |         sigma=sigma,
72 |         min_score=min_score)
73 | 
74 |     if is_tensor:
75 |         return dets.new_tensor(new_dets), dets.new_tensor(
76 |             inds, dtype=torch.long)
77 |     else:
78 |         return new_dets.astype(np.float32), inds.astype(np.int64)
79 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/setup.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | from setuptools import setup, Extension
 3 | 
 4 | import numpy as np
 5 | from Cython.Build import cythonize
 6 | from Cython.Distutils import build_ext
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | 
 9 | ext_args = dict(
10 |     include_dirs=[np.get_include()],
11 |     language='c++',
12 |     extra_compile_args={
13 |         'cc': ['-Wno-unused-function', '-Wno-write-strings'],
14 |         'nvcc': ['-c', '--compiler-options', '-fPIC'],
15 |     },
16 | )
17 | 
18 | extensions = [
19 |     Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args),
20 | ]
21 | 
22 | 
23 | def customize_compiler_for_nvcc(self):
24 |     """inject deep into distutils to customize how the dispatch
25 |     to cc/nvcc works.
26 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
27 |     injected in, and still have the right customizations (i.e.
28 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
29 |     the OO route, I have this. Note, it's kindof like a wierd functional
30 |     subclassing going on."""
31 | 
32 |     # tell the compiler it can processes .cu
33 |     self.src_extensions.append('.cu')
34 | 
35 |     # save references to the default compiler_so and _comple methods
36 |     default_compiler_so = self.compiler_so
37 |     super = self._compile
38 | 
39 |     # now redefine the _compile method. This gets executed for each
40 |     # object but distutils doesn't have the ability to change compilers
41 |     # based on source extension: we add it.
42 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
43 |         if osp.splitext(src)[1] == '.cu':
44 |             # use the cuda for .cu files
45 |             self.set_executable('compiler_so', 'nvcc')
46 |             # use only a subset of the extra_postargs, which are 1-1 translated
47 |             # from the extra_compile_args in the Extension class
48 |             postargs = extra_postargs['nvcc']
49 |         else:
50 |             postargs = extra_postargs['cc']
51 | 
52 |         super(obj, src, ext, cc_args, postargs, pp_opts)
53 |         # reset the default compiler_so, which we might have changed for cuda
54 |         self.compiler_so = default_compiler_so
55 | 
56 |     # inject our redefined _compile method into the class
57 |     self._compile = _compile
58 | 
59 | 
60 | class custom_build_ext(build_ext):
61 | 
62 |     def build_extensions(self):
63 |         customize_compiler_for_nvcc(self.compiler)
64 |         build_ext.build_extensions(self)
65 | 
66 | 
67 | setup(
68 |     name='soft_nms',
69 |     cmdclass={'build_ext': custom_build_ext},
70 |     ext_modules=cythonize(extensions),
71 | )
72 | 
73 | setup(
74 |     name='nms_cuda',
75 |     ext_modules=[
76 |         CUDAExtension('nms_cuda', [
77 |             'src/nms_cuda.cpp',
78 |             'src/nms_kernel.cu',
79 |         ]),
80 |         CUDAExtension('nms_cpu', [
81 |             'src/nms_cpu.cpp',
82 |         ]),
83 |     ],
84 |     cmdclass={'build_ext': BuildExtension})
85 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_align import roi_align
2 | from .modules.roi_align import RoIAlign
3 | 
4 | __all__ = ['roi_align', 'RoIAlign']
5 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .. import roi_align_cuda
 4 | 
 5 | 
 6 | class RoIAlignFunction(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
10 |         if isinstance(out_size, int):
11 |             out_h = out_size
12 |             out_w = out_size
13 |         elif isinstance(out_size, tuple):
14 |             assert len(out_size) == 2
15 |             assert isinstance(out_size[0], int)
16 |             assert isinstance(out_size[1], int)
17 |             out_h, out_w = out_size
18 |         else:
19 |             raise TypeError(
20 |                 '"out_size" must be an integer or tuple of integers')
21 |         ctx.spatial_scale = spatial_scale
22 |         ctx.sample_num = sample_num
23 |         ctx.save_for_backward(rois)
24 |         ctx.feature_size = features.size()
25 | 
26 |         batch_size, num_channels, data_height, data_width = features.size()
27 |         num_rois = rois.size(0)
28 | 
29 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
30 |         if features.is_cuda:
31 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
32 |                                    sample_num, output)
33 |         else:
34 |             raise NotImplementedError
35 | 
36 |         return output
37 | 
38 |     @staticmethod
39 |     def backward(ctx, grad_output):
40 |         feature_size = ctx.feature_size
41 |         spatial_scale = ctx.spatial_scale
42 |         sample_num = ctx.sample_num
43 |         rois = ctx.saved_tensors[0]
44 |         assert (feature_size is not None and grad_output.is_cuda)
45 | 
46 |         batch_size, num_channels, data_height, data_width = feature_size
47 |         out_w = grad_output.size(3)
48 |         out_h = grad_output.size(2)
49 | 
50 |         grad_input = grad_rois = None
51 |         if ctx.needs_input_grad[0]:
52 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
53 |                                         data_width)
54 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
55 |                                     out_w, spatial_scale, sample_num,
56 |                                     grad_input)
57 | 
58 |         return grad_input, grad_rois, None, None, None
59 | 
60 | 
61 | roi_align = RoIAlignFunction.apply
62 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.autograd import gradcheck
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_align import RoIAlign  # noqa: E402
 9 | 
10 | feat_size = 15
11 | spatial_scale = 1.0 / 8
12 | img_size = feat_size / spatial_scale
13 | num_imgs = 2
14 | num_rois = 20
15 | 
16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
18 | rois[:, 2:] += img_size * 0.5
19 | rois = np.hstack((batch_ind, rois))
20 | 
21 | feat = torch.randn(
22 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
23 | rois = torch.from_numpy(rois).float().cuda()
24 | inputs = (feat, rois)
25 | print('Gradcheck for roi align...')
26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
27 | print(test)
28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
29 | print(test)
30 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_align import RoIAlignFunction
 3 | 
 4 | 
 5 | class RoIAlign(Module):
 6 | 
 7 |     def __init__(self, out_size, spatial_scale, sample_num=0):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.out_size = out_size
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.sample_num = int(sample_num)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction.apply(features, rois, self.out_size,
16 |                                       self.spatial_scale, self.sample_num)
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='roi_align_cuda',
 6 |     ext_modules=[
 7 |         CUDAExtension('roi_align_cuda', [
 8 |             'src/roi_align_cuda.cpp',
 9 |             'src/roi_align_kernel.cu',
10 |         ]),
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_pool import roi_pool
2 | from .modules.roi_pool import RoIPool
3 | 
4 | __all__ = ['roi_pool', 'RoIPool']
5 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_pool/functions/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from .. import roi_pool_cuda
 5 | 
 6 | 
 7 | class RoIPoolFunction(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx, features, rois, out_size, spatial_scale):
11 |         if isinstance(out_size, int):
12 |             out_h = out_size
13 |             out_w = out_size
14 |         elif isinstance(out_size, tuple):
15 |             assert len(out_size) == 2
16 |             assert isinstance(out_size[0], int)
17 |             assert isinstance(out_size[1], int)
18 |             out_h, out_w = out_size
19 |         else:
20 |             raise TypeError(
21 |                 '"out_size" must be an integer or tuple of integers')
22 |         assert features.is_cuda
23 |         ctx.save_for_backward(rois)
24 |         num_channels = features.size(1)
25 |         num_rois = rois.size(0)
26 |         out_size = (num_rois, num_channels, out_h, out_w)
27 |         output = features.new_zeros(out_size)
28 |         argmax = features.new_zeros(out_size, dtype=torch.int)
29 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
30 |                               output, argmax)
31 |         ctx.spatial_scale = spatial_scale
32 |         ctx.feature_size = features.size()
33 |         ctx.argmax = argmax
34 | 
35 |         return output
36 | 
37 |     @staticmethod
38 |     def backward(ctx, grad_output):
39 |         assert grad_output.is_cuda
40 |         spatial_scale = ctx.spatial_scale
41 |         feature_size = ctx.feature_size
42 |         argmax = ctx.argmax
43 |         rois = ctx.saved_tensors[0]
44 |         assert feature_size is not None
45 | 
46 |         grad_input = grad_rois = None
47 |         if ctx.needs_input_grad[0]:
48 |             grad_input = grad_output.new_zeros(feature_size)
49 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
50 |                                    spatial_scale, grad_input)
51 | 
52 |         return grad_input, grad_rois, None, None
53 | 
54 | 
55 | roi_pool = RoIPoolFunction.apply
56 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import gradcheck
 3 | 
 4 | import os.path as osp
 5 | import sys
 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 7 | from roi_pool import RoIPool  # noqa: E402
 8 | 
 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
11 |                      [1, 67, 40, 110, 120]]).cuda()
12 | inputs = (feat, rois)
13 | print('Gradcheck for roi pooling...')
14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
15 | print(test)
16 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/roi_pool/modules/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import roi_pool
 3 | 
 4 | 
 5 | class RoIPool(Module):
 6 | 
 7 |     def __init__(self, out_size, spatial_scale):
 8 |         super(RoIPool, self).__init__()
 9 | 
10 |         self.out_size = out_size
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return roi_pool(features, rois, self.out_size, self.spatial_scale)
15 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='roi_pool',
 6 |     ext_modules=[
 7 |         CUDAExtension('roi_pool_cuda', [
 8 |             'src/roi_pool_cuda.cpp',
 9 |             'src/roi_pool_kernel.cu',
10 |         ])
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int channels = features.size(1);
45 |   int height = features.size(2);
46 |   int width = features.size(3);
47 | 
48 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 |                         num_rois, pooled_height, pooled_width, output, argmax);
50 | 
51 |   return 1;
52 | }
53 | 
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 |                               at::Tensor argmax, float spatial_scale,
56 |                               at::Tensor bottom_grad) {
57 |   CHECK_INPUT(top_grad);
58 |   CHECK_INPUT(rois);
59 |   CHECK_INPUT(argmax);
60 |   CHECK_INPUT(bottom_grad);
61 | 
62 |   int pooled_height = top_grad.size(2);
63 |   int pooled_width = top_grad.size(3);
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 | 
67 |   if (size_rois != 5) {
68 |     printf("wrong roi size\n");
69 |     return 0;
70 |   }
71 |   int batch_size = bottom_grad.size(0);
72 |   int channels = bottom_grad.size(1);
73 |   int height = bottom_grad.size(2);
74 |   int width = bottom_grad.size(3);
75 | 
76 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 |                          channels, height, width, num_rois, pooled_height,
78 |                          pooled_width, bottom_grad);
79 | 
80 |   return 1;
81 | }
82 | 
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .modules.sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/sigmoid_focal_loss/functions/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/functions/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from .. import sigmoid_focal_loss_cuda
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25, reduction='mean'):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
19 |                                                gamma, alpha)
20 |         reduction_enum = F._Reduction.get_enum(reduction)
21 |         # none: 0, mean:1, sum: 2
22 |         if reduction_enum == 0:
23 |             return loss
24 |         elif reduction_enum == 1:
25 |             return loss.mean()
26 |         elif reduction_enum == 2:
27 |             return loss.sum()
28 | 
29 |     @staticmethod
30 |     @once_differentiable
31 |     def backward(ctx, d_loss):
32 |         input, target = ctx.saved_tensors
33 |         num_classes = ctx.num_classes
34 |         gamma = ctx.gamma
35 |         alpha = ctx.alpha
36 |         d_loss = d_loss.contiguous()
37 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
38 |                                                    num_classes, gamma, alpha)
39 |         return d_input, None, None, None, None
40 | 
41 | 
42 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
43 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/mmdet/ops/sigmoid_focal_loss/modules/__init__.py


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/modules/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from ..functions.sigmoid_focal_loss import sigmoid_focal_loss
 4 | 
 5 | 
 6 | class SigmoidFocalLoss(nn.Module):
 7 | 
 8 |     def __init__(self, gamma, alpha):
 9 |         super(SigmoidFocalLoss, self).__init__()
10 |         self.gamma = gamma
11 |         self.alpha = alpha
12 | 
13 |     def forward(self, logits, targets):
14 |         assert logits.is_cuda
15 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
16 |         return loss.sum()
17 | 
18 |     def __repr__(self):
19 |         tmpstr = self.__class__.__name__ + "("
20 |         tmpstr += "gamma=" + str(self.gamma)
21 |         tmpstr += ", alpha=" + str(self.alpha)
22 |         tmpstr += ")"
23 |         return tmpstr
24 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='SigmoidFocalLoss',
 6 |     ext_modules=[
 7 |         CUDAExtension('sigmoid_focal_loss_cuda', [
 8 |             'src/sigmoid_focal_loss.cpp',
 9 |             'src/sigmoid_focal_loss_cuda.cu',
10 |         ]),
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 | }
26 | 
27 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
28 |                                      const at::Tensor &targets,
29 |                                      const at::Tensor &d_losses,
30 |                                      const int num_classes, const float gamma,
31 |                                      const float alpha) {
32 |   if (logits.type().is_cuda()) {
33 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
34 |                                           num_classes, gamma, alpha);
35 |   }
36 | }
37 | 
38 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
39 |   m.def("forward", &SigmoidFocalLoss_forward,
40 |         "SigmoidFocalLoss forward (CUDA)");
41 |   m.def("backward", &SigmoidFocalLoss_backward,
42 |         "SigmoidFocalLoss backward (CUDA)");
43 | }
44 | 


--------------------------------------------------------------------------------
/panopticapi/README.md:
--------------------------------------------------------------------------------
 1 | # COCO 2018 Panoptic Segmentation Task API (Beta version)
 2 | This API is an experimental version of [COCO 2018 Panoptic Segmentation Task API](http://cocodataset.org/#panoptic-2018).
 3 | 
 4 | ## Summary
 5 | **Evaluation script**
 6 | 
 7 | *evaluation.py* calculates [PQ metrics](http://cocodataset.org/#panoptic-eval). For more information about the script usage: `python evaluation.py --help`
 8 | 
 9 | **Format converters**
10 | 
11 | COCO panoptic segmentation is stored in a new [format](http://cocodataset.org/#format-data). Unlike COCO detection format that stores each segment independently, COCO panoptic format stores all segmentations for an image in a single PNG file. This compact representation naturally maintains non-overlapping property of the panoptic segmentation.
12 | 
13 | We provide several converters for COCO panoptic format. Full description and usage examples are available [here](https://github.com/cocodataset/panopticapi/blob/master/CONVERTERS.md).
14 | 
15 | **Semantic and instance segmentation heuristic combination**
16 | 
17 | We provide simple script that heuristically combines semantic and instance segmentation predictions into panoptic segmentation prediction.
18 | 
19 | The merging logic of the script is described in the panoptic segmentation [paper](https://arxiv.org/abs/1801.00868). In addition, this script is able to filter out stuff predicted segments that have their area below the threshold defined by `--stuff_area_limit` parameter. For more information about the script logic and usage: `python combine_semantic_and_instance_predictions.py --help`
20 | 
21 | **COCO panoptic segmentation challenge categories**
22 | 
23 | Json file [panoptic_coco_categories.json](https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json) contains the list of all categories used in COCO panoptic segmentation challenge 2018.
24 | 
25 | **Visualization**
26 | 
27 | *visualization.py* provides an example of generating visually appealing representation of the panoptic segmentation data.
28 | 
29 | ## Contact
30 | If you have any questions regarding this API, please contact us at alexander.n.kirillov-at-gmail.com.
31 | 


--------------------------------------------------------------------------------
/panopticapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/__init__.py


--------------------------------------------------------------------------------
/panopticapi/cityscapes_gt_converter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/cityscapes_gt_converter/__init__.py


--------------------------------------------------------------------------------
/panopticapi/converted_data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/panopticapi/converters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/converters/__init__.py


--------------------------------------------------------------------------------
/panopticapi/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Alexander Kirillov
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/panopticapi/sample_data/input_images/000000142238.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/input_images/000000142238.jpg


--------------------------------------------------------------------------------
/panopticapi/sample_data/input_images/000000439180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/input_images/000000439180.jpg


--------------------------------------------------------------------------------
/panopticapi/sample_data/panoptic_examples/000000142238.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples/000000142238.png


--------------------------------------------------------------------------------
/panopticapi/sample_data/panoptic_examples/000000439180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples/000000439180.png


--------------------------------------------------------------------------------
/panopticapi/sample_data/panoptic_examples_2ch_format/000000142238.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples_2ch_format/000000142238.png


--------------------------------------------------------------------------------
/panopticapi/sample_data/panoptic_examples_2ch_format/000000439180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dingguo1996/EasyPSNet/5905a9adab15cd5937a1821a67cda1813175a528/panopticapi/sample_data/panoptic_examples_2ch_format/000000439180.png


--------------------------------------------------------------------------------
/panopticapi/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | import functools
  6 | import traceback
  7 | import json
  8 | import numpy as np
  9 | 
 10 | 
 11 | # The decorator is used to prints an error trhown inside process
 12 | def get_traceback(f):
 13 |     @functools.wraps(f)
 14 |     def wrapper(*args, **kwargs):
 15 |         try:
 16 |             return f(*args, **kwargs)
 17 |         except Exception as e:
 18 |             print('Caught exception in worker thread:')
 19 |             traceback.print_exc()
 20 |             raise e
 21 | 
 22 |     return wrapper
 23 | 
 24 | 
 25 | class IdGenerator():
 26 |     '''
 27 |     The class is designed to generate unique IDs that have meaningful RGB encoding.
 28 |     Given semantic category unique ID will be generated and its RGB encoding will
 29 |     have color close to the predefined semantic category color.
 30 |     The RGB encoding used is ID = R * 256 * G + 256 * 256 + B.
 31 |     Class constructor takes dictionary {id: category_info}, where all semantic
 32 |     class ids are presented and category_info record is a dict with fields
 33 |     'isthing' and 'color'
 34 |     '''
 35 |     def __init__(self, categories):
 36 |         self.taken_colors = set([0, 0, 0])
 37 |         self.categories = categories
 38 |         for category in self.categories.values():
 39 |             if category['isthing'] == 0:
 40 |                 self.taken_colors.add(tuple(category['color']))
 41 | 
 42 |     def get_color(self, cat_id):
 43 |         def random_color(base, max_dist=30):
 44 |             new_color = base + np.random.randint(low=-max_dist,
 45 |                                                  high=max_dist+1,
 46 |                                                  size=3)
 47 |             return tuple(np.maximum(0, np.minimum(255, new_color)))
 48 | 
 49 |         category = self.categories[cat_id]
 50 |         if category['isthing'] == 0:
 51 |             return category['color']
 52 |         base_color_array = category['color']
 53 |         base_color = tuple(base_color_array)
 54 |         if base_color not in self.taken_colors:
 55 |             self.taken_colors.add(base_color)
 56 |             return base_color
 57 |         else:
 58 |             while True:
 59 |                 color = random_color(base_color_array)
 60 |                 if color not in self.taken_colors:
 61 |                     self.taken_colors.add(color)
 62 |                     return color
 63 | 
 64 |     def get_id(self, cat_id):
 65 |         color = self.get_color(cat_id)
 66 |         return rgb2id(color)
 67 | 
 68 |     def get_id_and_color(self, cat_id):
 69 |         color = self.get_color(cat_id)
 70 |         return rgb2id(color), color
 71 | 
 72 | 
 73 | def rgb2id(color):
 74 |     if isinstance(color, np.ndarray) and len(color.shape) == 3:
 75 |         if color.dtype == np.uint8:
 76 |             color = color.astype(np.int32)
 77 |         return color[:, :, 0] + 256 * color[:, :, 1] + 256 * 256 * color[:, :, 2]
 78 |     return int(color[0] + 256 * color[1] + 256 * 256 * color[2])
 79 | 
 80 | 
 81 | def id2rgb(id_map):
 82 |     if isinstance(id_map, np.ndarray):
 83 |         id_map_copy = id_map.copy()
 84 |         rgb_shape = tuple(list(id_map.shape) + [3])
 85 |         rgb_map = np.zeros(rgb_shape, dtype=np.uint8)
 86 |         for i in range(3):
 87 |             rgb_map[..., i] = id_map_copy % 256
 88 |             id_map_copy //= 256
 89 |         return rgb_map
 90 |     color = []
 91 |     for _ in range(3):
 92 |         color.append(id_map % 256)
 93 |         id_map //= 256
 94 |     return color
 95 | 
 96 | 
 97 | def save_json(d, file):
 98 |     with open(file, 'w') as f:
 99 |         json.dump(d, f)
100 | 


--------------------------------------------------------------------------------
/panopticapi/visualization.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | '''
 3 | Visualization demo for panoptic COCO sample_data
 4 | 
 5 | The code shows an example of color generation for panoptic data (with
 6 | "generate_new_colors" set to True). For each segment distinct color is used in
 7 | a way that it close to the color of corresponding semantic class.
 8 | '''
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | from __future__ import unicode_literals
13 | import os, sys
14 | import numpy as np
15 | import json
16 | 
17 | import PIL.Image as Image
18 | import matplotlib.pyplot as plt
19 | from skimage.segmentation import find_boundaries
20 | 
21 | from utils import IdGenerator, rgb2id
22 | 
23 | # whether from the PNG are used or new colors are generated
24 | generate_new_colors = True
25 | 
26 | json_file = './sample_data/panoptic_examples.json'
27 | segmentations_folder = './sample_data/panoptic_examples/'
28 | img_folder = './sample_data/input_images/'
29 | panoptic_coco_categories = './panoptic_coco_categories.json'
30 | 
31 | with open(json_file, 'r') as f:
32 |     coco_d = json.load(f)
33 | 
34 | ann = np.random.choice(coco_d['annotations'])
35 | 
36 | with open(panoptic_coco_categories, 'r') as f:
37 |     categories_list = json.load(f)
38 | categegories = {category['id']: category for category in categories_list}
39 | 
40 | # find input img that correspond to the annotation
41 | img = None
42 | for image_info in coco_d['images']:
43 |     if image_info['id'] == ann['image_id']:
44 |         try:
45 |             img = np.array(
46 |                 Image.open(os.path.join(img_folder, image_info['file_name']))
47 |             )
48 |         except:
49 |             print("Undable to find correspoding input image.")
50 |         break
51 | 
52 | segmentation = np.array(
53 |     Image.open(os.path.join(segmentations_folder, ann['file_name'])),
54 |     dtype=np.uint8
55 | )
56 | segmentation_id = rgb2id(segmentation)
57 | # find segments boundaries
58 | boundaries = find_boundaries(segmentation_id, mode='thick')
59 | 
60 | if generate_new_colors:
61 |     segmentation[:, :, :] = 0
62 |     color_generator = IdGenerator(categegories)
63 |     for segment_info in ann['segments_info']:
64 |         color = color_generator.get_color(segment_info['category_id'])
65 |         mask = segmentation_id == segment_info['id']
66 |         segmentation[mask] = color
67 | 
68 | # depict boundaries
69 | segmentation[boundaries] = [0, 0, 0]
70 | 
71 | if img is None:
72 |     plt.figure()
73 |     plt.imshow(segmentation)
74 |     plt.axis('off')
75 | else:
76 |     plt.figure(figsize=(9, 5))
77 |     plt.subplot(121)
78 |     plt.imshow(img)
79 |     plt.axis('off')
80 |     plt.subplot(122)
81 |     plt.imshow(segmentation)
82 |     plt.axis('off')
83 |     plt.tight_layout()
84 | plt.show()
85 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import time
  4 | from setuptools import find_packages, setup
  5 | 
  6 | 
  7 | def readme():
  8 |     with open('README.md', encoding='utf-8') as f:
  9 |         content = f.read()
 10 |     return content
 11 | 
 12 | 
 13 | MAJOR = 0
 14 | MINOR = 6
 15 | PATCH = 0
 16 | SUFFIX = ''
 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
 18 | 
 19 | version_file = 'mmdet/version.py'
 20 | 
 21 | 
 22 | def get_git_hash():
 23 | 
 24 |     def _minimal_ext_cmd(cmd):
 25 |         # construct minimal environment
 26 |         env = {}
 27 |         for k in ['SYSTEMROOT', 'PATH', 'HOME']:
 28 |             v = os.environ.get(k)
 29 |             if v is not None:
 30 |                 env[k] = v
 31 |         # LANGUAGE is used on win32
 32 |         env['LANGUAGE'] = 'C'
 33 |         env['LANG'] = 'C'
 34 |         env['LC_ALL'] = 'C'
 35 |         out = subprocess.Popen(
 36 |             cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
 37 |         return out
 38 | 
 39 |     try:
 40 |         out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
 41 |         sha = out.strip().decode('ascii')
 42 |     except OSError:
 43 |         sha = 'unknown'
 44 | 
 45 |     return sha
 46 | 
 47 | 
 48 | def get_hash():
 49 |     if os.path.exists('.git'):
 50 |         sha = get_git_hash()[:7]
 51 |     elif os.path.exists(version_file):
 52 |         try:
 53 |             from mmdet.version import __version__
 54 |             sha = __version__.split('+')[-1]
 55 |         except ImportError:
 56 |             raise ImportError('Unable to get git version')
 57 |     else:
 58 |         sha = 'unknown'
 59 | 
 60 |     return sha
 61 | 
 62 | 
 63 | def write_version_py():
 64 |     content = """# GENERATED VERSION FILE
 65 | # TIME: {}
 66 | 
 67 | __version__ = '{}'
 68 | short_version = '{}'
 69 | """
 70 |     sha = get_hash()
 71 |     VERSION = SHORT_VERSION + '+' + sha
 72 | 
 73 |     with open(version_file, 'w') as f:
 74 |         f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
 75 | 
 76 | 
 77 | def get_version():
 78 |     with open(version_file, 'r') as f:
 79 |         exec(compile(f.read(), version_file, 'exec'))
 80 |     return locals()['__version__']
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     write_version_py()
 85 |     setup(
 86 |         name='mmdet',
 87 |         version=get_version(),
 88 |         description='Open MMLab Detection Toolbox',
 89 |         long_description=readme(),
 90 |         keywords='computer vision, object detection',
 91 |         url='https://github.com/open-mmlab/mmdetection',
 92 |         packages=find_packages(exclude=('configs', 'tools', 'demo')),
 93 |         package_data={'mmdet.ops': ['*/*.so']},
 94 |         classifiers=[
 95 |             'Development Status :: 4 - Beta',
 96 |             'License :: OSI Approved :: Apache Software License',
 97 |             'Operating System :: OS Independent',
 98 |             'Programming Language :: Python :: 2',
 99 |             'Programming Language :: Python :: 2.7',
100 |             'Programming Language :: Python :: 3',
101 |             'Programming Language :: Python :: 3.4',
102 |             'Programming Language :: Python :: 3.5',
103 |             'Programming Language :: Python :: 3.6',
104 |         ],
105 |         license='GPLv3',
106 |         setup_requires=['pytest-runner'],
107 |         tests_require=['pytest'],
108 |         install_requires=[
109 |             'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables',
110 |             'pycocotools'
111 |         ],
112 |         zip_safe=False)
113 | 


--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.core import coco_eval
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser(description='COCO Evaluation')
 8 |     parser.add_argument('result', help='result file path')
 9 |     parser.add_argument('--ann', help='annotation file path')
10 |     parser.add_argument(
11 |         '--types',
12 |         type=str,
13 |         nargs='+',
14 |         choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 |         default=['bbox'],
16 |         help='result types')
17 |     parser.add_argument(
18 |         '--max-dets',
19 |         type=int,
20 |         nargs='+',
21 |         default=[100, 300, 1000],
22 |         help='proposal numbers, only used for recall evaluation')
23 |     args = parser.parse_args()
24 |     coco_eval(args.result, args.types, args.ann, args.max_dets)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | PYTHON=${PYTHON:-"python"}
4 | 
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3}
6 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-32}
11 | PY_ARGS=${@:5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | 
14 | srun -p ${PARTITION} \
15 |     --job-name=${JOB_NAME} \
16 |     --gres=gpu:${GPUS} \
17 |     --ntasks=1 \
18 |     --ntasks-per-node=1 \
19 |     --cpus-per-task=${CPUS_PER_TASK} \
20 |     --kill-on-bad-exit=1 \
21 |     ${SRUN_ARGS} \
22 |     python tools/test.py ${CONFIG} ${CHECKPOINT} --gpus ${GPUS} ${PY_ARGS}
23 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${5:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${PY_ARGS:-"--validate"}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import argparse
 4 | from mmcv import Config
 5 | 
 6 | from mmdet import __version__
 7 | from mmdet.datasets import get_dataset
 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger,
 9 |                         set_random_seed)
10 | from mmdet.models import build_detector
11 | import torch
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument('--work_dir', help='the dir to save logs and models')
18 |     parser.add_argument(
19 |         '--resume_from', help='the checkpoint file to resume from')
20 |     parser.add_argument(
21 |         '--validate',
22 |         action='store_true',
23 |         help='whether to evaluate the checkpoint during training')
24 |     parser.add_argument(
25 |         '--gpus',
26 |         type=int,
27 |         default=1,
28 |         help='number of gpus to use '
29 |         '(only applicable to non-distributed training)')
30 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
31 |     parser.add_argument(
32 |         '--launcher',
33 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
34 |         default='none',
35 |         help='job launcher')
36 |     parser.add_argument('--local_rank', type=int, default=0)
37 |     args = parser.parse_args()
38 | 
39 |     return args
40 | 
41 | 
42 | def main():
43 |     args = parse_args()
44 | 
45 |     cfg = Config.fromfile(args.config)
46 |     # set cudnn_benchmark
47 |     if cfg.get('cudnn_benchmark', False):
48 |         torch.backends.cudnn.benchmark = True
49 |     # update configs according to CLI args
50 |     if args.work_dir is not None:
51 |         cfg.work_dir = args.work_dir
52 |     if args.resume_from is not None:
53 |         cfg.resume_from = args.resume_from
54 |     cfg.gpus = args.gpus
55 |     if cfg.checkpoint_config is not None:
56 |         # save mmdet version in checkpoints as meta data
57 |         cfg.checkpoint_config.meta = dict(
58 |             mmdet_version=__version__, config=cfg.text)
59 | 
60 |     # init distributed env first, since logger depends on the dist info.
61 |     if args.launcher == 'none':
62 |         distributed = False
63 |     else:
64 |         distributed = True
65 |         init_dist(args.launcher, **cfg.dist_params)
66 | 
67 |     # init logger before other steps
68 |     logger = get_root_logger(cfg.log_level)
69 |     logger.info('Distributed training: {}'.format(distributed))
70 | 
71 |     # set random seeds
72 |     if args.seed is not None:
73 |         logger.info('Set random seed to {}'.format(args.seed))
74 |         set_random_seed(args.seed)
75 | 
76 |     model = build_detector(
77 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
78 | 
79 |     train_dataset = get_dataset(cfg.data.train)
80 |     train_detector(
81 |         model,
82 |         train_dataset,
83 |         cfg,
84 |         distributed=distributed,
85 |         validate=args.validate,
86 |         logger=logger)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | 
 6 | from mmdet import datasets
 7 | from mmdet.core import eval_map
 8 | 
 9 | 
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 |     det_results = mmcv.load(result_file)
12 |     gt_bboxes = []
13 |     gt_labels = []
14 |     gt_ignore = []
15 |     for i in range(len(dataset)):
16 |         ann = dataset.get_ann_info(i)
17 |         bboxes = ann['bboxes']
18 |         labels = ann['labels']
19 |         if 'bboxes_ignore' in ann:
20 |             ignore = np.concatenate([
21 |                 np.zeros(bboxes.shape[0], dtype=np.bool),
22 |                 np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 |             ])
24 |             gt_ignore.append(ignore)
25 |             bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 |             labels = np.concatenate([labels, ann['labels_ignore']])
27 |         gt_bboxes.append(bboxes)
28 |         gt_labels.append(labels)
29 |     if not gt_ignore:
30 |         gt_ignore = gt_ignore
31 |     if hasattr(dataset, 'year') and dataset.year == 2007:
32 |         dataset_name = 'voc07'
33 |     else:
34 |         dataset_name = dataset.CLASSES
35 |     eval_map(
36 |         det_results,
37 |         gt_bboxes,
38 |         gt_labels,
39 |         gt_ignore=gt_ignore,
40 |         scale_ranges=None,
41 |         iou_thr=iou_thr,
42 |         dataset=dataset_name,
43 |         print_summary=True)
44 | 
45 | 
46 | def main():
47 |     parser = ArgumentParser(description='VOC Evaluation')
48 |     parser.add_argument('result', help='result file path')
49 |     parser.add_argument('config', help='config file path')
50 |     parser.add_argument(
51 |         '--iou-thr',
52 |         type=float,
53 |         default=0.5,
54 |         help='IoU threshold for evaluation')
55 |     args = parser.parse_args()
56 |     cfg = mmcv.Config.fromfile(args.config)
57 |     test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
58 |     voc_eval(args.result, test_dataset, args.iou_thr)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------