├── mmdet
    ├── ops
    │   ├── dcn
    │   │   ├── modules
    │   │   │   └── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── deform_pool.py
    │   │   ├── setup.py
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── deform_pool_cuda.cpp
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── Makefile
    │   │   ├── gpu_nms.hpp
    │   │   ├── gpu_nms.pyx
    │   │   ├── nms_wrapper.py
    │   │   ├── cpu_nms.pyx
    │   │   └── setup.py
    │   ├── roi_pool
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── gradcheck.py
    │   │   └── src
    │   │   │   └── roi_pool_cuda.cpp
    │   ├── roi_align
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── gradcheck.py
    │   │   └── src
    │   │   │   └── roi_align_cuda.cpp
    │   └── __init__.py
    ├── models
    │   ├── necks
    │   │   └── __init__.py
    │   ├── mask_heads
    │   │   └── __init__.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── bbox_heads
    │   │   └── __init__.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   ├── retina_head.py
    │   │   └── rpn_head.py
    │   ├── backbones
    │   │   └── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── weight_init.py
    │   │   ├── norm.py
    │   │   └── conv_module.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── retinanet.py
    │   │   ├── faster_rcnn.py
    │   │   ├── mask_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── single_stage.py
    │   │   └── rpn.py
    │   ├── __init__.py
    │   ├── registry.py
    │   └── builder.py
    ├── __init__.py
    ├── core
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── mask_target.py
    │   ├── anchor
    │   │   └── __init__.py
    │   ├── bbox
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── base_assigner.py
    │   │   │   └── assign_result.py
    │   │   ├── samplers
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── __init__.py
    │   │   │   ├── sampling_result.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   └── base_sampler.py
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   ├── geometry.py
    │   │   └── bbox_target.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── dist_utils.py
    │   ├── __init__.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   └── merge_augs.py
    │   ├── loss
    │   │   └── __init__.py
    │   └── evaluation
    │   │   ├── __init__.py
    │   │   └── bbox_overlaps.py
    ├── datasets
    │   ├── loader
    │   │   ├── __init__.py
    │   │   └── build_loader.py
    │   ├── SAR.py
    │   ├── repeat_dataset.py
    │   ├── voc.py
    │   ├── __init__.py
    │   ├── concat_dataset.py
    │   ├── xml_style.py
    │   └── transforms.py
    └── apis
    │   ├── __init__.py
    │   ├── env.py
    │   ├── inference.py
    │   └── train.py
├── demo
    ├── 1.png
    ├── 2.png
    ├── 3.png
    ├── 4.png
    ├── V2.png
    ├── V3.png
    ├── V4.png
    ├── v1.png
    └── coco_test_12510.jpg
├── .travis.yml
├── tools
    ├── dist_train.sh
    ├── coco_eval.py
    ├── voc_eval.py
    └── train.py
├── compile.sh
├── ssd_debug
    ├── compute_mean.py
    ├── assign_sample_debug.py
    └── test_ssd流程.py
├── test_images.py
├── INSTALL.md
├── .gitignore
├── README.md
├── RetinaNet_debug
    └── compute_Receptive_field.py
├── setup.py
├── configs
    ├── retinanet_r101_fpn_1x.py
    ├── retinanet_r50_fpn_1x.py
    ├── retinanet_mobileV2_fpn_1x.py
    ├── retinanet_x101_32x4d_fpn_1x.py
    ├── retinanet_x101_64x4d_fpn_1x.py
    ├── rpn_r50_fpn_1x.py
    ├── rpn_r101_fpn_1x.py
    ├── rpn_x101_32x4d_fpn_1x.py
    ├── rpn_x101_64x4d_fpn_1x.py
    ├── fast_rcnn_r50_fpn_1x.py
    ├── fast_rcnn_r101_fpn_1x.py
    └── ssd300_coco.py
└── TECHNICAL_DETAILS.md


/mmdet/ops/dcn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.cpp
2 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | 
3 | __all__ = ['FPN']
4 | 


--------------------------------------------------------------------------------
/demo/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/1.png


--------------------------------------------------------------------------------
/demo/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/2.png


--------------------------------------------------------------------------------
/demo/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/3.png


--------------------------------------------------------------------------------
/demo/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/4.png


--------------------------------------------------------------------------------
/demo/V2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V2.png


--------------------------------------------------------------------------------
/demo/V3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V3.png


--------------------------------------------------------------------------------
/demo/V4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V4.png


--------------------------------------------------------------------------------
/demo/v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/v1.png


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 | 
3 | __all__ = ['FCNMaskHead']
4 | 


--------------------------------------------------------------------------------
/demo/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/coco_test_12510.jpg


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_pool import roi_pool
2 | from .modules.roi_pool import RoIPool
3 | 
4 | __all__ = ['roi_pool', 'RoIPool']
5 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_align import roi_align
2 | from .modules.roi_align import RoIAlign
3 | 
4 | __all__ = ['roi_align', 'RoIAlign']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import split_combined_polys
2 | from .mask_target import mask_target
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | language: python
 3 | 
 4 | install:
 5 |   - pip install flake8
 6 | 
 7 | python:
 8 |   - "3.5"
 9 |   - "3.6"
10 | 
11 | script:
12 |   - flake8


--------------------------------------------------------------------------------
/mmdet/ops/nms/Makefile:
--------------------------------------------------------------------------------
1 | PYTHON=${PYTHON:-python}
2 | 
3 | all:
4 | 	echo "Compiling nms kernels..."
5 | 	$(PYTHON) setup.py build_ext --inplace
6 | 
7 | clean:
8 | 	rm -f *.so
9 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target
3 | 
4 | __all__ = ['AnchorGenerator', 'anchor_target']
5 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | PYTHON=${PYTHON:-"python"}
4 | 
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3}
6 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | 
4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead']
5 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id, size_t base);
3 | size_t nms_Malloc();
4 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 | 
4 | __all__ = [
5 |     'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
6 | ]
7 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_assigner import BaseAssigner
2 | from .max_iou_assigner import MaxIoUAssigner
3 | from .assign_result import AssignResult
4 | 
5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .rpn_head import RPNHead
3 | from .retina_head import RetinaHead
4 | from .ssd_head import SSDHead
5 | 
6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead']
7 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 | from .misc import tensor2imgs, unmap, multi_apply
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 | from .resnext import ResNeXt
3 | from .ssd_vgg import SSDVGG
4 | from .mobilenet import MobileNetV2
5 | from .shufflenet import ShuffleNetV2
6 | 
7 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG', 'ShuffleNetV2', 'MobileNetV2']
8 | 


--------------------------------------------------------------------------------
/mmdet/datasets/SAR.py:
--------------------------------------------------------------------------------
 1 | from .xml_style import XMLDataset
 2 | # from .voc import VOCDataset
 3 | 
 4 | 
 5 | class SARDataset(XMLDataset):
 6 | 
 7 |     CLASSES = ('ship',)
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super(SARDataset, self).__init__(**kwargs)
11 |         self.abc = 1


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .env import init_dist, get_root_logger, set_random_seed
2 | from .train import train_detector
3 | from .inference import inference_detector, show_result
4 | 
5 | __all__ = [
6 |     'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
7 |     'inference_detector', 'show_result'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .mask import *  # noqa: F401, F403
4 | from .loss import *  # noqa: F401, F403
5 | from .evaluation import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
3 |                          merge_aug_scores, merge_aug_masks)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | # 定义一个抽象基类的方法是将一个类的元类设置为abc.ABCMeta
4 | class BaseAssigner(metaclass=ABCMeta):
5 |     # 用@abstractmethod声明一个基类中的函数使虚函数。除了该装饰器外，还有@abstractproperty声明一个抽象属性。
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='roi_pool',
 6 |     ext_modules=[
 7 |         CUDAExtension('roi_pool_cuda', [
 8 |             'src/roi_pool_cuda.cpp',
 9 |             'src/roi_pool_kernel.cu',
10 |         ])
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule
 2 | from .norm import build_norm_layer
 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
 4 |                           bias_init_with_prob)
 5 | 
 6 | __all__ = [
 7 |     'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init',
 8 |     'uniform_init', 'kaiming_init', 'bias_init_with_prob'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='roi_align_cuda',
 6 |     ext_modules=[
 7 |         CUDAExtension('roi_align_cuda', [
 8 |             'src/roi_align_cuda.cpp',
 9 |             'src/roi_align_kernel.cu',
10 |         ]),
11 |     ],
12 |     cmdclass={'build_ext': BuildExtension})
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import roi_pool
 3 | 
 4 | 
 5 | class RoIPool(Module):
 6 | 
 7 |     def __init__(self, out_size, spatial_scale):
 8 |         super(RoIPool, self).__init__()
 9 | 
10 |         self.out_size = out_size
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return roi_pool(features, rois, self.out_size, self.spatial_scale)
15 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .single_stage import SingleStageDetector
 3 | from .two_stage import TwoStageDetector
 4 | from .rpn import RPN
 5 | from .fast_rcnn import FastRCNN
 6 | from .faster_rcnn import FasterRCNN
 7 | from .mask_rcnn import MaskRCNN
 8 | from .cascade_rcnn import CascadeRCNN
 9 | from .retinanet import RetinaNet
10 | 
11 | __all__ = [
12 |     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
13 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='deform_conv',
 6 |     ext_modules=[
 7 |         CUDAExtension('deform_conv_cuda', [
 8 |             'src/deform_conv_cuda.cpp',
 9 |             'src/deform_conv_cuda_kernel.cu',
10 |         ]),
11 |         CUDAExtension('deform_pool_cuda', [
12 |             'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu'
13 |         ]),
14 |     ],
15 |     cmdclass={'build_ext': BuildExtension})
16 | 


--------------------------------------------------------------------------------
/mmdet/core/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy,
 2 |                      weighted_binary_cross_entropy, sigmoid_focal_loss,
 3 |                      weighted_sigmoid_focal_loss, mask_cross_entropy,
 4 |                      smooth_l1_loss, weighted_smoothl1, accuracy)
 5 | 
 6 | __all__ = [
 7 |     'weighted_nll_loss', 'weighted_cross_entropy',
 8 |     'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
 9 |     'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss',
10 |     'weighted_smoothl1', 'accuracy'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from .single_stage import SingleStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import gradcheck
 3 | 
 4 | import os.path as osp
 5 | import sys
 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 7 | from roi_pool import RoIPool  # noqa: E402
 8 | 
 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
11 |                      [1, 67, 40, 110, 120]]).cuda()
12 | inputs = (feat, rois)
13 | print('Gradcheck for roi pooling...')
14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
15 | print(test)
16 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from ..assign_sampling import build_sampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/mmdet/datasets/repeat_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class RepeatDataset(object):
 5 | 
 6 |     def __init__(self, dataset, times):
 7 |         self.dataset = dataset
 8 |         self.times = times
 9 |         self.CLASSES = dataset.CLASSES
10 |         if hasattr(self.dataset, 'flag'):
11 |             # np.tile:就是把数组沿各个方向复制
12 |             self.flag = np.tile(self.dataset.flag, times)
13 | 
14 |         self._ori_len = len(self.dataset)
15 | 
16 |     def __getitem__(self, idx):
17 |         return self.dataset[idx % self._ori_len]
18 | 
19 |     def __len__(self):
20 |         return self.times * self._ori_len
21 | 


--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | echo "Building roi align op..."
 6 | cd mmdet/ops/roi_align
 7 | if [ -d "build" ]; then
 8 |     rm -r build
 9 | fi
10 | $PYTHON setup.py build_ext --inplace
11 | 
12 | echo "Building roi pool op..."
13 | cd ../roi_pool
14 | if [ -d "build" ]; then
15 |     rm -r build
16 | fi
17 | $PYTHON setup.py build_ext --inplace
18 | 
19 | echo "Building nms op..."
20 | cd ../nms
21 | make clean
22 | make PYTHON=${PYTHON}
23 | 
24 | echo "Building dcn..."
25 | cd ../dcn
26 | if [ -d "build" ]; then
27 |     rm -r build
28 | fi
29 | $PYTHON setup.py build_ext --inplace
30 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_align import RoIAlignFunction
 3 | 
 4 | 
 5 | class RoIAlign(Module):
 6 | 
 7 |     def __init__(self, out_size, spatial_scale, sample_num=0):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.out_size = out_size
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.sample_num = int(sample_num)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction.apply(features, rois, self.out_size,
16 |                                       self.spatial_scale, self.sample_num)
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .pseudo_sampler import PseudoSampler
 3 | from .random_sampler import RandomSampler
 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 6 | from .combined_sampler import CombinedSampler
 7 | from .ohem_sampler import OHEMSampler
 8 | from .sampling_result import SamplingResult
 9 | 
10 | __all__ = [
11 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 |     'OHEMSampler', 'SamplingResult'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv
 2 | from .functions.deform_pool import deform_roi_pooling
 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv,
 4 |                                   ModulatedDeformConvPack)
 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 6 |                                   ModulatedDeformRoIPoolingPack)
 7 | 
 8 | __all__ = [
 9 |     'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
11 |     'ModulatedDeformConvPack', 'deform_conv',
12 |     'modulated_deform_conv', 'deform_roi_pooling'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack,
 2 |                   ModulatedDeformRoIPoolingPack, ModulatedDeformConv,
 3 |                   ModulatedDeformConvPack, deform_conv, modulated_deform_conv,
 4 |                   deform_roi_pooling)
 5 | from .nms import nms, soft_nms
 6 | from .roi_align import RoIAlign, roi_align
 7 | from .roi_pool import RoIPool, roi_pool
 8 | 
 9 | __all__ = [
10 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
11 |     'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack',
12 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
13 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
14 |     'deform_roi_pooling'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .xml_style import XMLDataset
 2 | 
 3 | 
 4 | class VOCDataset(XMLDataset):
 5 | 
 6 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 7 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
 8 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
 9 |                'tvmonitor')
10 |     # CLASSES = ('ship',)
11 | 
12 | 
13 |     def __init__(self, **kwargs):
14 |         super(VOCDataset, self).__init__(**kwargs)
15 |         if 'VOC2007' in self.img_prefix:
16 |             self.year = 2007
17 |         elif 'VOC2012' in self.img_prefix:
18 |             self.year = 2012
19 |         else:
20 |             raise ValueError('Cannot infer dataset year from img_prefix')
21 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *  # noqa: F401,F403
 2 | from .necks import *  # noqa: F401,F403
 3 | from .roi_extractors import *  # noqa: F401,F403
 4 | from .anchor_heads import *  # noqa: F401,F403
 5 | from .bbox_heads import *  # noqa: F401,F403
 6 | from .mask_heads import *  # noqa: F401,F403
 7 | from .detectors import *  # noqa: F401,F403
 8 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
 9 | from .builder import (build_backbone, build_neck, build_roi_extractor,
10 |                       build_head, build_detector)
11 | 
12 | __all__ = [
13 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS',
14 |     'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head',
15 |     'build_detector'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AssignResult(object):
 5 | 
 6 |     def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
 7 |         self.num_gts = num_gts
 8 |         self.gt_inds = gt_inds
 9 |         self.max_overlaps = max_overlaps
10 |         self.labels = labels
11 | 
12 |     def add_gt_(self, gt_labels):
13 |         self_inds = torch.arange(
14 |             1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 |         self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 |         self.max_overlaps = torch.cat(
17 |             [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 |         if self.labels is not None:
19 |             self.labels = torch.cat([gt_labels, self.labels])
20 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .custom import CustomDataset
 2 | from .xml_style import XMLDataset
 3 | from .coco import CocoDataset
 4 | from .voc import VOCDataset
 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset
 7 | from .concat_dataset import ConcatDataset
 8 | from .repeat_dataset import RepeatDataset
 9 | from .extra_aug import ExtraAugmentation
10 | from .SAR import SARDataset
11 | 
12 | __all__ = [
13 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'SARDataset', 'VOCDataset', 'GroupSampler',
14 |     'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
15 |     'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset',
16 |     'ExtraAugmentation',
17 | ]
18 | 


--------------------------------------------------------------------------------
/ssd_debug/compute_mean.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | path = '/home/hs/data/SAR/JPEGImages'
 6 | 
 7 | 
 8 | def compute(path):
 9 |     file_names = os.listdir(path)
10 |     per_image_Rmean = []
11 |     per_image_Gmean = []
12 |     per_image_Bmean = []
13 |     for file_name in file_names:
14 |         img = cv2.imread(os.path.join(path, file_name), 1)
15 |         per_image_Bmean.append(np.mean(img[:, :, 0]))
16 |         per_image_Gmean.append(np.mean(img[:, :, 1]))
17 |         per_image_Rmean.append(np.mean(img[:, :, 2]))
18 |     R_mean = np.mean(per_image_Rmean)
19 |     G_mean = np.mean(per_image_Gmean)
20 |     B_mean = np.mean(per_image_Bmean)
21 |     return R_mean, G_mean, B_mean
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     R, G, B = compute(path)
26 |     print(R, G, B)
27 | 
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  rpn_head,
12 |                  bbox_roi_extractor,
13 |                  bbox_head,
14 |                  train_cfg,
15 |                  test_cfg,
16 |                  pretrained=None):
17 |         super(FasterRCNN, self).__init__(
18 |             backbone=backbone,
19 |             neck=neck,
20 |             rpn_head=rpn_head,
21 |             bbox_roi_extractor=bbox_roi_extractor,
22 |             bbox_head=bbox_head,
23 |             train_cfg=train_cfg,
24 |             test_cfg=test_cfg,
25 |             pretrained=pretrained)
26 | 


--------------------------------------------------------------------------------
/test_images.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | from mmcv.runner import load_checkpoint
 3 | from mmdet.models import build_detector
 4 | from mmdet.apis import inference_detector, show_result
 5 | 
 6 | cfg = mmcv.Config.fromfile('configs/faster_rcnn_r50_fpn_1x.py')
 7 | cfg.model.pretrained = None
 8 | 
 9 | # construct the model and load checkpoint
10 | model = build_detector(cfg.model, test_cfg=cfg.test_cfg)
11 | _ = load_checkpoint(model, 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth')
12 | 
13 | # test a single image
14 | img = mmcv.imread('test.jpg')
15 | result = inference_detector(model, img, cfg)
16 | show_result(img, result)
17 | 
18 | # test a list of images
19 | imgs = ['test1.jpg', 'test2.jpg']
20 | for i, result in enumerate(inference_detector(model, imgs, cfg, device='cuda:0')):
21 |     print(i, imgs[i])
22 |     show_result(imgs[i], result)


--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.core import coco_eval
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser(description='COCO Evaluation')
 8 |     parser.add_argument('result', help='result file path')
 9 |     parser.add_argument('--ann', help='annotation file path')
10 |     parser.add_argument(
11 |         '--types',
12 |         type=str,
13 |         nargs='+',
14 |         choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 |         default=['bbox'],
16 |         help='result types')
17 |     parser.add_argument(
18 |         '--max-dets',
19 |         type=int,
20 |         nargs='+',
21 |         default=[100, 300, 1000],
22 |         help='proposal numbers, only used for recall evaluation')
23 |     args = parser.parse_args()
24 |     coco_eval(args.result, args.types, args.ann, args.max_dets)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/mmdet/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | 
 5 | class ConcatDataset(_ConcatDataset):
 6 |     """A wrapper of concatenated dataset.
 7 | 
 8 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
 9 |     concat the group flag for image aspect ratio.
10 | 
11 |     Args:
12 |         datasets (list[:obj:`Dataset`]): A list of datasets.
13 |     """
14 | 
15 |     def __init__(self, datasets):
16 |         super(ConcatDataset, self).__init__(datasets)
17 |         self.CLASSES = datasets[0].CLASSES
18 |         # 判断对象datasets[0]中是否存在 flag 属性,有则返回True
19 |         # flag 表示 数据库中宽比高大的图像的数量
20 |         # train 的图像 都是宽比高大
21 |         if hasattr(datasets[0], 'flag'):
22 |             flags = []
23 |             for i in range(0, len(datasets)):
24 |                 flags.append(datasets[i].flag)
25 |             self.flag = np.concatenate(flags)
26 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class MaskRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  rpn_head,
12 |                  bbox_roi_extractor,
13 |                  bbox_head,
14 |                  mask_roi_extractor,
15 |                  mask_head,
16 |                  train_cfg,
17 |                  test_cfg,
18 |                  pretrained=None):
19 |         super(MaskRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             mask_roi_extractor=mask_roi_extractor,
26 |             mask_head=mask_head,
27 |             train_cfg=train_cfg,
28 |             test_cfg=test_cfg,
29 |             pretrained=pretrained)
30 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.autograd import gradcheck
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_align import RoIAlign  # noqa: E402
 9 | 
10 | feat_size = 15
11 | spatial_scale = 1.0 / 8
12 | img_size = feat_size / spatial_scale
13 | num_imgs = 2
14 | num_rois = 20
15 | 
16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
18 | rois[:, 2:] += img_size * 0.5
19 | rois = np.hstack((batch_ind, rois))
20 | 
21 | feat = torch.randn(
22 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
23 | rois = torch.from_numpy(rois).float().cuda()
24 | inputs = (feat, rois)
25 | print('Gradcheck for roi align...')
26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
27 | print(test)
28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
29 | print(test)
30 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (voc_classes, imagenet_det_classes,
 2 |                           imagenet_vid_classes, coco_classes, dataset_aliases,
 3 |                           get_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          CocoDistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
 9 |                      plot_iou_recall)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .geometry import bbox_overlaps
 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler,
 4 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 5 |                        CombinedSampler, SamplingResult)
 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample
 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
 8 |                          bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
 9 | from .bbox_target import bbox_target
10 | 
11 | __all__ = [
12 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
13 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
14 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
15 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
16 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
17 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(
11 |             cfg, assigners, default_args=kwargs)
12 |     else:
13 |         raise TypeError('Invalid type {} for building a sampler'.format(
14 |             type(cfg)))
15 | 
16 | 
17 | def build_sampler(cfg, **kwargs):
18 |     if isinstance(cfg, samplers.BaseSampler):
19 |         return cfg
20 |     elif isinstance(cfg, dict):
21 |         return mmcv.runner.obj_from_dict(
22 |             cfg, samplers, default_args=kwargs)
23 |     else:
24 |         raise TypeError('Invalid type {} for building a sampler'.format(
25 |             type(cfg)))
26 | 
27 | # cfg = train_cfg
28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
29 |     bbox_assigner = build_assigner(cfg.assigner)
30 |     bbox_sampler = build_sampler(cfg.sampler)
31 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
32 |                                          gt_labels)
33 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
34 |                                           gt_labels)
35 |     return assign_result, sampling_result
36 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SamplingResult(object):
 5 | 
 6 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
 7 |                  gt_flags):
 8 |         # assigned_gt_inds中正样本值对应的anchor索引值的从大到小排列
 9 |         self.pos_inds = pos_inds
10 |         # assigned_gt_inds中负样本值对应的anchor索引值的从大到小排列
11 |         self.neg_inds = neg_inds
12 |         # anchor中正样本的坐标
13 |         self.pos_bboxes = bboxes[pos_inds]
14 |         # anchor中负样本的坐标
15 |         self.neg_bboxes = bboxes[neg_inds]
16 |         # self.pos_is_gt： 选出来正样本的anchor，值为0
17 |         self.pos_is_gt = gt_flags[pos_inds]
18 | 
19 |         self.num_gts = gt_bboxes.shape[0]
20 |         # self.pos_assigned_gt_inds 表示正样本anchor对应真实label值的索引
21 |         # 也就是正样本anchor对应的正负样本值 - 1
22 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
23 |         # self.pos_gt_bboxes 表示从gt中选出正样本anchor对应的gt的[xmin, ymin, xmax, ymax]
24 |         self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
25 |         if assign_result.labels is not None:
26 |             # self.pos_gt_labels 表示从gt_label中选出正样本anchor对应的真值label
27 |             self.pos_gt_labels = assign_result.labels[pos_inds]
28 |         else:
29 |             self.pos_gt_labels = None
30 | 
31 |     @property
32 |     def bboxes(self):
33 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
34 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         # torch.nonzero(input) 返回 input的 > 0 元素的索引值
20 |         # assigned_gt_inds中正样本值对应的anchor索引值的从大到小排列
21 |         pos_inds = torch.nonzero(
22 |             assign_result.gt_inds > 0).squeeze(-1).unique()
23 |         # assigned_gt_inds中负样本值对应的anchor索引值的从大到小排列
24 |         neg_inds = torch.nonzero(
25 |             assign_result.gt_inds == 0).squeeze(-1).unique()
26 |         # ft_flags.shape: [8732]
27 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
28 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
29 |                                          assign_result, gt_flags)
30 |         return sampling_result
31 | 
32 | # bboxes = torch.randn(8, 4)
33 | # gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
34 | # pos_inds = torch.LongTensor([7, 5, 3, 1])
35 | # print(gt_flags, gt_flags.shape)
36 | # print(gt_flags[pos_inds])


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class Registry(object):
 5 | 
 6 |     def __init__(self, name):
 7 |         self._name = name
 8 |         self._module_dict = dict()
 9 | 
10 |     @property
11 |     def name(self):
12 |         return self._name
13 | 
14 |     @property
15 |     def module_dict(self):
16 |         return self._module_dict
17 | 
18 |     def _register_module(self, module_class):
19 |         """Register a module.
20 | 
21 |         Args:
22 |             module (:obj:`nn.Module`): Module to be registered.
23 |         """
24 |         if not issubclass(module_class, nn.Module):
25 |             raise TypeError(
26 |                 'module must be a child of nn.Module, but got {}'.format(
27 |                     module_class))
28 |         module_name = module_class.__name__
29 |         if module_name in self._module_dict:
30 |             raise KeyError('{} is already registered in {}'.format(
31 |                 module_name, self.name))
32 |         self._module_dict[module_name] = module_class
33 | 
34 |     def register_module(self, cls):
35 |         self._register_module(cls)
36 |         return cls
37 | 
38 | 
39 | BACKBONES = Registry('backbone')
40 | NECKS = Registry('neck')
41 | ROI_EXTRACTORS = Registry('roi_extractor')
42 | HEADS = Registry('head')
43 | DETECTORS = Registry('detector')
44 | 
45 | # print(DETECTORS.module_dict)
46 | # print(HEADS.module_dict)
47 | 
48 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | from mmcv.runner import get_dist_info
 4 | from mmcv.parallel import collate
 5 | from torch.utils.data import DataLoader
 6 | 
 7 | from .sampler import GroupSampler, DistributedGroupSampler
 8 | 
 9 | # https://github.com/pytorch/pytorch/issues/973
10 | import resource
11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
13 | 
14 | 
15 | def build_dataloader(dataset,
16 |                      imgs_per_gpu,
17 |                      workers_per_gpu,
18 |                      num_gpus=1,
19 |                      dist=True,
20 |                      **kwargs):
21 |     if dist:
22 |         rank, world_size = get_dist_info()
23 |         sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
24 |                                           rank)
25 |         batch_size = imgs_per_gpu
26 |         num_workers = workers_per_gpu
27 |     else:
28 |         if not kwargs.get('shuffle', True):
29 |             sampler = None
30 |         else:
31 |             sampler = GroupSampler(dataset, imgs_per_gpu)
32 |         batch_size = num_gpus * imgs_per_gpu
33 |         num_workers = num_gpus * workers_per_gpu
34 | 
35 |     data_loader = DataLoader(
36 |         dataset,
37 |         batch_size=batch_size,
38 |         sampler=sampler,
39 |         num_workers=num_workers,
40 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
41 |         pin_memory=False,
42 |         **kwargs)
43 | 
44 |     return data_loader
45 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import mmcv
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
 9 |     mask_targets = map(mask_target_single, pos_proposals_list,
10 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
11 |     mask_targets = torch.cat(list(mask_targets))
12 |     return mask_targets
13 | 
14 | 
15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
16 |     mask_size = cfg.mask_size
17 |     num_pos = pos_proposals.size(0)
18 |     mask_targets = []
19 |     if num_pos > 0:
20 |         proposals_np = pos_proposals.cpu().numpy()
21 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
22 |         for i in range(num_pos):
23 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
24 |             bbox = proposals_np[i, :].astype(np.int32)
25 |             x1, y1, x2, y2 = bbox
26 |             w = np.maximum(x2 - x1 + 1, 1)
27 |             h = np.maximum(y2 - y1 + 1, 1)
28 |             # mask is uint8 both before and after resizing
29 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
30 |                                    (mask_size, mask_size))
31 |             mask_targets.append(target)
32 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
33 |             pos_proposals.device)
34 |     else:
35 |         mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
36 |     return mask_targets
37 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | from six.moves import map, zip
 6 | 
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     #  map()是 Python 内置的高阶函数,它接收一个函数 f 和一个 list,
24 |     # 并通过把函数 f 依次作用在 list 的每个元素上,得到一个新的 list 并返回
25 |     map_results = map(pfunc, *args)
26 |     # >>>a = [1,2,3]
27 |     # >>> b = [4,5,6]
28 |     # >>> zipped = zip(a,b)     # 打包为元组的列表
29 |     # [(1, 4), (2, 5), (3, 6)]
30 |     # 就是将每张图产生的(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
31 |     #  pos_inds_list, neg_inds_list) 打包成元组的列表,再打包成元组
32 |     return tuple(map(list, zip(*map_results)))
33 | 
34 | 
35 | def unmap(data, count, inds, fill=0):
36 |     """ Unmap a subset of item (data) back to the original set of items (of
37 |     size count) """
38 |     if data.dim() == 1:
39 |         ret = data.new_full((count, ), fill)
40 |         ret[inds] = data
41 |     else:
42 |         new_size = (count, ) + data.size()[1:]
43 |         ret = data.new_full(new_size, fill)
44 |         ret[inds, :] = data
45 |     return ret
46 | 
47 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | # cython: language_level=3, boundscheck=False
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | assert sizeof(int) == sizeof(np.int32_t)
14 | 
15 | cdef extern from "gpu_nms.hpp":
16 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil
17 |     size_t nms_Malloc() nogil
18 | 
19 | memory_pool = {}
20 | 
21 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
22 |             np.int32_t device_id=0):
23 |     cdef int boxes_num = dets.shape[0]
24 |     cdef int boxes_dim = 5
25 |     cdef int num_out
26 |     cdef size_t base
27 |     cdef np.ndarray[np.int32_t, ndim=1] \
28 |         keep = np.zeros(boxes_num, dtype=np.int32)
29 |     cdef np.ndarray[np.float32_t, ndim=1] \
30 |         scores = dets[:, 4]
31 |     cdef np.ndarray[np.int_t, ndim=1] \
32 |         order = scores.argsort()[::-1]
33 |     cdef np.ndarray[np.float32_t, ndim=2] \
34 |         sorted_dets = dets[order, :5]
35 |     cdef float cthresh = thresh
36 |     if device_id not in memory_pool:
37 |         with nogil:
38 |             base = nms_Malloc()
39 |         memory_pool[device_id] = base
40 |         # print "malloc", base
41 |     base = memory_pool[device_id]
42 |     with nogil:
43 |         _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base)
44 |     keep = keep[:num_out]
45 |     return list(order[keep])
46 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Requirements
 4 | 
 5 | - Linux (tested on Ubuntu 16.04 and CentOS 7.2)
 6 | - Python 3.4+
 7 | - PyTorch 1.0
 8 | - Cython
 9 | - [mmcv](https://github.com/open-mmlab/mmcv) >= 0.2.2
10 | 
11 | ### Install mmdetection
12 | 
13 | a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/).
14 | 
15 | b. Clone the mmdetection repository.
16 | 
17 | ```shell
18 | git clone https://github.com/open-mmlab/mmdetection.git
19 | ```
20 | 
21 | c. Compile cuda extensions.
22 | 
23 | ```shell
24 | cd mmdetection
25 | pip install cython  # or "conda install cython" if you prefer conda
26 | ./compile.sh  # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments
27 | ```
28 | 
29 | d. Install mmdetection (other dependencies will be installed automatically).
30 | 
31 | ```shell
32 | python(3) setup.py install  # add --user if you want to install it locally
33 | # or "pip install ."
34 | ```
35 | 
36 | Note: You need to run the last step each time you pull updates from github.
37 | The git commit id will be written to the version number and also saved in trained models.
38 | 
39 | ### Prepare COCO dataset.
40 | 
41 | It is recommended to symlink the dataset root to `$MMDETECTION/data`.
42 | 
43 | ```
44 | mmdetection
45 | ├── mmdet
46 | ├── tools
47 | ├── configs
48 | ├── data
49 | │   ├── coco
50 | │   │   ├── annotations
51 | │   │   ├── train2017
52 | │   │   ├── val2017
53 | │   │   ├── test2017
54 | │   ├── VOCdevkit
55 | │   │   ├── VOC2007
56 | │   │   ├── VOC2012
57 | 
58 | ```
59 | 
60 | ### Scripts
61 | Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
62 | a script for setting up mmdetection with conda.
63 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | from torch import nn
 3 | 
 4 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS
 5 | 
 6 | 
 7 | def _build_module(cfg, registry, default_args):
 8 |     assert isinstance(cfg, dict) and 'type' in cfg
 9 |     assert isinstance(default_args, dict) or default_args is None
10 |     args = cfg.copy()
11 |     obj_type = args.pop('type')
12 |     if mmcv.is_str(obj_type):
13 |         if obj_type not in registry.module_dict:
14 |             raise KeyError('{} is not in the {} registry'.format(
15 |                 obj_type, registry.name))
16 |         obj_type = registry.module_dict[obj_type]
17 |     elif not isinstance(obj_type, type):
18 |         raise TypeError('type must be a str or valid type, but got {}'.format(
19 |             type(obj_type)))
20 |     if default_args is not None:
21 |         for name, value in default_args.items():
22 |             args.setdefault(name, value)
23 |     return obj_type(**args)
24 | 
25 | 
26 | def build(cfg, registry, default_args=None):
27 |     if isinstance(cfg, list):
28 |         modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
29 |         return nn.Sequential(*modules)
30 |     else:
31 |         return _build_module(cfg, registry, default_args)
32 | 
33 | 
34 | def build_backbone(cfg):
35 |     return build(cfg, BACKBONES)
36 | 
37 | 
38 | def build_neck(cfg):
39 |     return build(cfg, NECKS)
40 | 
41 | 
42 | def build_roi_extractor(cfg):
43 |     return build(cfg, ROI_EXTRACTORS)
44 | 
45 | 
46 | def build_head(cfg):
47 |     return build(cfg, HEADS)
48 | 
49 | 
50 | def build_detector(cfg, train_cfg=None, test_cfg=None):
51 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | import torch.distributed as dist
 8 | import torch.multiprocessing as mp
 9 | from mmcv.runner import get_dist_info
10 | 
11 | 
12 | def init_dist(launcher, backend='nccl', **kwargs):
13 |     if mp.get_start_method(allow_none=True) is None:
14 |         mp.set_start_method('spawn')
15 |     if launcher == 'pytorch':
16 |         _init_dist_pytorch(backend, **kwargs)
17 |     elif launcher == 'mpi':
18 |         _init_dist_mpi(backend, **kwargs)
19 |     elif launcher == 'slurm':
20 |         _init_dist_slurm(backend, **kwargs)
21 |     else:
22 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
23 | 
24 | 
25 | def _init_dist_pytorch(backend, **kwargs):
26 |     # TODO: use local_rank instead of rank % num_gpus
27 |     rank = int(os.environ['RANK'])
28 |     num_gpus = torch.cuda.device_count()
29 |     torch.cuda.set_device(rank % num_gpus)
30 |     dist.init_process_group(backend=backend, **kwargs)
31 | 
32 | 
33 | def _init_dist_mpi(backend, **kwargs):
34 |     raise NotImplementedError
35 | 
36 | 
37 | def _init_dist_slurm(backend, **kwargs):
38 |     raise NotImplementedError
39 | 
40 | 
41 | def set_random_seed(seed):
42 |     random.seed(seed)
43 |     np.random.seed(seed)
44 |     torch.manual_seed(seed)
45 |     torch.cuda.manual_seed_all(seed)
46 | 
47 | 
48 | def get_root_logger(log_level=logging.INFO):
49 |     logger = logging.getLogger()
50 |     if not logger.hasHandlers():
51 |         logging.basicConfig(
52 |             format='%(asctime)s - %(levelname)s - %(message)s',
53 |             level=log_level)
54 |     rank, _ = get_dist_info()
55 |     if rank != 0:
56 |         logger.setLevel('ERROR')
57 |     return logger
58 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # cython generated cpp
107 | mmdet/ops/nms/*.cpp
108 | mmdet/version.py
109 | data
110 | .vscode
111 | .idea
112 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from .two_stage import TwoStageDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  mask_roi_extractor=None,
16 |                  mask_head=None,
17 |                  pretrained=None):
18 |         super(FastRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             bbox_roi_extractor=bbox_roi_extractor,
22 |             bbox_head=bbox_head,
23 |             train_cfg=train_cfg,
24 |             test_cfg=test_cfg,
25 |             mask_roi_extractor=mask_roi_extractor,
26 |             mask_head=mask_head,
27 |             pretrained=pretrained)
28 | 
29 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
30 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
31 |             if not isinstance(var, list):
32 |                 raise TypeError('{} must be a list, but got {}'.format(
33 |                     name, type(var)))
34 | 
35 |         num_augs = len(imgs)
36 |         if num_augs != len(img_metas):
37 |             raise ValueError(
38 |                 'num of augmentations ({}) != num of image meta ({})'.format(
39 |                     len(imgs), len(img_metas)))
40 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
41 |         imgs_per_gpu = imgs[0].size(0)
42 |         assert imgs_per_gpu == 1
43 | 
44 |         if num_augs == 1:
45 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
46 |                                     **kwargs)
47 |         else:
48 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
49 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | norm_cfg = {
 5 |     # format: layer_type: (abbreviation, module)
 6 |     'BN': ('bn', nn.BatchNorm2d),
 7 |     'SyncBN': ('bn', None),
 8 |     'GN': ('gn', nn.GroupNorm),
 9 |     # and potentially 'SN'
10 | }
11 | 
12 | 
13 | def build_norm_layer(cfg, num_features, postfix=''):
14 |     """ Build normalization layer
15 | 
16 |     Args:
17 |         cfg (dict): cfg should contain:
18 |             type (str): identify norm layer type.
19 |             layer args: args needed to instantiate a norm layer.
20 |             frozen (bool): [optional] whether stop gradient updates
21 |                 of norm layer, it is helpful to set frozen mode
22 |                 in backbone's norms.
23 |         num_features (int): number of channels from input
24 |         postfix (int, str): appended into norm abbreation to
25 |             create named layer.
26 | 
27 |     Returns:
28 |         name (str): abbreation + postfix
29 |         layer (nn.Module): created norm layer
30 |     """
31 |     assert isinstance(cfg, dict) and 'type' in cfg
32 |     cfg_ = cfg.copy()
33 | 
34 |     layer_type = cfg_.pop('type')
35 |     if layer_type not in norm_cfg:
36 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
37 |     else:
38 |         abbr, norm_layer = norm_cfg[layer_type]
39 |         if norm_layer is None:
40 |             raise NotImplementedError
41 | 
42 |     assert isinstance(postfix, (int, str))
43 |     name = abbr + str(postfix)
44 | 
45 |     frozen = cfg_.pop('frozen', False)
46 |     cfg_.setdefault('eps', 1e-5)
47 |     if layer_type != 'GN':
48 |         layer = norm_layer(num_features, **cfg_)
49 |     else:
50 |         assert 'num_groups' in cfg_
51 |         layer = norm_layer(num_channels=num_features, **cfg_)
52 | 
53 |     if frozen:
54 |         for param in layer.parameters():
55 |             param.requires_grad = False
56 | 
57 |     return name, layer
58 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from .. import roi_pool_cuda
 5 | 
 6 | 
 7 | class RoIPoolFunction(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx, features, rois, out_size, spatial_scale):
11 |         if isinstance(out_size, int):
12 |             out_h = out_size
13 |             out_w = out_size
14 |         elif isinstance(out_size, tuple):
15 |             assert len(out_size) == 2
16 |             assert isinstance(out_size[0], int)
17 |             assert isinstance(out_size[1], int)
18 |             out_h, out_w = out_size
19 |         else:
20 |             raise TypeError(
21 |                 '"out_size" must be an integer or tuple of integers')
22 |         assert features.is_cuda
23 |         ctx.save_for_backward(rois)
24 |         num_channels = features.size(1)
25 |         num_rois = rois.size(0)
26 |         out_size = (num_rois, num_channels, out_h, out_w)
27 |         output = features.new_zeros(out_size)
28 |         argmax = features.new_zeros(out_size, dtype=torch.int)
29 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
30 |                               output, argmax)
31 |         ctx.spatial_scale = spatial_scale
32 |         ctx.feature_size = features.size()
33 |         ctx.argmax = argmax
34 | 
35 |         return output
36 | 
37 |     @staticmethod
38 |     def backward(ctx, grad_output):
39 |         assert grad_output.is_cuda
40 |         spatial_scale = ctx.spatial_scale
41 |         feature_size = ctx.feature_size
42 |         argmax = ctx.argmax
43 |         rois = ctx.saved_tensors[0]
44 |         assert feature_size is not None
45 | 
46 |         grad_input = grad_rois = None
47 |         if ctx.needs_input_grad[0]:
48 |             grad_input = grad_output.new_zeros(feature_size)
49 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
50 |                                    spatial_scale, grad_input)
51 | 
52 |         return grad_input, grad_rois, None, None
53 | 
54 | 
55 | roi_pool = RoIPoolFunction.apply
56 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 |                                             add_gt_as_proposals)
17 | 
18 |     @staticmethod
19 |     def random_choice(gallery, num):
20 |         """Random select some elements from the gallery.
21 | 
22 |         It seems that Pytorch's implementation is slower than numpy so we use
23 |         numpy to randperm the indices.
24 |         """
25 |         assert len(gallery) >= num
26 |         if isinstance(gallery, list):
27 |             gallery = np.array(gallery)
28 |         cands = np.arange(len(gallery))
29 |         np.random.shuffle(cands)
30 |         rand_inds = cands[:num]
31 |         if not isinstance(gallery, np.ndarray):
32 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 |         return gallery[rand_inds]
34 | 
35 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
36 |         """Randomly sample some positive samples."""
37 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 |         if pos_inds.numel() != 0:
39 |             pos_inds = pos_inds.squeeze(1)
40 |         if pos_inds.numel() <= num_expected:
41 |             return pos_inds
42 |         else:
43 |             return self.random_choice(pos_inds, num_expected)
44 | 
45 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
46 |         """Randomly sample some negative samples."""
47 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 |         if neg_inds.numel() != 0:
49 |             neg_inds = neg_inds.squeeze(1)
50 |         if len(neg_inds) <= num_expected:
51 |             return neg_inds
52 |         else:
53 |             return self.random_choice(neg_inds, num_expected)
54 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
 5 |                           _take_tensors)
 6 | from mmcv.runner import OptimizerHook
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in model.parameters()
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
55 |         if self.grad_clip is not None:
56 |             self.clip_grads(runner.model.parameters())
57 |         runner.optimizer.step()
58 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .gpu_nms import gpu_nms
 5 | from .cpu_nms import cpu_nms
 6 | from .cpu_soft_nms import cpu_soft_nms
 7 | 
 8 | 
 9 | def nms(dets, iou_thr, device_id=None):
10 |     """Dispatch to either CPU or GPU NMS implementations."""
11 |     if isinstance(dets, torch.Tensor):
12 |         is_tensor = True
13 |         if dets.is_cuda:
14 |             device_id = dets.get_device()
15 |         dets_np = dets.detach().cpu().numpy()
16 |     elif isinstance(dets, np.ndarray):
17 |         is_tensor = False
18 |         dets_np = dets
19 |     else:
20 |         raise TypeError(
21 |             'dets must be either a Tensor or numpy array, but got {}'.format(
22 |                 type(dets)))
23 | 
24 |     if dets_np.shape[0] == 0:
25 |         inds = []
26 |     else:
27 |         inds = (gpu_nms(dets_np, iou_thr, device_id=device_id)
28 |                 if device_id is not None else cpu_nms(dets_np, iou_thr))
29 | 
30 |     if is_tensor:
31 |         inds = dets.new_tensor(inds, dtype=torch.long)
32 |     else:
33 |         inds = np.array(inds, dtype=np.int64)
34 |     return dets[inds, :], inds
35 | 
36 | 
37 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
38 |     if isinstance(dets, torch.Tensor):
39 |         is_tensor = True
40 |         dets_np = dets.detach().cpu().numpy()
41 |     elif isinstance(dets, np.ndarray):
42 |         is_tensor = False
43 |         dets_np = dets
44 |     else:
45 |         raise TypeError(
46 |             'dets must be either a Tensor or numpy array, but got {}'.format(
47 |                 type(dets)))
48 | 
49 |     method_codes = {'linear': 1, 'gaussian': 2}
50 |     if method not in method_codes:
51 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
52 |     new_dets, inds = cpu_soft_nms(
53 |         dets_np,
54 |         iou_thr,
55 |         method=method_codes[method],
56 |         sigma=sigma,
57 |         min_score=min_score)
58 | 
59 |     if is_tensor:
60 |         return dets.new_tensor(new_dets), dets.new_tensor(
61 |             inds, dtype=torch.long)
62 |     else:
63 |         return new_dets.astype(np.float32), inds.astype(np.int64)
64 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1):
 7 |     """NMS for multi-class bboxes.
 8 | 
 9 |     Args:
10 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
11 |         multi_scores (Tensor): shape (n, #class)
12 |         score_thr (float): bbox threshold, bboxes with scores lower than it
13 |             will not be considered.
14 |         nms_thr (float): NMS IoU threshold
15 |         max_num (int): if there are more than max_num bboxes after NMS,
16 |             only top max_num will be kept.
17 | 
18 |     Returns:
19 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
20 |             are 0-based.
21 |     """
22 |     num_classes = multi_scores.shape[1]
23 |     bboxes, labels = [], []
24 |     nms_cfg_ = nms_cfg.copy()
25 |     nms_type = nms_cfg_.pop('type', 'nms')
26 |     nms_op = getattr(nms_wrapper, nms_type)
27 |     for i in range(1, num_classes):
28 |         cls_inds = multi_scores[:, i] > score_thr
29 |         if not cls_inds.any():
30 |             continue
31 |         # get bboxes and scores of this class
32 |         if multi_bboxes.shape[1] == 4:
33 |             _bboxes = multi_bboxes[cls_inds, :]
34 |         else:
35 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
36 |         _scores = multi_scores[cls_inds, i]
37 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
38 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
39 |         cls_labels = multi_bboxes.new_full(
40 |             (cls_dets.shape[0], ), i - 1, dtype=torch.long)
41 |         bboxes.append(cls_dets)
42 |         labels.append(cls_labels)
43 |     if bboxes:
44 |         bboxes = torch.cat(bboxes)
45 |         labels = torch.cat(labels)
46 |         if bboxes.shape[0] > max_num:
47 |             _, inds = bboxes[:, -1].sort(descending=True)
48 |             inds = inds[:max_num]
49 |             bboxes = bboxes[inds]
50 |             labels = labels[inds]
51 |     else:
52 |         bboxes = multi_bboxes.new_zeros((0, 5))
53 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
54 | 
55 |     return bboxes, labels
56 | 


--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | 
 6 | from mmdet import datasets
 7 | from mmdet.core import eval_map
 8 | 
 9 | 
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 |     det_results = mmcv.load(result_file)
12 |     gt_bboxes = []
13 |     gt_labels = []
14 |     gt_ignore = []
15 |     for i in range(len(dataset)):
16 |         ann = dataset.get_ann_info(i)
17 |         bboxes = ann['bboxes']
18 |         labels = ann['labels']
19 |         if 'bboxes_ignore' in ann:
20 |             ignore = np.concatenate([
21 |                 np.zeros(bboxes.shape[0], dtype=np.bool),
22 |                 np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 |             ])
24 |             gt_ignore.append(ignore)
25 |             bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 |             labels = np.concatenate([labels, ann['labels_ignore']])
27 |         gt_bboxes.append(bboxes)
28 |         gt_labels.append(labels)
29 |     if not gt_ignore:
30 |         gt_ignore = gt_ignore
31 |     if hasattr(dataset, 'year') and dataset.year == 2007:
32 |         # dataset_name = 'voc07'
33 |         dataset_name = '2class'
34 |     # sar 图像
35 |     elif hasattr(dataset, 'abc') and dataset.abc == 1:
36 |         dataset_name = 'sar'
37 |     else:
38 |         dataset_name = dataset.CLASSES
39 |     eval_map(
40 |         det_results,
41 |         gt_bboxes,
42 |         gt_labels,
43 |         gt_ignore=gt_ignore,
44 |         scale_ranges=None,
45 |         iou_thr=iou_thr,
46 |         dataset=dataset_name,
47 |         print_summary=True)
48 | 
49 | 
50 | def main():
51 |     parser = ArgumentParser(description='VOC Evaluation')
52 |     parser.add_argument('result', help='result file path')
53 |     parser.add_argument('config', help='config file path')
54 |     parser.add_argument(
55 |         '--iou-thr',
56 |         type=float,
57 |         default=0.5,
58 |         help='IoU threshold for evaluation')
59 |     args = parser.parse_args()
60 |     cfg = mmcv.Config.fromfile(args.config)
61 |     test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
62 |     voc_eval(args.result, test_dataset, args.iou_thr)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     main()
67 | 


--------------------------------------------------------------------------------
/mmdet/apis/inference.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | from mmdet.datasets import to_tensor
 6 | from mmdet.datasets.transforms import ImageTransform
 7 | from mmdet.core import get_classes
 8 | 
 9 | 
10 | def _prepare_data(img, img_transform, cfg, device):
11 |     ori_shape = img.shape
12 |     img, img_shape, pad_shape, scale_factor = img_transform(
13 |         img,
14 |         scale=cfg.data.test.img_scale,
15 |         keep_ratio=cfg.data.test.get('resize_keep_ratio', True))
16 |     img = to_tensor(img).to(device).unsqueeze(0)
17 |     img_meta = [
18 |         dict(
19 |             ori_shape=ori_shape,
20 |             img_shape=img_shape,
21 |             pad_shape=pad_shape,
22 |             scale_factor=scale_factor,
23 |             flip=False)
24 |     ]
25 |     return dict(img=[img], img_meta=[img_meta])
26 | 
27 | 
28 | def _inference_single(model, img, img_transform, cfg, device):
29 |     img = mmcv.imread(img)
30 |     data = _prepare_data(img, img_transform, cfg, device)
31 |     with torch.no_grad():
32 |         result = model(return_loss=False, rescale=True, **data)
33 |     return result
34 | 
35 | 
36 | def _inference_generator(model, imgs, img_transform, cfg, device):
37 |     for img in imgs:
38 |         yield _inference_single(model, img, img_transform, cfg, device)
39 | 
40 | 
41 | def inference_detector(model, imgs, cfg, device='cuda:0'):
42 |     img_transform = ImageTransform(
43 |         size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg)
44 |     model = model.to(device)
45 |     model.eval()
46 | 
47 |     if not isinstance(imgs, list):
48 |         return _inference_single(model, imgs, img_transform, cfg, device)
49 |     else:
50 |         return _inference_generator(model, imgs, img_transform, cfg, device)
51 | 
52 | 
53 | def show_result(img, result, dataset='coco', score_thr=0.3):
54 |     class_names = get_classes(dataset)
55 |     labels = [
56 |         np.full(bbox.shape[0], i, dtype=np.int32)
57 |         for i, bbox in enumerate(result)
58 |     ]
59 |     labels = np.concatenate(labels)
60 |     bboxes = np.vstack(result)
61 |     img = mmcv.imread(img)
62 |     mmcv.imshow_det_bboxes(
63 |         img.copy(),
64 |         bboxes,
65 |         labels,
66 |         class_names=class_names,
67 |         score_thr=score_thr)
68 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .. import roi_align_cuda
 4 | 
 5 | 
 6 | class RoIAlignFunction(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
10 |         if isinstance(out_size, int):
11 |             out_h = out_size
12 |             out_w = out_size
13 |         elif isinstance(out_size, tuple):
14 |             assert len(out_size) == 2
15 |             assert isinstance(out_size[0], int)
16 |             assert isinstance(out_size[1], int)
17 |             out_h, out_w = out_size
18 |         else:
19 |             raise TypeError(
20 |                 '"out_size" must be an integer or tuple of integers')
21 |         ctx.spatial_scale = spatial_scale
22 |         ctx.sample_num = sample_num
23 |         ctx.save_for_backward(rois)
24 |         ctx.feature_size = features.size()
25 | 
26 |         batch_size, num_channels, data_height, data_width = features.size()
27 |         num_rois = rois.size(0)
28 | 
29 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
30 |         if features.is_cuda:
31 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
32 |                                    sample_num, output)
33 |         else:
34 |             raise NotImplementedError
35 | 
36 |         return output
37 | 
38 |     @staticmethod
39 |     def backward(ctx, grad_output):
40 |         feature_size = ctx.feature_size
41 |         spatial_scale = ctx.spatial_scale
42 |         sample_num = ctx.sample_num
43 |         rois = ctx.saved_tensors[0]
44 |         assert (feature_size is not None and grad_output.is_cuda)
45 | 
46 |         batch_size, num_channels, data_height, data_width = feature_size
47 |         out_w = grad_output.size(3)
48 |         out_h = grad_output.size(2)
49 | 
50 |         grad_input = grad_rois = None
51 |         if ctx.needs_input_grad[0]:
52 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
53 |                                         data_width)
54 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
55 |                                     out_w, spatial_scale, sample_num,
56 |                                     grad_input)
57 | 
58 |         return grad_input, grad_rois, None, None, None
59 | 
60 | 
61 | roi_align = RoIAlignFunction.apply
62 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | # cython: language_level=3, boundscheck=False
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
14 |     return a if a >= b else b
15 | 
16 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
17 |     return a if a <= b else b
18 | 
19 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
20 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
22 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
23 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
24 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
25 | 
26 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
27 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
28 | 
29 |     cdef int ndets = dets.shape[0]
30 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
31 |             np.zeros((ndets), dtype=np.int)
32 | 
33 |     # nominal indices
34 |     cdef int _i, _j
35 |     # sorted indices
36 |     cdef int i, j
37 |     # temp variables for box i's (the box currently under consideration)
38 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
39 |     # variables for computing overlap with box j (lower scoring box)
40 |     cdef np.float32_t xx1, yy1, xx2, yy2
41 |     cdef np.float32_t w, h
42 |     cdef np.float32_t inter, ovr
43 | 
44 |     keep = []
45 |     for _i in range(ndets):
46 |         i = order[_i]
47 |         if suppressed[i] == 1:
48 |             continue
49 |         keep.append(i)
50 |         ix1 = x1[i]
51 |         iy1 = y1[i]
52 |         ix2 = x2[i]
53 |         iy2 = y2[i]
54 |         iarea = areas[i]
55 |         for _j in range(_i + 1, ndets):
56 |             j = order[_j]
57 |             if suppressed[j] == 1:
58 |                 continue
59 |             xx1 = max(ix1, x1[j])
60 |             yy1 = max(iy1, y1[j])
61 |             xx2 = min(ix2, x2[j])
62 |             yy2 = min(iy2, y2[j])
63 |             w = max(0.0, xx2 - xx1 + 1)
64 |             h = max(0.0, yy2 - yy1 + 1)
65 |             inter = w * h
66 |             ovr = inter / (iarea + areas[j] - inter)
67 |             if ovr >= thresh:
68 |                 suppressed[j] = 1
69 | 
70 |     return keep
71 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/functions/deform_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | 
 4 | from .. import deform_pool_cuda
 5 | 
 6 | 
 7 | class DeformRoIPoolingFunction(Function):
 8 | 
 9 |     @staticmethod
10 |     def forward(ctx,
11 |                 data,
12 |                 rois,
13 |                 offset,
14 |                 spatial_scale,
15 |                 out_size,
16 |                 out_channels,
17 |                 no_trans,
18 |                 group_size=1,
19 |                 part_size=None,
20 |                 sample_per_part=4,
21 |                 trans_std=.0):
22 |         ctx.spatial_scale = spatial_scale
23 |         ctx.out_size = out_size
24 |         ctx.out_channels = out_channels
25 |         ctx.no_trans = no_trans
26 |         ctx.group_size = group_size
27 |         ctx.part_size = out_size if part_size is None else part_size
28 |         ctx.sample_per_part = sample_per_part
29 |         ctx.trans_std = trans_std
30 | 
31 |         assert 0.0 <= ctx.trans_std <= 1.0
32 |         if not data.is_cuda:
33 |             raise NotImplementedError
34 | 
35 |         n = rois.shape[0]
36 |         output = data.new_empty(n, out_channels, out_size, out_size)
37 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
38 |         deform_pool_cuda.deform_psroi_pooling_cuda_forward(
39 |             data, rois, offset, output, output_count, ctx.no_trans,
40 |             ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
41 |             ctx.part_size, ctx.sample_per_part, ctx.trans_std)
42 | 
43 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
44 |             ctx.save_for_backward(data, rois, offset)
45 |         ctx.output_count = output_count
46 | 
47 |         return output
48 | 
49 |     @staticmethod
50 |     def backward(ctx, grad_output):
51 |         if not grad_output.is_cuda:
52 |             raise NotImplementedError
53 | 
54 |         data, rois, offset = ctx.saved_tensors
55 |         output_count = ctx.output_count
56 |         grad_input = torch.zeros_like(data)
57 |         grad_rois = None
58 |         grad_offset = torch.zeros_like(offset)
59 | 
60 |         deform_pool_cuda.deform_psroi_pooling_cuda_backward(
61 |             grad_output, data, rois, offset, output_count, grad_input,
62 |             grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
63 |             ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
64 |             ctx.trans_std)
65 |         return (grad_input, grad_rois, grad_offset, None, None, None, None,
66 |                 None, None, None, None)
67 | 
68 | 
69 | deform_roi_pooling = DeformRoIPoolingFunction.apply
70 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Lightweight Object Detection(One-Stage)
 3 | 
 4 | ## Introduction
 5 | 
 6 | The code is based on the mmdetection.
 7 | 
 8 | mmdetection is an open source object detection toolbox based on PyTorch. It is
 9 | a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
10 | 
11 | Currently, it contains these features:
12 | - **Multiple Base Network**: Mobilenet V2, ShuffleNet V2
13 | - **One-Stage Lightweight Detector**: MobileV2-SSD, MobileV2-RetinaNet
14 | 
15 | 
16 | ## Performance
17 | 
18 | | VOC2007      | SSD                                                                         | RetinaNet                                                                   
19 | |--------------|-----------------------------------------------------------------------------|-----------------------------------------------------------------------------|
20 | | MobilenetV2  |                                                                             | 81.9%                                                                        |
21 | | ShufflenetV2 |                                                                             |                                                                              |
22 | 
23 | 
24 | 
25 | | SAR(SSDD)    | SSD                                                                          | RetinaNet                                                                   
26 | |--------------|------------------------------------------------------------------------------|----------------------------------------------------------------------------|
27 | | MobilenetV2  | 90.4%                                                                        | 91.7%                                                                      |
28 | | ShufflenetV2 |                                                                             |                                                                              |
29 | 
30 | 
31 | | COCO2017     | SSD                                                                          | RetinaNet                                                                   
32 | |--------------|------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
33 | | MobilenetV2  |                                                                              | 31.7                                                                         |
34 | | ShufflenetV2 |                                                                              |                                                                             |
35 | 
36 | ## Demo
37 | ![demo image](demo/V3.png)
38 | ![demo image](demo/V4.png)
39 | ![demo image](demo/1.png)
40 | 
41 | ## TODO
42 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/setup.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | from distutils.core import setup, Extension
 3 | 
 4 | import numpy as np
 5 | from Cython.Build import cythonize
 6 | from Cython.Distutils import build_ext
 7 | 
 8 | # extensions
 9 | ext_args = dict(
10 |     include_dirs=[np.get_include()],
11 |     language='c++',
12 |     extra_compile_args={
13 |         'cc': ['-Wno-unused-function', '-Wno-write-strings'],
14 |         'nvcc': ['-c', '--compiler-options', '-fPIC'],
15 |     },
16 | )
17 | 
18 | extensions = [
19 |     Extension('cpu_nms', ['cpu_nms.pyx'], **ext_args),
20 |     Extension('cpu_soft_nms', ['cpu_soft_nms.pyx'], **ext_args),
21 |     Extension('gpu_nms', ['gpu_nms.pyx', 'nms_kernel.cu'], **ext_args),
22 | ]
23 | 
24 | 
25 | def customize_compiler_for_nvcc(self):
26 |     """inject deep into distutils to customize how the dispatch
27 |     to cc/nvcc works.
28 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
29 |     injected in, and still have the right customizations (i.e.
30 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
31 |     the OO route, I have this. Note, it's kindof like a wierd functional
32 |     subclassing going on."""
33 | 
34 |     # tell the compiler it can processes .cu
35 |     self.src_extensions.append('.cu')
36 | 
37 |     # save references to the default compiler_so and _comple methods
38 |     default_compiler_so = self.compiler_so
39 |     super = self._compile
40 | 
41 |     # now redefine the _compile method. This gets executed for each
42 |     # object but distutils doesn't have the ability to change compilers
43 |     # based on source extension: we add it.
44 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
45 |         if osp.splitext(src)[1] == '.cu':
46 |             # use the cuda for .cu files
47 |             self.set_executable('compiler_so', 'nvcc')
48 |             # use only a subset of the extra_postargs, which are 1-1 translated
49 |             # from the extra_compile_args in the Extension class
50 |             postargs = extra_postargs['nvcc']
51 |         else:
52 |             postargs = extra_postargs['cc']
53 | 
54 |         super(obj, src, ext, cc_args, postargs, pp_opts)
55 |         # reset the default compiler_so, which we might have changed for cuda
56 |         self.compiler_so = default_compiler_so
57 | 
58 |     # inject our redefined _compile method into the class
59 |     self._compile = _compile
60 | 
61 | 
62 | # run the customize_compiler
63 | class custom_build_ext(build_ext):
64 | 
65 |     def build_extensions(self):
66 |         customize_compiler_for_nvcc(self.compiler)
67 |         build_ext.build_extensions(self)
68 | 
69 | 
70 | setup(
71 |     name='nms',
72 |     cmdclass={'build_ext': custom_build_ext},
73 |     ext_modules=cythonize(extensions),
74 | )
75 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from ..transforms import bbox2roi
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  context,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 |                                           add_gt_as_proposals)
18 |         self.bbox_roi_extractor = context.bbox_roi_extractor
19 |         self.bbox_head = context.bbox_head
20 | 
21 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
22 |         with torch.no_grad():
23 |             rois = bbox2roi([bboxes])
24 |             bbox_feats = self.bbox_roi_extractor(
25 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
26 |             cls_score, _ = self.bbox_head(bbox_feats)
27 |             loss = self.bbox_head.loss(
28 |                 cls_score=cls_score,
29 |                 bbox_pred=None,
30 |                 labels=labels,
31 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
32 |                 bbox_targets=None,
33 |                 bbox_weights=None,
34 |                 reduce=False)['loss_cls']
35 |             _, topk_loss_inds = loss.topk(num_expected)
36 |         return inds[topk_loss_inds]
37 | 
38 |     def _sample_pos(self,
39 |                     assign_result,
40 |                     num_expected,
41 |                     bboxes=None,
42 |                     feats=None,
43 |                     **kwargs):
44 |         # Sample some hard positive samples
45 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
46 |         if pos_inds.numel() != 0:
47 |             pos_inds = pos_inds.squeeze(1)
48 |         if pos_inds.numel() <= num_expected:
49 |             return pos_inds
50 |         else:
51 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
52 |                                     assign_result.labels[pos_inds], feats)
53 | 
54 |     def _sample_neg(self,
55 |                     assign_result,
56 |                     num_expected,
57 |                     bboxes=None,
58 |                     feats=None,
59 |                     **kwargs):
60 |         # Sample some hard negative samples
61 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
62 |         if neg_inds.numel() != 0:
63 |             neg_inds = neg_inds.squeeze(1)
64 |         if len(neg_inds) <= num_expected:
65 |             return neg_inds
66 |         else:
67 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
68 |                                     assign_result.labels[neg_inds], feats)
69 | 


--------------------------------------------------------------------------------
/ssd_debug/assign_sample_debug.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | #
 5 | # base_anchors = torch.Tensor([[-11., -11., 18., 18.],
 6 | #                     [-17., -17., 24., 24.],
 7 | #                     [-17., -7., 24., 14.],
 8 | #                     [-7., -17., 14., 24.]])
 9 | # num_base_anchors = base_anchors.size(0)
10 | # print(num_base_anchors)
11 | #
12 | # def meshgrid(x, y, row_major=True):
13 | #     xx = x.repeat(len(y))
14 | #     yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
15 | #     if row_major:
16 | #         return xx, yy
17 | #     else:
18 | #         return yy, xx
19 | #
20 | # featmap_size = (38, 38)
21 | # valid_size = (37, 37)
22 | #
23 | # feat_h, feat_w = featmap_size
24 | # valid_h, valid_w = valid_size
25 | # assert valid_h <= feat_h and valid_w <= feat_w
26 | # valid_x = torch.zeros(feat_w, dtype=torch.uint8)
27 | # valid_y = torch.zeros(feat_h, dtype=torch.uint8)
28 | # # print(valid_x)
29 | # valid_x[:valid_w] = 1
30 | # valid_y[:valid_h] = 1
31 | # valid_xx, valid_yy = meshgrid(valid_x, valid_y)
32 | # # print(valid_xx[:100])
33 | # # print(valid_yy[-100:])
34 | # valid = valid_xx & valid_yy
35 | # print(valid.shape)
36 | # valid = valid[:, None].expand(
37 | #     valid.size(0), num_base_anchors).contiguous().view(-1)
38 | # print(valid[:200], valid.shape)
39 | 
40 | 
41 | # x = torch.randn(8,4)
42 | # print(x)
43 | # a = torch.tensor([0, 1, 1, 1, 0, 1, 1, 1], dtype=torch.uint8)
44 | # print(a.shape)
45 | # c = x[a,:]
46 | # print(c, c.shape)
47 | torch.manual_seed(1314)
48 | x = torch.rand(4, 8)
49 | print(x)
50 | assigned_gt_inds = x.new_full((8,), -1, dtype=torch.long)
51 | print(assigned_gt_inds)
52 | max_overlaps, argmax_overlaps = x.max(dim=0)
53 | print(max_overlaps, argmax_overlaps)
54 | assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < 0.5)] = 0
55 | print(assigned_gt_inds)
56 | pos_inds = max_overlaps >= 0.5
57 | print(pos_inds)
58 | print(assigned_gt_inds[pos_inds])
59 | print(argmax_overlaps[pos_inds])
60 | assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1
61 | print(assigned_gt_inds[pos_inds])
62 | gt_max_overlaps, gt_argmax_overlaps = x.max(dim=1)
63 | print(gt_max_overlaps, gt_argmax_overlaps)
64 | print()
65 | for i in range(4):
66 |             if gt_max_overlaps[i] >= 0.:
67 |                  assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1
68 | print(assigned_gt_inds)
69 | assigned_labels = assigned_gt_inds.new_zeros((8, ))
70 | print(assigned_labels)
71 | pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze(-1).unique()
72 | print(pos_inds, pos_inds.shape)
73 | gt_labels = torch.LongTensor([1, 1, 1, 1])
74 | if pos_inds.numel() > 0:
75 |                 assigned_labels[pos_inds] = gt_labels[
76 |                     assigned_gt_inds[pos_inds] - 1]
77 | print(assigned_labels)
78 | pos_assigned_gt_inds = assigned_gt_inds[pos_inds]-1
79 | print(pos_assigned_gt_inds)
80 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from .base import BaseDetector
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from mmdet.core import bbox2result
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 | 
12 |     def __init__(self,
13 |                  backbone,
14 |                  neck=None,
15 |                  bbox_head=None,
16 |                  train_cfg=None,
17 |                  test_cfg=None,
18 |                  pretrained=None):
19 |         super(SingleStageDetector, self).__init__()
20 |         # 相当于 SSDVGG(cfg.model.backbone)
21 |         # ResNet(cfg.model.backbone)
22 |         self.backbone = builder.build_backbone(backbone)
23 |         if neck is not None:
24 |             # 相当于对应NECK(cfg.model.neck)
25 |             # FPN(cfg.model.neck)
26 |             self.neck = builder.build_neck(neck)
27 |         # 相当于 SSDHead(cfg.model.bbox_head)
28 |         # RetinaHead(cfg.model.bbox_head)
29 |         self.bbox_head = builder.build_head(bbox_head)
30 |         self.train_cfg = train_cfg
31 |         self.test_cfg = test_cfg
32 |         self.init_weights(pretrained=pretrained)
33 | 
34 |     def init_weights(self, pretrained=None):
35 |         super(SingleStageDetector, self).init_weights(pretrained)
36 |         self.backbone.init_weights(pretrained=pretrained)
37 |         if self.with_neck:
38 |             if isinstance(self.neck, nn.Sequential):
39 |                 for m in self.neck:
40 |                     m.init_weights()
41 |             else:
42 |                 self.neck.init_weights()
43 |         self.bbox_head.init_weights()
44 | 
45 |     def extract_feat(self, img):
46 |         x = self.backbone(img)
47 |         if self.with_neck:
48 |             x = self.neck(x)
49 |         return x
50 | 
51 |     def forward_train(self,
52 |                       img,
53 |                       img_metas,
54 |                       gt_bboxes,
55 |                       gt_labels,
56 |                       gt_bboxes_ignore=None):
57 |         x = self.extract_feat(img)
58 |         outs = self.bbox_head(x)
59 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
60 |         losses = self.bbox_head.loss(
61 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
62 |         return losses
63 | 
64 |     def simple_test(self, img, img_meta, rescale=False):
65 |         x = self.extract_feat(img)
66 |         outs = self.bbox_head(x)
67 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
68 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
69 |         bbox_results = [
70 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
71 |             for det_bboxes, det_labels in bbox_list
72 |         ]
73 |         return bbox_results[0]
74 | 
75 |     def aug_test(self, imgs, img_metas, rescale=False):
76 |         raise NotImplementedError
77 | 
78 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class IoUBalancedNegSampler(RandomSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  hard_thr=0.1,
13 |                  hard_fraction=0.5,
14 |                  **kwargs):
15 |         super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,
16 |                                                     **kwargs)
17 |         assert hard_thr > 0
18 |         assert 0 < hard_fraction < 1
19 |         self.hard_thr = hard_thr
20 |         self.hard_fraction = hard_fraction
21 | 
22 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
23 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
24 |         if neg_inds.numel() != 0:
25 |             neg_inds = neg_inds.squeeze(1)
26 |         if len(neg_inds) <= num_expected:
27 |             return neg_inds
28 |         else:
29 |             max_overlaps = assign_result.max_overlaps.cpu().numpy()
30 |             # balance sampling for negative samples
31 |             neg_set = set(neg_inds.cpu().numpy())
32 |             easy_set = set(
33 |                 np.where(
34 |                     np.logical_and(max_overlaps >= 0,
35 |                                    max_overlaps < self.hard_thr))[0])
36 |             hard_set = set(np.where(max_overlaps >= self.hard_thr)[0])
37 |             easy_neg_inds = list(easy_set & neg_set)
38 |             hard_neg_inds = list(hard_set & neg_set)
39 | 
40 |             num_expected_hard = int(num_expected * self.hard_fraction)
41 |             if len(hard_neg_inds) > num_expected_hard:
42 |                 sampled_hard_inds = self.random_choice(hard_neg_inds,
43 |                                                        num_expected_hard)
44 |             else:
45 |                 sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int)
46 |             num_expected_easy = num_expected - len(sampled_hard_inds)
47 |             if len(easy_neg_inds) > num_expected_easy:
48 |                 sampled_easy_inds = self.random_choice(easy_neg_inds,
49 |                                                        num_expected_easy)
50 |             else:
51 |                 sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int)
52 |             sampled_inds = np.concatenate((sampled_easy_inds,
53 |                                            sampled_hard_inds))
54 |             if len(sampled_inds) < num_expected:
55 |                 num_extra = num_expected - len(sampled_inds)
56 |                 extra_inds = np.array(list(neg_set - set(sampled_inds)))
57 |                 if len(extra_inds) > num_extra:
58 |                     extra_inds = self.random_choice(extra_inds, num_extra)
59 |                 sampled_inds = np.concatenate((sampled_inds, extra_inds))
60 |             sampled_inds = torch.from_numpy(sampled_inds).long().to(
61 |                 assign_result.gt_inds.device)
62 |             return sampled_inds
63 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     def sample(self,
32 |                assign_result,
33 |                bboxes,
34 |                gt_bboxes,
35 |                gt_labels=None,
36 |                **kwargs):
37 |         """Sample positive and negative bboxes.
38 | 
39 |         This is a simple implementation of bbox sampling given candidates,
40 |         assigning results and ground truth bboxes.
41 | 
42 |         Args:
43 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
44 |             bboxes (Tensor): Boxes to be sampled from.
45 |             gt_bboxes (Tensor): Ground truth bboxes.
46 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 | 
48 |         Returns:
49 |             :obj:`SamplingResult`: Sampling result.
50 |         """
51 |         bboxes = bboxes[:, :4]
52 | 
53 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
54 |         if self.add_gt_as_proposals:
55 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
56 |             assign_result.add_gt_(gt_labels)
57 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
58 |             gt_flags = torch.cat([gt_ones, gt_flags])
59 | 
60 |         num_expected_pos = int(self.num * self.pos_fraction)
61 |         pos_inds = self.pos_sampler._sample_pos(
62 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
63 |         # We found that sampled indices have duplicated items occasionally.
64 |         # (may be a bug of PyTorch)
65 |         # unique函数去除其中重复的元素
66 |         pos_inds = pos_inds.unique()
67 |         # numel()返回数组中元素的个数
68 |         num_sampled_pos = pos_inds.numel()
69 |         num_expected_neg = self.num - num_sampled_pos
70 |         if self.neg_pos_ub >= 0:
71 |             _pos = max(1, num_sampled_pos)
72 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
73 |             if num_expected_neg > neg_upper_bound:
74 |                 num_expected_neg = neg_upper_bound
75 |         neg_inds = self.neg_sampler._sample_neg(
76 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
77 |         neg_inds = neg_inds.unique()
78 | 
79 |         return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
80 |                               assign_result, gt_flags)
81 | 


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | 
 9 | 
10 | class XMLDataset(CustomDataset):
11 | 
12 |     def __init__(self, **kwargs):
13 |         super(XMLDataset, self).__init__(**kwargs)
14 |         # self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
15 |         self.cat2label = {cat: 1 for i, cat in enumerate(self.CLASSES)}
16 | 
17 |     def load_annotations(self, ann_file):
18 |         img_infos = []
19 |         img_ids = mmcv.list_from_file(ann_file)
20 |         for img_id in img_ids:
21 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
22 |             xml_path = osp.join(self.img_prefix, 'Annotations',
23 |                                 '{}.xml'.format(img_id))
24 |             tree = ET.parse(xml_path)
25 |             root = tree.getroot()
26 |             size = root.find('size')
27 |             width = int(size.find('width').text)
28 |             height = int(size.find('height').text)
29 |             img_infos.append(
30 |                 dict(id=img_id, filename=filename, width=width, height=height))
31 |         return img_infos
32 | 
33 |     def get_ann_info(self, idx):
34 |         img_id = self.img_infos[idx]['id']
35 |         xml_path = osp.join(self.img_prefix, 'Annotations',
36 |                             '{}.xml'.format(img_id))
37 |         tree = ET.parse(xml_path)
38 |         root = tree.getroot()
39 |         # gt [xmin, ymin, xmax, ymax]
40 |         bboxes = []
41 |         # gt 类别对应的数字
42 |         labels = []
43 |         # difficult gt [xmin, ymin, xmax, ymax]
44 |         bboxes_ignore = []
45 |         # difficult gt 类别对应的数字
46 |         labels_ignore = []
47 |         for obj in root.findall('object'):
48 |             name = obj.find('name').text
49 |             label = self.cat2label[name]
50 |             difficult = int(obj.find('difficult').text)
51 |             bnd_box = obj.find('bndbox')
52 |             bbox = [
53 |                 int(bnd_box.find('xmin').text),
54 |                 int(bnd_box.find('ymin').text),
55 |                 int(bnd_box.find('xmax').text),
56 |                 int(bnd_box.find('ymax').text)
57 |             ]
58 |             if difficult:
59 |                 bboxes_ignore.append(bbox)
60 |                 labels_ignore.append(label)
61 |             else:
62 |                 bboxes.append(bbox)
63 |                 labels.append(label)
64 |         if not bboxes:
65 |             bboxes = np.zeros((0, 4))
66 |             labels = np.zeros((0, ))
67 |         else:
68 |             bboxes = np.array(bboxes, ndmin=2) - 1
69 |             labels = np.array(labels)
70 |         if not bboxes_ignore:
71 |             bboxes_ignore = np.zeros((0, 4))
72 |             labels_ignore = np.zeros((0, ))
73 |         else:
74 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
75 |             labels_ignore = np.array(labels_ignore)
76 |         ann = dict(
77 |             bboxes=bboxes.astype(np.float32),
78 |             labels=labels.astype(np.int64),
79 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
80 |             labels_ignore=labels_ignore.astype(np.int64))
81 |         return ann
82 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_module.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import kaiming_init, constant_init
 5 | 
 6 | from .norm import build_norm_layer
 7 | 
 8 | 
 9 | class ConvModule(nn.Module):
10 | 
11 |     def __init__(self,
12 |                  in_channels,
13 |                  out_channels,
14 |                  kernel_size,
15 |                  stride=1,
16 |                  padding=0,
17 |                  dilation=1,
18 |                  groups=1,
19 |                  bias=True,
20 |                  normalize=None,
21 |                  activation='relu',
22 |                  inplace=True,
23 |                  activate_last=True):
24 |         super(ConvModule, self).__init__()
25 |         self.with_norm = normalize is not None
26 |         self.with_activatation = activation is not None
27 |         self.with_bias = bias
28 |         self.activation = activation
29 |         self.activate_last = activate_last
30 | 
31 |         if self.with_norm and self.with_bias:
32 |             warnings.warn('ConvModule has norm and bias at the same time')
33 | 
34 |         self.conv = nn.Conv2d(
35 |             in_channels,
36 |             out_channels,
37 |             kernel_size,
38 |             stride,
39 |             padding,
40 |             dilation,
41 |             groups,
42 |             bias=bias)
43 | 
44 |         self.in_channels = self.conv.in_channels
45 |         self.out_channels = self.conv.out_channels
46 |         self.kernel_size = self.conv.kernel_size
47 |         self.stride = self.conv.stride
48 |         self.padding = self.conv.padding
49 |         self.dilation = self.conv.dilation
50 |         self.transposed = self.conv.transposed
51 |         self.output_padding = self.conv.output_padding
52 |         self.groups = self.conv.groups
53 | 
54 |         if self.with_norm:
55 |             norm_channels = out_channels if self.activate_last else in_channels
56 |             self.norm_name, norm = build_norm_layer(normalize, norm_channels)
57 |             self.add_module(self.norm_name, norm)
58 | 
59 |         if self.with_activatation:
60 |             assert activation in ['relu'], 'Only ReLU supported.'
61 |             if self.activation == 'relu':
62 |                 self.activate = nn.ReLU(inplace=inplace)
63 | 
64 |         # Default using msra init
65 |         self.init_weights()
66 | 
67 |     @property
68 |     def norm(self):
69 |         return getattr(self, self.norm_name)
70 | 
71 |     def init_weights(self):
72 |         nonlinearity = 'relu' if self.activation is None else self.activation
73 |         kaiming_init(self.conv, nonlinearity=nonlinearity)
74 |         if self.with_norm:
75 |             constant_init(self.norm, 1, bias=0)
76 | 
77 |     def forward(self, x, activate=True, norm=True):
78 |         if self.activate_last:
79 |             x = self.conv(x)
80 |             if norm and self.with_norm:
81 |                 x = self.norm(x)
82 |             if activate and self.with_activatation:
83 |                 x = self.activate(x)
84 |         else:
85 |             if norm and self.with_norm:
86 |                 x = self.norm(x)
87 |             if activate and self.with_activatation:
88 |                 x = self.activate(x)
89 |             x = self.conv(x)
90 |         return x
91 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .transforms import bbox2delta
 4 | from ..utils import multi_apply
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 |         pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 |                                       target_stds)
56 |         bbox_targets[:num_pos, :] = pos_bbox_targets
57 |         bbox_weights[:num_pos, :] = 1
58 |     if num_neg > 0:
59 |         label_weights[-num_neg:] = 1.0
60 |     if reg_classes > 1:
61 |         bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
62 |                                                    labels, reg_classes)
63 | 
64 |     return labels, label_weights, bbox_targets, bbox_weights
65 | 
66 | 
67 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
68 |     bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
69 |                                                   4 * num_classes))
70 |     bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
71 |                                                   4 * num_classes))
72 |     for i in torch.nonzero(labels > 0).squeeze(-1):
73 |         start, end = labels[i] * 4, (labels[i] + 1) * 4
74 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
75 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
76 |     return bbox_targets_expand, bbox_weights_expand
77 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int channels = features.size(1);
45 |   int height = features.size(2);
46 |   int width = features.size(3);
47 | 
48 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 |                         num_rois, pooled_height, pooled_width, output, argmax);
50 | 
51 |   return 1;
52 | }
53 | 
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 |                               at::Tensor argmax, float spatial_scale,
56 |                               at::Tensor bottom_grad) {
57 |   CHECK_INPUT(top_grad);
58 |   CHECK_INPUT(rois);
59 |   CHECK_INPUT(argmax);
60 |   CHECK_INPUT(bottom_grad);
61 | 
62 |   int pooled_height = top_grad.size(2);
63 |   int pooled_width = top_grad.size(3);
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 | 
67 |   if (size_rois != 5) {
68 |     printf("wrong roi size\n");
69 |     return 0;
70 |   }
71 |   int batch_size = bottom_grad.size(0);
72 |   int channels = bottom_grad.size(1);
73 |   int height = bottom_grad.size(2);
74 |   int width = bottom_grad.size(3);
75 | 
76 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 |                          channels, height, width, num_rois, pooled_height,
78 |                          pooled_width, bottom_grad);
79 | 
80 |   return 1;
81 | }
82 | 
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | from mmcv.cnn import normal_init
 4 | 
 5 | from .anchor_head import AnchorHead
 6 | from ..registry import HEADS
 7 | from ..utils import bias_init_with_prob
 8 | 
 9 | 
10 | @HEADS.register_module
11 | class RetinaHead(AnchorHead):
12 |     #     num_classes=2,
13 |     #     in_channels=256,
14 |     #     stacked_convs=4,
15 |     #     feat_channels=256,
16 |     #     octave_base_scale=4,
17 |     #     scales_per_octave=3,
18 |     #     anchor_ratios=[0.5, 1.0, 2.0],
19 |     #     anchor_strides=[8, 16, 32, 64, 128],
20 |     #     target_means=[.0, .0, .0, .0],
21 |     #     target_stds=[1.0, 1.0, 1.0, 1.0]
22 |     def __init__(self,
23 |                  num_classes,
24 |                  in_channels,
25 |                  stacked_convs=4,
26 |                  octave_base_scale=4,
27 |                  scales_per_octave=3,
28 |                  **kwargs):
29 |         self.stacked_convs = stacked_convs # 4
30 |         self.octave_base_scale = octave_base_scale # 4
31 |         self.scales_per_octave = scales_per_octave # 3
32 |         # octave_scales = [1, 2^(1/3), 2^(2/3)]
33 |         octave_scales = np.array(
34 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
35 |         # anchor_scales=[4, 4*2^(1/3), 4*2^(2/3)]
36 |         anchor_scales = octave_scales * octave_base_scale
37 |         super(RetinaHead, self).__init__(
38 |             num_classes,
39 |             in_channels,
40 |             anchor_scales=anchor_scales,
41 |             use_sigmoid_cls=True,
42 |             use_focal_loss=True,
43 |             **kwargs)
44 | 
45 |     def _init_layers(self):
46 |         self.relu = nn.ReLU(inplace=True)
47 |         self.cls_convs = nn.ModuleList()
48 |         self.reg_convs = nn.ModuleList()
49 |         # 俩组并行的 subnet ： 由4个 conv(256, 256, 3, s=1, p=1) 组成
50 |         # fro i in range(4)
51 |         for i in range(self.stacked_convs):
52 |             # self.in_channels = self.feat_channels = 256
53 |             chn = self.in_channels if i == 0 else self.feat_channels
54 |             self.cls_convs.append(
55 |                 nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
56 |             self.reg_convs.append(
57 |                 nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1))
58 |         self.retina_cls = nn.Conv2d(
59 |             self.feat_channels,
60 |             self.num_anchors * self.cls_out_channels,
61 |             3,
62 |             padding=1)
63 |         self.retina_reg = nn.Conv2d(
64 |             self.feat_channels, self.num_anchors * 4, 3, padding=1)
65 | 
66 |     def init_weights(self):
67 |         for m in self.cls_convs:
68 |             normal_init(m, std=0.01)
69 |         for m in self.reg_convs:
70 |             normal_init(m, std=0.01)
71 |         bias_cls = bias_init_with_prob(0.01)
72 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
73 |         normal_init(self.retina_reg, std=0.01)
74 | 
75 |     def forward_single(self, x):
76 |         cls_feat = x
77 |         reg_feat = x
78 |         for cls_conv in self.cls_convs:
79 |             cls_feat = self.relu(cls_conv(cls_feat))
80 |         for reg_conv in self.reg_convs:
81 |             reg_feat = self.relu(reg_conv(reg_feat))
82 |         cls_score = self.retina_cls(cls_feat)
83 |         bbox_pred = self.retina_reg(reg_feat)
84 |         return cls_score, bbox_pred
85 | 


--------------------------------------------------------------------------------
/RetinaNet_debug/compute_Receptive_field.py:
--------------------------------------------------------------------------------
 1 | net_struct = {
 2 |     'alexnet': {'net': [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [3, 2, 0]],
 3 |                 'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5']},
 4 |     'vgg16': {'net': [[3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
 5 |                       [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1],
 6 |                       [2, 2, 0]],
 7 |               'name': ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2',
 8 |                        'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3',
 9 |                        'pool5']},
10 |     'resnet50': {'net': [[7,2,3], [3,2,1], [1,1,0], [3,1,1], [1,1,0], [1,1,0], [3,1,1], [1,1,0], [1,1,0], [3,1,1], [1,1,0],
11 |                          [1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],
12 |                          [1, 1, 0], [3, 2, 1], [1, 1, 0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],
13 |                          [1, 1, 0], [3, 2, 1], [1, 1, 0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0], [3,2,1], [3,2,1]],
14 |                  'name':['conv1', 'pool', 'conv1_1', 'conv1_2', 'conv1_3', 'conv2_1', 'conv2_2', 'conv2_3', 'conv3_1', 'conv3_2', 'conv3_3',
15 |                          'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', 'conv6_1', 'conv6_2', 'conv6_3', 'conv7_1', 'conv7_2', 'conv7_3',
16 |                          'conv8_1', 'conv8_2', 'conv8_3', 'conv9_1', 'conv9_2', 'conv9_3', 'conv10_1', 'conv10_2', 'conv10_3', 'conv11_1', 'conv11_2', 'conv11_3', 'conv12_1', 'conv12_2', 'conv12_3', 'conv13_1', 'conv13_2', 'conv13_3',
17 |                          'conv14_1', 'conv14_2', 'conv14_3', 'conv15_1', 'conv15_2', 'conv15_3', 'conv16_1', 'conv16_2', 'conv16_3', 'conv17', 'conv18'
18 |                          ]},
19 | 
20 |     'zf-5': {'net': [[7, 2, 3], [3, 2, 1], [5, 2, 2], [3, 2, 1], [3, 1, 1], [3, 1, 1], [3, 1, 1]],
21 |              'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']}}
22 | imsize = 640
23 | 
24 | 
25 | def outFromIn(isz, net, layernum):
26 |     totstride = 1
27 |     insize = isz
28 |     for layer in range(layernum):
29 |         fsize, stride, pad = net[layer]
30 |         outsize = (insize - fsize + 2 * pad) / stride + 1
31 |         insize = outsize
32 |         totstride = totstride * stride
33 |     return outsize, totstride
34 | 
35 | 
36 | def inFromOut(net, layernum):
37 |     RF = 1
38 |     for layer in reversed(range(layernum)):
39 |         fsize, stride, pad = net[layer]
40 |         RF = ((RF - 1) * stride) + fsize
41 |     return RF
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     print("layer output sizes given image = %dx%d" % (imsize, imsize))
46 | 
47 | for net in net_struct.keys():
48 |     print('************net structrue name is %s**************' % net)
49 |     for i in range(len(net_struct[net]['net'])):
50 |         p = outFromIn(imsize, net_struct[net]['net'], i + 1)
51 |         rf = inFromOut(net_struct[net]['net'], i + 1)
52 |         print("Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d" % (
53 |         net_struct[net]['name'][i], p[0], p[1], rf))


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from mmdet import ops
 7 | from ..registry import ROI_EXTRACTORS
 8 | 
 9 | 
10 | @ROI_EXTRACTORS.register_module
11 | class SingleRoIExtractor(nn.Module):
12 |     """Extract RoI features from a single level feature map.
13 | 
14 |     If there are mulitple input feature levels, each RoI is mapped to a level
15 |     according to its scale.
16 | 
17 |     Args:
18 |         roi_layer (dict): Specify RoI layer type and arguments.
19 |         out_channels (int): Output channels of RoI layers.
20 |         featmap_strides (int): Strides of input feature maps.
21 |         finest_scale (int): Scale threshold of mapping to level 0.
22 |     """
23 | 
24 |     def __init__(self,
25 |                  roi_layer,
26 |                  out_channels,
27 |                  featmap_strides,
28 |                  finest_scale=56):
29 |         super(SingleRoIExtractor, self).__init__()
30 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
31 |         self.out_channels = out_channels
32 |         self.featmap_strides = featmap_strides
33 |         self.finest_scale = finest_scale
34 | 
35 |     @property
36 |     def num_inputs(self):
37 |         """int: Input feature map levels."""
38 |         return len(self.featmap_strides)
39 | 
40 |     def init_weights(self):
41 |         pass
42 | 
43 |     def build_roi_layers(self, layer_cfg, featmap_strides):
44 |         cfg = layer_cfg.copy()
45 |         layer_type = cfg.pop('type')
46 |         assert hasattr(ops, layer_type)
47 |         layer_cls = getattr(ops, layer_type)
48 |         roi_layers = nn.ModuleList(
49 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
50 |         return roi_layers
51 | 
52 |     def map_roi_levels(self, rois, num_levels):
53 |         """Map rois to corresponding feature levels by scales.
54 | 
55 |         - scale < finest_scale: level 0
56 |         - finest_scale <= scale < finest_scale * 2: level 1
57 |         - finest_scale * 2 <= scale < finest_scale * 4: level 2
58 |         - scale >= finest_scale * 4: level 3
59 | 
60 |         Args:
61 |             rois (Tensor): Input RoIs, shape (k, 5).
62 |             num_levels (int): Total level number.
63 | 
64 |         Returns:
65 |             Tensor: Level index (0-based) of each RoI, shape (k, )
66 |         """
67 |         scale = torch.sqrt(
68 |             (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
69 |         target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
70 |         target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
71 |         return target_lvls
72 | 
73 |     def forward(self, feats, rois):
74 |         if len(feats) == 1:
75 |             return self.roi_layers[0](feats[0], rois)
76 | 
77 |         out_size = self.roi_layers[0].out_size
78 |         num_levels = len(feats)
79 |         target_lvls = self.map_roi_levels(rois, num_levels)
80 |         roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,
81 |                                            out_size, out_size).fill_(0)
82 |         for i in range(num_levels):
83 |             inds = target_lvls == i
84 |             if inds.any():
85 |                 rois_ = rois[inds, :]
86 |                 roi_feats_t = self.roi_layers[i](feats[i], rois_)
87 |                 roi_feats[inds] += roi_feats_t
88 |         return roi_feats
89 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import argparse
 4 | from mmcv import Config
 5 | 
 6 | from mmdet import __version__
 7 | from mmdet.datasets import get_dataset
 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger,
 9 |                         set_random_seed)
10 | from mmdet.models import build_detector
11 | import torch
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument('--work_dir', help='the dir to save logs and models')
18 |     parser.add_argument(
19 |         '--resume_from', help='the checkpoint file to resume from')
20 |     parser.add_argument(
21 |         '--validate',
22 |         action='store_true',
23 |         help='whether to evaluate the checkpoint during training')
24 |     parser.add_argument(
25 |         '--gpus',
26 |         type=int,
27 |         default=1,
28 |         help='number of gpus to use '
29 |         '(only applicable to non-distributed training)')
30 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
31 |     parser.add_argument(
32 |         '--launcher',
33 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
34 |         default='none',
35 |         help='job launcher')
36 |     parser.add_argument('--local_rank', type=int, default=0)
37 |     args = parser.parse_args()
38 | 
39 |     return args
40 | 
41 | 
42 | def main():
43 |     args = parse_args()
44 | 
45 |     cfg = Config.fromfile(args.config)
46 |     # set cudnn_benchmark
47 |     if cfg.get('cudnn_benchmark', False):
48 |         torch.backends.cudnn.benchmark = True
49 |     # update configs according to CLI args
50 |     if args.work_dir is not None:
51 |         cfg.work_dir = args.work_dir
52 |     if args.resume_from is not None:
53 |         cfg.resume_from = args.resume_from
54 |     cfg.gpus = args.gpus
55 |     if cfg.checkpoint_config is not None:
56 |         # save mmdet version in checkpoints as meta data
57 |         cfg.checkpoint_config.meta = dict(
58 |             mmdet_version=__version__, config=cfg.text)
59 | 
60 |     # init distributed env first, since logger depends on the dist info.
61 |     if args.launcher == 'none':
62 |         distributed = False
63 |     else:
64 |         distributed = True
65 |         init_dist(args.launcher, **cfg.dist_params)
66 | 
67 |     # init logger before other steps
68 |     logger = get_root_logger(cfg.log_level)
69 |     logger.info('Distributed training: {}'.format(distributed))
70 | 
71 |     # set random seeds
72 |     if args.seed is not None:
73 |         logger.info('Set random seed to {}'.format(args.seed))
74 |         set_random_seed(args.seed)
75 |     # 首先要先注册 BACKBONES、 NECKS、 ROI_EXTRACTORS、 HEADS、 DETECTORS、
76 |     # 然后 BACKBONES.register_module（class SSDVGG） @HEADS.register_module(class AnchorHead)
77 |     #     @HEADS.register_module(class SSDHead)   @DETECTORS.register_module(class SingleStageDetector)
78 |     # 最后 build_detector() 相当于SingleStageDetector(**args)
79 | 
80 |     model = build_detector(
81 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
82 | 
83 |     train_dataset = get_dataset(cfg.data.train)
84 |     train_detector(
85 |         model,
86 |         train_dataset,
87 |         cfg,
88 |         distributed=distributed,
89 |         validate=args.validate,
90 |         logger=logger)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import time
  4 | from setuptools import find_packages, setup
  5 | 
  6 | 
  7 | def readme():
  8 |     with open('README.md', encoding='utf-8') as f:
  9 |         content = f.read()
 10 |     return content
 11 | 
 12 | 
 13 | MAJOR = 0
 14 | MINOR = 6
 15 | PATCH = 'rc0'
 16 | SUFFIX = ''
 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
 18 | 
 19 | version_file = 'mmdet/version.py'
 20 | 
 21 | 
 22 | def get_git_hash():
 23 | 
 24 |     def _minimal_ext_cmd(cmd):
 25 |         # construct minimal environment
 26 |         env = {}
 27 |         for k in ['SYSTEMROOT', 'PATH', 'HOME']:
 28 |             v = os.environ.get(k)
 29 |             if v is not None:
 30 |                 env[k] = v
 31 |         # LANGUAGE is used on win32
 32 |         env['LANGUAGE'] = 'C'
 33 |         env['LANG'] = 'C'
 34 |         env['LC_ALL'] = 'C'
 35 |         out = subprocess.Popen(
 36 |             cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
 37 |         return out
 38 | 
 39 |     try:
 40 |         out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
 41 |         sha = out.strip().decode('ascii')
 42 |     except OSError:
 43 |         sha = 'unknown'
 44 | 
 45 |     return sha
 46 | 
 47 | 
 48 | def get_hash():
 49 |     if os.path.exists('.git'):
 50 |         sha = get_git_hash()[:7]
 51 |     elif os.path.exists(version_file):
 52 |         try:
 53 |             from mmdet.version import __version__
 54 |             sha = __version__.split('+')[-1]
 55 |         except ImportError:
 56 |             raise ImportError('Unable to get git version')
 57 |     else:
 58 |         sha = 'unknown'
 59 | 
 60 |     return sha
 61 | 
 62 | 
 63 | def write_version_py():
 64 |     content = """# GENERATED VERSION FILE
 65 | # TIME: {}
 66 | 
 67 | __version__ = '{}'
 68 | short_version = '{}'
 69 | """
 70 |     sha = get_hash()
 71 |     VERSION = SHORT_VERSION + '+' + sha
 72 | 
 73 |     with open(version_file, 'w') as f:
 74 |         f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
 75 | 
 76 | 
 77 | def get_version():
 78 |     with open(version_file, 'r') as f:
 79 |         exec(compile(f.read(), version_file, 'exec'))
 80 |     return locals()['__version__']
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     write_version_py()
 85 |     setup(
 86 |         name='mmdet',
 87 |         version=get_version(),
 88 |         description='Open MMLab Detection Toolbox',
 89 |         long_description=readme(),
 90 |         keywords='computer vision, object detection',
 91 |         url='https://github.com/open-mmlab/mmdetection',
 92 |         packages=find_packages(exclude=('configs', 'tools', 'demo')),
 93 |         package_data={'mmdet.ops': ['*/*.so']},
 94 |         classifiers=[
 95 |             'Development Status :: 4 - Beta',
 96 |             'License :: OSI Approved :: Apache Software License',
 97 |             'Operating System :: OS Independent',
 98 |             'Programming Language :: Python :: 2',
 99 |             'Programming Language :: Python :: 2.7',
100 |             'Programming Language :: Python :: 3',
101 |             'Programming Language :: Python :: 3.4',
102 |             'Programming Language :: Python :: 3.5',
103 |             'Programming Language :: Python :: 3.6',
104 |         ],
105 |         license='GPLv3',
106 |         setup_requires=['pytest-runner'],
107 |         tests_require=['pytest'],
108 |         install_requires=[
109 |             'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables',
110 |             'pycocotools'
111 |         ],
112 |         zip_safe=False)
113 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import numpy as np
 4 | 
 5 | from mmdet.ops import nms
 6 | from ..bbox import bbox_mapping_back
 7 | 
 8 | 
 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
10 |     """Merge augmented proposals (multiscale, flip, etc.)
11 | 
12 |     Args:
13 |         aug_proposals (list[Tensor]): proposals from different testing
14 |             schemes, shape (n, 5). Note that they are not rescaled to the
15 |             original image size.
16 |         img_metas (list[dict]): image info including "shape_scale" and "flip".
17 |         rpn_test_cfg (dict): rpn test config.
18 | 
19 |     Returns:
20 |         Tensor: shape (n, 4), proposals corresponding to original image scale.
21 |     """
22 |     recovered_proposals = []
23 |     for proposals, img_info in zip(aug_proposals, img_metas):
24 |         img_shape = img_info['img_shape']
25 |         scale_factor = img_info['scale_factor']
26 |         flip = img_info['flip']
27 |         _proposals = proposals.clone()
28 |         _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
29 |                                               scale_factor, flip)
30 |         recovered_proposals.append(_proposals)
31 |     aug_proposals = torch.cat(recovered_proposals, dim=0)
32 |     merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
33 |     scores = merged_proposals[:, 4]
34 |     _, order = scores.sort(0, descending=True)
35 |     num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
36 |     order = order[:num]
37 |     merged_proposals = merged_proposals[order, :]
38 |     return merged_proposals
39 | 
40 | 
41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
42 |     """Merge augmented detection bboxes and scores.
43 | 
44 |     Args:
45 |         aug_bboxes (list[Tensor]): shape (n, 4*#class)
46 |         aug_scores (list[Tensor] or None): shape (n, #class)
47 |         img_shapes (list[Tensor]): shape (3, ).
48 |         rcnn_test_cfg (dict): rcnn test config.
49 | 
50 |     Returns:
51 |         tuple: (bboxes, scores)
52 |     """
53 |     recovered_bboxes = []
54 |     for bboxes, img_info in zip(aug_bboxes, img_metas):
55 |         img_shape = img_info[0]['img_shape']
56 |         scale_factor = img_info[0]['scale_factor']
57 |         flip = img_info[0]['flip']
58 |         bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
59 |         recovered_bboxes.append(bboxes)
60 |     bboxes = torch.stack(recovered_bboxes).mean(dim=0)
61 |     if aug_scores is None:
62 |         return bboxes
63 |     else:
64 |         scores = torch.stack(aug_scores).mean(dim=0)
65 |         return bboxes, scores
66 | 
67 | 
68 | def merge_aug_scores(aug_scores):
69 |     """Merge augmented bbox scores."""
70 |     if isinstance(aug_scores[0], torch.Tensor):
71 |         return torch.mean(torch.stack(aug_scores), dim=0)
72 |     else:
73 |         return np.mean(aug_scores, axis=0)
74 | 
75 | 
76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
77 |     """Merge augmented mask prediction.
78 | 
79 |     Args:
80 |         aug_masks (list[ndarray]): shape (n, #class, h, w)
81 |         img_shapes (list[ndarray]): shape (3, ).
82 |         rcnn_test_cfg (dict): rcnn test config.
83 | 
84 |     Returns:
85 |         tuple: (bboxes, scores)
86 |     """
87 |     recovered_masks = [
88 |         mask if not img_info[0]['flip'] else mask[..., ::-1]
89 |         for mask, img_info in zip(aug_masks, img_metas)
90 |     ]
91 |     if weights is None:
92 |         merged_masks = np.mean(recovered_masks, axis=0)
93 |     else:
94 |         merged_masks = np.average(
95 |             np.array(recovered_masks), axis=0, weights=np.array(weights))
96 |     return merged_masks
97 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from mmdet.core import tensor2imgs, bbox_mapping
 4 | from .base import BaseDetector
 5 | from .test_mixins import RPNTestMixin
 6 | from .. import builder
 7 | from ..registry import DETECTORS
 8 | 
 9 | 
10 | @DETECTORS.register_module
11 | class RPN(BaseDetector, RPNTestMixin):
12 | 
13 |     def __init__(self,
14 |                  backbone,
15 |                  neck,
16 |                  rpn_head,
17 |                  train_cfg,
18 |                  test_cfg,
19 |                  pretrained=None):
20 |         super(RPN, self).__init__()
21 |         self.backbone = builder.build_backbone(backbone)
22 |         self.neck = builder.build_neck(neck) if neck is not None else None
23 |         self.rpn_head = builder.build_head(rpn_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(RPN, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             self.neck.init_weights()
33 |         self.rpn_head.init_weights()
34 | 
35 |     def extract_feat(self, img):
36 |         x = self.backbone(img)
37 |         if self.with_neck:
38 |             x = self.neck(x)
39 |         return x
40 | 
41 |     def forward_train(self,
42 |                       img,
43 |                       img_meta,
44 |                       gt_bboxes=None,
45 |                       gt_bboxes_ignore=None):
46 |         if self.train_cfg.rpn.get('debug', False):
47 |             self.rpn_head.debug_imgs = tensor2imgs(img)
48 | 
49 |         x = self.extract_feat(img)
50 |         rpn_outs = self.rpn_head(x)
51 | 
52 |         rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
53 |         losses = self.rpn_head.loss(
54 |             *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 |         return losses
56 | 
57 |     def simple_test(self, img, img_meta, rescale=False):
58 |         x = self.extract_feat(img)
59 |         proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
60 |         if rescale:
61 |             for proposals, meta in zip(proposal_list, img_meta):
62 |                 proposals[:, :4] /= meta['scale_factor']
63 |         # TODO: remove this restriction
64 |         return proposal_list[0].cpu().numpy()
65 | 
66 |     def aug_test(self, imgs, img_metas, rescale=False):
67 |         proposal_list = self.aug_test_rpn(
68 |             self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
69 |         if not rescale:
70 |             for proposals, img_meta in zip(proposal_list, img_metas[0]):
71 |                 img_shape = img_meta['img_shape']
72 |                 scale_factor = img_meta['scale_factor']
73 |                 flip = img_meta['flip']
74 |                 proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
75 |                                                 scale_factor, flip)
76 |         # TODO: remove this restriction
77 |         return proposal_list[0].cpu().numpy()
78 | 
79 |     def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20):
80 |         """Show RPN proposals on the image.
81 | 
82 |         Although we assume batch size is 1, this method supports arbitrary
83 |         batch size.
84 |         """
85 |         img_tensor = data['img'][0]
86 |         img_metas = data['img_meta'][0].data[0]
87 |         imgs = tensor2imgs(img_tensor, **img_norm_cfg)
88 |         assert len(imgs) == len(img_metas)
89 |         for img, img_meta in zip(imgs, img_metas):
90 |             h, w, _ = img_meta['img_shape']
91 |             img_show = img[:h, :w, :]
92 |             mmcv.imshow_bboxes(img_show, result, top_k=top_k)
93 | 


--------------------------------------------------------------------------------
/configs/retinanet_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         add_extra_convs=True,
 18 |         num_outs=5),
 19 |     bbox_head=dict(
 20 |         type='RetinaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         octave_base_scale=4,
 26 |         scales_per_octave=3,
 27 |         anchor_ratios=[0.5, 1.0, 2.0],
 28 |         anchor_strides=[8, 16, 32, 64, 128],
 29 |         target_means=[.0, .0, .0, .0],
 30 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 31 | # training and testing settings
 32 | train_cfg = dict(
 33 |     assigner=dict(
 34 |         type='MaxIoUAssigner',
 35 |         pos_iou_thr=0.5,
 36 |         neg_iou_thr=0.4,
 37 |         min_pos_iou=0,
 38 |         ignore_iof_thr=-1),
 39 |     smoothl1_beta=0.11,
 40 |     gamma=2.0,
 41 |     alpha=0.25,
 42 |     allowed_border=-1,
 43 |     pos_weight=-1,
 44 |     debug=False)
 45 | test_cfg = dict(
 46 |     nms_pre=1000,
 47 |     min_bbox_size=0,
 48 |     score_thr=0.05,
 49 |     nms=dict(type='nms', iou_thr=0.5),
 50 |     max_per_img=100)
 51 | # dataset settings
 52 | dataset_type = 'CocoDataset'
 53 | data_root = 'data/coco/'
 54 | img_norm_cfg = dict(
 55 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 56 | data = dict(
 57 |     imgs_per_gpu=2,
 58 |     workers_per_gpu=2,
 59 |     train=dict(
 60 |         type=dataset_type,
 61 |         ann_file=data_root + 'annotations/instances_train2017.json',
 62 |         img_prefix=data_root + 'train2017/',
 63 |         img_scale=(1333, 800),
 64 |         img_norm_cfg=img_norm_cfg,
 65 |         size_divisor=32,
 66 |         flip_ratio=0.5,
 67 |         with_mask=False,
 68 |         with_crowd=False,
 69 |         with_label=True),
 70 |     val=dict(
 71 |         type=dataset_type,
 72 |         ann_file=data_root + 'annotations/instances_val2017.json',
 73 |         img_prefix=data_root + 'val2017/',
 74 |         img_scale=(1333, 800),
 75 |         img_norm_cfg=img_norm_cfg,
 76 |         size_divisor=32,
 77 |         flip_ratio=0,
 78 |         with_mask=False,
 79 |         with_crowd=False,
 80 |         with_label=True),
 81 |     test=dict(
 82 |         type=dataset_type,
 83 |         ann_file=data_root + 'annotations/instances_val2017.json',
 84 |         img_prefix=data_root + 'val2017/',
 85 |         img_scale=(1333, 800),
 86 |         img_norm_cfg=img_norm_cfg,
 87 |         size_divisor=32,
 88 |         flip_ratio=0,
 89 |         with_mask=False,
 90 |         with_crowd=False,
 91 |         with_label=False,
 92 |         test_mode=True))
 93 | # optimizer
 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 96 | # learning policy
 97 | lr_config = dict(
 98 |     policy='step',
 99 |     warmup='linear',
100 |     warmup_iters=500,
101 |     warmup_ratio=1.0 / 3,
102 |     step=[8, 11])
103 | checkpoint_config = dict(interval=1)
104 | # yapf:disable
105 | log_config = dict(
106 |     interval=50,
107 |     hooks=[
108 |         dict(type='TextLoggerHook'),
109 |         # dict(type='TensorboardLoggerHook')
110 |     ])
111 | # yapf:enable
112 | # runtime settings
113 | total_epochs = 12
114 | device_ids = range(8)
115 | dist_params = dict(backend='nccl')
116 | log_level = 'INFO'
117 | work_dir = './work_dirs/retinanet_r101_fpn_1x'
118 | load_from = None
119 | resume_from = None
120 | workflow = [('train', 1)]
121 | 


--------------------------------------------------------------------------------
/configs/retinanet_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         start_level=1,
 17 |         add_extra_convs=True,
 18 |         num_outs=5),
 19 |     bbox_head=dict(
 20 |         type='RetinaHead',
 21 |         num_classes=81,
 22 |         in_channels=256,
 23 |         stacked_convs=4,
 24 |         feat_channels=256,
 25 |         octave_base_scale=4,
 26 |         scales_per_octave=3,
 27 |         anchor_ratios=[0.5, 1.0, 2.0],
 28 |         anchor_strides=[8, 16, 32, 64, 128],
 29 |         target_means=[.0, .0, .0, .0],
 30 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 31 | # training and testing settings
 32 | train_cfg = dict(
 33 |     assigner=dict(
 34 |         type='MaxIoUAssigner',
 35 |         pos_iou_thr=0.5,
 36 |         neg_iou_thr=0.4,
 37 |         min_pos_iou=0,
 38 |         ignore_iof_thr=-1),
 39 |     smoothl1_beta=0.11,
 40 |     gamma=2.0,
 41 |     alpha=0.25,
 42 |     allowed_border=-1,
 43 |     pos_weight=-1,
 44 |     debug=False)
 45 | test_cfg = dict(
 46 |     nms_pre=1000,
 47 |     min_bbox_size=0,
 48 |     score_thr=0.05,
 49 |     nms=dict(type='nms', iou_thr=0.5),
 50 |     max_per_img=100)
 51 | # dataset settings
 52 | dataset_type = 'CocoDataset'
 53 | data_root = '/home/hs/data/COCO/coco2017/'
 54 | img_norm_cfg = dict(
 55 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 56 | data = dict(
 57 |     imgs_per_gpu=4,
 58 |     workers_per_gpu=8,
 59 |     train=dict(
 60 |         type=dataset_type,
 61 |         ann_file=data_root + 'annotations/instances_train2017.json',
 62 |         img_prefix=data_root + 'train2017/',
 63 |         img_scale=(1333, 800),
 64 |         img_norm_cfg=img_norm_cfg,
 65 |         size_divisor=32,
 66 |         flip_ratio=0.5,
 67 |         with_mask=False,
 68 |         with_crowd=False,
 69 |         with_label=True),
 70 |     val=dict(
 71 |         type=dataset_type,
 72 |         ann_file=data_root + 'annotations/instances_val2017.json',
 73 |         img_prefix=data_root + 'val2017/',
 74 |         img_scale=(1333, 800),
 75 |         img_norm_cfg=img_norm_cfg,
 76 |         size_divisor=32,
 77 |         flip_ratio=0,
 78 |         with_mask=False,
 79 |         with_crowd=False,
 80 |         with_label=True),
 81 |     test=dict(
 82 |         type=dataset_type,
 83 |         ann_file=data_root + 'annotations/instances_val2017.json',
 84 |         img_prefix=data_root + 'val2017/',
 85 |         img_scale=(1333, 800),
 86 |         img_norm_cfg=img_norm_cfg,
 87 |         size_divisor=32,
 88 |         flip_ratio=0,
 89 |         with_mask=False,
 90 |         with_crowd=False,
 91 |         with_label=False,
 92 |         test_mode=True))
 93 | # optimizer
 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 96 | # learning policy
 97 | lr_config = dict(
 98 |     policy='step',
 99 |     warmup='linear',
100 |     warmup_iters=500,
101 |     warmup_ratio=1.0 / 3,
102 |     step=[8, 11])
103 | checkpoint_config = dict(interval=1)
104 | # yapf:disable
105 | log_config = dict(
106 |     interval=50,
107 |     hooks=[
108 |         dict(type='TextLoggerHook'),
109 |         # dict(type='TensorboardLoggerHook')
110 |     ])
111 | # yapf:enable
112 | # runtime settings
113 | total_epochs = 12
114 | device_ids = range(8)
115 | dist_params = dict(backend='nccl')
116 | log_level = 'INFO'
117 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
118 | load_from = None
119 | resume_from = None
120 | workflow = [('train', 1)]
121 | 


--------------------------------------------------------------------------------
/configs/retinanet_mobileV2_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='/home/hs/hs/237014845/HuaWei/mmdetection-master/weights/mobileV2_retina_coco/mobilev2/mobilenet_v2.pth',
  5 |     backbone=dict(
  6 |         type='MobileNetV2',
  7 |         out_indices=(3, 6, 13, 17),
  8 |         width_mult=1.,
  9 |         ),
 10 |     neck=dict(
 11 |         type='FPN',
 12 |         in_channels=[56, 32, 96, 320],
 13 |         out_channels=256,
 14 |         start_level=1,
 15 |         add_extra_convs=True,
 16 |         num_outs=5),
 17 |     bbox_head=dict(
 18 |         type='RetinaHead',
 19 |         num_classes=2,
 20 |         in_channels=256,
 21 |         stacked_convs=4,
 22 |         feat_channels=256,
 23 |         octave_base_scale=4,
 24 |         scales_per_octave=3,
 25 |         anchor_ratios=[0.5, 1.0, 2.0],
 26 |         anchor_strides=[8, 16, 32, 64, 128],
 27 |         target_means=[.0, .0, .0, .0],
 28 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 29 | # training and testing settings
 30 | train_cfg = dict(
 31 |     assigner=dict(
 32 |         type='MaxIoUAssigner',
 33 |         pos_iou_thr=0.5,
 34 |         neg_iou_thr=0.4,
 35 |         min_pos_iou=0,
 36 |         ignore_iof_thr=-1),
 37 |     smoothl1_beta=0.11,
 38 |     gamma=2.0,
 39 |     alpha=0.25,
 40 |     allowed_border=-1,
 41 |     pos_weight=-1,
 42 |     debug=False)
 43 | test_cfg = dict(
 44 |     nms_pre=1000,
 45 |     min_bbox_size=0,
 46 |     score_thr=0.05,
 47 |     nms=dict(type='nms', iou_thr=0.5),
 48 |     max_per_img=100)
 49 | # dataset settings
 50 | dataset_type = 'CocoDataset'
 51 | data_root = '/home/hs/data/COCO/coco2017/'
 52 | img_norm_cfg = dict(
 53 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 54 | data = dict(
 55 |     imgs_per_gpu=16,
 56 |     workers_per_gpu=8,
 57 |     train=dict(
 58 |         type=dataset_type,
 59 |         ann_file=data_root + 'annotations/instances_train2017.json',
 60 |         img_prefix=data_root + 'train2017/',
 61 |         img_scale=(640, 640),
 62 |         img_norm_cfg=img_norm_cfg,
 63 |         size_divisor=32,
 64 |         flip_ratio=0.5,
 65 |         with_mask=False,
 66 |         with_crowd=False,
 67 |         with_label=True),
 68 |     val=dict(
 69 |         type=dataset_type,
 70 |         ann_file=data_root + 'annotations/instances_val2017.json',
 71 |         img_prefix=data_root + 'val2017/',
 72 |         img_scale=(640, 640),
 73 |         img_norm_cfg=img_norm_cfg,
 74 |         size_divisor=32,
 75 |         flip_ratio=0,
 76 |         with_mask=False,
 77 |         with_crowd=False,
 78 |         with_label=True),
 79 |     test=dict(
 80 |         type=dataset_type,
 81 |         ann_file=data_root + 'annotations/instances_val2017.json',
 82 |         img_prefix=data_root + 'val2017/',
 83 |         img_scale=(640, 640),
 84 |         img_norm_cfg=img_norm_cfg,
 85 |         size_divisor=32,
 86 |         flip_ratio=0,
 87 |         with_mask=False,
 88 |         with_crowd=False,
 89 |         with_label=False,
 90 |         test_mode=True))
 91 | # optimizer
 92 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 93 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 94 | # learning policy
 95 | lr_config = dict(
 96 |     policy='step',
 97 |     warmup='linear',
 98 |     warmup_iters=500,
 99 |     warmup_ratio=1.0 / 3,
100 |     step=[8, 11])
101 | checkpoint_config = dict(interval=1)
102 | # yapf:disable
103 | log_config = dict(
104 |     interval=50,
105 |     hooks=[
106 |         dict(type='TextLoggerHook'),
107 |         # dict(type='TensorboardLoggerHook')
108 |     ])
109 | # yapf:enable
110 | # runtime settings
111 | total_epochs = 12
112 | device_ids = range(8)
113 | dist_params = dict(backend='nccl')
114 | log_level = 'INFO'
115 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
116 | load_from = None
117 | resume_from = None
118 | workflow = [('train', 1)]
119 | 


--------------------------------------------------------------------------------
/configs/retinanet_x101_32x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='open-mmlab://resnext101_32x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=32,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         start_level=1,
 19 |         add_extra_convs=True,
 20 |         num_outs=5),
 21 |     bbox_head=dict(
 22 |         type='RetinaHead',
 23 |         num_classes=81,
 24 |         in_channels=256,
 25 |         stacked_convs=4,
 26 |         feat_channels=256,
 27 |         octave_base_scale=4,
 28 |         scales_per_octave=3,
 29 |         anchor_ratios=[0.5, 1.0, 2.0],
 30 |         anchor_strides=[8, 16, 32, 64, 128],
 31 |         target_means=[.0, .0, .0, .0],
 32 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 33 | # training and testing settings
 34 | train_cfg = dict(
 35 |     assigner=dict(
 36 |         type='MaxIoUAssigner',
 37 |         pos_iou_thr=0.5,
 38 |         neg_iou_thr=0.4,
 39 |         min_pos_iou=0,
 40 |         ignore_iof_thr=-1),
 41 |     smoothl1_beta=0.11,
 42 |     gamma=2.0,
 43 |     alpha=0.25,
 44 |     allowed_border=-1,
 45 |     pos_weight=-1,
 46 |     debug=False)
 47 | test_cfg = dict(
 48 |     nms_pre=1000,
 49 |     min_bbox_size=0,
 50 |     score_thr=0.05,
 51 |     nms=dict(type='nms', iou_thr=0.5),
 52 |     max_per_img=100)
 53 | # dataset settings
 54 | dataset_type = 'CocoDataset'
 55 | data_root = 'data/coco/'
 56 | img_norm_cfg = dict(
 57 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 58 | data = dict(
 59 |     imgs_per_gpu=2,
 60 |     workers_per_gpu=2,
 61 |     train=dict(
 62 |         type=dataset_type,
 63 |         ann_file=data_root + 'annotations/instances_train2017.json',
 64 |         img_prefix=data_root + 'train2017/',
 65 |         img_scale=(1333, 800),
 66 |         img_norm_cfg=img_norm_cfg,
 67 |         size_divisor=32,
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=False,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         size_divisor=32,
 79 |         flip_ratio=0,
 80 |         with_mask=False,
 81 |         with_crowd=False,
 82 |         with_label=True),
 83 |     test=dict(
 84 |         type=dataset_type,
 85 |         ann_file=data_root + 'annotations/instances_val2017.json',
 86 |         img_prefix=data_root + 'val2017/',
 87 |         img_scale=(1333, 800),
 88 |         img_norm_cfg=img_norm_cfg,
 89 |         size_divisor=32,
 90 |         flip_ratio=0,
 91 |         with_mask=False,
 92 |         with_crowd=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 98 | # learning policy
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | device_ids = range(8)
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/retinanet_x101_64x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RetinaNet',
  4 |     pretrained='open-mmlab://resnext101_64x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=64,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         start_level=1,
 19 |         add_extra_convs=True,
 20 |         num_outs=5),
 21 |     bbox_head=dict(
 22 |         type='RetinaHead',
 23 |         num_classes=2,
 24 |         in_channels=256,
 25 |         stacked_convs=4,
 26 |         feat_channels=256,
 27 |         octave_base_scale=4,
 28 |         scales_per_octave=3,
 29 |         anchor_ratios=[0.5, 1.0, 2.0],
 30 |         anchor_strides=[8, 16, 32, 64, 128],
 31 |         target_means=[.0, .0, .0, .0],
 32 |         target_stds=[1.0, 1.0, 1.0, 1.0]))
 33 | # training and testing settings
 34 | train_cfg = dict(
 35 |     assigner=dict(
 36 |         type='MaxIoUAssigner',
 37 |         pos_iou_thr=0.5,
 38 |         neg_iou_thr=0.4,
 39 |         min_pos_iou=0,
 40 |         ignore_iof_thr=-1),
 41 |     smoothl1_beta=0.11,
 42 |     gamma=2.0,
 43 |     alpha=0.25,
 44 |     allowed_border=-1,
 45 |     pos_weight=-1,
 46 |     debug=False)
 47 | test_cfg = dict(
 48 |     nms_pre=1000,
 49 |     min_bbox_size=0,
 50 |     score_thr=0.05,
 51 |     nms=dict(type='nms', iou_thr=0.5),
 52 |     max_per_img=100)
 53 | # dataset settings
 54 | dataset_type = 'CocoDataset'
 55 | data_root = 'data/coco/'
 56 | img_norm_cfg = dict(
 57 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 58 | data = dict(
 59 |     imgs_per_gpu=2,
 60 |     workers_per_gpu=2,
 61 |     train=dict(
 62 |         type=dataset_type,
 63 |         ann_file=data_root + 'annotations/instances_train2017.json',
 64 |         img_prefix=data_root + 'train2017/',
 65 |         img_scale=(1333, 800),
 66 |         img_norm_cfg=img_norm_cfg,
 67 |         size_divisor=32,
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=False,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         size_divisor=32,
 79 |         flip_ratio=0,
 80 |         with_mask=False,
 81 |         with_crowd=False,
 82 |         with_label=True),
 83 |     test=dict(
 84 |         type=dataset_type,
 85 |         ann_file=data_root + 'annotations/instances_val2017.json',
 86 |         img_prefix=data_root + 'val2017/',
 87 |         img_scale=(1333, 800),
 88 |         img_norm_cfg=img_norm_cfg,
 89 |         size_divisor=32,
 90 |         flip_ratio=0,
 91 |         with_mask=False,
 92 |         with_crowd=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 98 | # learning policy
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | device_ids = range(8)
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/retinanet_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/rpn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         use_sigmoid_cls=True))
 27 | # model training and testing settings
 28 | train_cfg = dict(
 29 |     rpn=dict(
 30 |         assigner=dict(
 31 |             type='MaxIoUAssigner',
 32 |             pos_iou_thr=0.7,
 33 |             neg_iou_thr=0.3,
 34 |             min_pos_iou=0.3,
 35 |             ignore_iof_thr=-1),
 36 |         sampler=dict(
 37 |             type='RandomSampler',
 38 |             num=256,
 39 |             pos_fraction=0.5,
 40 |             neg_pos_ub=-1,
 41 |             add_gt_as_proposals=False),
 42 |         allowed_border=0,
 43 |         pos_weight=-1,
 44 |         smoothl1_beta=1 / 9.0,
 45 |         debug=False))
 46 | test_cfg = dict(
 47 |     rpn=dict(
 48 |         nms_across_levels=False,
 49 |         nms_pre=2000,
 50 |         nms_post=2000,
 51 |         max_num=2000,
 52 |         nms_thr=0.7,
 53 |         min_bbox_size=0))
 54 | # dataset settings
 55 | dataset_type = 'CocoDataset'
 56 | data_root = 'data/coco/'
 57 | img_norm_cfg = dict(
 58 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 59 | data = dict(
 60 |     imgs_per_gpu=2,
 61 |     workers_per_gpu=2,
 62 |     train=dict(
 63 |         type=dataset_type,
 64 |         ann_file=data_root + 'annotations/instances_train2017.json',
 65 |         img_prefix=data_root + 'train2017/',
 66 |         img_scale=(1333, 800),
 67 |         img_norm_cfg=img_norm_cfg,
 68 |         size_divisor=32,
 69 |         flip_ratio=0.5,
 70 |         with_mask=False,
 71 |         with_crowd=False,
 72 |         with_label=False),
 73 |     val=dict(
 74 |         type=dataset_type,
 75 |         ann_file=data_root + 'annotations/instances_val2017.json',
 76 |         img_prefix=data_root + 'val2017/',
 77 |         img_scale=(1333, 800),
 78 |         img_norm_cfg=img_norm_cfg,
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=False,
 83 |         with_label=False),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         size_divisor=32,
 91 |         flip_ratio=0,
 92 |         with_mask=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 97 | # runner configs
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | dist_params = dict(backend='nccl')
117 | log_level = 'INFO'
118 | work_dir = './work_dirs/rpn_r50_fpn_1x'
119 | load_from = None
120 | resume_from = None
121 | workflow = [('train', 1)]
122 | 


--------------------------------------------------------------------------------
/configs/rpn_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         use_sigmoid_cls=True))
 27 | # model training and testing settings
 28 | train_cfg = dict(
 29 |     rpn=dict(
 30 |         assigner=dict(
 31 |             type='MaxIoUAssigner',
 32 |             pos_iou_thr=0.7,
 33 |             neg_iou_thr=0.3,
 34 |             min_pos_iou=0.3,
 35 |             ignore_iof_thr=-1),
 36 |         sampler=dict(
 37 |             type='RandomSampler',
 38 |             num=256,
 39 |             pos_fraction=0.5,
 40 |             neg_pos_ub=-1,
 41 |             add_gt_as_proposals=False),
 42 |         allowed_border=0,
 43 |         pos_weight=-1,
 44 |         smoothl1_beta=1 / 9.0,
 45 |         debug=False))
 46 | test_cfg = dict(
 47 |     rpn=dict(
 48 |         nms_across_levels=False,
 49 |         nms_pre=2000,
 50 |         nms_post=2000,
 51 |         max_num=2000,
 52 |         nms_thr=0.7,
 53 |         min_bbox_size=0))
 54 | # dataset settings
 55 | dataset_type = 'CocoDataset'
 56 | data_root = 'data/coco/'
 57 | img_norm_cfg = dict(
 58 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 59 | data = dict(
 60 |     imgs_per_gpu=2,
 61 |     workers_per_gpu=2,
 62 |     train=dict(
 63 |         type=dataset_type,
 64 |         ann_file=data_root + 'annotations/instances_train2017.json',
 65 |         img_prefix=data_root + 'train2017/',
 66 |         img_scale=(1333, 800),
 67 |         img_norm_cfg=img_norm_cfg,
 68 |         size_divisor=32,
 69 |         flip_ratio=0.5,
 70 |         with_mask=False,
 71 |         with_crowd=False,
 72 |         with_label=False),
 73 |     val=dict(
 74 |         type=dataset_type,
 75 |         ann_file=data_root + 'annotations/instances_val2017.json',
 76 |         img_prefix=data_root + 'val2017/',
 77 |         img_scale=(1333, 800),
 78 |         img_norm_cfg=img_norm_cfg,
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=False,
 83 |         with_label=False),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         size_divisor=32,
 91 |         flip_ratio=0,
 92 |         with_mask=False,
 93 |         with_label=False,
 94 |         test_mode=True))
 95 | # optimizer
 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 97 | # runner configs
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | lr_config = dict(
100 |     policy='step',
101 |     warmup='linear',
102 |     warmup_iters=500,
103 |     warmup_ratio=1.0 / 3,
104 |     step=[8, 11])
105 | checkpoint_config = dict(interval=1)
106 | # yapf:disable
107 | log_config = dict(
108 |     interval=50,
109 |     hooks=[
110 |         dict(type='TextLoggerHook'),
111 |         # dict(type='TensorboardLoggerHook')
112 |     ])
113 | # yapf:enable
114 | # runtime settings
115 | total_epochs = 12
116 | dist_params = dict(backend='nccl')
117 | log_level = 'INFO'
118 | work_dir = './work_dirs/rpn_r101_fpn_1x'
119 | load_from = None
120 | resume_from = None
121 | workflow = [('train', 1)]
122 | 


--------------------------------------------------------------------------------
/configs/rpn_x101_32x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='open-mmlab://resnext101_32x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=32,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_scales=[8],
 24 |         anchor_ratios=[0.5, 1.0, 2.0],
 25 |         anchor_strides=[4, 8, 16, 32, 64],
 26 |         target_means=[.0, .0, .0, .0],
 27 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 28 |         use_sigmoid_cls=True))
 29 | # model training and testing settings
 30 | train_cfg = dict(
 31 |     rpn=dict(
 32 |         assigner=dict(
 33 |             type='MaxIoUAssigner',
 34 |             pos_iou_thr=0.7,
 35 |             neg_iou_thr=0.3,
 36 |             min_pos_iou=0.3,
 37 |             ignore_iof_thr=-1),
 38 |         sampler=dict(
 39 |             type='RandomSampler',
 40 |             num=256,
 41 |             pos_fraction=0.5,
 42 |             neg_pos_ub=-1,
 43 |             add_gt_as_proposals=False),
 44 |         allowed_border=0,
 45 |         pos_weight=-1,
 46 |         smoothl1_beta=1 / 9.0,
 47 |         debug=False))
 48 | test_cfg = dict(
 49 |     rpn=dict(
 50 |         nms_across_levels=False,
 51 |         nms_pre=2000,
 52 |         nms_post=2000,
 53 |         max_num=2000,
 54 |         nms_thr=0.7,
 55 |         min_bbox_size=0))
 56 | # dataset settings
 57 | dataset_type = 'CocoDataset'
 58 | data_root = 'data/coco/'
 59 | img_norm_cfg = dict(
 60 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 61 | data = dict(
 62 |     imgs_per_gpu=2,
 63 |     workers_per_gpu=2,
 64 |     train=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_train2017.json',
 67 |         img_prefix=data_root + 'train2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0.5,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=False),
 75 |     val=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False),
 86 |     test=dict(
 87 |         type=dataset_type,
 88 |         ann_file=data_root + 'annotations/instances_val2017.json',
 89 |         img_prefix=data_root + 'val2017/',
 90 |         img_scale=(1333, 800),
 91 |         img_norm_cfg=img_norm_cfg,
 92 |         size_divisor=32,
 93 |         flip_ratio=0,
 94 |         with_mask=False,
 95 |         with_label=False,
 96 |         test_mode=True))
 97 | # optimizer
 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 99 | # runner configs
100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
101 | lr_config = dict(
102 |     policy='step',
103 |     warmup='linear',
104 |     warmup_iters=500,
105 |     warmup_ratio=1.0 / 3,
106 |     step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 |     interval=50,
111 |     hooks=[
112 |         dict(type='TextLoggerHook'),
113 |         # dict(type='TensorboardLoggerHook')
114 |     ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | dist_params = dict(backend='nccl')
119 | log_level = 'INFO'
120 | work_dir = './work_dirs/rpn_r101_fpn_1x'
121 | load_from = None
122 | resume_from = None
123 | workflow = [('train', 1)]
124 | 


--------------------------------------------------------------------------------
/configs/rpn_x101_64x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='RPN',
  4 |     pretrained='open-mmlab://resnext101_64x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=64,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_scales=[8],
 24 |         anchor_ratios=[0.5, 1.0, 2.0],
 25 |         anchor_strides=[4, 8, 16, 32, 64],
 26 |         target_means=[.0, .0, .0, .0],
 27 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 28 |         use_sigmoid_cls=True))
 29 | # model training and testing settings
 30 | train_cfg = dict(
 31 |     rpn=dict(
 32 |         assigner=dict(
 33 |             type='MaxIoUAssigner',
 34 |             pos_iou_thr=0.7,
 35 |             neg_iou_thr=0.3,
 36 |             min_pos_iou=0.3,
 37 |             ignore_iof_thr=-1),
 38 |         sampler=dict(
 39 |             type='RandomSampler',
 40 |             num=256,
 41 |             pos_fraction=0.5,
 42 |             neg_pos_ub=-1,
 43 |             add_gt_as_proposals=False),
 44 |         allowed_border=0,
 45 |         pos_weight=-1,
 46 |         smoothl1_beta=1 / 9.0,
 47 |         debug=False))
 48 | test_cfg = dict(
 49 |     rpn=dict(
 50 |         nms_across_levels=False,
 51 |         nms_pre=2000,
 52 |         nms_post=2000,
 53 |         max_num=2000,
 54 |         nms_thr=0.7,
 55 |         min_bbox_size=0))
 56 | # dataset settings
 57 | dataset_type = 'CocoDataset'
 58 | data_root = 'data/coco/'
 59 | img_norm_cfg = dict(
 60 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 61 | data = dict(
 62 |     imgs_per_gpu=2,
 63 |     workers_per_gpu=2,
 64 |     train=dict(
 65 |         type=dataset_type,
 66 |         ann_file=data_root + 'annotations/instances_train2017.json',
 67 |         img_prefix=data_root + 'train2017/',
 68 |         img_scale=(1333, 800),
 69 |         img_norm_cfg=img_norm_cfg,
 70 |         size_divisor=32,
 71 |         flip_ratio=0.5,
 72 |         with_mask=False,
 73 |         with_crowd=False,
 74 |         with_label=False),
 75 |     val=dict(
 76 |         type=dataset_type,
 77 |         ann_file=data_root + 'annotations/instances_val2017.json',
 78 |         img_prefix=data_root + 'val2017/',
 79 |         img_scale=(1333, 800),
 80 |         img_norm_cfg=img_norm_cfg,
 81 |         size_divisor=32,
 82 |         flip_ratio=0,
 83 |         with_mask=False,
 84 |         with_crowd=False,
 85 |         with_label=False),
 86 |     test=dict(
 87 |         type=dataset_type,
 88 |         ann_file=data_root + 'annotations/instances_val2017.json',
 89 |         img_prefix=data_root + 'val2017/',
 90 |         img_scale=(1333, 800),
 91 |         img_norm_cfg=img_norm_cfg,
 92 |         size_divisor=32,
 93 |         flip_ratio=0,
 94 |         with_mask=False,
 95 |         with_label=False,
 96 |         test_mode=True))
 97 | # optimizer
 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 99 | # runner configs
100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
101 | lr_config = dict(
102 |     policy='step',
103 |     warmup='linear',
104 |     warmup_iters=500,
105 |     warmup_ratio=1.0 / 3,
106 |     step=[8, 11])
107 | checkpoint_config = dict(interval=1)
108 | # yapf:disable
109 | log_config = dict(
110 |     interval=50,
111 |     hooks=[
112 |         dict(type='TextLoggerHook'),
113 |         # dict(type='TensorboardLoggerHook')
114 |     ])
115 | # yapf:enable
116 | # runtime settings
117 | total_epochs = 12
118 | dist_params = dict(backend='nccl')
119 | log_level = 'INFO'
120 | work_dir = './work_dirs/rpn_r101_fpn_1x'
121 | load_from = None
122 | resume_from = None
123 | workflow = [('train', 1)]
124 | 


--------------------------------------------------------------------------------
/TECHNICAL_DETAILS.md:
--------------------------------------------------------------------------------
  1 | ## Overview
  2 | 
  3 | In this section, we will introduce the main units of training a detector:
  4 | data loading, model and iteration pipeline.
  5 | 
  6 | ## Data loading
  7 | 
  8 | Following typical conventions, we use `Dataset` and `DataLoader` for data loading
  9 | with multiple workers. `Dataset` returns a dict of data items corresponding
 10 | the arguments of models' forward method.
 11 | Since the data in object detection may not be the same size (image size, gt bbox size, etc.),
 12 | we introduce a new `DataContainer` type in `mmcv` to help collect and distribute
 13 | data of different size.
 14 | See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
 15 | 
 16 | ## Model
 17 | 
 18 | In mmdetection, model components are basically categorized as 4 types.
 19 | 
 20 | - backbone: usually a FCN network to extract feature maps, e.g., ResNet.
 21 | - neck: the part between backbones and heads, e.g., FPN, ASPP.
 22 | - head: the part for specific tasks, e.g., bbox prediction and mask prediction.
 23 | - roi extractor: the part for extracting features from feature maps, e.g., RoI Align.
 24 | 
 25 | We also write implement some general detection pipelines with the above components,
 26 | such as `SingleStageDetector` and `TwoStageDetector`.
 27 | 
 28 | ### Build a model with basic components
 29 | 
 30 | Following some basic pipelines (e.g., two-stage detectors), the model structure
 31 | can be customized through config files with no pains.
 32 | 
 33 | If we want to implement some new components, e.g, the path aggregation
 34 | FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do.
 35 | 
 36 | 1. create a new file in `mmdet/models/necks/pafpn.py`.
 37 | 
 38 |     ```python
 39 |     class PAFPN(nn.Module):
 40 | 
 41 |         def __init__(self,
 42 |                     in_channels,
 43 |                     out_channels,
 44 |                     num_outs,
 45 |                     start_level=0,
 46 |                     end_level=-1,
 47 |                     add_extra_convs=False):
 48 |             pass
 49 |         
 50 |         def forward(self, inputs):
 51 |             # implementation is ignored
 52 |             pass
 53 |     ```
 54 | 
 55 | 2. modify the config file from
 56 | 
 57 |     ```python
 58 |     neck=dict(
 59 |         type='FPN',
 60 |         in_channels=[256, 512, 1024, 2048],
 61 |         out_channels=256,
 62 |         num_outs=5)
 63 |     ```
 64 | 
 65 |     to
 66 | 
 67 |     ```python
 68 |     neck=dict(
 69 |         type='PAFPN',
 70 |         in_channels=[256, 512, 1024, 2048],
 71 |         out_channels=256,
 72 |         num_outs=5)
 73 |     ```
 74 | 
 75 | We will release more components (backbones, necks, heads) for research purpose.
 76 | 
 77 | ### Write a new model
 78 | 
 79 | To write a new detection pipeline, you need to inherit from `BaseDetector`,
 80 | which defines the following abstract methods.
 81 | 
 82 | - `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s).
 83 | - `forward_train()`: forward method of the training mode
 84 | - `simple_test()`: single scale testing without augmentation
 85 | - `aug_test()`: testing with augmentation (multi-scale, flip, etc.)
 86 | 
 87 | [TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py)
 88 | is a good example which shows how to do that.
 89 | 
 90 | ## Iteration pipeline
 91 | 
 92 | We adopt distributed training for both single machine and multiple machines.
 93 | Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU.
 94 | 
 95 | Each process keeps an isolated model, data loader, and optimizer.
 96 | Model parameters are only synchronized once at the begining.
 97 | After a forward and backward pass, gradients will be allreduced among all GPUs,
 98 | and the optimizer will update model parameters.
 99 | Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration.
100 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/src/deform_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <torch/extension.h>
 9 | 
10 | #include <cmath>
11 | #include <vector>
12 | 
13 | void DeformablePSROIPoolForward(
14 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
15 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
16 |     const int height, const int width, const int num_bbox,
17 |     const int channels_trans, const int no_trans, const float spatial_scale,
18 |     const int output_dim, const int group_size, const int pooled_size,
19 |     const int part_size, const int sample_per_part, const float trans_std);
20 | 
21 | void DeformablePSROIPoolBackwardAcc(
22 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
23 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
24 |     at::Tensor trans_grad, const int batch, const int channels,
25 |     const int height, const int width, const int num_bbox,
26 |     const int channels_trans, const int no_trans, const float spatial_scale,
27 |     const int output_dim, const int group_size, const int pooled_size,
28 |     const int part_size, const int sample_per_part, const float trans_std);
29 | 
30 | void deform_psroi_pooling_cuda_forward(
31 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
32 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std) {
35 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
36 | 
37 |   const int batch = input.size(0);
38 |   const int channels = input.size(1);
39 |   const int height = input.size(2);
40 |   const int width = input.size(3);
41 |   const int channels_trans = no_trans ? 2 : trans.size(1);
42 | 
43 |   const int num_bbox = bbox.size(0);
44 |   if (num_bbox != out.size(0))
45 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
46 |              out.size(0), num_bbox);
47 | 
48 |   DeformablePSROIPoolForward(
49 |       input, bbox, trans, out, top_count, batch, channels, height, width,
50 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
51 |       pooled_size, part_size, sample_per_part, trans_std);
52 | }
53 | 
54 | void deform_psroi_pooling_cuda_backward(
55 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
56 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
57 |     const int no_trans, const float spatial_scale, const int output_dim,
58 |     const int group_size, const int pooled_size, const int part_size,
59 |     const int sample_per_part, const float trans_std) {
60 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
61 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
62 | 
63 |   const int batch = input.size(0);
64 |   const int channels = input.size(1);
65 |   const int height = input.size(2);
66 |   const int width = input.size(3);
67 |   const int channels_trans = no_trans ? 2 : trans.size(1);
68 | 
69 |   const int num_bbox = bbox.size(0);
70 |   if (num_bbox != out_grad.size(0))
71 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
72 |              out_grad.size(0), num_bbox);
73 | 
74 |   DeformablePSROIPoolBackwardAcc(
75 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
76 |       channels, height, width, num_bbox, channels_trans, no_trans,
77 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
78 |       sample_per_part, trans_std);
79 | }
80 | 
81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
82 |   m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
83 |         "deform psroi pooling forward(CUDA)");
84 |   m.def("deform_psroi_pooling_cuda_backward",
85 |         &deform_psroi_pooling_cuda_backward,
86 |         "deform psroi pooling backward(CUDA)");
87 | }


--------------------------------------------------------------------------------
/ssd_debug/test_ssd流程.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import mmcv
  4 | 
  5 | class Registry(object):
  6 | 
  7 |     def __init__(self, name):
  8 |         self._name = name
  9 |         self._module_dict = dict()
 10 | 
 11 |     @property
 12 |     def name(self):
 13 |         return self._name
 14 | 
 15 |     @property
 16 |     def module_dict(self):
 17 |         return self._module_dict
 18 | 
 19 |     def _register_module(self, module_class):
 20 |         """Register a module.
 21 | 
 22 |         Args:
 23 |             module (:obj:`nn.Module`): Module to be registered.
 24 |         """
 25 |         if not issubclass(module_class, nn.Module):
 26 |             raise TypeError(
 27 |                 'module must be a child of nn.Module, but got {}'.format(
 28 |                     module_class))
 29 |         module_name = module_class.__name__
 30 |         if module_name in self._module_dict:
 31 |             raise KeyError('{} is already registered in {}'.format(
 32 |                 module_name, self.name))
 33 |         self._module_dict[module_name] = module_class
 34 | 
 35 |     def register_module(self, cls):
 36 |         self._register_module(cls)
 37 |         return cls
 38 | 
 39 | 
 40 | BACKBONES = Registry('backbone')
 41 | NECKS = Registry('neck')
 42 | ROI_EXTRACTORS = Registry('roi_extractor')
 43 | HEADS = Registry('head')
 44 | DETECTORS = Registry('detector')
 45 | 
 46 | 
 47 | # 和 obj_from_dict 一样
 48 | def build_module(cfg, registry, default_args):
 49 |     assert isinstance(cfg, dict) and 'type' in cfg
 50 |     assert isinstance(default_args, dict) or default_args is None
 51 |     args = cfg.copy()
 52 |     obj_type = args.pop('type')
 53 |     if mmcv.is_str(obj_type):
 54 |         if obj_type not in registry.module_dict:
 55 |             raise KeyError('{} is not in the {} registry'.format(
 56 |                 obj_type, registry.name))
 57 |         obj_type = registry.module_dict[obj_type]
 58 |     elif not isinstance(obj_type, type):
 59 |         raise TypeError('type must be a str or valid type, but got {}'.format(
 60 |             type(obj_type)))
 61 |     if default_args is not None:
 62 |         for name, value in default_args.items():
 63 |             args.setdefault(name, value)
 64 |     return obj_type(**args)
 65 | 
 66 | 
 67 | def build(cfg, registry, default_args=None):
 68 |     if isinstance(cfg, list):
 69 |         modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
 70 |         return nn.Sequential(*modules)
 71 |     else:
 72 |         return build_module(cfg, registry, default_args)
 73 | 
 74 | cfg = dict(
 75 |     type='SingleStageDetector',
 76 |     pretrained='open-mmlab://vgg16_caffe',
 77 |     backbone=dict(
 78 |         type='SSDVGG',
 79 |         input_size=300,
 80 |         depth=16,
 81 |         with_last_pool=False,
 82 |         ceil_mode=True,
 83 |         out_indices=(3, 4),
 84 |         out_feature_indices=(22, 34),
 85 |         l2_norm_scale=20),
 86 |     neck=None,
 87 |     bbox_head=dict(
 88 |         type='SSDHead',
 89 |         input_size=300,
 90 |         in_channels=(512, 1024, 512, 256, 256, 256),
 91 |         num_classes=2,
 92 |         anchor_strides=(8, 16, 32, 64, 100, 300),
 93 |         basesize_ratio_range=(0.2, 0.9),
 94 |         anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
 95 |         target_means=(.0, .0, .0, .0),
 96 |         target_stds=(0.1, 0.1, 0.2, 0.2)))
 97 | 
 98 | default_args = dict(
 99 |     train_cfg=dict(
100 |         assigner=dict(
101 |             type='MaxIoUAssigner',
102 |             pos_iou_thr=0.5,
103 |             neg_iou_thr=0.5,
104 |             min_pos_iou=0.,
105 |             ignore_iof_thr=-1,
106 |             gt_max_assign_all=False),
107 |         smoothl1_beta=1.,
108 |         allowed_border=-1,
109 |         pos_weight=-1,
110 |         neg_pos_ratio=3,
111 |         debug=False), 
112 |     test_cfg=dict(
113 |         nms=dict(type='nms', iou_thr=0.45),
114 |     min_bbox_size=0,
115 |     score_thr=0.02,
116 |     max_per_img=200))
117 | 
118 | args = cfg.copy()
119 | obj_type = args.pop('type')
120 | print(obj_type)
121 | 
122 | if default_args is not None:
123 |     for name, value in default_args.items():
124 |         args.setdefault(name, value)
125 | print(args)


--------------------------------------------------------------------------------
/configs/fast_rcnn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FastRCNN',
  4 |     pretrained='modelzoo://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     bbox_roi_extractor=dict(
 18 |         type='SingleRoIExtractor',
 19 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 20 |         out_channels=256,
 21 |         featmap_strides=[4, 8, 16, 32]),
 22 |     bbox_head=dict(
 23 |         type='SharedFCBBoxHead',
 24 |         num_fcs=2,
 25 |         in_channels=256,
 26 |         fc_out_channels=1024,
 27 |         roi_feat_size=7,
 28 |         num_classes=81,
 29 |         target_means=[0., 0., 0., 0.],
 30 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 31 |         reg_class_agnostic=False))
 32 | # model training and testing settings
 33 | train_cfg = dict(
 34 |     rcnn=dict(
 35 |         assigner=dict(
 36 |             type='MaxIoUAssigner',
 37 |             pos_iou_thr=0.5,
 38 |             neg_iou_thr=0.5,
 39 |             min_pos_iou=0.5,
 40 |             ignore_iof_thr=-1),
 41 |         sampler=dict(
 42 |             type='RandomSampler',
 43 |             num=512,
 44 |             pos_fraction=0.25,
 45 |             neg_pos_ub=-1,
 46 |             add_gt_as_proposals=True),
 47 |         pos_weight=-1,
 48 |         debug=False))
 49 | test_cfg = dict(
 50 |     rcnn=dict(
 51 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
 52 | # dataset settings
 53 | dataset_type = 'CocoDataset'
 54 | data_root = 'data/coco/'
 55 | img_norm_cfg = dict(
 56 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 57 | data = dict(
 58 |     imgs_per_gpu=2,
 59 |     workers_per_gpu=2,
 60 |     train=dict(
 61 |         type=dataset_type,
 62 |         ann_file=data_root + 'annotations/instances_train2017.json',
 63 |         img_prefix=data_root + 'train2017/',
 64 |         img_scale=(1333, 800),
 65 |         img_norm_cfg=img_norm_cfg,
 66 |         size_divisor=32,
 67 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=True,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=True,
 83 |         with_label=True),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 91 |         size_divisor=32,
 92 |         flip_ratio=0,
 93 |         with_mask=False,
 94 |         with_label=False,
 95 |         test_mode=True))
 96 | # optimizer
 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | # learning policy
100 | lr_config = dict(
101 |     policy='step',
102 |     warmup='linear',
103 |     warmup_iters=500,
104 |     warmup_ratio=1.0 / 3,
105 |     step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 |     interval=50,
110 |     hooks=[
111 |         dict(type='TextLoggerHook'),
112 |         # dict(type='TensorboardLoggerHook')
113 |     ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/fast_rcnn_r50_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/configs/fast_rcnn_r101_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FastRCNN',
  4 |     pretrained='modelzoo://resnet101',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=101,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     bbox_roi_extractor=dict(
 18 |         type='SingleRoIExtractor',
 19 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 20 |         out_channels=256,
 21 |         featmap_strides=[4, 8, 16, 32]),
 22 |     bbox_head=dict(
 23 |         type='SharedFCBBoxHead',
 24 |         num_fcs=2,
 25 |         in_channels=256,
 26 |         fc_out_channels=1024,
 27 |         roi_feat_size=7,
 28 |         num_classes=81,
 29 |         target_means=[0., 0., 0., 0.],
 30 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 31 |         reg_class_agnostic=False))
 32 | # model training and testing settings
 33 | train_cfg = dict(
 34 |     rcnn=dict(
 35 |         assigner=dict(
 36 |             type='MaxIoUAssigner',
 37 |             pos_iou_thr=0.5,
 38 |             neg_iou_thr=0.5,
 39 |             min_pos_iou=0.5,
 40 |             ignore_iof_thr=-1),
 41 |         sampler=dict(
 42 |             type='RandomSampler',
 43 |             num=512,
 44 |             pos_fraction=0.25,
 45 |             neg_pos_ub=-1,
 46 |             add_gt_as_proposals=True),
 47 |         pos_weight=-1,
 48 |         debug=False))
 49 | test_cfg = dict(
 50 |     rcnn=dict(
 51 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
 52 | # dataset settings
 53 | dataset_type = 'CocoDataset'
 54 | data_root = 'data/coco/'
 55 | img_norm_cfg = dict(
 56 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 57 | data = dict(
 58 |     imgs_per_gpu=2,
 59 |     workers_per_gpu=2,
 60 |     train=dict(
 61 |         type=dataset_type,
 62 |         ann_file=data_root + 'annotations/instances_train2017.json',
 63 |         img_prefix=data_root + 'train2017/',
 64 |         img_scale=(1333, 800),
 65 |         img_norm_cfg=img_norm_cfg,
 66 |         size_divisor=32,
 67 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
 68 |         flip_ratio=0.5,
 69 |         with_mask=False,
 70 |         with_crowd=True,
 71 |         with_label=True),
 72 |     val=dict(
 73 |         type=dataset_type,
 74 |         ann_file=data_root + 'annotations/instances_val2017.json',
 75 |         img_prefix=data_root + 'val2017/',
 76 |         img_scale=(1333, 800),
 77 |         img_norm_cfg=img_norm_cfg,
 78 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 79 |         size_divisor=32,
 80 |         flip_ratio=0,
 81 |         with_mask=False,
 82 |         with_crowd=True,
 83 |         with_label=True),
 84 |     test=dict(
 85 |         type=dataset_type,
 86 |         ann_file=data_root + 'annotations/instances_val2017.json',
 87 |         img_prefix=data_root + 'val2017/',
 88 |         img_scale=(1333, 800),
 89 |         img_norm_cfg=img_norm_cfg,
 90 |         proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
 91 |         size_divisor=32,
 92 |         flip_ratio=0,
 93 |         with_mask=False,
 94 |         with_label=False,
 95 |         test_mode=True))
 96 | # optimizer
 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 99 | # learning policy
100 | lr_config = dict(
101 |     policy='step',
102 |     warmup='linear',
103 |     warmup_iters=500,
104 |     warmup_ratio=1.0 / 3,
105 |     step=[8, 11])
106 | checkpoint_config = dict(interval=1)
107 | # yapf:disable
108 | log_config = dict(
109 |     interval=50,
110 |     hooks=[
111 |         dict(type='TextLoggerHook'),
112 |         # dict(type='TensorboardLoggerHook')
113 |     ])
114 | # yapf:enable
115 | # runtime settings
116 | total_epochs = 12
117 | dist_params = dict(backend='nccl')
118 | log_level = 'INFO'
119 | work_dir = './work_dirs/fast_rcnn_r101_fpn_1x'
120 | load_from = None
121 | resume_from = None
122 | workflow = [('train', 1)]
123 | 


--------------------------------------------------------------------------------
/mmdet/datasets/transforms.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor']
  6 | 
  7 | 
  8 | class ImageTransform(object):
  9 |     """Preprocess an image.
 10 | 
 11 |     1. rescale the image to expected size
 12 |     2. normalize the image
 13 |     3. flip the image (if needed)
 14 |     4. pad the image (if needed)
 15 |     5. transpose to (c, h, w)
 16 |     """
 17 | 
 18 |     def __init__(self,
 19 |                  mean=(0, 0, 0),
 20 |                  std=(1, 1, 1),
 21 |                  to_rgb=True,
 22 |                  size_divisor=None):
 23 |         self.mean = np.array(mean, dtype=np.float32)
 24 |         self.std = np.array(std, dtype=np.float32)
 25 |         self.to_rgb = to_rgb
 26 |         self.size_divisor = size_divisor
 27 | 
 28 |     def __call__(self, img, scale, flip=False, keep_ratio=True):
 29 |         if keep_ratio:
 30 |             img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
 31 |         else:
 32 |             img, w_scale, h_scale = mmcv.imresize(
 33 |                 img, scale, return_scale=True)
 34 |             scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
 35 |                                     dtype=np.float32)
 36 |         img_shape = img.shape
 37 |         img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
 38 |         if flip:
 39 |             img = mmcv.imflip(img)
 40 |         if self.size_divisor is not None:
 41 |             img = mmcv.impad_to_multiple(img, self.size_divisor)
 42 |             pad_shape = img.shape
 43 |         else:
 44 |             pad_shape = img_shape
 45 |         img = img.transpose(2, 0, 1)
 46 |         return img, img_shape, pad_shape, scale_factor
 47 | 
 48 | 
 49 | def bbox_flip(bboxes, img_shape):
 50 |     """Flip bboxes horizontally.
 51 | 
 52 |     Args:
 53 |         bboxes(ndarray): shape (..., 4*k)
 54 |         img_shape(tuple): (height, width)
 55 |     """
 56 |     assert bboxes.shape[-1] % 4 == 0
 57 |     w = img_shape[1]
 58 |     flipped = bboxes.copy()
 59 |     flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
 60 |     flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
 61 |     return flipped
 62 | 
 63 | 
 64 | class BboxTransform(object):
 65 |     """Preprocess gt bboxes.
 66 | 
 67 |     1. rescale bboxes according to image size
 68 |     2. flip bboxes (if needed)
 69 |     3. pad the first dimension to `max_num_gts`
 70 |     """
 71 | 
 72 |     def __init__(self, max_num_gts=None):
 73 |         self.max_num_gts = max_num_gts
 74 | 
 75 |     def __call__(self, bboxes, img_shape, scale_factor, flip=False):
 76 |         gt_bboxes = bboxes * scale_factor
 77 |         if flip:
 78 |             gt_bboxes = bbox_flip(gt_bboxes, img_shape)
 79 |         gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
 80 |         gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
 81 |         if self.max_num_gts is None:
 82 |             return gt_bboxes
 83 |         else:
 84 |             num_gts = gt_bboxes.shape[0]
 85 |             padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
 86 |             padded_bboxes[:num_gts, :] = gt_bboxes
 87 |             return padded_bboxes
 88 | 
 89 | 
 90 | class MaskTransform(object):
 91 |     """Preprocess masks.
 92 | 
 93 |     1. resize masks to expected size and stack to a single array
 94 |     2. flip the masks (if needed)
 95 |     3. pad the masks (if needed)
 96 |     """
 97 | 
 98 |     def __call__(self, masks, pad_shape, scale_factor, flip=False):
 99 |         masks = [
100 |             mmcv.imrescale(mask, scale_factor, interpolation='nearest')
101 |             for mask in masks
102 |         ]
103 |         if flip:
104 |             masks = [mask[:, ::-1] for mask in masks]
105 |         padded_masks = [
106 |             mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
107 |         ]
108 |         padded_masks = np.stack(padded_masks, axis=0)
109 |         return padded_masks
110 | 
111 | 
112 | class Numpy2Tensor(object):
113 | 
114 |     def __init__(self):
115 |         pass
116 | 
117 |     def __call__(self, *args):
118 |         if len(args) == 1:
119 |             return torch.from_numpy(args[0])
120 |         else:
121 |             return tuple([torch.from_numpy(np.array(array)) for array in args])
122 | 


--------------------------------------------------------------------------------
/mmdet/apis/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | from collections import OrderedDict
  4 | 
  5 | import torch
  6 | from mmcv.runner import Runner, DistSamplerSeedHook
  7 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  8 | 
  9 | from mmdet.core import (DistOptimizerHook, DistEvalmAPHook,
 10 |                         CocoDistEvalRecallHook, CocoDistEvalmAPHook)
 11 | from mmdet.datasets import build_dataloader
 12 | from mmdet.models import RPN
 13 | from .env import get_root_logger
 14 | 
 15 | 
 16 | def parse_losses(losses):
 17 |     log_vars = OrderedDict()
 18 |     for loss_name, loss_value in losses.items():
 19 |         if isinstance(loss_value, torch.Tensor):
 20 |             log_vars[loss_name] = loss_value.mean()
 21 |         elif isinstance(loss_value, list):
 22 |             log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
 23 |         else:
 24 |             raise TypeError(
 25 |                 '{} is not a tensor or list of tensors'.format(loss_name))
 26 | 
 27 |     loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key)
 28 | 
 29 |     log_vars['loss'] = loss
 30 |     for name in log_vars:
 31 |         log_vars[name] = log_vars[name].item()
 32 | 
 33 |     return loss, log_vars
 34 | 
 35 | 
 36 | def batch_processor(model, data, train_mode):
 37 |     losses = model(**data)
 38 |     loss, log_vars = parse_losses(losses)
 39 | 
 40 |     outputs = dict(
 41 |         loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
 42 | 
 43 |     return outputs
 44 | 
 45 | 
 46 | def train_detector(model,
 47 |                    dataset,
 48 |                    cfg,
 49 |                    distributed=False,
 50 |                    validate=False,
 51 |                    logger=None):
 52 |     if logger is None:
 53 |         logger = get_root_logger(cfg.log_level)
 54 | 
 55 |     # start training
 56 |     if distributed:
 57 |         _dist_train(model, dataset, cfg, validate=validate)
 58 |     else:
 59 |         _non_dist_train(model, dataset, cfg, validate=validate)
 60 | 
 61 | 
 62 | def _dist_train(model, dataset, cfg, validate=False):
 63 |     # prepare data loaders
 64 |     data_loaders = [
 65 |         build_dataloader(
 66 |             dataset,
 67 |             cfg.data.imgs_per_gpu,
 68 |             cfg.data.workers_per_gpu,
 69 |             dist=True)
 70 |     ]
 71 |     # put model on gpus
 72 |     model = MMDistributedDataParallel(model.cuda())
 73 |     # build runner
 74 |     runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
 75 |                     cfg.log_level)
 76 |     # register hooks
 77 |     optimizer_config = DistOptimizerHook(**cfg.optimizer_config)
 78 |     runner.register_training_hooks(cfg.lr_config, optimizer_config,
 79 |                                    cfg.checkpoint_config, cfg.log_config)
 80 |     runner.register_hook(DistSamplerSeedHook())
 81 |     # register eval hooks
 82 |     if validate:
 83 |         if isinstance(model.module, RPN):
 84 |             # TODO: implement recall hooks for other datasets
 85 |             runner.register_hook(CocoDistEvalRecallHook(cfg.data.val))
 86 |         else:
 87 |             if cfg.data.val.type == 'CocoDataset':
 88 |                 runner.register_hook(CocoDistEvalmAPHook(cfg.data.val))
 89 |             else:
 90 |                 runner.register_hook(DistEvalmAPHook(cfg.data.val))
 91 | 
 92 |     if cfg.resume_from:
 93 |         runner.resume(cfg.resume_from)
 94 |     elif cfg.load_from:
 95 |         runner.load_checkpoint(cfg.load_from)
 96 |     runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
 97 | 
 98 | 
 99 | def _non_dist_train(model, dataset, cfg, validate=False):
100 |     # prepare data loaders
101 |     data_loaders = [
102 |         build_dataloader(
103 |             dataset,
104 |             cfg.data.imgs_per_gpu,
105 |             cfg.data.workers_per_gpu,
106 |             cfg.gpus,
107 |             dist=False)
108 |     ]
109 |     # put model on gpus
110 |     model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
111 |     # build runner
112 |     runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
113 |                     cfg.log_level)
114 |     runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
115 |                                    cfg.checkpoint_config, cfg.log_config)
116 | 
117 |     if cfg.resume_from:
118 |         runner.resume(cfg.resume_from)
119 |     elif cfg.load_from:
120 |         runner.load_checkpoint(cfg.load_from)
121 |     runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
122 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/rpn_head.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mmcv.cnn import normal_init
  5 | 
  6 | from mmdet.core import delta2bbox
  7 | from mmdet.ops import nms
  8 | from .anchor_head import AnchorHead
  9 | from ..registry import HEADS
 10 | 
 11 | 
 12 | @HEADS.register_module
 13 | class RPNHead(AnchorHead):
 14 | 
 15 |     def __init__(self, in_channels, **kwargs):
 16 |         super(RPNHead, self).__init__(2, in_channels, **kwargs)
 17 | 
 18 |     def _init_layers(self):
 19 |         self.rpn_conv = nn.Conv2d(
 20 |             self.in_channels, self.feat_channels, 3, padding=1)
 21 |         self.rpn_cls = nn.Conv2d(self.feat_channels,
 22 |                                  self.num_anchors * self.cls_out_channels, 1)
 23 |         self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)
 24 | 
 25 |     def init_weights(self):
 26 |         normal_init(self.rpn_conv, std=0.01)
 27 |         normal_init(self.rpn_cls, std=0.01)
 28 |         normal_init(self.rpn_reg, std=0.01)
 29 | 
 30 |     def forward_single(self, x):
 31 |         x = self.rpn_conv(x)
 32 |         x = F.relu(x, inplace=True)
 33 |         rpn_cls_score = self.rpn_cls(x)
 34 |         rpn_bbox_pred = self.rpn_reg(x)
 35 |         return rpn_cls_score, rpn_bbox_pred
 36 | 
 37 |     def loss(self,
 38 |              cls_scores,
 39 |              bbox_preds,
 40 |              gt_bboxes,
 41 |              img_metas,
 42 |              cfg,
 43 |              gt_bboxes_ignore=None):
 44 |         losses = super(RPNHead, self).loss(
 45 |             cls_scores,
 46 |             bbox_preds,
 47 |             gt_bboxes,
 48 |             None,
 49 |             img_metas,
 50 |             cfg,
 51 |             gt_bboxes_ignore=gt_bboxes_ignore)
 52 |         return dict(
 53 |             loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg'])
 54 | 
 55 |     def get_bboxes_single(self,
 56 |                           cls_scores,
 57 |                           bbox_preds,
 58 |                           mlvl_anchors,
 59 |                           img_shape,
 60 |                           scale_factor,
 61 |                           cfg,
 62 |                           rescale=False):
 63 |         mlvl_proposals = []
 64 |         for idx in range(len(cls_scores)):
 65 |             rpn_cls_score = cls_scores[idx]
 66 |             rpn_bbox_pred = bbox_preds[idx]
 67 |             assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
 68 |             anchors = mlvl_anchors[idx]
 69 |             rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
 70 |             if self.use_sigmoid_cls:
 71 |                 rpn_cls_score = rpn_cls_score.reshape(-1)
 72 |                 scores = rpn_cls_score.sigmoid()
 73 |             else:
 74 |                 rpn_cls_score = rpn_cls_score.reshape(-1, 2)
 75 |                 scores = rpn_cls_score.softmax(dim=1)[:, 1]
 76 |             rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)
 77 |             if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
 78 |                 _, topk_inds = scores.topk(cfg.nms_pre)
 79 |                 rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
 80 |                 anchors = anchors[topk_inds, :]
 81 |                 scores = scores[topk_inds]
 82 |             proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means,
 83 |                                    self.target_stds, img_shape)
 84 |             if cfg.min_bbox_size > 0:
 85 |                 w = proposals[:, 2] - proposals[:, 0] + 1
 86 |                 h = proposals[:, 3] - proposals[:, 1] + 1
 87 |                 valid_inds = torch.nonzero((w >= cfg.min_bbox_size) &
 88 |                                            (h >= cfg.min_bbox_size)).squeeze()
 89 |                 proposals = proposals[valid_inds, :]
 90 |                 scores = scores[valid_inds]
 91 |             proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
 92 |             proposals, _ = nms(proposals, cfg.nms_thr)
 93 |             proposals = proposals[:cfg.nms_post, :]
 94 |             mlvl_proposals.append(proposals)
 95 |         proposals = torch.cat(mlvl_proposals, 0)
 96 |         if cfg.nms_across_levels:
 97 |             proposals, _ = nms(proposals, cfg.nms_thr)
 98 |             proposals = proposals[:cfg.max_num, :]
 99 |         else:
100 |             scores = proposals[:, 4]
101 |             num = min(cfg.max_num, proposals.shape[0])
102 |             _, topk_inds = scores.topk(num)
103 |             proposals = proposals[topk_inds, :]
104 |         return proposals
105 | 


--------------------------------------------------------------------------------
/configs/ssd300_coco.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | input_size = 300
  3 | model = dict(
  4 |     type='SingleStageDetector',
  5 |     pretrained='open-mmlab://vgg16_caffe',
  6 |     backbone=dict(
  7 |         type='SSDVGG',
  8 |         input_size=input_size,
  9 |         depth=16,
 10 |         with_last_pool=False,
 11 |         ceil_mode=True,
 12 |         out_indices=(3, 4),
 13 |         out_feature_indices=(22, 34),
 14 |         l2_norm_scale=20),
 15 |     neck=None,
 16 |     bbox_head=dict(
 17 |         type='SSDHead',
 18 |         input_size=input_size,
 19 |         in_channels=(512, 1024, 512, 256, 256, 256),
 20 |         num_classes=81,
 21 |         anchor_strides=(8, 16, 32, 64, 100, 300),
 22 |         basesize_ratio_range=(0.15, 0.9),
 23 |         anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
 24 |         target_means=(.0, .0, .0, .0),
 25 |         target_stds=(0.1, 0.1, 0.2, 0.2)))
 26 | cudnn_benchmark = True
 27 | train_cfg = dict(
 28 |     assigner=dict(
 29 |         type='MaxIoUAssigner',
 30 |         pos_iou_thr=0.5,
 31 |         neg_iou_thr=0.5,
 32 |         min_pos_iou=0.,
 33 |         ignore_iof_thr=-1,
 34 |         gt_max_assign_all=False),
 35 |     smoothl1_beta=1.,
 36 |     allowed_border=-1,
 37 |     pos_weight=-1,
 38 |     neg_pos_ratio=3,
 39 |     debug=False)
 40 | test_cfg = dict(
 41 |     nms=dict(type='nms', iou_thr=0.45),
 42 |     min_bbox_size=0,
 43 |     score_thr=0.02,
 44 |     max_per_img=200)
 45 | # model training and testing settings
 46 | # dataset settings
 47 | dataset_type = 'CocoDataset'
 48 | data_root = 'data/coco/'
 49 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 50 | data = dict(
 51 |     imgs_per_gpu=8,
 52 |     workers_per_gpu=3,
 53 |     train=dict(
 54 |         type='RepeatDataset',
 55 |         times=5,
 56 |         dataset=dict(
 57 |             type=dataset_type,
 58 |             ann_file=data_root + 'annotations/instances_train2017.json',
 59 |             img_prefix=data_root + 'train2017/',
 60 |             img_scale=(300, 300),
 61 |             img_norm_cfg=img_norm_cfg,
 62 |             size_divisor=None,
 63 |             flip_ratio=0.5,
 64 |             with_mask=False,
 65 |             with_crowd=False,
 66 |             with_label=True,
 67 |             test_mode=False,
 68 |             extra_aug=dict(
 69 |                 photo_metric_distortion=dict(
 70 |                     brightness_delta=32,
 71 |                     contrast_range=(0.5, 1.5),
 72 |                     saturation_range=(0.5, 1.5),
 73 |                     hue_delta=18),
 74 |                 expand=dict(
 75 |                     mean=img_norm_cfg['mean'],
 76 |                     to_rgb=img_norm_cfg['to_rgb'],
 77 |                     ratio_range=(1, 4)),
 78 |                 random_crop=dict(
 79 |                     min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)),
 80 |             resize_keep_ratio=False)),
 81 |     val=dict(
 82 |         type=dataset_type,
 83 |         ann_file=data_root + 'annotations/instances_val2017.json',
 84 |         img_prefix=data_root + 'val2017/',
 85 |         img_scale=(300, 300),
 86 |         img_norm_cfg=img_norm_cfg,
 87 |         size_divisor=None,
 88 |         flip_ratio=0,
 89 |         with_mask=False,
 90 |         with_label=False,
 91 |         test_mode=True,
 92 |         resize_keep_ratio=False),
 93 |     test=dict(
 94 |         type=dataset_type,
 95 |         ann_file=data_root + 'annotations/instances_val2017.json',
 96 |         img_prefix=data_root + 'val2017/',
 97 |         img_scale=(300, 300),
 98 |         img_norm_cfg=img_norm_cfg,
 99 |         size_divisor=None,
100 |         flip_ratio=0,
101 |         with_mask=False,
102 |         with_label=False,
103 |         test_mode=True,
104 |         resize_keep_ratio=False))
105 | # optimizer
106 | optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)
107 | optimizer_config = dict()
108 | # learning policy
109 | lr_config = dict(
110 |     policy='step',
111 |     warmup='linear',
112 |     warmup_iters=500,
113 |     warmup_ratio=1.0 / 3,
114 |     step=[16, 22])
115 | checkpoint_config = dict(interval=1)
116 | # yapf:disable
117 | log_config = dict(
118 |     interval=50,
119 |     hooks=[
120 |         dict(type='TextLoggerHook'),
121 |         # dict(type='TensorboardLoggerHook')
122 |     ])
123 | # yapf:enable
124 | # runtime settings
125 | total_epochs = 24
126 | dist_params = dict(backend='nccl')
127 | log_level = 'INFO'
128 | work_dir = './work_dirs/ssd300_coco'
129 | load_from = None
130 | resume_from = None
131 | workflow = [('train', 1)]
132 | 


--------------------------------------------------------------------------------