├── mmdet ├── ops │ ├── dcn │ │ ├── modules │ │ │ └── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── deform_pool.py │ │ ├── setup.py │ │ ├── __init__.py │ │ └── src │ │ │ └── deform_pool_cuda.cpp │ ├── roi_pool │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── gradcheck.py │ │ └── src │ │ │ └── roi_pool_cuda.cpp │ ├── roi_align │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── gradcheck.py │ │ └── src │ │ │ └── roi_align_cuda.cpp │ ├── nms │ │ ├── __init__.py │ │ ├── src │ │ │ ├── nms_cuda.cpp │ │ │ ├── nms_cpu.cpp │ │ │ └── soft_nms_cpu.pyx │ │ ├── nms_wrapper.py │ │ └── setup.py │ └── __init__.py ├── models │ ├── mask_heads │ │ └── __init__.py │ ├── necks │ │ ├── __init__.py │ │ └── hrfpn.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── bbox_heads │ │ └── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── retina_head.py │ │ └── rpn_head.py │ ├── backbones │ │ └── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── weight_init.py │ │ ├── norm.py │ │ └── conv_module.py │ ├── detectors │ │ ├── __init__.py │ │ ├── retinanet.py │ │ ├── faster_rcnn.py │ │ ├── mask_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── single_stage.py │ │ └── rpn.py │ ├── __init__.py │ ├── registry.py │ └── builder.py ├── __init__.py ├── core │ ├── mask │ │ ├── __init__.py │ │ ├── utils.py │ │ └── mask_target.py │ ├── anchor │ │ ├── __init__.py │ │ └── anchor_generator.py │ ├── bbox │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── base_assigner.py │ │ │ └── assign_result.py │ │ ├── samplers │ │ │ ├── combined_sampler.py │ │ │ ├── __init__.py │ │ │ ├── sampling_result.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ └── base_sampler.py │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── geometry.py │ │ └── bbox_target.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── dist_utils.py │ ├── __init__.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ ├── loss │ │ ├── __init__.py │ │ └── losses.py │ └── evaluation │ │ ├── __init__.py │ │ └── bbox_overlaps.py ├── datasets │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── collate.py │ ├── repeat_dataset.py │ ├── voc.py │ ├── concat_dataset.py │ ├── __init__.py │ ├── xml_style.py │ ├── utils.py │ └── transforms.py └── apis │ ├── __init__.py │ ├── env.py │ └── inference.py ├── images └── hrnetv2p.png ├── demo └── coco_test_12510.jpg ├── tools ├── dist_train.sh ├── coco_eval.py ├── voc_eval.py └── train.py ├── compile.sh ├── .gitignore ├── INSTALL.md ├── setup.py ├── configs ├── retinanet_r50_fpn_1x.py ├── retinanet_r101_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py ├── ssd300_coco.py └── ssd512_coco.py └── TECHNICAL_DETAILS.md /mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/hrnetv2p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Object-Detection/HEAD/images/hrnetv2p.png -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Object-Detection/HEAD/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | 3 | __all__ = ['FCNMaskHead'] 4 | -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .hrfpn import HRFPN 3 | 4 | __all__ = ['FPN', 'HRFPN'] 5 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] 5 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .assign_result import AssignResult 4 | 5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 6 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | from .retina_head import RetinaHead 4 | from .ssd_head import SSDHead 5 | 6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead'] 7 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .hrnet import HighResolutionNet 5 | from .synchrnet import SyncHighResolutionNet 6 | 7 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG', 'HighResolutionNet', 'SyncHighResolutionNet'] 8 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import inference_detector, show_result 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule 2 | from .norm import build_norm_layer 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 4 | bias_init_with_prob) 5 | 6 | __all__ = [ 7 | 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', 8 | 'uniform_init', 'kaiming_init', 'bias_init_with_prob' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .retinanet import RetinaNet 10 | 11 | __all__ = [ 12 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 13 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension('deform_pool_cuda', [ 12 | 'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu' 13 | ]), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy, 2 | weighted_binary_cross_entropy, sigmoid_focal_loss, 3 | weighted_sigmoid_focal_loss, mask_cross_entropy, 4 | smooth_l1_loss, weighted_smoothl1, accuracy) 5 | 6 | __all__ = [ 7 | 'weighted_nll_loss', 'weighted_cross_entropy', 8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 9 | 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', 10 | 'weighted_smoothl1', 'accuracy' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | self.flag = np.tile(self.dataset.flag, times) 12 | 13 | self._ori_len = len(self.dataset) 14 | 15 | def __getitem__(self, idx): 16 | return self.dataset[idx % self._ori_len] 17 | 18 | def __len__(self): 19 | return self.times * self._ori_len 20 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | if [ -d "build" ]; then 22 | rm -r build 23 | fi 24 | $PYTHON setup.py build_ext --inplace 25 | 26 | echo "Building dcn..." 27 | cd ../dcn 28 | if [ -d "build" ]; then 29 | rm -r build 30 | fi 31 | $PYTHON setup.py build_ext --inplace 32 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 11 | 'ModulatedDeformConvPack', 'deform_conv', 12 | 'modulated_deform_conv', 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') 19 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack, 2 | ModulatedDeformRoIPoolingPack, ModulatedDeformConv, 3 | ModulatedDeformConvPack, deform_conv, modulated_deform_conv, 4 | deform_roi_pooling) 5 | from .nms import nms, soft_nms 6 | from .roi_align import RoIAlign, roi_align 7 | from .roi_pool import RoIPool, roi_pool 8 | 9 | __all__ = [ 10 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 11 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 12 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 13 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 14 | 'deform_roi_pooling' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .bbox_heads import * # noqa: F401,F403 6 | from .mask_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 9 | from .builder import (build_backbone, build_neck, build_roi_extractor, 10 | build_head, build_detector) 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS', 14 | 'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head', 15 | 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | if hasattr(datasets[0], 'flag'): 19 | flags = [] 20 | for i in range(0, len(datasets)): 21 | flags.append(datasets[i].flag) 22 | self.flag = np.concatenate(flags) 23 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | train_cfg, 15 | test_cfg, 16 | pretrained=None): 17 | super(FasterRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained) 26 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset 7 | from .concat_dataset import ConcatDataset 8 | from .repeat_dataset import RepeatDataset 9 | from .extra_aug import ExtraAugmentation 10 | from .zip_dataset import ZipDataset 11 | from .coco_zip import CocoZipDataset 12 | 13 | __all__ = [ 14 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 15 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 16 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 'ZipDataset', 17 | 'ExtraAugmentation', 'CocoZipDataset' 18 | ] -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | mask_roi_extractor, 15 | mask_head, 16 | train_cfg, 17 | test_cfg, 18 | pretrained=None): 19 | super(MaskRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) 9 | from .bbox_target import bbox_target 10 | 11 | __all__ = [ 12 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 13 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 14 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 15 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 16 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 17 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | HEADS = Registry('head') 43 | DETECTORS = Registry('detector') 44 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict( 11 | cfg, assigners, default_args=kwargs) 12 | else: 13 | raise TypeError('Invalid type {} for building a sampler'.format( 14 | type(cfg))) 15 | 16 | 17 | def build_sampler(cfg, **kwargs): 18 | if isinstance(cfg, samplers.BaseSampler): 19 | return cfg 20 | elif isinstance(cfg, dict): 21 | return mmcv.runner.obj_from_dict( 22 | cfg, samplers, default_args=kwargs) 23 | else: 24 | raise TypeError('Invalid type {} for building a sampler'.format( 25 | type(cfg))) 26 | 27 | 28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 29 | bbox_assigner = build_assigner(cfg.assigner) 30 | bbox_sampler = build_sampler(cfg.sampler) 31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 32 | gt_labels) 33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 34 | gt_labels) 35 | return assign_result, sampling_result 36 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from .collate import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | pad_size=None, 21 | **kwargs): 22 | if dist: 23 | rank, world_size = get_dist_info() 24 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, 25 | rank) 26 | batch_size = imgs_per_gpu 27 | num_workers = workers_per_gpu 28 | else: 29 | if not kwargs.get('shuffle', True): 30 | sampler = None 31 | else: 32 | sampler = GroupSampler(dataset, imgs_per_gpu) 33 | batch_size = num_gpus * imgs_per_gpu 34 | num_workers = num_gpus * workers_per_gpu 35 | 36 | data_loader = DataLoader( 37 | dataset, 38 | batch_size=batch_size, 39 | sampler=sampler, 40 | num_workers=num_workers, 41 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu, pad_size=pad_size), 42 | pin_memory=False, 43 | **kwargs) 44 | 45 | return data_loader 46 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 5 | 6 | 7 | def _build_module(cfg, registry, default_args): 8 | assert isinstance(cfg, dict) and 'type' in cfg 9 | assert isinstance(default_args, dict) or default_args is None 10 | args = cfg.copy() 11 | obj_type = args.pop('type') 12 | if mmcv.is_str(obj_type): 13 | if obj_type not in registry.module_dict: 14 | raise KeyError('{} is not in the {} registry'.format( 15 | obj_type, registry.name)) 16 | obj_type = registry.module_dict[obj_type] 17 | elif not isinstance(obj_type, type): 18 | raise TypeError('type must be a str or valid type, but got {}'.format( 19 | type(obj_type))) 20 | if default_args is not None: 21 | for name, value in default_args.items(): 22 | args.setdefault(name, value) 23 | return obj_type(**args) 24 | 25 | 26 | def build(cfg, registry, default_args=None): 27 | if isinstance(cfg, list): 28 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 29 | return nn.Sequential(*modules) 30 | else: 31 | return _build_module(cfg, registry, default_args) 32 | 33 | 34 | def build_backbone(cfg): 35 | return build(cfg, BACKBONES) 36 | 37 | 38 | def build_neck(cfg): 39 | return build(cfg, NECKS) 40 | 41 | 42 | def build_roi_extractor(cfg): 43 | return build(cfg, ROI_EXTRACTORS) 44 | 45 | 46 | def build_head(cfg): 47 | return build(cfg, HEADS) 48 | 49 | 50 | def build_detector(cfg, train_cfg=None, test_cfg=None): 51 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 52 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | import torch.multiprocessing as mp 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def init_dist(launcher, backend='nccl', **kwargs): 13 | #if mp.get_start_method(allow_none=True) is None: 14 | # mp.set_start_method('spawn') 15 | if launcher == 'pytorch': 16 | _init_dist_pytorch(backend, **kwargs) 17 | elif launcher == 'mpi': 18 | _init_dist_mpi(backend, **kwargs) 19 | elif launcher == 'slurm': 20 | _init_dist_slurm(backend, **kwargs) 21 | else: 22 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 23 | 24 | 25 | def _init_dist_pytorch(backend, **kwargs): 26 | # TODO: use local_rank instead of rank % num_gpus 27 | rank = int(os.environ['RANK']) 28 | num_gpus = torch.cuda.device_count() 29 | torch.cuda.set_device(rank % num_gpus) 30 | dist.init_process_group(backend=backend, **kwargs) 31 | 32 | 33 | def _init_dist_mpi(backend, **kwargs): 34 | raise NotImplementedError 35 | 36 | 37 | def _init_dist_slurm(backend, **kwargs): 38 | raise NotImplementedError 39 | 40 | 41 | def set_random_seed(seed): 42 | random.seed(seed) 43 | np.random.seed(seed) 44 | torch.manual_seed(seed) 45 | torch.cuda.manual_seed_all(seed) 46 | 47 | 48 | def get_root_logger(log_level=logging.INFO): 49 | logger = logging.getLogger() 50 | if not logger.hasHandlers(): 51 | logging.basicConfig( 52 | format='%(asctime)s - %(levelname)s - %(message)s', 53 | level=log_level) 54 | rank, _ = get_dist_info() 55 | if rank != 0: 56 | logger.setLevel('ERROR') 57 | return logger 58 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | mask_roi_extractor=None, 16 | mask_head=None, 17 | pretrained=None): 18 | super(FastRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | pretrained=pretrained) 28 | 29 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 30 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 31 | if not isinstance(var, list): 32 | raise TypeError('{} must be a list, but got {}'.format( 33 | name, type(var))) 34 | 35 | num_augs = len(imgs) 36 | if num_augs != len(img_metas): 37 | raise ValueError( 38 | 'num of augmentations ({}) != num of image meta ({})'.format( 39 | len(imgs), len(img_metas))) 40 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 41 | imgs_per_gpu = imgs[0].size(0) 42 | assert imgs_per_gpu == 1 43 | 44 | if num_augs == 1: 45 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 46 | **kwargs) 47 | else: 48 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/src/soft_nms_cpu.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', None), 8 | 'GN': ('gn', nn.GroupNorm), 9 | # and potentially 'SN' 10 | } 11 | 12 | 13 | def build_norm_layer(cfg, num_features, postfix=''): 14 | """ Build normalization layer 15 | 16 | Args: 17 | cfg (dict): cfg should contain: 18 | type (str): identify norm layer type. 19 | layer args: args needed to instantiate a norm layer. 20 | frozen (bool): [optional] whether stop gradient updates 21 | of norm layer, it is helpful to set frozen mode 22 | in backbone's norms. 23 | num_features (int): number of channels from input 24 | postfix (int, str): appended into norm abbreation to 25 | create named layer. 26 | 27 | Returns: 28 | name (str): abbreation + postfix 29 | layer (nn.Module): created norm layer 30 | """ 31 | assert isinstance(cfg, dict) and 'type' in cfg 32 | cfg_ = cfg.copy() 33 | 34 | layer_type = cfg_.pop('type') 35 | if layer_type not in norm_cfg: 36 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 37 | else: 38 | abbr, norm_layer = norm_cfg[layer_type] 39 | if norm_layer is None: 40 | raise NotImplementedError 41 | 42 | assert isinstance(postfix, (int, str)) 43 | name = abbr + str(postfix) 44 | 45 | frozen = cfg_.pop('frozen', False) 46 | cfg_.setdefault('eps', 1e-5) 47 | if layer_type != 'GN': 48 | layer = norm_layer(num_features, **cfg_) 49 | else: 50 | assert 'num_groups' in cfg_ 51 | layer = norm_layer(num_channels=num_features, **cfg_) 52 | 53 | if frozen: 54 | for param in layer.parameters(): 55 | param.requires_grad = False 56 | 57 | return name, layer 58 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1): 7 | """NMS for multi-class bboxes. 8 | 9 | Args: 10 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 11 | multi_scores (Tensor): shape (n, #class) 12 | score_thr (float): bbox threshold, bboxes with scores lower than it 13 | will not be considered. 14 | nms_thr (float): NMS IoU threshold 15 | max_num (int): if there are more than max_num bboxes after NMS, 16 | only top max_num will be kept. 17 | 18 | Returns: 19 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 20 | are 0-based. 21 | """ 22 | num_classes = multi_scores.shape[1] 23 | bboxes, labels = [], [] 24 | nms_cfg_ = nms_cfg.copy() 25 | nms_type = nms_cfg_.pop('type', 'nms') 26 | nms_op = getattr(nms_wrapper, nms_type) 27 | for i in range(1, num_classes): 28 | cls_inds = multi_scores[:, i] > score_thr 29 | if not cls_inds.any(): 30 | continue 31 | # get bboxes and scores of this class 32 | if multi_bboxes.shape[1] == 4: 33 | _bboxes = multi_bboxes[cls_inds, :] 34 | else: 35 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 36 | _scores = multi_scores[cls_inds, i] 37 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 38 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 39 | cls_labels = multi_bboxes.new_full( 40 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 41 | bboxes.append(cls_dets) 42 | labels.append(cls_labels) 43 | if bboxes: 44 | bboxes = torch.cat(bboxes) 45 | labels = torch.cat(labels) 46 | if bboxes.shape[0] > max_num: 47 | _, inds = bboxes[:, -1].sort(descending=True) 48 | inds = inds[:max_num] 49 | bboxes = bboxes[inds] 50 | labels = labels[inds] 51 | else: 52 | bboxes = multi_bboxes.new_zeros((0, 5)) 53 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 54 | 55 | return bboxes, labels 56 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux (tested on Ubuntu 16.04 and CentOS 7.2) 6 | - Python 3.4+ 7 | - PyTorch 1.0 8 | - Cython 9 | - [mmcv](https://github.com/open-mmlab/mmcv) >= 0.2.2 10 | 11 | ### Install mmdetection 12 | 13 | a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/). 14 | 15 | b. Clone the mmdetection repository. 16 | 17 | ```shell 18 | git clone https://github.com/open-mmlab/mmdetection.git 19 | ``` 20 | 21 | c. Compile cuda extensions. 22 | 23 | ```shell 24 | cd mmdetection 25 | pip install cython # or "conda install cython" if you prefer conda 26 | ./compile.sh # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments 27 | ``` 28 | 29 | d. Install mmdetection (other dependencies will be installed automatically). 30 | 31 | ```shell 32 | python(3) setup.py install # add --user if you want to install it locally 33 | # or "pip install ." 34 | ``` 35 | 36 | Note: You need to run the last step each time you pull updates from github. 37 | The git commit id will be written to the version number and also saved in trained models. 38 | 39 | ### Prepare COCO dataset. 40 | 41 | It is recommended to symlink the dataset root to `$MMDETECTION/data`. 42 | 43 | ``` 44 | mmdetection 45 | ├── mmdet 46 | ├── tools 47 | ├── configs 48 | ├── data 49 | │ ├── coco 50 | │ │ ├── annotations 51 | │ │ ├── train2017 52 | │ │ ├── val2017 53 | │ │ ├── test2017 54 | │ ├── VOCdevkit 55 | │ │ ├── VOC2007 56 | │ │ ├── VOC2012 57 | 58 | ``` 59 | 60 | ### Scripts 61 | Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is 62 | a script for setting up mmdetection with conda. 63 | 64 | ### Notice 65 | You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently. 66 | 67 | If there are more than one mmdetection on your machine, and you want to use them alternatively. 68 | Please insert the following code to the main file 69 | ```python 70 | import os.path as osp 71 | import sys 72 | sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../')) 73 | ``` 74 | or run the following command in the terminal of corresponding folder. 75 | ```shell 76 | export PYTHONPATH=`pwd`:$PYTHONPATH 77 | ``` 78 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | **kwargs): 20 | self.stacked_convs = stacked_convs 21 | self.octave_base_scale = octave_base_scale 22 | self.scales_per_octave = scales_per_octave 23 | octave_scales = np.array( 24 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 25 | anchor_scales = octave_scales * octave_base_scale 26 | super(RetinaHead, self).__init__( 27 | num_classes, 28 | in_channels, 29 | anchor_scales=anchor_scales, 30 | use_sigmoid_cls=True, 31 | use_focal_loss=True, 32 | **kwargs) 33 | 34 | def _init_layers(self): 35 | self.relu = nn.ReLU(inplace=True) 36 | self.cls_convs = nn.ModuleList() 37 | self.reg_convs = nn.ModuleList() 38 | for i in range(self.stacked_convs): 39 | chn = self.in_channels if i == 0 else self.feat_channels 40 | self.cls_convs.append( 41 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 42 | self.reg_convs.append( 43 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 44 | self.retina_cls = nn.Conv2d( 45 | self.feat_channels, 46 | self.num_anchors * self.cls_out_channels, 47 | 3, 48 | padding=1) 49 | self.retina_reg = nn.Conv2d( 50 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 51 | 52 | def init_weights(self): 53 | for m in self.cls_convs: 54 | normal_init(m, std=0.01) 55 | for m in self.reg_convs: 56 | normal_init(m, std=0.01) 57 | bias_cls = bias_init_with_prob(0.01) 58 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 59 | normal_init(self.retina_reg, std=0.01) 60 | 61 | def forward_single(self, x): 62 | cls_feat = x 63 | reg_feat = x 64 | for cls_conv in self.cls_convs: 65 | cls_feat = self.relu(cls_conv(cls_feat)) 66 | for reg_conv in self.reg_convs: 67 | reg_feat = self.relu(reg_conv(reg_feat)) 68 | cls_score = self.retina_cls(cls_feat) 69 | bbox_pred = self.retina_reg(reg_feat) 70 | return cls_score, bbox_pred 71 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cuda, nms_cpu 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmdet/apis/inference.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as maskUtils 4 | import torch 5 | 6 | from mmdet.core import get_classes 7 | from mmdet.datasets import to_tensor 8 | from mmdet.datasets.transforms import ImageTransform 9 | 10 | 11 | def _prepare_data(img, img_transform, cfg, device): 12 | ori_shape = img.shape 13 | img, img_shape, pad_shape, scale_factor = img_transform( 14 | img, 15 | scale=cfg.data.test.img_scale, 16 | keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) 17 | img = to_tensor(img).to(device).unsqueeze(0) 18 | img_meta = [ 19 | dict( 20 | ori_shape=ori_shape, 21 | img_shape=img_shape, 22 | pad_shape=pad_shape, 23 | scale_factor=scale_factor, 24 | flip=False) 25 | ] 26 | return dict(img=[img], img_meta=[img_meta]) 27 | 28 | 29 | def _inference_single(model, img, img_transform, cfg, device): 30 | img = mmcv.imread(img) 31 | data = _prepare_data(img, img_transform, cfg, device) 32 | with torch.no_grad(): 33 | result = model(return_loss=False, rescale=True, **data) 34 | return result 35 | 36 | 37 | def _inference_generator(model, imgs, img_transform, cfg, device): 38 | for img in imgs: 39 | yield _inference_single(model, img, img_transform, cfg, device) 40 | 41 | 42 | def inference_detector(model, imgs, cfg, device='cuda:0'): 43 | img_transform = ImageTransform( 44 | size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) 45 | model = model.to(device) 46 | model.eval() 47 | 48 | if not isinstance(imgs, list): 49 | return _inference_single(model, imgs, img_transform, cfg, device) 50 | else: 51 | return _inference_generator(model, imgs, img_transform, cfg, device) 52 | 53 | 54 | def show_result(img, result, dataset='coco', score_thr=0.3, out_file=None): 55 | img = mmcv.imread(img) 56 | class_names = get_classes(dataset) 57 | if isinstance(result, tuple): 58 | bbox_result, segm_result = result 59 | else: 60 | bbox_result, segm_result = result, None 61 | bboxes = np.vstack(bbox_result) 62 | # draw segmentation masks 63 | if segm_result is not None: 64 | segms = mmcv.concat_list(segm_result) 65 | inds = np.where(bboxes[:, -1] > score_thr)[0] 66 | for i in inds: 67 | color_mask = np.random.randint( 68 | 0, 256, (1, 3), dtype=np.uint8) 69 | mask = maskUtils.decode(segms[i]).astype(np.bool) 70 | img[mask] = img[mask] * 0.5 + color_mask * 0.5 71 | # draw bounding boxes 72 | labels = [ 73 | np.full(bbox.shape[0], i, dtype=np.int32) 74 | for i, bbox in enumerate(bbox_result) 75 | ] 76 | labels = np.concatenate(labels) 77 | mmcv.imshow_det_bboxes( 78 | img.copy(), 79 | bboxes, 80 | labels, 81 | class_names=class_names, 82 | score_thr=score_thr, 83 | show=out_file is None) 84 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann 77 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class IoUBalancedNegSampler(RandomSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | hard_thr=0.1, 13 | hard_fraction=0.5, 14 | **kwargs): 15 | super(IoUBalancedNegSampler, self).__init__(num, pos_fraction, 16 | **kwargs) 17 | assert hard_thr > 0 18 | assert 0 < hard_fraction < 1 19 | self.hard_thr = hard_thr 20 | self.hard_fraction = hard_fraction 21 | 22 | def _sample_neg(self, assign_result, num_expected, **kwargs): 23 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 24 | if neg_inds.numel() != 0: 25 | neg_inds = neg_inds.squeeze(1) 26 | if len(neg_inds) <= num_expected: 27 | return neg_inds 28 | else: 29 | max_overlaps = assign_result.max_overlaps.cpu().numpy() 30 | # balance sampling for negative samples 31 | neg_set = set(neg_inds.cpu().numpy()) 32 | easy_set = set( 33 | np.where( 34 | np.logical_and(max_overlaps >= 0, 35 | max_overlaps < self.hard_thr))[0]) 36 | hard_set = set(np.where(max_overlaps >= self.hard_thr)[0]) 37 | easy_neg_inds = list(easy_set & neg_set) 38 | hard_neg_inds = list(hard_set & neg_set) 39 | 40 | num_expected_hard = int(num_expected * self.hard_fraction) 41 | if len(hard_neg_inds) > num_expected_hard: 42 | sampled_hard_inds = self.random_choice(hard_neg_inds, 43 | num_expected_hard) 44 | else: 45 | sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) 46 | num_expected_easy = num_expected - len(sampled_hard_inds) 47 | if len(easy_neg_inds) > num_expected_easy: 48 | sampled_easy_inds = self.random_choice(easy_neg_inds, 49 | num_expected_easy) 50 | else: 51 | sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) 52 | sampled_inds = np.concatenate((sampled_easy_inds, 53 | sampled_hard_inds)) 54 | if len(sampled_inds) < num_expected: 55 | num_extra = num_expected - len(sampled_inds) 56 | extra_inds = np.array(list(neg_set - set(sampled_inds))) 57 | if len(extra_inds) > num_extra: 58 | extra_inds = self.random_choice(extra_inds, num_extra) 59 | sampled_inds = np.concatenate((sampled_inds, extra_inds)) 60 | sampled_inds = torch.from_numpy(sampled_inds).long().to( 61 | assign_result.gt_inds.device) 62 | return sampled_inds 63 | -------------------------------------------------------------------------------- /mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from setuptools import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args), 20 | ] 21 | 22 | 23 | def customize_compiler_for_nvcc(self): 24 | """inject deep into distutils to customize how the dispatch 25 | to cc/nvcc works. 26 | If you subclass UnixCCompiler, it's not trivial to get your subclass 27 | injected in, and still have the right customizations (i.e. 28 | distutils.sysconfig.customize_compiler) run on it. So instead of going 29 | the OO route, I have this. Note, it's kindof like a wierd functional 30 | subclassing going on.""" 31 | 32 | # tell the compiler it can processes .cu 33 | self.src_extensions.append('.cu') 34 | 35 | # save references to the default compiler_so and _comple methods 36 | default_compiler_so = self.compiler_so 37 | super = self._compile 38 | 39 | # now redefine the _compile method. This gets executed for each 40 | # object but distutils doesn't have the ability to change compilers 41 | # based on source extension: we add it. 42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 43 | if osp.splitext(src)[1] == '.cu': 44 | # use the cuda for .cu files 45 | self.set_executable('compiler_so', 'nvcc') 46 | # use only a subset of the extra_postargs, which are 1-1 translated 47 | # from the extra_compile_args in the Extension class 48 | postargs = extra_postargs['nvcc'] 49 | else: 50 | postargs = extra_postargs['cc'] 51 | 52 | super(obj, src, ext, cc_args, postargs, pp_opts) 53 | # reset the default compiler_so, which we might have changed for cuda 54 | self.compiler_so = default_compiler_so 55 | 56 | # inject our redefined _compile method into the class 57 | self._compile = _compile 58 | 59 | 60 | class custom_build_ext(build_ext): 61 | 62 | def build_extensions(self): 63 | customize_compiler_for_nvcc(self.compiler) 64 | build_ext.build_extensions(self) 65 | 66 | 67 | setup( 68 | name='soft_nms', 69 | cmdclass={'build_ext': custom_build_ext}, 70 | ext_modules=cythonize(extensions), 71 | ) 72 | 73 | setup( 74 | name='nms_cuda', 75 | ext_modules=[ 76 | CUDAExtension('nms_cuda', [ 77 | 'src/nms_cuda.cpp', 78 | 'src/nms_kernel.cu', 79 | ]), 80 | CUDAExtension('nms_cpu', [ 81 | 'src/nms_cpu.cpp', 82 | ]), 83 | ], 84 | cmdclass={'build_ext': BuildExtension}) 85 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduce=False)['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_module.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import kaiming_init, constant_init 5 | 6 | from .norm import build_norm_layer 7 | 8 | 9 | class ConvModule(nn.Module): 10 | 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | kernel_size, 15 | stride=1, 16 | padding=0, 17 | dilation=1, 18 | groups=1, 19 | bias=True, 20 | normalize=None, 21 | activation='relu', 22 | inplace=True, 23 | activate_last=True): 24 | super(ConvModule, self).__init__() 25 | self.with_norm = normalize is not None 26 | self.with_activatation = activation is not None 27 | self.with_bias = bias 28 | self.activation = activation 29 | self.activate_last = activate_last 30 | 31 | if self.with_norm and self.with_bias: 32 | warnings.warn('ConvModule has norm and bias at the same time') 33 | 34 | self.conv = nn.Conv2d( 35 | in_channels, 36 | out_channels, 37 | kernel_size, 38 | stride, 39 | padding, 40 | dilation, 41 | groups, 42 | bias=bias) 43 | 44 | self.in_channels = self.conv.in_channels 45 | self.out_channels = self.conv.out_channels 46 | self.kernel_size = self.conv.kernel_size 47 | self.stride = self.conv.stride 48 | self.padding = self.conv.padding 49 | self.dilation = self.conv.dilation 50 | self.transposed = self.conv.transposed 51 | self.output_padding = self.conv.output_padding 52 | self.groups = self.conv.groups 53 | 54 | if self.with_norm: 55 | norm_channels = out_channels if self.activate_last else in_channels 56 | self.norm_name, norm = build_norm_layer(normalize, norm_channels) 57 | self.add_module(self.norm_name, norm) 58 | 59 | if self.with_activatation: 60 | assert activation in ['relu'], 'Only ReLU supported.' 61 | if self.activation == 'relu': 62 | self.activate = nn.ReLU(inplace=inplace) 63 | 64 | # Default using msra init 65 | self.init_weights() 66 | 67 | @property 68 | def norm(self): 69 | return getattr(self, self.norm_name) 70 | 71 | def init_weights(self): 72 | nonlinearity = 'relu' if self.activation is None else self.activation 73 | kaiming_init(self.conv, nonlinearity=nonlinearity) 74 | if self.with_norm: 75 | constant_init(self.norm, 1, bias=0) 76 | 77 | def forward(self, x, activate=True, norm=True): 78 | if self.activate_last: 79 | x = self.conv(x) 80 | if norm and self.with_norm: 81 | x = self.norm(x) 82 | if activate and self.with_activatation: 83 | x = self.activate(x) 84 | else: 85 | if norm and self.with_norm: 86 | x = self.norm(x) 87 | if activate and self.with_activatation: 88 | x = self.activate(x) 89 | x = self.conv(x) 90 | return x 91 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | if reg_classes > 1: 61 | bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, 62 | labels, reg_classes) 63 | 64 | return labels, label_weights, bbox_targets, bbox_weights 65 | 66 | 67 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 68 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 69 | 4 * num_classes)) 70 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 71 | 4 * num_classes)) 72 | for i in torch.nonzero(labels > 0).squeeze(-1): 73 | start, end = labels[i] * 4, (labels[i] + 1) * 4 74 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 75 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 76 | return bbox_targets_expand, bbox_weights_expand 77 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/collate.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch.utils.data.dataloader import default_collate 6 | from mmcv.parallel import DataContainer 7 | 8 | 9 | def collate(batch, samples_per_gpu=1, pad_size=None): 10 | """Puts each data field into a tensor/DataContainer with outer dimension 11 | batch size. 12 | 13 | Extend default_collate to add support for 14 | :type:`~mmcv.parallel.DataContainer`. There are 3 cases. 15 | 16 | 1. cpu_only = True, e.g., meta data 17 | 2. cpu_only = False, stack = True, e.g., images tensors 18 | 3. cpu_only = False, stack = False, e.g., gt bboxes 19 | """ 20 | 21 | if not isinstance(batch, collections.Sequence): 22 | raise TypeError("{} is not supported.".format(batch.dtype)) 23 | 24 | if isinstance(batch[0], DataContainer): 25 | assert len(batch) % samples_per_gpu == 0 26 | stacked = [] 27 | if batch[0].cpu_only: 28 | for i in range(0, len(batch), samples_per_gpu): 29 | stacked.append( 30 | [sample.data for sample in batch[i:i + samples_per_gpu]]) 31 | return DataContainer( 32 | stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) 33 | elif batch[0].stack: 34 | for i in range(0, len(batch), samples_per_gpu): 35 | assert isinstance(batch[i].data, torch.Tensor) 36 | # TODO: handle tensors other than 3d 37 | assert batch[i].dim() == 3 38 | c, h, w = batch[0].size() 39 | for sample in batch[i:i + samples_per_gpu]: 40 | assert c == sample.size(0) 41 | h = max(h, sample.size(1)) 42 | w = max(w, sample.size(2)) 43 | if pad_size is not None: 44 | aspect_ratio = h / w 45 | if aspect_ratio >= 1.0: 46 | h = pad_size[0] 47 | w = pad_size[1] 48 | else: 49 | h = pad_size[1] 50 | w = pad_size[0] 51 | padded_samples = [ 52 | F.pad( 53 | sample.data, 54 | (0, w - sample.size(2), 0, h - sample.size(1)), 55 | value=sample.padding_value) 56 | for sample in batch[i:i + samples_per_gpu] 57 | ] 58 | stacked.append(default_collate(padded_samples)) 59 | else: 60 | for i in range(0, len(batch), samples_per_gpu): 61 | stacked.append( 62 | [sample.data for sample in batch[i:i + samples_per_gpu]]) 63 | return DataContainer(stacked, batch[0].stack, batch[0].padding_value) 64 | elif isinstance(batch[0], collections.Sequence): 65 | transposed = zip(*batch) 66 | return [collate(samples, samples_per_gpu, pad_size=pad_size) for samples in transposed] 67 | elif isinstance(batch[0], collections.Mapping): 68 | return { 69 | key: collate([d[key] for d in batch], samples_per_gpu, pad_size=pad_size) 70 | for key in batch[0] 71 | } 72 | else: 73 | return default_collate(batch) 74 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import os 4 | import argparse 5 | from mmcv import Config 6 | 7 | from mmdet import __version__ 8 | from mmdet.datasets import get_dataset 9 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 10 | set_random_seed) 11 | from mmdet.models import build_detector 12 | import torch 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument('--work_dir', help='the dir to save logs and models') 19 | parser.add_argument( 20 | '--resume_from', help='the checkpoint file to resume from') 21 | parser.add_argument( 22 | '--validate', 23 | action='store_true', 24 | default=True, 25 | help='whether to evaluate the checkpoint during training') 26 | parser.add_argument( 27 | '--gpus', 28 | type=int, 29 | default=1, 30 | help='number of gpus to use ' 31 | '(only applicable to non-distributed training)') 32 | parser.add_argument('--seed', type=int, default=None, help='random seed') 33 | parser.add_argument( 34 | '--launcher', 35 | choices=['none', 'pytorch', 'slurm', 'mpi'], 36 | default='none', 37 | help='job launcher') 38 | parser.add_argument('--local_rank', type=int, default=0) 39 | args = parser.parse_args() 40 | 41 | return args 42 | 43 | 44 | def main(): 45 | args = parse_args() 46 | 47 | cfg = Config.fromfile(args.config) 48 | # set cudnn_benchmark 49 | if cfg.get('cudnn_benchmark', False): 50 | torch.backends.cudnn.benchmark = True 51 | # update configs according to CLI args 52 | if args.work_dir is not None: 53 | cfg.work_dir = args.work_dir 54 | if args.resume_from is not None: 55 | cfg.resume_from = args.resume_from 56 | latest_path = os.path.join(cfg.work_dir, 'latest.pth') 57 | if os.path.exists(latest_path): 58 | cfg.resume_from = latest_path 59 | cfg.gpus = args.gpus 60 | if cfg.checkpoint_config is not None: 61 | # save mmdet version in checkpoints as meta data 62 | cfg.checkpoint_config.meta = dict( 63 | mmdet_version=__version__, config=cfg.text) 64 | 65 | # init distributed env first, since logger depends on the dist info. 66 | if args.launcher == 'none': 67 | distributed = False 68 | else: 69 | distributed = True 70 | init_dist(args.launcher, **cfg.dist_params) 71 | 72 | # init logger before other steps 73 | logger = get_root_logger(cfg.log_level) 74 | logger.info('Distributed training: {}'.format(distributed)) 75 | 76 | # set random seeds 77 | if args.seed is not None: 78 | logger.info('Set random seed to {}'.format(args.seed)) 79 | set_random_seed(args.seed) 80 | 81 | model = build_detector( 82 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 83 | 84 | train_dataset = get_dataset(cfg.data.train) 85 | train_detector( 86 | model, 87 | train_dataset, 88 | cfg, 89 | distributed=distributed, 90 | validate=args.validate, 91 | logger=logger) 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from ..registry import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module 11 | class SingleRoIExtractor(nn.Module): 12 | """Extract RoI features from a single level feature map. 13 | 14 | If there are mulitple input feature levels, each RoI is mapped to a level 15 | according to its scale. 16 | 17 | Args: 18 | roi_layer (dict): Specify RoI layer type and arguments. 19 | out_channels (int): Output channels of RoI layers. 20 | featmap_strides (int): Strides of input feature maps. 21 | finest_scale (int): Scale threshold of mapping to level 0. 22 | """ 23 | 24 | def __init__(self, 25 | roi_layer, 26 | out_channels, 27 | featmap_strides, 28 | finest_scale=56): 29 | super(SingleRoIExtractor, self).__init__() 30 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 31 | self.out_channels = out_channels 32 | self.featmap_strides = featmap_strides 33 | self.finest_scale = finest_scale 34 | 35 | @property 36 | def num_inputs(self): 37 | """int: Input feature map levels.""" 38 | return len(self.featmap_strides) 39 | 40 | def init_weights(self): 41 | pass 42 | 43 | def build_roi_layers(self, layer_cfg, featmap_strides): 44 | cfg = layer_cfg.copy() 45 | layer_type = cfg.pop('type') 46 | assert hasattr(ops, layer_type) 47 | layer_cls = getattr(ops, layer_type) 48 | roi_layers = nn.ModuleList( 49 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 50 | return roi_layers 51 | 52 | def map_roi_levels(self, rois, num_levels): 53 | """Map rois to corresponding feature levels by scales. 54 | 55 | - scale < finest_scale: level 0 56 | - finest_scale <= scale < finest_scale * 2: level 1 57 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 58 | - scale >= finest_scale * 4: level 3 59 | 60 | Args: 61 | rois (Tensor): Input RoIs, shape (k, 5). 62 | num_levels (int): Total level number. 63 | 64 | Returns: 65 | Tensor: Level index (0-based) of each RoI, shape (k, ) 66 | """ 67 | scale = torch.sqrt( 68 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 69 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 70 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 71 | return target_lvls 72 | 73 | def forward(self, feats, rois): 74 | if len(feats) == 1: 75 | return self.roi_layers[0](feats[0], rois) 76 | 77 | out_size = self.roi_layers[0].out_size 78 | num_levels = len(feats) 79 | target_lvls = self.map_roi_levels(rois, num_levels) 80 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 81 | out_size, out_size).fill_(0) 82 | for i in range(num_levels): 83 | inds = target_lvls == i 84 | if inds.any(): 85 | rois_ = rois[inds, :] 86 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 87 | roi_feats[inds] += roi_feats_t 88 | return roi_feats 89 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | base_anchors = torch.stack( 37 | [ 38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 40 | ], 41 | dim=-1).round() 42 | 43 | return base_anchors 44 | 45 | def _meshgrid(self, x, y, row_major=True): 46 | xx = x.repeat(len(y)) 47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 48 | if row_major: 49 | return xx, yy 50 | else: 51 | return yy, xx 52 | 53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 54 | base_anchors = self.base_anchors.to(device) 55 | 56 | feat_h, feat_w = featmap_size 57 | shift_x = torch.arange(0, feat_w, device=device) * stride 58 | shift_y = torch.arange(0, feat_h, device=device) * stride 59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 61 | shifts = shifts.type_as(base_anchors) 62 | # first feat_w elements correspond to the first row of shifts 63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 64 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 65 | 66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 67 | all_anchors = all_anchors.view(-1, 4) 68 | # first A rows correspond to A anchors of (0, 0) in feature map, 69 | # then (0, 1), (0, 2), ... 70 | return all_anchors 71 | 72 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 73 | feat_h, feat_w = featmap_size 74 | valid_h, valid_w = valid_size 75 | assert valid_h <= feat_h and valid_w <= feat_w 76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 78 | valid_x[:valid_w] = 1 79 | valid_y[:valid_h] = 1 80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 81 | valid = valid_xx & valid_yy 82 | valid = valid[:, None].expand( 83 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 84 | return valid 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 6 15 | PATCH = 'rc0' 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmdet/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmdet.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmdet', 87 | version=get_version(), 88 | description='Open MMLab Detection Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, object detection', 91 | url='https://github.com/open-mmlab/mmdetection', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmdet.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='GPLv3', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables', 110 | 'pycocotools' 111 | ], 112 | zip_safe=False) 113 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmdet.ops import nms 6 | from ..bbox import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | 75 | 76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 77 | """Merge augmented mask prediction. 78 | 79 | Args: 80 | aug_masks (list[ndarray]): shape (n, #class, h, w) 81 | img_shapes (list[ndarray]): shape (3, ). 82 | rcnn_test_cfg (dict): rcnn test config. 83 | 84 | Returns: 85 | tuple: (bboxes, scores) 86 | """ 87 | recovered_masks = [ 88 | mask if not img_info[0]['flip'] else mask[..., ::-1] 89 | for mask, img_info in zip(aug_masks, img_metas) 90 | ] 91 | if weights is None: 92 | merged_masks = np.mean(recovered_masks, axis=0) 93 | else: 94 | merged_masks = np.average( 95 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 96 | return merged_masks 97 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /configs/retinanet_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = 'data/coco/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = 'data/coco/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r101_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=81, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/retinanet_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=81, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/rpn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r50_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r101_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /configs/rpn_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /TECHNICAL_DETAILS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | In this section, we will introduce the main units of training a detector: 4 | data loading, model and iteration pipeline. 5 | 6 | ## Data loading 7 | 8 | Following typical conventions, we use `Dataset` and `DataLoader` for data loading 9 | with multiple workers. `Dataset` returns a dict of data items corresponding 10 | the arguments of models' forward method. 11 | Since the data in object detection may not be the same size (image size, gt bbox size, etc.), 12 | we introduce a new `DataContainer` type in `mmcv` to help collect and distribute 13 | data of different size. 14 | See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. 15 | 16 | ## Model 17 | 18 | In mmdetection, model components are basically categorized as 4 types. 19 | 20 | - backbone: usually a FCN network to extract feature maps, e.g., ResNet. 21 | - neck: the part between backbones and heads, e.g., FPN, ASPP. 22 | - head: the part for specific tasks, e.g., bbox prediction and mask prediction. 23 | - roi extractor: the part for extracting features from feature maps, e.g., RoI Align. 24 | 25 | We also write implement some general detection pipelines with the above components, 26 | such as `SingleStageDetector` and `TwoStageDetector`. 27 | 28 | ### Build a model with basic components 29 | 30 | Following some basic pipelines (e.g., two-stage detectors), the model structure 31 | can be customized through config files with no pains. 32 | 33 | If we want to implement some new components, e.g, the path aggregation 34 | FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do. 35 | 36 | 1. create a new file in `mmdet/models/necks/pafpn.py`. 37 | 38 | ```python 39 | class PAFPN(nn.Module): 40 | 41 | def __init__(self, 42 | in_channels, 43 | out_channels, 44 | num_outs, 45 | start_level=0, 46 | end_level=-1, 47 | add_extra_convs=False): 48 | pass 49 | 50 | def forward(self, inputs): 51 | # implementation is ignored 52 | pass 53 | ``` 54 | 55 | 2. modify the config file from 56 | 57 | ```python 58 | neck=dict( 59 | type='FPN', 60 | in_channels=[256, 512, 1024, 2048], 61 | out_channels=256, 62 | num_outs=5) 63 | ``` 64 | 65 | to 66 | 67 | ```python 68 | neck=dict( 69 | type='PAFPN', 70 | in_channels=[256, 512, 1024, 2048], 71 | out_channels=256, 72 | num_outs=5) 73 | ``` 74 | 75 | We will release more components (backbones, necks, heads) for research purpose. 76 | 77 | ### Write a new model 78 | 79 | To write a new detection pipeline, you need to inherit from `BaseDetector`, 80 | which defines the following abstract methods. 81 | 82 | - `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s). 83 | - `forward_train()`: forward method of the training mode 84 | - `simple_test()`: single scale testing without augmentation 85 | - `aug_test()`: testing with augmentation (multi-scale, flip, etc.) 86 | 87 | [TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py) 88 | is a good example which shows how to do that. 89 | 90 | ## Iteration pipeline 91 | 92 | We adopt distributed training for both single machine and multiple machines. 93 | Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU. 94 | 95 | Each process keeps an isolated model, data loader, and optimizer. 96 | Model parameters are only synchronized once at the begining. 97 | After a forward and backward pass, gradients will be allreduced among all GPUs, 98 | and the optimizer will update model parameters. 99 | Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration. 100 | -------------------------------------------------------------------------------- /mmdet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import Sequence 3 | 4 | import mmcv 5 | from mmcv.runner import obj_from_dict 6 | import torch 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from .concat_dataset import ConcatDataset 11 | from .repeat_dataset import RepeatDataset 12 | from .. import datasets 13 | 14 | 15 | def to_tensor(data): 16 | """Convert objects of various python types to :obj:`torch.Tensor`. 17 | 18 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, 19 | :class:`Sequence`, :class:`int` and :class:`float`. 20 | """ 21 | if isinstance(data, torch.Tensor): 22 | return data 23 | elif isinstance(data, np.ndarray): 24 | return torch.from_numpy(data) 25 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 26 | return torch.tensor(data) 27 | elif isinstance(data, int): 28 | return torch.LongTensor([data]) 29 | elif isinstance(data, float): 30 | return torch.FloatTensor([data]) 31 | else: 32 | raise TypeError('type {} cannot be converted to tensor.'.format( 33 | type(data))) 34 | 35 | 36 | def random_scale(img_scales, mode='range'): 37 | """Randomly select a scale from a list of scales or scale ranges. 38 | 39 | Args: 40 | img_scales (list[tuple]): Image scale or scale range. 41 | mode (str): "range" or "value". 42 | 43 | Returns: 44 | tuple: Sampled image scale. 45 | """ 46 | num_scales = len(img_scales) 47 | if num_scales == 1: # fixed scale is specified 48 | img_scale = img_scales[0] 49 | elif num_scales == 2: # randomly sample a scale 50 | if mode == 'range': 51 | img_scale_long = [max(s) for s in img_scales] 52 | img_scale_short = [min(s) for s in img_scales] 53 | long_edge = np.random.randint( 54 | min(img_scale_long), 55 | max(img_scale_long) + 1) 56 | short_edge = np.random.randint( 57 | min(img_scale_short), 58 | max(img_scale_short) + 1) 59 | img_scale = (long_edge, short_edge) 60 | elif mode == 'value': 61 | img_scale = img_scales[np.random.randint(num_scales)] 62 | else: 63 | img_scale = img_scales[np.random.randint(num_scales)] 64 | return img_scale 65 | 66 | 67 | def show_ann(coco, img, ann_info): 68 | plt.imshow(mmcv.bgr2rgb(img)) 69 | plt.axis('off') 70 | coco.showAnns(ann_info) 71 | plt.show() 72 | 73 | 74 | def get_dataset(data_cfg): 75 | if data_cfg['type'] == 'RepeatDataset': 76 | return RepeatDataset( 77 | get_dataset(data_cfg['dataset']), data_cfg['times']) 78 | 79 | if isinstance(data_cfg['ann_file'], (list, tuple)): 80 | ann_files = data_cfg['ann_file'] 81 | num_dset = len(ann_files) 82 | else: 83 | ann_files = [data_cfg['ann_file']] 84 | num_dset = 1 85 | 86 | if 'proposal_file' in data_cfg.keys(): 87 | if isinstance(data_cfg['proposal_file'], (list, tuple)): 88 | proposal_files = data_cfg['proposal_file'] 89 | else: 90 | proposal_files = [data_cfg['proposal_file']] 91 | else: 92 | proposal_files = [None] * num_dset 93 | assert len(proposal_files) == num_dset 94 | 95 | if isinstance(data_cfg['img_prefix'], (list, tuple)): 96 | img_prefixes = data_cfg['img_prefix'] 97 | else: 98 | img_prefixes = [data_cfg['img_prefix']] * num_dset 99 | assert len(img_prefixes) == num_dset 100 | 101 | dsets = [] 102 | for i in range(num_dset): 103 | data_info = copy.deepcopy(data_cfg) 104 | data_info['ann_file'] = ann_files[i] 105 | data_info['proposal_file'] = proposal_files[i] 106 | data_info['img_prefix'] = img_prefixes[i] 107 | dset = obj_from_dict(data_info, datasets) 108 | dsets.append(dset) 109 | if len(dsets) > 1: 110 | dset = ConcatDataset(dsets) 111 | else: 112 | dset = dsets[0] 113 | return dset 114 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | void DeformablePSROIPoolForward( 14 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 15 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 16 | const int height, const int width, const int num_bbox, 17 | const int channels_trans, const int no_trans, const float spatial_scale, 18 | const int output_dim, const int group_size, const int pooled_size, 19 | const int part_size, const int sample_per_part, const float trans_std); 20 | 21 | void DeformablePSROIPoolBackwardAcc( 22 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 23 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 24 | at::Tensor trans_grad, const int batch, const int channels, 25 | const int height, const int width, const int num_bbox, 26 | const int channels_trans, const int no_trans, const float spatial_scale, 27 | const int output_dim, const int group_size, const int pooled_size, 28 | const int part_size, const int sample_per_part, const float trans_std); 29 | 30 | void deform_psroi_pooling_cuda_forward( 31 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 32 | at::Tensor top_count, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std) { 35 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 36 | 37 | const int batch = input.size(0); 38 | const int channels = input.size(1); 39 | const int height = input.size(2); 40 | const int width = input.size(3); 41 | const int channels_trans = no_trans ? 2 : trans.size(1); 42 | 43 | const int num_bbox = bbox.size(0); 44 | if (num_bbox != out.size(0)) 45 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 46 | out.size(0), num_bbox); 47 | 48 | DeformablePSROIPoolForward( 49 | input, bbox, trans, out, top_count, batch, channels, height, width, 50 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 51 | pooled_size, part_size, sample_per_part, trans_std); 52 | } 53 | 54 | void deform_psroi_pooling_cuda_backward( 55 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 56 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 57 | const int no_trans, const float spatial_scale, const int output_dim, 58 | const int group_size, const int pooled_size, const int part_size, 59 | const int sample_per_part, const float trans_std) { 60 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 61 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 62 | 63 | const int batch = input.size(0); 64 | const int channels = input.size(1); 65 | const int height = input.size(2); 66 | const int width = input.size(3); 67 | const int channels_trans = no_trans ? 2 : trans.size(1); 68 | 69 | const int num_bbox = bbox.size(0); 70 | if (num_bbox != out_grad.size(0)) 71 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 72 | out_grad.size(0), num_bbox); 73 | 74 | DeformablePSROIPoolBackwardAcc( 75 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 76 | channels, height, width, num_bbox, channels_trans, no_trans, 77 | spatial_scale, output_dim, group_size, pooled_size, part_size, 78 | sample_per_part, trans_std); 79 | } 80 | 81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 82 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 83 | "deform psroi pooling forward(CUDA)"); 84 | m.def("deform_psroi_pooling_cuda_backward", 85 | &deform_psroi_pooling_cuda_backward, 86 | "deform psroi pooling backward(CUDA)"); 87 | } -------------------------------------------------------------------------------- /configs/fast_rcnn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/fast_rcnn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r101_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /mmdet/datasets/transforms.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | 5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] 6 | 7 | 8 | class ImageTransform(object): 9 | """Preprocess an image. 10 | 11 | 1. rescale the image to expected size 12 | 2. normalize the image 13 | 3. flip the image (if needed) 14 | 4. pad the image (if needed) 15 | 5. transpose to (c, h, w) 16 | """ 17 | 18 | def __init__(self, 19 | mean=(0, 0, 0), 20 | std=(1, 1, 1), 21 | to_rgb=True, 22 | size_divisor=None): 23 | self.mean = np.array(mean, dtype=np.float32) 24 | self.std = np.array(std, dtype=np.float32) 25 | self.to_rgb = to_rgb 26 | self.size_divisor = size_divisor 27 | 28 | def __call__(self, img, scale, flip=False, keep_ratio=True): 29 | if keep_ratio: 30 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) 31 | else: 32 | img, w_scale, h_scale = mmcv.imresize( 33 | img, scale, return_scale=True) 34 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], 35 | dtype=np.float32) 36 | img_shape = img.shape 37 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 38 | if flip: 39 | img = mmcv.imflip(img) 40 | if self.size_divisor is not None: 41 | img = mmcv.impad_to_multiple(img, self.size_divisor) 42 | pad_shape = img.shape 43 | else: 44 | pad_shape = img_shape 45 | img = img.transpose(2, 0, 1) 46 | return img, img_shape, pad_shape, scale_factor 47 | 48 | 49 | def bbox_flip(bboxes, img_shape): 50 | """Flip bboxes horizontally. 51 | 52 | Args: 53 | bboxes(ndarray): shape (..., 4*k) 54 | img_shape(tuple): (height, width) 55 | """ 56 | assert bboxes.shape[-1] % 4 == 0 57 | w = img_shape[1] 58 | flipped = bboxes.copy() 59 | flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 60 | flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 61 | return flipped 62 | 63 | 64 | class BboxTransform(object): 65 | """Preprocess gt bboxes. 66 | 67 | 1. rescale bboxes according to image size 68 | 2. flip bboxes (if needed) 69 | 3. pad the first dimension to `max_num_gts` 70 | """ 71 | 72 | def __init__(self, max_num_gts=None): 73 | self.max_num_gts = max_num_gts 74 | 75 | def __call__(self, bboxes, img_shape, scale_factor, flip=False): 76 | gt_bboxes = bboxes * scale_factor 77 | if flip: 78 | gt_bboxes = bbox_flip(gt_bboxes, img_shape) 79 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) 80 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) 81 | if self.max_num_gts is None: 82 | return gt_bboxes 83 | else: 84 | num_gts = gt_bboxes.shape[0] 85 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) 86 | padded_bboxes[:num_gts, :] = gt_bboxes 87 | return padded_bboxes 88 | 89 | 90 | class MaskTransform(object): 91 | """Preprocess masks. 92 | 93 | 1. resize masks to expected size and stack to a single array 94 | 2. flip the masks (if needed) 95 | 3. pad the masks (if needed) 96 | """ 97 | 98 | def __call__(self, masks, pad_shape, scale_factor, flip=False): 99 | masks = [ 100 | mmcv.imrescale(mask, scale_factor, interpolation='nearest') 101 | for mask in masks 102 | ] 103 | if flip: 104 | masks = [mask[:, ::-1] for mask in masks] 105 | padded_masks = [ 106 | mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks 107 | ] 108 | padded_masks = np.stack(padded_masks, axis=0) 109 | return padded_masks 110 | 111 | 112 | class Numpy2Tensor(object): 113 | 114 | def __init__(self): 115 | pass 116 | 117 | def __call__(self, *args): 118 | if len(args) == 1: 119 | return torch.from_numpy(args[0]) 120 | else: 121 | return tuple([torch.from_numpy(np.array(array)) for array in args]) 122 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/rpn_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn import normal_init 5 | 6 | from mmdet.core import delta2bbox 7 | from mmdet.ops import nms 8 | from .anchor_head import AnchorHead 9 | from ..registry import HEADS 10 | 11 | 12 | @HEADS.register_module 13 | class RPNHead(AnchorHead): 14 | 15 | def __init__(self, in_channels, **kwargs): 16 | super(RPNHead, self).__init__(2, in_channels, **kwargs) 17 | 18 | def _init_layers(self): 19 | self.rpn_conv = nn.Conv2d( 20 | self.in_channels, self.feat_channels, 3, padding=1) 21 | self.rpn_cls = nn.Conv2d(self.feat_channels, 22 | self.num_anchors * self.cls_out_channels, 1) 23 | self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1) 24 | 25 | def init_weights(self): 26 | normal_init(self.rpn_conv, std=0.01) 27 | normal_init(self.rpn_cls, std=0.01) 28 | normal_init(self.rpn_reg, std=0.01) 29 | 30 | def forward_single(self, x): 31 | x = self.rpn_conv(x) 32 | x = F.relu(x, inplace=True) 33 | rpn_cls_score = self.rpn_cls(x) 34 | rpn_bbox_pred = self.rpn_reg(x) 35 | return rpn_cls_score, rpn_bbox_pred 36 | 37 | def loss(self, 38 | cls_scores, 39 | bbox_preds, 40 | gt_bboxes, 41 | img_metas, 42 | cfg, 43 | gt_bboxes_ignore=None): 44 | losses = super(RPNHead, self).loss( 45 | cls_scores, 46 | bbox_preds, 47 | gt_bboxes, 48 | None, 49 | img_metas, 50 | cfg, 51 | gt_bboxes_ignore=gt_bboxes_ignore) 52 | return dict( 53 | loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg']) 54 | 55 | def get_bboxes_single(self, 56 | cls_scores, 57 | bbox_preds, 58 | mlvl_anchors, 59 | img_shape, 60 | scale_factor, 61 | cfg, 62 | rescale=False): 63 | mlvl_proposals = [] 64 | for idx in range(len(cls_scores)): 65 | rpn_cls_score = cls_scores[idx] 66 | rpn_bbox_pred = bbox_preds[idx] 67 | assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] 68 | anchors = mlvl_anchors[idx] 69 | rpn_cls_score = rpn_cls_score.permute(1, 2, 0) 70 | if self.use_sigmoid_cls: 71 | rpn_cls_score = rpn_cls_score.reshape(-1) 72 | scores = rpn_cls_score.sigmoid() 73 | else: 74 | rpn_cls_score = rpn_cls_score.reshape(-1, 2) 75 | scores = rpn_cls_score.softmax(dim=1)[:, 1] 76 | rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) 77 | if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: 78 | _, topk_inds = scores.topk(cfg.nms_pre) 79 | rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] 80 | anchors = anchors[topk_inds, :] 81 | scores = scores[topk_inds] 82 | proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, 83 | self.target_stds, img_shape) 84 | if cfg.min_bbox_size > 0: 85 | w = proposals[:, 2] - proposals[:, 0] + 1 86 | h = proposals[:, 3] - proposals[:, 1] + 1 87 | valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & 88 | (h >= cfg.min_bbox_size)).squeeze() 89 | proposals = proposals[valid_inds, :] 90 | scores = scores[valid_inds] 91 | proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) 92 | proposals, _ = nms(proposals, cfg.nms_thr) 93 | proposals = proposals[:cfg.nms_post, :] 94 | mlvl_proposals.append(proposals) 95 | proposals = torch.cat(mlvl_proposals, 0) 96 | if cfg.nms_across_levels: 97 | proposals, _ = nms(proposals, cfg.nms_thr) 98 | proposals = proposals[:cfg.max_num, :] 99 | else: 100 | scores = proposals[:, 4] 101 | num = min(cfg.max_num, proposals.shape[0]) 102 | _, topk_inds = scores.topk(num) 103 | proposals = proposals[topk_inds, :] 104 | return proposals 105 | -------------------------------------------------------------------------------- /mmdet/models/necks/hrfpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2019 Microsoft 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | 25 | import torch 26 | import torch.nn as nn 27 | import torch.nn.functional as F 28 | from torch.utils.checkpoint import checkpoint 29 | 30 | from ..registry import NECKS 31 | 32 | 33 | @NECKS.register_module 34 | class HRFPN(nn.Module): 35 | 36 | def __init__(self, 37 | in_channels, 38 | out_channels, 39 | normalize=None, 40 | pooling='AVG', 41 | share_conv=False, 42 | with_checkpoint=False): 43 | super(HRFPN, self).__init__() 44 | assert isinstance(in_channels, list) 45 | self.in_channels = in_channels 46 | self.out_channels = out_channels 47 | self.num_ins = len(in_channels) 48 | self.with_bias = normalize is None 49 | self.share_conv = share_conv 50 | self.reduction_conv = nn.Sequential( 51 | nn.Conv2d(in_channels=sum(in_channels), 52 | out_channels=out_channels, 53 | kernel_size=1), 54 | ) 55 | 56 | if self.share_conv: 57 | self.fpn_conv = nn.Conv2d(in_channels=out_channels, 58 | out_channels=out_channels, 59 | kernel_size=3, padding=1) 60 | else: 61 | self.fpn_conv = nn.ModuleList() 62 | for i in range(5): 63 | self.fpn_conv.append(nn.Conv2d( 64 | in_channels=out_channels, 65 | out_channels=out_channels, 66 | kernel_size=3, 67 | padding=1 68 | )) 69 | if pooling == 'MAX': 70 | print("Using AVG Pooling") 71 | self.pooling = F.max_pool2d 72 | else: 73 | self.pooling = F.avg_pool2d 74 | self.with_checkpoint = with_checkpoint 75 | 76 | # default init_weights for conv(msra) and norm in ConvModule 77 | def init_weights(self): 78 | for m in self.modules(): 79 | if isinstance(m, nn.Conv2d): 80 | nn.init.kaiming_normal_(m.weight, a=1) 81 | nn.init.constant_(m.bias, 0) 82 | 83 | def forward(self, inputs): 84 | assert len(inputs) == len(self.in_channels) 85 | outs = [] 86 | outs.append(inputs[0]) 87 | for i in range(1, len(inputs)): 88 | outs.append(F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear')) 89 | out = torch.cat(outs, dim=1) 90 | if out.requires_grad and self.with_checkpoint: 91 | out = checkpoint(self.reduction_conv, out) 92 | else: 93 | out = self.reduction_conv(out) 94 | outs = [out] 95 | for i in range(1, 5): 96 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i)) 97 | outputs = [] 98 | if self.share_conv: 99 | for i in range(5): 100 | outputs.append(self.fpn_conv(outs[i])) 101 | else: 102 | for i in range(5): 103 | if outs[i].requires_grad and self.with_checkpoint: 104 | tmp_out = checkpoint(self.fpn_conv[i], outs[i]) 105 | else: 106 | tmp_out = self.fpn_conv[i](outs[i]) 107 | outputs.append(tmp_out) 108 | return tuple(outputs) 109 | -------------------------------------------------------------------------------- /configs/ssd300_coco.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | input_size=input_size, 19 | in_channels=(512, 1024, 512, 256, 256, 256), 20 | num_classes=81, 21 | anchor_strides=(8, 16, 32, 64, 100, 300), 22 | basesize_ratio_range=(0.15, 0.9), 23 | anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), 24 | target_means=(.0, .0, .0, .0), 25 | target_stds=(0.1, 0.1, 0.2, 0.2))) 26 | cudnn_benchmark = True 27 | train_cfg = dict( 28 | assigner=dict( 29 | type='MaxIoUAssigner', 30 | pos_iou_thr=0.5, 31 | neg_iou_thr=0.5, 32 | min_pos_iou=0., 33 | ignore_iof_thr=-1, 34 | gt_max_assign_all=False), 35 | smoothl1_beta=1., 36 | allowed_border=-1, 37 | pos_weight=-1, 38 | neg_pos_ratio=3, 39 | debug=False) 40 | test_cfg = dict( 41 | nms=dict(type='nms', iou_thr=0.45), 42 | min_bbox_size=0, 43 | score_thr=0.02, 44 | max_per_img=200) 45 | # model training and testing settings 46 | # dataset settings 47 | dataset_type = 'CocoDataset' 48 | data_root = 'data/coco/' 49 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 50 | data = dict( 51 | imgs_per_gpu=8, 52 | workers_per_gpu=3, 53 | train=dict( 54 | type='RepeatDataset', 55 | times=5, 56 | dataset=dict( 57 | type=dataset_type, 58 | ann_file=data_root + 'annotations/instances_train2017.json', 59 | img_prefix=data_root + 'train2017/', 60 | img_scale=(300, 300), 61 | img_norm_cfg=img_norm_cfg, 62 | size_divisor=None, 63 | flip_ratio=0.5, 64 | with_mask=False, 65 | with_crowd=False, 66 | with_label=True, 67 | test_mode=False, 68 | extra_aug=dict( 69 | photo_metric_distortion=dict( 70 | brightness_delta=32, 71 | contrast_range=(0.5, 1.5), 72 | saturation_range=(0.5, 1.5), 73 | hue_delta=18), 74 | expand=dict( 75 | mean=img_norm_cfg['mean'], 76 | to_rgb=img_norm_cfg['to_rgb'], 77 | ratio_range=(1, 4)), 78 | random_crop=dict( 79 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), 80 | resize_keep_ratio=False)), 81 | val=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(300, 300), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=None, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_label=False, 91 | test_mode=True, 92 | resize_keep_ratio=False), 93 | test=dict( 94 | type=dataset_type, 95 | ann_file=data_root + 'annotations/instances_val2017.json', 96 | img_prefix=data_root + 'val2017/', 97 | img_scale=(300, 300), 98 | img_norm_cfg=img_norm_cfg, 99 | size_divisor=None, 100 | flip_ratio=0, 101 | with_mask=False, 102 | with_label=False, 103 | test_mode=True, 104 | resize_keep_ratio=False)) 105 | # optimizer 106 | optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) 107 | optimizer_config = dict() 108 | # learning policy 109 | lr_config = dict( 110 | policy='step', 111 | warmup='linear', 112 | warmup_iters=500, 113 | warmup_ratio=1.0 / 3, 114 | step=[16, 22]) 115 | checkpoint_config = dict(interval=1) 116 | # yapf:disable 117 | log_config = dict( 118 | interval=50, 119 | hooks=[ 120 | dict(type='TextLoggerHook'), 121 | # dict(type='TensorboardLoggerHook') 122 | ]) 123 | # yapf:enable 124 | # runtime settings 125 | total_epochs = 24 126 | dist_params = dict(backend='nccl') 127 | log_level = 'INFO' 128 | work_dir = './work_dirs/ssd300_coco' 129 | load_from = None 130 | resume_from = None 131 | workflow = [('train', 1)] 132 | -------------------------------------------------------------------------------- /mmdet/core/loss/losses.py: -------------------------------------------------------------------------------- 1 | # TODO merge naive and weighted loss. 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def weighted_nll_loss(pred, label, weight, avg_factor=None): 7 | if avg_factor is None: 8 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 9 | raw = F.nll_loss(pred, label, reduction='none') 10 | return torch.sum(raw * weight)[None] / avg_factor 11 | 12 | 13 | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True): 14 | if avg_factor is None: 15 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 16 | raw = F.cross_entropy(pred, label, reduction='none') 17 | if reduce: 18 | return torch.sum(raw * weight)[None] / avg_factor 19 | else: 20 | return raw * weight / avg_factor 21 | 22 | 23 | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): 24 | if avg_factor is None: 25 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 26 | return F.binary_cross_entropy_with_logits( 27 | pred, label.float(), weight.float(), 28 | reduction='sum')[None] / avg_factor 29 | 30 | 31 | def sigmoid_focal_loss(pred, 32 | target, 33 | weight, 34 | gamma=2.0, 35 | alpha=0.25, 36 | reduction='mean'): 37 | pred_sigmoid = pred.sigmoid() 38 | target = target.type_as(pred) 39 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 40 | weight = (alpha * target + (1 - alpha) * (1 - target)) * weight 41 | weight = weight * pt.pow(gamma) 42 | loss = F.binary_cross_entropy_with_logits( 43 | pred, target, reduction='none') * weight 44 | reduction_enum = F._Reduction.get_enum(reduction) 45 | # none: 0, mean:1, sum: 2 46 | if reduction_enum == 0: 47 | return loss 48 | elif reduction_enum == 1: 49 | return loss.mean() 50 | elif reduction_enum == 2: 51 | return loss.sum() 52 | 53 | 54 | def weighted_sigmoid_focal_loss(pred, 55 | target, 56 | weight, 57 | gamma=2.0, 58 | alpha=0.25, 59 | avg_factor=None, 60 | num_classes=80): 61 | if avg_factor is None: 62 | avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 63 | return sigmoid_focal_loss( 64 | pred, target, weight, gamma=gamma, alpha=alpha, 65 | reduction='sum')[None] / avg_factor 66 | 67 | 68 | def mask_cross_entropy(pred, target, label): 69 | num_rois = pred.size()[0] 70 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 71 | pred_slice = pred[inds, label].squeeze(1) 72 | return F.binary_cross_entropy_with_logits( 73 | pred_slice, target, reduction='mean')[None] 74 | 75 | 76 | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'): 77 | assert beta > 0 78 | assert pred.size() == target.size() and target.numel() > 0 79 | diff = torch.abs(pred - target) 80 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 81 | diff - 0.5 * beta) 82 | reduction_enum = F._Reduction.get_enum(reduction) 83 | # none: 0, mean:1, sum: 2 84 | if reduction_enum == 0: 85 | return loss 86 | elif reduction_enum == 1: 87 | return loss.sum() / pred.numel() 88 | elif reduction_enum == 2: 89 | return loss.sum() 90 | 91 | 92 | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): 93 | if avg_factor is None: 94 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 95 | loss = smooth_l1_loss(pred, target, beta, reduction='none') 96 | return torch.sum(loss * weight)[None] / avg_factor 97 | 98 | 99 | def accuracy(pred, target, topk=1): 100 | if isinstance(topk, int): 101 | topk = (topk, ) 102 | return_single = True 103 | else: 104 | return_single = False 105 | 106 | maxk = max(topk) 107 | _, pred_label = pred.topk(maxk, 1, True, True) 108 | pred_label = pred_label.t() 109 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 110 | 111 | res = [] 112 | for k in topk: 113 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 114 | res.append(correct_k.mul_(100.0 / pred.size(0))) 115 | return res[0] if return_single else res 116 | -------------------------------------------------------------------------------- /configs/ssd512_coco.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 512 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | input_size=input_size, 19 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 20 | num_classes=81, 21 | anchor_strides=(8, 16, 32, 64, 128, 256, 512), 22 | basesize_ratio_range=(0.1, 0.9), 23 | anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]), 24 | target_means=(.0, .0, .0, .0), 25 | target_stds=(0.1, 0.1, 0.2, 0.2))) 26 | cudnn_benchmark = True 27 | train_cfg = dict( 28 | assigner=dict( 29 | type='MaxIoUAssigner', 30 | pos_iou_thr=0.5, 31 | neg_iou_thr=0.5, 32 | min_pos_iou=0., 33 | ignore_iof_thr=-1, 34 | gt_max_assign_all=False), 35 | smoothl1_beta=1., 36 | allowed_border=-1, 37 | pos_weight=-1, 38 | neg_pos_ratio=3, 39 | debug=False) 40 | test_cfg = dict( 41 | nms=dict(type='nms', iou_thr=0.45), 42 | min_bbox_size=0, 43 | score_thr=0.02, 44 | max_per_img=200) 45 | # model training and testing settings 46 | # dataset settings 47 | dataset_type = 'CocoDataset' 48 | data_root = 'data/coco/' 49 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 50 | data = dict( 51 | imgs_per_gpu=8, 52 | workers_per_gpu=3, 53 | train=dict( 54 | type='RepeatDataset', 55 | times=5, 56 | dataset=dict( 57 | type=dataset_type, 58 | ann_file=data_root + 'annotations/instances_train2017.json', 59 | img_prefix=data_root + 'train2017/', 60 | img_scale=(512, 512), 61 | img_norm_cfg=img_norm_cfg, 62 | size_divisor=None, 63 | flip_ratio=0.5, 64 | with_mask=False, 65 | with_crowd=False, 66 | with_label=True, 67 | test_mode=False, 68 | extra_aug=dict( 69 | photo_metric_distortion=dict( 70 | brightness_delta=32, 71 | contrast_range=(0.5, 1.5), 72 | saturation_range=(0.5, 1.5), 73 | hue_delta=18), 74 | expand=dict( 75 | mean=img_norm_cfg['mean'], 76 | to_rgb=img_norm_cfg['to_rgb'], 77 | ratio_range=(1, 4)), 78 | random_crop=dict( 79 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), 80 | resize_keep_ratio=False)), 81 | val=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(512, 512), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=None, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_label=False, 91 | test_mode=True, 92 | resize_keep_ratio=False), 93 | test=dict( 94 | type=dataset_type, 95 | ann_file=data_root + 'annotations/instances_val2017.json', 96 | img_prefix=data_root + 'val2017/', 97 | img_scale=(512, 512), 98 | img_norm_cfg=img_norm_cfg, 99 | size_divisor=None, 100 | flip_ratio=0, 101 | with_mask=False, 102 | with_label=False, 103 | test_mode=True, 104 | resize_keep_ratio=False)) 105 | # optimizer 106 | optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) 107 | optimizer_config = dict() 108 | # learning policy 109 | lr_config = dict( 110 | policy='step', 111 | warmup='linear', 112 | warmup_iters=500, 113 | warmup_ratio=1.0 / 3, 114 | step=[16, 22]) 115 | checkpoint_config = dict(interval=1) 116 | # yapf:disable 117 | log_config = dict( 118 | interval=50, 119 | hooks=[ 120 | dict(type='TextLoggerHook'), 121 | # dict(type='TensorboardLoggerHook') 122 | ]) 123 | # yapf:enable 124 | # runtime settings 125 | total_epochs = 24 126 | dist_params = dict(backend='nccl') 127 | log_level = 'INFO' 128 | work_dir = './work_dirs/ssd512_coco' 129 | load_from = None 130 | resume_from = None 131 | workflow = [('train', 1)] 132 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/soft_nms_cpu.pyx: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------- 2 | # Soft-NMS: Improving Object Detection With One Line of Code 3 | # Copyright (c) University of Maryland, College Park 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Navaneeth Bodla and Bharat Singh 6 | # Modified by Kai Chen 7 | # ---------------------------------------------------------- 8 | 9 | # cython: language_level=3, boundscheck=False 10 | 11 | import numpy as np 12 | cimport numpy as np 13 | 14 | 15 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 16 | return a if a >= b else b 17 | 18 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 19 | return a if a <= b else b 20 | 21 | 22 | def soft_nms_cpu( 23 | np.ndarray[float, ndim=2] boxes_in, 24 | float iou_thr, 25 | unsigned int method=1, 26 | float sigma=0.5, 27 | float min_score=0.001, 28 | ): 29 | boxes = boxes_in.copy() 30 | cdef unsigned int N = boxes.shape[0] 31 | cdef float iw, ih, box_area 32 | cdef float ua 33 | cdef int pos = 0 34 | cdef float maxscore = 0 35 | cdef int maxpos = 0 36 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov 37 | inds = np.arange(N) 38 | 39 | for i in range(N): 40 | maxscore = boxes[i, 4] 41 | maxpos = i 42 | 43 | tx1 = boxes[i, 0] 44 | ty1 = boxes[i, 1] 45 | tx2 = boxes[i, 2] 46 | ty2 = boxes[i, 3] 47 | ts = boxes[i, 4] 48 | ti = inds[i] 49 | 50 | pos = i + 1 51 | # get max box 52 | while pos < N: 53 | if maxscore < boxes[pos, 4]: 54 | maxscore = boxes[pos, 4] 55 | maxpos = pos 56 | pos = pos + 1 57 | 58 | # add max box as a detection 59 | boxes[i, 0] = boxes[maxpos, 0] 60 | boxes[i, 1] = boxes[maxpos, 1] 61 | boxes[i, 2] = boxes[maxpos, 2] 62 | boxes[i, 3] = boxes[maxpos, 3] 63 | boxes[i, 4] = boxes[maxpos, 4] 64 | inds[i] = inds[maxpos] 65 | 66 | # swap ith box with position of max box 67 | boxes[maxpos, 0] = tx1 68 | boxes[maxpos, 1] = ty1 69 | boxes[maxpos, 2] = tx2 70 | boxes[maxpos, 3] = ty2 71 | boxes[maxpos, 4] = ts 72 | inds[maxpos] = ti 73 | 74 | tx1 = boxes[i, 0] 75 | ty1 = boxes[i, 1] 76 | tx2 = boxes[i, 2] 77 | ty2 = boxes[i, 3] 78 | ts = boxes[i, 4] 79 | 80 | pos = i + 1 81 | # NMS iterations, note that N changes if detection boxes fall below 82 | # threshold 83 | while pos < N: 84 | x1 = boxes[pos, 0] 85 | y1 = boxes[pos, 1] 86 | x2 = boxes[pos, 2] 87 | y2 = boxes[pos, 3] 88 | s = boxes[pos, 4] 89 | 90 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 91 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 92 | if iw > 0: 93 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 94 | if ih > 0: 95 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 96 | ov = iw * ih / ua # iou between max box and detection box 97 | 98 | if method == 1: # linear 99 | if ov > iou_thr: 100 | weight = 1 - ov 101 | else: 102 | weight = 1 103 | elif method == 2: # gaussian 104 | weight = np.exp(-(ov * ov) / sigma) 105 | else: # original NMS 106 | if ov > iou_thr: 107 | weight = 0 108 | else: 109 | weight = 1 110 | 111 | boxes[pos, 4] = weight * boxes[pos, 4] 112 | 113 | # if box score falls below threshold, discard the box by 114 | # swapping with last box update N 115 | if boxes[pos, 4] < min_score: 116 | boxes[pos, 0] = boxes[N-1, 0] 117 | boxes[pos, 1] = boxes[N-1, 1] 118 | boxes[pos, 2] = boxes[N-1, 2] 119 | boxes[pos, 3] = boxes[N-1, 3] 120 | boxes[pos, 4] = boxes[N-1, 4] 121 | inds[pos] = inds[N - 1] 122 | N = N - 1 123 | pos = pos - 1 124 | 125 | pos = pos + 1 126 | 127 | return boxes[:N], inds[:N] 128 | --------------------------------------------------------------------------------